2 * Copyright (c) 2015, 2016 VMware, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "Conntrack.h"
18 #include "IpFragment.h"
20 #include "PacketParser.h"
22 #include "Conntrack-nat.h"
25 #pragma warning(disable:4311)
27 #define WINDOWS_TICK 10000000
28 #define SEC_TO_UNIX_EPOCH 11644473600LL
29 #define SEC_TO_NANOSEC 1000000000LL
31 KSTART_ROUTINE OvsConntrackEntryCleaner
;
32 static PLIST_ENTRY ovsConntrackTable
;
33 static OVS_CT_THREAD_CTX ctThreadCtx
;
34 static PNDIS_RW_LOCK_EX ovsConntrackLockObj
;
35 extern POVS_SWITCH_CONTEXT gOvsSwitchContext
;
36 static UINT64 ctTotalEntries
;
38 static __inline NDIS_STATUS
OvsCtFlush(UINT16 zone
);
41 *----------------------------------------------------------------------------
43 * Initialize the components used by Connection Tracking
44 *----------------------------------------------------------------------------
47 OvsInitConntrack(POVS_SWITCH_CONTEXT context
)
50 HANDLE threadHandle
= NULL
;
53 /* Init the sync-lock */
54 ovsConntrackLockObj
= NdisAllocateRWLock(context
->NdisFilterHandle
);
55 if (ovsConntrackLockObj
== NULL
) {
56 return STATUS_INSUFFICIENT_RESOURCES
;
59 /* Init the Hash Buffer */
60 ovsConntrackTable
= OvsAllocateMemoryWithTag(sizeof(LIST_ENTRY
)
63 if (ovsConntrackTable
== NULL
) {
64 NdisFreeRWLock(ovsConntrackLockObj
);
65 ovsConntrackLockObj
= NULL
;
66 return STATUS_INSUFFICIENT_RESOURCES
;
69 for (int i
= 0; i
< CT_HASH_TABLE_SIZE
; i
++) {
70 InitializeListHead(&ovsConntrackTable
[i
]);
73 /* Init CT Cleaner Thread */
74 KeInitializeEvent(&ctThreadCtx
.event
, NotificationEvent
, FALSE
);
75 status
= PsCreateSystemThread(&threadHandle
, SYNCHRONIZE
, NULL
, NULL
,
76 NULL
, OvsConntrackEntryCleaner
,
79 if (status
!= STATUS_SUCCESS
) {
80 NdisFreeRWLock(ovsConntrackLockObj
);
81 ovsConntrackLockObj
= NULL
;
83 OvsFreeMemoryWithTag(ovsConntrackTable
, OVS_CT_POOL_TAG
);
84 ovsConntrackTable
= NULL
;
89 ObReferenceObjectByHandle(threadHandle
, SYNCHRONIZE
, NULL
, KernelMode
,
90 &ctThreadCtx
.threadObject
, NULL
);
91 ZwClose(threadHandle
);
94 status
= OvsNatInit();
96 if (status
!= STATUS_SUCCESS
) {
97 OvsCleanupConntrack();
100 return STATUS_SUCCESS
;
104 *----------------------------------------------------------------------------
105 * OvsCleanupConntrack
106 * Cleanup memory and thread that were spawned for Connection tracking
107 *----------------------------------------------------------------------------
110 OvsCleanupConntrack(VOID
)
112 LOCK_STATE_EX lockState
;
113 NdisAcquireRWLockWrite(ovsConntrackLockObj
, &lockState
, 0);
114 ctThreadCtx
.exit
= 1;
115 KeSetEvent(&ctThreadCtx
.event
, 0, FALSE
);
116 NdisReleaseRWLock(ovsConntrackLockObj
, &lockState
);
118 KeWaitForSingleObject(ctThreadCtx
.threadObject
, Executive
,
119 KernelMode
, FALSE
, NULL
);
120 ObDereferenceObject(ctThreadCtx
.threadObject
);
122 /* Force flush all entries before removing */
125 if (ovsConntrackTable
) {
126 OvsFreeMemoryWithTag(ovsConntrackTable
, OVS_CT_POOL_TAG
);
127 ovsConntrackTable
= NULL
;
130 NdisFreeRWLock(ovsConntrackLockObj
);
131 ovsConntrackLockObj
= NULL
;
136 OvsCtKeyReverse(OVS_CT_KEY
*key
)
138 struct ct_endpoint tmp
;
145 OvsCtUpdateFlowKey(struct OvsFlowKey
*key
,
149 struct ovs_key_ct_labels
*labels
)
151 key
->ct
.state
= state
| OVS_CS_F_TRACKED
;
155 NdisMoveMemory(&key
->ct
.labels
, labels
,
156 sizeof(struct ovs_key_ct_labels
));
158 memset(&key
->ct
.labels
, 0,
159 sizeof(struct ovs_key_ct_labels
));
164 OvsPostCtEventEntry(POVS_CT_ENTRY entry
, UINT8 type
)
166 OVS_CT_EVENT_ENTRY ctEventEntry
= {0};
167 NdisMoveMemory(&ctEventEntry
.entry
, entry
, sizeof(OVS_CT_ENTRY
));
168 ctEventEntry
.type
= type
;
169 OvsPostCtEvent(&ctEventEntry
);
173 OvsCtIncrementCounters(POVS_CT_ENTRY entry
, BOOLEAN reply
, PNET_BUFFER_LIST nbl
)
176 entry
->rev_key
.byteCount
+= OvsPacketLenNBL(nbl
);
177 entry
->rev_key
.packetCount
++;
179 entry
->key
.byteCount
+= OvsPacketLenNBL(nbl
);
180 entry
->key
.packetCount
++;
184 static __inline BOOLEAN
185 OvsCtAddEntry(POVS_CT_ENTRY entry
, OvsConntrackKeyLookupCtx
*ctx
,
186 PNAT_ACTION_INFO natInfo
, UINT64 now
)
188 NdisMoveMemory(&entry
->key
, &ctx
->key
, sizeof(OVS_CT_KEY
));
189 NdisMoveMemory(&entry
->rev_key
, &ctx
->key
, sizeof(OVS_CT_KEY
));
190 OvsCtKeyReverse(&entry
->rev_key
);
192 /* NatInfo is always initialized to be disabled, so that if NAT action
193 * fails, we will not end up deleting an non-existent NAT entry.
195 if (natInfo
== NULL
) {
196 entry
->natInfo
.natAction
= NAT_ACTION_NONE
;
198 if (OvsIsForwardNat(natInfo
->natAction
)) {
199 entry
->natInfo
= *natInfo
;
200 if (!OvsNatTranslateCtEntry(entry
)) {
203 ctx
->hash
= OvsHashCtKey(&entry
->key
);
205 entry
->natInfo
.natAction
= natInfo
->natAction
;
209 entry
->timestampStart
= now
;
210 InsertHeadList(&ovsConntrackTable
[ctx
->hash
& CT_HASH_TABLE_MASK
],
217 static __inline POVS_CT_ENTRY
218 OvsCtEntryCreate(OvsForwardingContext
*fwdCtx
,
221 OvsConntrackKeyLookupCtx
*ctx
,
223 PNAT_ACTION_INFO natInfo
,
226 BOOLEAN
*entryCreated
)
228 POVS_CT_ENTRY entry
= NULL
;
230 POVS_CT_ENTRY parentEntry
;
231 PNET_BUFFER_LIST curNbl
= fwdCtx
->curNbl
;
233 *entryCreated
= FALSE
;
234 state
|= OVS_CS_F_NEW
;
236 parentEntry
= OvsCtRelatedLookup(ctx
->key
, currentTime
);
237 if (parentEntry
!= NULL
) {
238 state
|= OVS_CS_F_RELATED
;
246 tcp
= OvsGetTcp(curNbl
, l4Offset
, &tcpStorage
);
247 if (!OvsConntrackValidateTcpPacket(tcp
)) {
248 state
= OVS_CS_F_INVALID
;
253 entry
= OvsConntrackCreateTcpEntry(tcp
, curNbl
, currentTime
);
261 icmp
= OvsGetIcmp(curNbl
, l4Offset
, &storage
);
262 if (!OvsConntrackValidateIcmpPacket(icmp
)) {
263 state
= OVS_CS_F_INVALID
;
268 entry
= OvsConntrackCreateIcmpEntry(currentTime
);
275 entry
= OvsConntrackCreateOtherEntry(currentTime
);
280 state
= OVS_CS_F_INVALID
;
284 if (state
!= OVS_CS_F_INVALID
&& commit
) {
286 entry
->parent
= parentEntry
;
287 if (OvsCtAddEntry(entry
, ctx
, natInfo
, currentTime
)) {
288 *entryCreated
= TRUE
;
290 /* Unable to add entry to the list */
291 OvsFreeMemoryWithTag(entry
, OVS_CT_POOL_TAG
);
292 state
= OVS_CS_F_INVALID
;
296 /* OvsAllocateMemoryWithTag returned NULL; treat as invalid */
297 state
= OVS_CS_F_INVALID
;
301 OvsCtUpdateFlowKey(key
, state
, ctx
->key
.zone
, 0, NULL
);
303 OvsCtIncrementCounters(entry
, ctx
->reply
, curNbl
);
308 static enum CT_UPDATE_RES
309 OvsCtUpdateEntry(OVS_CT_ENTRY
* entry
,
310 PNET_BUFFER_LIST nbl
,
321 tcp
= OvsGetTcp(nbl
, l4Offset
, &tcpStorage
);
323 return CT_UPDATE_INVALID
;
325 return OvsConntrackUpdateTcpEntry(entry
, tcp
, nbl
, reply
, now
);
328 return OvsConntrackUpdateIcmpEntry(entry
, reply
, now
);
330 return OvsConntrackUpdateOtherEntry(entry
, reply
, now
);
332 return CT_UPDATE_INVALID
;
337 OvsCtEntryDelete(POVS_CT_ENTRY entry
)
342 if (entry
->natInfo
.natAction
) {
343 OvsNatDeleteKey(&entry
->key
);
345 OvsPostCtEventEntry(entry
, OVS_EVENT_CT_DELETE
);
346 RemoveEntryList(&entry
->link
);
347 OvsFreeMemoryWithTag(entry
, OVS_CT_POOL_TAG
);
351 static __inline BOOLEAN
352 OvsCtEntryExpired(POVS_CT_ENTRY entry
)
355 NdisGetCurrentSystemTime((LARGE_INTEGER
*)¤tTime
);
356 return entry
->expiration
< currentTime
;
359 static __inline NDIS_STATUS
360 OvsDetectCtPacket(OvsForwardingContext
*fwdCtx
,
362 PNET_BUFFER_LIST
*newNbl
)
364 /* Currently we support only Unfragmented TCP packets */
365 switch (ntohs(key
->l2
.dlType
)) {
367 if (key
->ipKey
.nwFrag
!= OVS_FRAG_TYPE_NONE
) {
368 return OvsProcessIpv4Fragment(fwdCtx
->switchContext
,
370 fwdCtx
->completionList
,
371 fwdCtx
->fwdDetail
->SourcePortId
,
372 key
->tunKey
.tunnelId
,
375 if (key
->ipKey
.nwProto
== IPPROTO_TCP
376 || key
->ipKey
.nwProto
== IPPROTO_UDP
377 || key
->ipKey
.nwProto
== IPPROTO_ICMP
) {
378 return NDIS_STATUS_SUCCESS
;
380 return NDIS_STATUS_NOT_SUPPORTED
;
382 return NDIS_STATUS_NOT_SUPPORTED
;
385 return NDIS_STATUS_NOT_SUPPORTED
;
389 OvsCtKeyAreSame(OVS_CT_KEY ctxKey
, OVS_CT_KEY entryKey
)
391 return ((NdisEqualMemory(&ctxKey
.src
, &entryKey
.src
,
392 sizeof(struct ct_endpoint
))) &&
393 (NdisEqualMemory(&ctxKey
.dst
, &entryKey
.dst
,
394 sizeof(struct ct_endpoint
))) &&
395 (ctxKey
.dl_type
== entryKey
.dl_type
) &&
396 (ctxKey
.nw_proto
== entryKey
.nw_proto
) &&
397 (ctxKey
.zone
== entryKey
.zone
));
401 OvsCtLookup(OvsConntrackKeyLookupCtx
*ctx
)
405 BOOLEAN reply
= FALSE
;
406 POVS_CT_ENTRY found
= NULL
;
408 /* Reverse NAT must be performed before OvsCtLookup, so here
409 * we simply need to flip the src and dst in key and compare
410 * they are equal. Note that flipped key is not equal to
411 * rev_key due to NAT effect.
413 OVS_CT_KEY revCtxKey
= ctx
->key
;
414 OvsCtKeyReverse(&revCtxKey
);
416 if (!ctTotalEntries
) {
420 LIST_FORALL(&ovsConntrackTable
[ctx
->hash
& CT_HASH_TABLE_MASK
], link
) {
421 entry
= CONTAINING_RECORD(link
, OVS_CT_ENTRY
, link
);
423 if (OvsCtKeyAreSame(ctx
->key
, entry
->key
)) {
429 if (OvsCtKeyAreSame(revCtxKey
, entry
->key
)) {
437 if (OvsCtEntryExpired(found
)) {
449 OvsHashCtKey(const OVS_CT_KEY
*key
)
451 UINT32 hsrc
, hdst
, hash
;
452 hsrc
= OvsJhashBytes((UINT32
*) &key
->src
, sizeof(key
->src
), 0);
453 hdst
= OvsJhashBytes((UINT32
*) &key
->dst
, sizeof(key
->dst
), 0);
454 hash
= hsrc
^ hdst
; /* TO identify reverse traffic */
455 hash
= OvsJhashBytes((uint32_t *) &key
->dst
+ 1,
456 ((uint32_t *) (key
+ 1) -
457 (uint32_t *) (&key
->dst
+ 1)),
463 OvsReverseIcmpType(UINT8 type
)
466 case ICMP4_ECHO_REQUEST
:
467 return ICMP4_ECHO_REPLY
;
468 case ICMP4_ECHO_REPLY
:
469 return ICMP4_ECHO_REQUEST
;
470 case ICMP4_TIMESTAMP_REQUEST
:
471 return ICMP4_TIMESTAMP_REPLY
;
472 case ICMP4_TIMESTAMP_REPLY
:
473 return ICMP4_TIMESTAMP_REQUEST
;
474 case ICMP4_INFO_REQUEST
:
475 return ICMP4_INFO_REPLY
;
476 case ICMP4_INFO_REPLY
:
477 return ICMP4_INFO_REQUEST
;
483 static __inline NDIS_STATUS
484 OvsCtSetupLookupCtx(OvsFlowKey
*flowKey
,
486 OvsConntrackKeyLookupCtx
*ctx
,
487 PNET_BUFFER_LIST curNbl
,
490 const OVS_NAT_ENTRY
*natEntry
;
491 ctx
->key
.zone
= zone
;
492 ctx
->key
.dl_type
= flowKey
->l2
.dlType
;
493 ctx
->related
= FALSE
;
495 /* Extract L3 and L4*/
496 if (flowKey
->l2
.dlType
== htons(ETH_TYPE_IPV4
)) {
497 ctx
->key
.src
.addr
.ipv4
= flowKey
->ipKey
.nwSrc
;
498 ctx
->key
.dst
.addr
.ipv4
= flowKey
->ipKey
.nwDst
;
499 ctx
->key
.nw_proto
= flowKey
->ipKey
.nwProto
;
501 ctx
->key
.src
.port
= flowKey
->ipKey
.l4
.tpSrc
;
502 ctx
->key
.dst
.port
= flowKey
->ipKey
.l4
.tpDst
;
503 if (flowKey
->ipKey
.nwProto
== IPPROTO_ICMP
) {
506 icmp
= OvsGetIcmp(curNbl
, l4Offset
, &icmpStorage
);
509 /* Related bit is set when ICMP has an error */
510 /* XXX parse out the appropriate src and dst from inner pkt */
511 switch (icmp
->type
) {
512 case ICMP4_ECHO_REQUEST
:
513 case ICMP4_ECHO_REPLY
:
514 case ICMP4_TIMESTAMP_REQUEST
:
515 case ICMP4_TIMESTAMP_REPLY
:
516 case ICMP4_INFO_REQUEST
:
517 case ICMP4_INFO_REPLY
:
518 if (icmp
->code
!= 0) {
519 return NDIS_STATUS_INVALID_PACKET
;
521 /* Separate ICMP connection: identified using id */
522 ctx
->key
.dst
.icmp_id
= icmp
->fields
.echo
.id
;
523 ctx
->key
.src
.icmp_id
= icmp
->fields
.echo
.id
;
524 ctx
->key
.src
.icmp_type
= icmp
->type
;
525 ctx
->key
.dst
.icmp_type
= OvsReverseIcmpType(icmp
->type
);
527 case ICMP4_DEST_UNREACH
:
528 case ICMP4_TIME_EXCEEDED
:
529 case ICMP4_PARAM_PROB
:
530 case ICMP4_SOURCE_QUENCH
:
531 case ICMP4_REDIRECT
: {
532 /* XXX Handle inner packet */
537 ctx
->related
= FALSE
;
540 } else if (flowKey
->l2
.dlType
== htons(ETH_TYPE_IPV6
)) {
541 ctx
->key
.src
.addr
.ipv6
= flowKey
->ipv6Key
.ipv6Src
;
542 ctx
->key
.dst
.addr
.ipv6
= flowKey
->ipv6Key
.ipv6Dst
;
543 ctx
->key
.nw_proto
= flowKey
->ipv6Key
.nwProto
;
545 ctx
->key
.src
.port
= flowKey
->ipv6Key
.l4
.tpSrc
;
546 ctx
->key
.dst
.port
= flowKey
->ipv6Key
.l4
.tpDst
;
547 /* XXX Handle ICMPv6 errors*/
549 return NDIS_STATUS_INVALID_PACKET
;
552 natEntry
= OvsNatLookup(&ctx
->key
, TRUE
);
554 /* Translate address first for reverse NAT */
555 ctx
->key
= natEntry
->ctEntry
->key
;
556 OvsCtKeyReverse(&ctx
->key
);
559 ctx
->hash
= OvsHashCtKey(&ctx
->key
);
560 return NDIS_STATUS_SUCCESS
;
563 static __inline BOOLEAN
564 OvsDetectFtpPacket(OvsFlowKey
*key
) {
565 return (key
->ipKey
.nwProto
== IPPROTO_TCP
&&
566 (ntohs(key
->ipKey
.l4
.tpDst
) == IPPORT_FTP
||
567 ntohs(key
->ipKey
.l4
.tpSrc
) == IPPORT_FTP
));
571 *----------------------------------------------------------------------------
572 * OvsProcessConntrackEntry
573 * Check the TCP flags and set the ct_state of the entry
574 *----------------------------------------------------------------------------
576 static __inline POVS_CT_ENTRY
577 OvsProcessConntrackEntry(OvsForwardingContext
*fwdCtx
,
579 OvsConntrackKeyLookupCtx
*ctx
,
582 NAT_ACTION_INFO
*natInfo
,
585 BOOLEAN
*entryCreated
)
587 POVS_CT_ENTRY entry
= ctx
->entry
;
589 PNET_BUFFER_LIST curNbl
= fwdCtx
->curNbl
;
590 *entryCreated
= FALSE
;
592 /* If an entry was found, update the state based on TCP flags */
594 state
|= OVS_CS_F_RELATED
;
596 state
|= OVS_CS_F_REPLY_DIR
;
599 CT_UPDATE_RES result
;
600 result
= OvsCtUpdateEntry(entry
, curNbl
, key
->ipKey
.nwProto
,
601 l4Offset
, ctx
->reply
, currentTime
);
603 case CT_UPDATE_VALID
:
604 state
|= OVS_CS_F_ESTABLISHED
;
606 state
|= OVS_CS_F_REPLY_DIR
;
609 case CT_UPDATE_INVALID
:
610 state
|= OVS_CS_F_INVALID
;
613 //Delete and update the Conntrack
614 OvsCtEntryDelete(ctx
->entry
);
616 entry
= OvsCtEntryCreate(fwdCtx
, key
->ipKey
.nwProto
, l4Offset
,
617 ctx
, key
, natInfo
, commit
, currentTime
,
626 if (key
->ipKey
.nwProto
== IPPROTO_TCP
&& entry
) {
627 /* Update the related bit if there is a parent */
629 state
|= OVS_CS_F_RELATED
;
631 POVS_CT_ENTRY parentEntry
;
632 parentEntry
= OvsCtRelatedLookup(ctx
->key
, currentTime
);
633 entry
->parent
= parentEntry
;
634 if (parentEntry
!= NULL
) {
635 state
|= OVS_CS_F_RELATED
;
640 /* Copy mark and label from entry into flowKey. If actions specify
641 different mark and label, update the flowKey. */
643 OvsCtUpdateFlowKey(key
, state
, zone
, entry
->mark
, &entry
->labels
);
645 OvsCtUpdateFlowKey(key
, state
, zone
, 0, NULL
);
651 OvsConntrackSetMark(OvsFlowKey
*key
,
655 BOOLEAN
*markChanged
)
658 newMark
= value
| (entry
->mark
& ~(mask
));
659 if (entry
->mark
!= newMark
) {
660 entry
->mark
= newMark
;
661 key
->ct
.mark
= newMark
;
667 OvsConntrackSetLabels(OvsFlowKey
*key
,
669 struct ovs_key_ct_labels
*val
,
670 struct ovs_key_ct_labels
*mask
,
671 BOOLEAN
*labelChanged
)
673 ovs_u128 v
, m
, pktMdLabel
= {0};
674 memcpy(&v
, val
, sizeof v
);
675 memcpy(&m
, mask
, sizeof m
);
677 pktMdLabel
.u64
.lo
= v
.u64
.lo
| (pktMdLabel
.u64
.lo
& ~(m
.u64
.lo
));
678 pktMdLabel
.u64
.hi
= v
.u64
.hi
| (pktMdLabel
.u64
.hi
& ~(m
.u64
.hi
));
680 if (!NdisEqualMemory(&entry
->labels
, &pktMdLabel
,
681 sizeof(struct ovs_key_ct_labels
))) {
682 *labelChanged
= TRUE
;
684 NdisMoveMemory(&entry
->labels
, &pktMdLabel
,
685 sizeof(struct ovs_key_ct_labels
));
686 NdisMoveMemory(&key
->ct
.labels
, &pktMdLabel
,
687 sizeof(struct ovs_key_ct_labels
));
690 static __inline NDIS_STATUS
691 OvsCtExecute_(OvsForwardingContext
*fwdCtx
,
693 OVS_PACKET_HDR_INFO
*layers
,
700 PNAT_ACTION_INFO natInfo
,
701 BOOLEAN postUpdateEvent
)
703 NDIS_STATUS status
= NDIS_STATUS_SUCCESS
;
704 BOOLEAN triggerUpdateEvent
= FALSE
;
705 POVS_CT_ENTRY entry
= NULL
;
706 PNET_BUFFER_LIST curNbl
= fwdCtx
->curNbl
;
707 OvsConntrackKeyLookupCtx ctx
= { 0 };
708 LOCK_STATE_EX lockState
;
710 NdisGetCurrentSystemTime((LARGE_INTEGER
*) ¤tTime
);
713 /* Retrieve the Conntrack Key related fields from packet */
714 OvsCtSetupLookupCtx(key
, zone
, &ctx
, curNbl
, layers
->l4Offset
);
716 NdisAcquireRWLockWrite(ovsConntrackLockObj
, &lockState
, 0);
718 /* Lookup Conntrack entries for a matching entry */
719 entry
= OvsCtLookup(&ctx
);
720 BOOLEAN entryCreated
= FALSE
;
722 /* Delete entry in reverse direction if 'force' is specified */
723 if (entry
&& force
&& ctx
.reply
) {
724 OvsCtEntryDelete(entry
);
728 if (!entry
&& commit
&& ctTotalEntries
>= CT_MAX_ENTRIES
) {
729 /* Don't proceed with processing if the max limit has been hit.
730 * This blocks only new entries from being created and doesn't
731 * affect existing connections.
733 NdisReleaseRWLock(ovsConntrackLockObj
, &lockState
);
734 OVS_LOG_ERROR("Conntrack Limit hit: %lu", ctTotalEntries
);
735 return NDIS_STATUS_RESOURCES
;
738 /* Increment the counters soon after the lookup, since we set ct.state
739 * to OVS_CS_F_TRACKED after processing the ct entry.
741 if (entry
&& (!(key
->ct
.state
& OVS_CS_F_TRACKED
))) {
742 OvsCtIncrementCounters(entry
, ctx
.reply
, curNbl
);
746 /* If no matching entry was found, create one and add New state */
747 entry
= OvsCtEntryCreate(fwdCtx
, key
->ipKey
.nwProto
,
748 layers
->l4Offset
, &ctx
,
749 key
, natInfo
, commit
, currentTime
,
752 /* Process the entry and update CT flags */
753 entry
= OvsProcessConntrackEntry(fwdCtx
, layers
->l4Offset
, &ctx
, key
,
754 zone
, natInfo
, commit
, currentTime
,
759 * Note that natInfo is not the same as entry->natInfo here. natInfo
760 * is decided by action in the openflow rule, entry->natInfo is decided
761 * when the entry is created. In the reverse NAT case, natInfo is
762 * NAT_ACTION_REVERSE, yet entry->natInfo is NAT_ACTION_SRC or
763 * NAT_ACTION_DST without NAT_ACTION_REVERSE
765 if (entry
&& natInfo
->natAction
!= NAT_ACTION_NONE
)
767 OvsNatPacket(fwdCtx
, entry
, entry
->natInfo
.natAction
,
772 OvsConntrackSetMark(key
, entry
, mark
->value
, mark
->mask
,
773 &triggerUpdateEvent
);
776 if (entry
&& labels
) {
777 OvsConntrackSetLabels(key
, entry
, &labels
->value
, &labels
->mask
,
778 &triggerUpdateEvent
);
781 if (entry
&& OvsDetectFtpPacket(key
)) {
782 /* FTP parser will always be loaded */
783 UNREFERENCED_PARAMETER(helper
);
785 status
= OvsCtHandleFtp(curNbl
, key
, layers
, currentTime
, entry
,
786 (ntohs(key
->ipKey
.l4
.tpDst
) == IPPORT_FTP
));
787 if (status
!= NDIS_STATUS_SUCCESS
) {
788 OVS_LOG_ERROR("Error while parsing the FTP packet");
792 /* Add original tuple information to flow Key */
793 if (entry
&& entry
->key
.dl_type
== ntohs(ETH_TYPE_IPV4
)) {
795 if (entry
->parent
!= NULL
) {
796 POVS_CT_ENTRY parent
= entry
->parent
;
797 ctKey
= &parent
->key
;
802 key
->ct
.tuple_ipv4
.ipv4_src
= ctKey
->src
.addr
.ipv4_aligned
;
803 key
->ct
.tuple_ipv4
.ipv4_dst
= ctKey
->dst
.addr
.ipv4_aligned
;
804 key
->ct
.tuple_ipv4
.ipv4_proto
= ctKey
->nw_proto
;
806 /* Orig tuple Port is overloaded to take in ICMP-Type & Code */
807 /* This mimics the behavior in lib/conntrack.c*/
808 key
->ct
.tuple_ipv4
.src_port
= ctKey
->nw_proto
!= IPPROTO_ICMP
?
810 htons(ctKey
->src
.icmp_type
);
811 key
->ct
.tuple_ipv4
.dst_port
= ctKey
->nw_proto
!= IPPROTO_ICMP
?
813 htons(ctKey
->src
.icmp_code
);
816 if (entryCreated
&& entry
) {
817 OvsPostCtEventEntry(entry
, OVS_EVENT_CT_NEW
);
819 if (postUpdateEvent
&& entry
&& !entryCreated
&& triggerUpdateEvent
) {
820 OvsPostCtEventEntry(entry
, OVS_EVENT_CT_UPDATE
);
823 NdisReleaseRWLock(ovsConntrackLockObj
, &lockState
);
829 *---------------------------------------------------------------------------
830 * OvsExecuteConntrackAction
831 * Executes Conntrack actions XXX - Add more
832 * For the Ipv4 fragments, consume the orginal fragment NBL
833 *---------------------------------------------------------------------------
836 OvsExecuteConntrackAction(OvsForwardingContext
*fwdCtx
,
841 BOOLEAN commit
= FALSE
;
842 BOOLEAN force
= FALSE
;
843 BOOLEAN postUpdateEvent
= FALSE
;
845 UINT32 eventmask
= 0;
846 MD_MARK
*mark
= NULL
;
847 MD_LABELS
*labels
= NULL
;
849 NAT_ACTION_INFO natActionInfo
;
850 OVS_PACKET_HDR_INFO
*layers
= &fwdCtx
->layers
;
851 PNET_BUFFER_LIST newNbl
= NULL
;
854 memset(&natActionInfo
, 0, sizeof natActionInfo
);
855 status
= OvsDetectCtPacket(fwdCtx
, key
, &newNbl
);
856 if (status
!= NDIS_STATUS_SUCCESS
) {
860 /* XXX Convert this to NL_ATTR_FOR_EACH */
861 ctAttr
= NlAttrFindNested(a
, OVS_CT_ATTR_ZONE
);
863 zone
= NlAttrGetU16(ctAttr
);
865 ctAttr
= NlAttrFindNested(a
, OVS_CT_ATTR_COMMIT
);
869 ctAttr
= NlAttrFindNested(a
, OVS_CT_ATTR_MARK
);
871 mark
= NlAttrGet(ctAttr
);
873 ctAttr
= NlAttrFindNested(a
, OVS_CT_ATTR_LABELS
);
875 labels
= NlAttrGet(ctAttr
);
877 natActionInfo
.natAction
= NAT_ACTION_NONE
;
878 ctAttr
= NlAttrFindNested(a
, OVS_CT_ATTR_NAT
);
880 /* Pares Nested NAT attributes. */
883 BOOLEAN hasMinIp
= FALSE
;
884 BOOLEAN hasMinPort
= FALSE
;
885 BOOLEAN hasMaxIp
= FALSE
;
886 BOOLEAN hasMaxPort
= FALSE
;
887 NL_NESTED_FOR_EACH_UNSAFE (natAttr
, left
, ctAttr
) {
888 enum ovs_nat_attr subtype
= NlAttrType(natAttr
);
890 case OVS_NAT_ATTR_SRC
:
891 case OVS_NAT_ATTR_DST
:
892 natActionInfo
.natAction
|=
893 ((subtype
== OVS_NAT_ATTR_SRC
)
894 ? NAT_ACTION_SRC
: NAT_ACTION_DST
);
896 case OVS_NAT_ATTR_IP_MIN
:
897 memcpy(&natActionInfo
.minAddr
,
898 NlAttrData(natAttr
), NlAttrGetSize(natAttr
));
901 case OVS_NAT_ATTR_IP_MAX
:
902 memcpy(&natActionInfo
.maxAddr
,
903 NlAttrData(natAttr
), NlAttrGetSize(natAttr
));
906 case OVS_NAT_ATTR_PROTO_MIN
:
907 natActionInfo
.minPort
= NlAttrGetU16(natAttr
);
910 case OVS_NAT_ATTR_PROTO_MAX
:
911 natActionInfo
.maxPort
= NlAttrGetU16(natAttr
);
914 case OVS_NAT_ATTR_PERSISTENT
:
915 case OVS_NAT_ATTR_PROTO_HASH
:
916 case OVS_NAT_ATTR_PROTO_RANDOM
:
920 if (natActionInfo
.natAction
== NAT_ACTION_NONE
) {
921 natActionInfo
.natAction
= NAT_ACTION_REVERSE
;
923 if (hasMinIp
&& !hasMaxIp
) {
924 memcpy(&natActionInfo
.maxAddr
,
925 &natActionInfo
.minAddr
,
926 sizeof(natActionInfo
.maxAddr
));
928 if (hasMinPort
&& !hasMaxPort
) {
929 natActionInfo
.maxPort
= natActionInfo
.minPort
;
931 if (hasMinPort
|| hasMaxPort
) {
932 if (natActionInfo
.natAction
& NAT_ACTION_SRC
) {
933 natActionInfo
.natAction
|= NAT_ACTION_SRC_PORT
;
934 } else if (natActionInfo
.natAction
& NAT_ACTION_DST
) {
935 natActionInfo
.natAction
|= NAT_ACTION_DST_PORT
;
939 ctAttr
= NlAttrFindNested(a
, OVS_CT_ATTR_HELPER
);
941 helper
= NlAttrGetString(ctAttr
);
942 if (helper
== NULL
) {
943 return NDIS_STATUS_INVALID_PARAMETER
;
945 if (strcmp("ftp", helper
) != 0) {
946 /* Only support FTP */
947 return NDIS_STATUS_NOT_SUPPORTED
;
950 ctAttr
= NlAttrFindNested(a
, OVS_CT_ATTR_FORCE_COMMIT
);
953 /* Force implicitly means commit */
956 ctAttr
= NlAttrFindNested(a
, OVS_CT_ATTR_EVENTMASK
);
958 eventmask
= NlAttrGetU32(ctAttr
);
959 /* Only mark and label updates are supported. */
960 if (eventmask
& (1 << IPCT_MARK
| 1 << IPCT_LABEL
))
961 postUpdateEvent
= TRUE
;
963 /* If newNbl is not allocated, use the current Nbl*/
964 status
= OvsCtExecute_(fwdCtx
, key
, layers
,
965 commit
, force
, zone
, mark
, labels
, helper
, &natActionInfo
,
971 *----------------------------------------------------------------------------
972 * OvsConntrackEntryCleaner
973 * Runs periodically and cleans up the connection tracker
974 *----------------------------------------------------------------------------
977 OvsConntrackEntryCleaner(PVOID data
)
980 POVS_CT_THREAD_CTX context
= (POVS_CT_THREAD_CTX
)data
;
981 PLIST_ENTRY link
, next
;
983 LOCK_STATE_EX lockState
;
984 BOOLEAN success
= TRUE
;
987 if (ovsConntrackLockObj
== NULL
) {
988 /* Lock has been freed by 'OvsCleanupConntrack()' */
991 NdisAcquireRWLockWrite(ovsConntrackLockObj
, &lockState
, 0);
993 NdisReleaseRWLock(ovsConntrackLockObj
, &lockState
);
997 /* Set the timeout for the thread and cleanup */
998 INT64 threadSleepTimeout
= -CT_CLEANUP_INTERVAL
;
1000 if (ctTotalEntries
) {
1001 for (int i
= 0; i
< CT_HASH_TABLE_SIZE
; i
++) {
1002 LIST_FORALL_SAFE(&ovsConntrackTable
[i
], link
, next
) {
1003 entry
= CONTAINING_RECORD(link
, OVS_CT_ENTRY
, link
);
1004 if (entry
&& OvsCtEntryExpired(entry
)) {
1005 OvsCtEntryDelete(entry
);
1010 NdisReleaseRWLock(ovsConntrackLockObj
, &lockState
);
1011 KeWaitForSingleObject(&context
->event
, Executive
, KernelMode
,
1012 FALSE
, (LARGE_INTEGER
*)&threadSleepTimeout
);
1015 PsTerminateSystemThread(STATUS_SUCCESS
);
1019 *----------------------------------------------------------------------------
1021 * Flushes out all Conntrack Entries that match the given zone
1022 *----------------------------------------------------------------------------
1024 static __inline NDIS_STATUS
1025 OvsCtFlush(UINT16 zone
)
1027 PLIST_ENTRY link
, next
;
1028 POVS_CT_ENTRY entry
;
1030 LOCK_STATE_EX lockState
;
1031 NdisAcquireRWLockWrite(ovsConntrackLockObj
, &lockState
, 0);
1033 if (ctTotalEntries
) {
1034 for (int i
= 0; i
< CT_HASH_TABLE_SIZE
; i
++) {
1035 LIST_FORALL_SAFE(&ovsConntrackTable
[i
], link
, next
) {
1036 entry
= CONTAINING_RECORD(link
, OVS_CT_ENTRY
, link
);
1037 /* zone is a non-zero value */
1038 if (!zone
|| zone
== entry
->key
.zone
)
1039 OvsCtEntryDelete(entry
);
1045 NdisReleaseRWLock(ovsConntrackLockObj
, &lockState
);
1046 return NDIS_STATUS_SUCCESS
;
1050 OvsCtDeleteCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx
,
1053 POVS_MESSAGE msgIn
= (POVS_MESSAGE
)usrParamsCtx
->inputBuffer
;
1054 POVS_MESSAGE msgOut
= (POVS_MESSAGE
)usrParamsCtx
->outputBuffer
;
1055 PNL_MSG_HDR nlMsgHdr
= &(msgIn
->nlMsg
);
1056 PNL_ATTR ctAttrs
[__CTA_MAX
];
1057 UINT32 attrOffset
= NLMSG_HDRLEN
+ NF_GEN_MSG_HDRLEN
+ OVS_HDRLEN
;
1058 NL_ERROR nlError
= NL_ERROR_SUCCESS
;
1065 static const NL_POLICY ctZonePolicy
[] = {
1066 [CTA_ZONE
] = { .type
= NL_A_BE16
, .optional
= TRUE
},
1069 if ((NlAttrParse(nlMsgHdr
, attrOffset
, NlNfMsgAttrsLen(nlMsgHdr
),
1070 ctZonePolicy
, ARRAY_SIZE(ctZonePolicy
),
1071 ctAttrs
, ARRAY_SIZE(ctAttrs
)))
1073 OVS_LOG_ERROR("Zone attr parsing failed for msg: %p", nlMsgHdr
);
1074 status
= STATUS_INVALID_PARAMETER
;
1078 if (ctAttrs
[CTA_ZONE
]) {
1079 zone
= ntohs(NlAttrGetU16(ctAttrs
[CTA_ZONE
]));
1082 status
= OvsCtFlush(zone
);
1083 if (status
== STATUS_SUCCESS
) {
1084 nlmsgType
= (NFNL_SUBSYS_CTNETLINK
<< 8 | IPCTNL_MSG_CT_DELETE
);
1086 usrParamsCtx
->outputBuffer
,
1087 usrParamsCtx
->outputLength
);
1088 if (!NlFillOvsMsgForNfGenMsg(&nlBuf
, nlmsgType
, NLM_F_CREATE
,
1089 msgIn
->nlMsg
.nlmsgSeq
,
1090 msgIn
->nlMsg
.nlmsgPid
,
1092 msgIn
->nfGenMsg
.version
,
1094 status
= STATUS_INVALID_PARAMETER
;
1096 nlMsg
= (PNL_MSG_HDR
)NlBufAt(&nlBuf
, 0, 0);
1097 nlMsg
->nlmsgLen
= NlBufSize(&nlBuf
);
1098 *replyLen
= msgOut
->nlMsg
.nlmsgLen
;
1102 nlError
= NlMapStatusToNlErr(status
);
1103 if (nlError
!= NL_ERROR_SUCCESS
) {
1104 POVS_MESSAGE_ERROR msgError
= (POVS_MESSAGE_ERROR
)
1105 usrParamsCtx
->outputBuffer
;
1108 NlBuildErrorMsg(msgIn
, msgError
, nlError
, replyLen
);
1109 ASSERT(*replyLen
!= 0);
1110 status
= STATUS_SUCCESS
;
1116 static __inline NDIS_STATUS
1117 MapIpTupleToNl(PNL_BUFFER nlBuf
, OVS_CT_KEY
*key
)
1119 NDIS_STATUS status
= NDIS_STATUS_SUCCESS
;
1122 offset
= NlMsgStartNested(nlBuf
, CTA_TUPLE_IP
);
1124 return NDIS_STATUS_FAILURE
;
1127 if (key
->dl_type
== ntohs(ETH_TYPE_IPV4
)) {
1128 if (!NlMsgPutTailU32(nlBuf
, CTA_IP_V4_SRC
, key
->src
.addr
.ipv4
)) {
1129 status
= NDIS_STATUS_FAILURE
;
1132 if (!NlMsgPutTailU32(nlBuf
, CTA_IP_V4_DST
, key
->dst
.addr
.ipv4
)) {
1133 status
= NDIS_STATUS_FAILURE
;
1136 } else if (key
->dl_type
== ntohs(ETH_TYPE_IPV6
)) {
1137 if (!NlMsgPutTailUnspec(nlBuf
, CTA_IP_V6_SRC
,
1138 (PCHAR
)(&key
->src
.addr
.ipv6
),
1139 sizeof(key
->src
.addr
.ipv6
))) {
1140 status
= NDIS_STATUS_FAILURE
;
1143 if (!NlMsgPutTailUnspec(nlBuf
, CTA_IP_V6_DST
,
1144 (PCHAR
)(&key
->dst
.addr
.ipv6
),
1145 sizeof(key
->dst
.addr
.ipv6
))) {
1146 status
= NDIS_STATUS_FAILURE
;
1152 NlMsgEndNested(nlBuf
, offset
);
1156 static __inline NDIS_STATUS
1157 MapProtoTupleToNl(PNL_BUFFER nlBuf
, OVS_CT_KEY
*key
)
1159 NDIS_STATUS status
= NDIS_STATUS_SUCCESS
;
1162 offset
= NlMsgStartNested(nlBuf
, CTA_TUPLE_PROTO
);
1164 return NDIS_STATUS_FAILURE
;
1167 if (!NlMsgPutTailU8(nlBuf
, CTA_PROTO_NUM
, key
->nw_proto
)) {
1168 status
= NDIS_STATUS_FAILURE
;
1172 if (key
->dl_type
== ntohs(ETH_TYPE_IPV4
)
1173 || key
->dl_type
== ntohs(ETH_TYPE_IPV6
)) {
1174 /* ICMP and ICMPv6 Type, Code and ID are currently not tracked */
1175 if (key
->nw_proto
== IPPROTO_ICMP
) {
1176 if (!NlMsgPutTailU16(nlBuf
, CTA_PROTO_ICMP_ID
,
1177 htons(key
->src
.icmp_id
))) {
1178 status
= NDIS_STATUS_FAILURE
;
1181 if (!NlMsgPutTailU8(nlBuf
, CTA_PROTO_ICMP_TYPE
,
1182 key
->src
.icmp_type
)) {
1183 status
= NDIS_STATUS_FAILURE
;
1186 if (!NlMsgPutTailU8(nlBuf
, CTA_PROTO_ICMP_CODE
,
1187 key
->src
.icmp_code
)) {
1188 status
= NDIS_STATUS_FAILURE
;
1191 } else if (key
->nw_proto
== IPPROTO_ICMPV6
) {
1192 if (!NlMsgPutTailU16(nlBuf
, CTA_PROTO_ICMPV6_ID
, 0)) {
1193 status
= NDIS_STATUS_FAILURE
;
1196 if (!NlMsgPutTailU8(nlBuf
, CTA_PROTO_ICMPV6_TYPE
, 0)) {
1197 status
= NDIS_STATUS_FAILURE
;
1200 if (!NlMsgPutTailU8(nlBuf
, CTA_PROTO_ICMPV6_CODE
, 0)) {
1201 status
= NDIS_STATUS_FAILURE
;
1204 } else if (key
->nw_proto
== IPPROTO_TCP
1205 || key
->nw_proto
== IPPROTO_UDP
) {
1206 if (!NlMsgPutTailU16(nlBuf
, CTA_PROTO_SRC_PORT
,
1208 status
= NDIS_STATUS_FAILURE
;
1211 if (!NlMsgPutTailU16(nlBuf
, CTA_PROTO_DST_PORT
,
1213 status
= NDIS_STATUS_FAILURE
;
1220 NlMsgEndNested(nlBuf
, offset
);
1224 static __inline NDIS_STATUS
1225 MapCtKeyTupleToNl(PNL_BUFFER nlBuf
,
1229 NDIS_STATUS status
= NDIS_STATUS_SUCCESS
;
1232 offset
= NlMsgStartNested(nlBuf
, tupleType
);
1234 return NDIS_STATUS_FAILURE
;
1237 status
= MapIpTupleToNl(nlBuf
, key
);
1238 if (status
!= NDIS_STATUS_SUCCESS
) {
1242 status
= MapProtoTupleToNl(nlBuf
, key
);
1243 if (status
!= NDIS_STATUS_SUCCESS
) {
1248 NlMsgEndNested(nlBuf
, offset
);
1252 static __inline NDIS_STATUS
1253 MapCtCounterToNl(PNL_BUFFER nlBuf
,
1257 NDIS_STATUS status
= NDIS_STATUS_SUCCESS
;
1260 offset
= NlMsgStartNested(nlBuf
, counterType
);
1262 return NDIS_STATUS_FAILURE
;
1265 if (!NlMsgPutTailU64(nlBuf
, CTA_COUNTERS_PACKETS
,
1266 htonll(key
->packetCount
))) {
1267 status
= NDIS_STATUS_FAILURE
;
1271 if (!NlMsgPutTailU64(nlBuf
, CTA_COUNTERS_BYTES
,
1272 htonll(key
->byteCount
))) {
1273 status
= NDIS_STATUS_FAILURE
;
1278 NlMsgEndNested(nlBuf
, offset
);
1282 /* Userspace expects system time to be Unix timestamp in Nano Seconds */
1283 static __inline
unsigned
1284 WindowsTickToUnixSeconds(long long windowsTicks
)
1287 * Windows epoch starts 1601-01-01T00:00:00Z. It's 11644473600 seconds
1288 * before the UNIX/Linux epoch (1970-01-01T00:00:00Z). Windows ticks are
1289 * in 100 nanoseconds
1291 return (unsigned)((windowsTicks
/ WINDOWS_TICK
1292 - SEC_TO_UNIX_EPOCH
));
1296 OvsCreateNlMsgFromCtEntry(POVS_CT_ENTRY entry
,
1310 UINT64 currentTime
, expiration
;
1312 UINT16 nlmsgFlags
= NLM_F_CREATE
;
1313 NdisGetCurrentSystemTime((LARGE_INTEGER
*)¤tTime
);
1314 UINT8 nfgenFamily
= 0;
1315 if (entry
->key
.dl_type
== htons(ETH_TYPE_IPV4
)) {
1316 nfgenFamily
= AF_INET
;
1317 } else if (entry
->key
.dl_type
== htons(ETH_TYPE_IPV6
)) {
1318 nfgenFamily
= AF_INET6
;
1321 NlBufInit(&nlBuf
, outBuffer
, outBufLen
);
1322 /* Mimic netfilter */
1323 if (eventType
== OVS_EVENT_CT_NEW
|| eventType
== OVS_EVENT_CT_UPDATE
) {
1324 nlmsgType
= (UINT16
) (NFNL_SUBSYS_CTNETLINK
<< 8 | IPCTNL_MSG_CT_NEW
);
1325 } else if (eventType
== OVS_EVENT_CT_DELETE
) {
1326 nlmsgType
= (UINT16
) (NFNL_SUBSYS_CTNETLINK
<< 8 | IPCTNL_MSG_CT_DELETE
);
1328 return STATUS_INVALID_PARAMETER
;
1331 if (eventType
== OVS_EVENT_CT_UPDATE
) {
1332 /* In netlink-conntrack.c IPCTNL_MSG_CT_NEW msg type is used to
1333 * differentiate between OVS_EVENT_CT_NEW and OVS_EVENT_CT_UPDATE
1334 * events based on nlmsgFlags, unset it to notify an update event.
1338 ok
= NlFillOvsMsgForNfGenMsg(&nlBuf
, nlmsgType
, nlmsgFlags
,
1339 nlmsgSeq
, nlmsgPid
, nfgenFamily
,
1340 nfGenVersion
, dpIfIndex
);
1342 return STATUS_INVALID_BUFFER_SIZE
;
1345 status
= MapCtKeyTupleToNl(&nlBuf
, CTA_TUPLE_ORIG
, &entry
->key
);
1346 if (status
!= NDIS_STATUS_SUCCESS
) {
1347 return STATUS_UNSUCCESSFUL
;
1350 status
= MapCtKeyTupleToNl(&nlBuf
, CTA_TUPLE_REPLY
, &entry
->rev_key
);
1351 if (status
!= NDIS_STATUS_SUCCESS
) {
1352 return STATUS_UNSUCCESSFUL
;
1355 status
= MapCtCounterToNl(&nlBuf
, CTA_COUNTERS_ORIG
, &entry
->key
);
1356 if (status
!= NDIS_STATUS_SUCCESS
) {
1357 return STATUS_UNSUCCESSFUL
;
1360 status
= MapCtCounterToNl(&nlBuf
, CTA_COUNTERS_REPLY
, &entry
->rev_key
);
1361 if (status
!= NDIS_STATUS_SUCCESS
) {
1362 return STATUS_UNSUCCESSFUL
;
1365 if (entry
->key
.zone
) {
1366 if (!NlMsgPutTailU16(&nlBuf
, CTA_ZONE
, htons(entry
->key
.zone
))) {
1367 return STATUS_INVALID_BUFFER_SIZE
;
1372 if (!NlMsgPutTailU32(&nlBuf
, CTA_MARK
, htonl(entry
->mark
))) {
1373 return STATUS_INVALID_BUFFER_SIZE
;
1377 if (entry
->labels
.ct_labels
) {
1378 ok
= NlMsgPutTailUnspec(&nlBuf
, CTA_LABELS
,
1379 (PCHAR
)(&entry
->labels
),
1380 sizeof(entry
->labels
));
1382 return STATUS_INVALID_BUFFER_SIZE
;
1386 if (entry
->expiration
> currentTime
) {
1387 expiration
= entry
->expiration
- currentTime
;
1388 timeout
= (UINT32
) (expiration
/ CT_INTERVAL_SEC
);
1389 if (!NlMsgPutTailU32(&nlBuf
, CTA_TIMEOUT
, htonl(timeout
))) {
1390 return STATUS_INVALID_BUFFER_SIZE
;
1394 if (entry
->key
.nw_proto
== IPPROTO_TCP
) {
1395 /* Add ProtoInfo for TCP */
1397 offset
= NlMsgStartNested(&nlBuf
, CTA_PROTOINFO
);
1399 return NDIS_STATUS_FAILURE
;
1402 status
= OvsCtMapTcpProtoInfoToNl(&nlBuf
, entry
);
1403 NlMsgEndNested(&nlBuf
, offset
);
1404 if (status
!= NDIS_STATUS_SUCCESS
) {
1405 return STATUS_UNSUCCESSFUL
;
1409 /* CTA_STATUS is required but not implemented. Default to 0 */
1410 if (!NlMsgPutTailU32(&nlBuf
, CTA_STATUS
, 0)) {
1411 return STATUS_INVALID_BUFFER_SIZE
;
1414 /* Mimic netfilter - nf_conntrack_netlink.c:
1416 * int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct) {
1417 * NLA_PUT_BE32(skb, CTA_ID, htonl((unsigned long)ct));
1422 if(!NlMsgPutTailU32(&nlBuf
, CTA_ID
, htonl((UINT32
) entry
))) {
1423 return STATUS_INVALID_BUFFER_SIZE
;
1426 if (entry
->timestampStart
) {
1428 offset
= NlMsgStartNested(&nlBuf
, CTA_TIMESTAMP
);
1430 return NDIS_STATUS_FAILURE
;
1433 start
= WindowsTickToUnixSeconds(entry
->timestampStart
);
1434 start
= start
* SEC_TO_NANOSEC
;
1435 if (!NlMsgPutTailU64(&nlBuf
, CTA_TIMESTAMP_START
, htonll(start
))) {
1436 NlMsgEndNested(&nlBuf
, offset
);
1437 return STATUS_INVALID_BUFFER_SIZE
;
1440 NlMsgEndNested(&nlBuf
, offset
);
1443 nlMsg
= (PNL_MSG_HDR
)NlBufAt(&nlBuf
, 0, 0);
1444 nlMsg
->nlmsgLen
= NlBufSize(&nlBuf
);
1446 return STATUS_SUCCESS
;
1450 *----------------------------------------------------------------------------
1451 * OvsCtDumpCmdHandler --
1452 * Handler for IPCTNL_MSG_CT_GET command.
1454 * XXX - Try to consolidate dump handler patterns around dumpState usage
1455 * The following dumpHandler is similar to one vport.c uses
1456 *----------------------------------------------------------------------------
1459 OvsCtDumpCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx
,
1463 /* Setup Dump Start if it's OVS_WRITE_DEV_OP and return */
1464 if (usrParamsCtx
->devOp
== OVS_WRITE_DEV_OP
) {
1466 OvsSetupDumpStart(usrParamsCtx
);
1467 return STATUS_SUCCESS
;
1470 POVS_OPEN_INSTANCE instance
=
1471 (POVS_OPEN_INSTANCE
)usrParamsCtx
->ovsInstance
;
1474 ASSERT(usrParamsCtx
->devOp
== OVS_READ_DEV_OP
);
1475 if (instance
->dumpState
.ovsMsg
== NULL
) {
1477 return STATUS_INVALID_DEVICE_STATE
;
1480 /* Output buffer has been validated while validating read dev op. */
1481 ASSERT(usrParamsCtx
->outputBuffer
!= NULL
);
1482 msgIn
= instance
->dumpState
.ovsMsg
;
1483 UINT32 inBucket
= instance
->dumpState
.index
[0];
1484 UINT32 inIndex
= instance
->dumpState
.index
[1];
1485 UINT32 i
= CT_HASH_TABLE_SIZE
;
1486 UINT32 outIndex
= 0;
1488 LOCK_STATE_EX lockState
;
1489 NdisAcquireRWLockRead(ovsConntrackLockObj
, &lockState
, 0);
1491 if (ctTotalEntries
) {
1492 for (i
= inBucket
; i
< CT_HASH_TABLE_SIZE
; i
++) {
1493 PLIST_ENTRY head
, link
;
1494 head
= &ovsConntrackTable
[i
];
1495 POVS_CT_ENTRY entry
= NULL
;
1498 LIST_FORALL(head
, link
) {
1500 * if one or more dumps were previously done on this same
1501 * bucket, inIndex will be > 0, so we'll need to reply with
1502 * the inIndex + 1 ct-entry from the bucket.
1504 if (outIndex
>= inIndex
) {
1505 entry
= CONTAINING_RECORD(link
, OVS_CT_ENTRY
, link
);
1507 rc
= OvsCreateNlMsgFromCtEntry(entry
,
1508 usrParamsCtx
->outputBuffer
,
1509 usrParamsCtx
->outputLength
,
1511 msgIn
->nlMsg
.nlmsgSeq
,
1512 msgIn
->nlMsg
.nlmsgPid
,
1513 msgIn
->nfGenMsg
.version
,
1516 if (rc
!= NDIS_STATUS_SUCCESS
) {
1517 NdisReleaseRWLock(ovsConntrackLockObj
, &lockState
);
1518 return STATUS_UNSUCCESSFUL
;
1533 * if no ct-entry was found above, check the next bucket, beginning
1534 * with the first (i.e. index 0) elem from within that bucket
1539 instance
->dumpState
.index
[0] = i
;
1540 instance
->dumpState
.index
[1] = outIndex
;
1541 NdisReleaseRWLock(ovsConntrackLockObj
, &lockState
);
1543 /* if i < CT_HASH_TABLE_SIZE => entry was found */
1544 if (i
< CT_HASH_TABLE_SIZE
) {
1545 POVS_MESSAGE msgOut
= (POVS_MESSAGE
)usrParamsCtx
->outputBuffer
;
1546 *replyLen
= msgOut
->nlMsg
.nlmsgLen
;
1548 /* if i >= CT_HASH_TABLE_SIZE => entry was not found => dump done */
1550 FreeUserDumpState(instance
);
1553 return STATUS_SUCCESS
;
1556 #pragma warning(pop)