]> git.proxmox.com Git - mirror_ovs.git/blame - datapath-windows/ovsext/Conntrack.c
datapath-windows: Add a global level RW lock for NAT
[mirror_ovs.git] / datapath-windows / ovsext / Conntrack.c
CommitLineData
792d377d
SV
1/*
2 * Copyright (c) 2015, 2016 VMware, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
792d377d 17#include "Conntrack.h"
45bde017 18#include "IpFragment.h"
792d377d
SV
19#include "Jhash.h"
20#include "PacketParser.h"
d7e6f8e6 21#include "Event.h"
b7a6b3a7 22#include "Conntrack-nat.h"
792d377d 23
b389a124
AS
24#pragma warning(push)
25#pragma warning(disable:4311)
26
b50d56a7
SV
27#define WINDOWS_TICK 10000000
28#define SEC_TO_UNIX_EPOCH 11644473600LL
29#define SEC_TO_NANOSEC 1000000000LL
30
b7a6b3a7 31KSTART_ROUTINE OvsConntrackEntryCleaner;
792d377d
SV
32static PLIST_ENTRY ovsConntrackTable;
33static OVS_CT_THREAD_CTX ctThreadCtx;
34static PNDIS_RW_LOCK_EX ovsConntrackLockObj;
dea38804 35static PNDIS_RW_LOCK_EX ovsCtNatLockObj;
d33f0efa 36extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
c4d9f931 37static LONG ctTotalEntries;
792d377d 38
43369532
AK
39static __inline OvsCtFlush(UINT16 zone, struct ovs_key_ct_tuple_ipv4 *tuple);
40static __inline NDIS_STATUS
41MapNlToCtTuple(POVS_MESSAGE msgIn, PNL_ATTR attr,
42 struct ovs_key_ct_tuple_ipv4 *ct_tuple);
792d377d
SV
43/*
44 *----------------------------------------------------------------------------
45 * OvsInitConntrack
46 * Initialize the components used by Connection Tracking
47 *----------------------------------------------------------------------------
48 */
49NTSTATUS
50OvsInitConntrack(POVS_SWITCH_CONTEXT context)
51{
52 NTSTATUS status;
53 HANDLE threadHandle = NULL;
d33f0efa 54 ctTotalEntries = 0;
792d377d
SV
55
56 /* Init the sync-lock */
57 ovsConntrackLockObj = NdisAllocateRWLock(context->NdisFilterHandle);
58 if (ovsConntrackLockObj == NULL) {
59 return STATUS_INSUFFICIENT_RESOURCES;
60 }
61
dea38804
AK
62 ovsCtNatLockObj = NdisAllocateRWLock(context->NdisFilterHandle);
63 if (ovsCtNatLockObj == NULL) {
64 NdisFreeRWLock(ovsConntrackLockObj);
65 ovsConntrackLockObj = NULL;
66 return STATUS_INSUFFICIENT_RESOURCES;
67 }
68
792d377d
SV
69 /* Init the Hash Buffer */
70 ovsConntrackTable = OvsAllocateMemoryWithTag(sizeof(LIST_ENTRY)
71 * CT_HASH_TABLE_SIZE,
72 OVS_CT_POOL_TAG);
73 if (ovsConntrackTable == NULL) {
74 NdisFreeRWLock(ovsConntrackLockObj);
75 ovsConntrackLockObj = NULL;
dea38804
AK
76 NdisFreeRWLock(ovsCtNatLockObj);
77 ovsCtNatLockObj = NULL;
792d377d
SV
78 return STATUS_INSUFFICIENT_RESOURCES;
79 }
80
81 for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) {
82 InitializeListHead(&ovsConntrackTable[i]);
83 }
84
85 /* Init CT Cleaner Thread */
86 KeInitializeEvent(&ctThreadCtx.event, NotificationEvent, FALSE);
87 status = PsCreateSystemThread(&threadHandle, SYNCHRONIZE, NULL, NULL,
b7a6b3a7 88 NULL, OvsConntrackEntryCleaner,
792d377d
SV
89 &ctThreadCtx);
90
91 if (status != STATUS_SUCCESS) {
92 NdisFreeRWLock(ovsConntrackLockObj);
93 ovsConntrackLockObj = NULL;
94
dea38804
AK
95 NdisFreeRWLock(ovsCtNatLockObj);
96 ovsCtNatLockObj = NULL;
97
792d377d
SV
98 OvsFreeMemoryWithTag(ovsConntrackTable, OVS_CT_POOL_TAG);
99 ovsConntrackTable = NULL;
100
101 return status;
102 }
103
104 ObReferenceObjectByHandle(threadHandle, SYNCHRONIZE, NULL, KernelMode,
105 &ctThreadCtx.threadObject, NULL);
106 ZwClose(threadHandle);
107 threadHandle = NULL;
b7a6b3a7
YL
108
109 status = OvsNatInit();
110
111 if (status != STATUS_SUCCESS) {
112 OvsCleanupConntrack();
113 return status;
114 }
792d377d
SV
115 return STATUS_SUCCESS;
116}
117
118/*
119 *----------------------------------------------------------------------------
120 * OvsCleanupConntrack
121 * Cleanup memory and thread that were spawned for Connection tracking
122 *----------------------------------------------------------------------------
123 */
124VOID
125OvsCleanupConntrack(VOID)
126{
dea38804 127 LOCK_STATE_EX lockState, lockStateNat;
792d377d
SV
128 NdisAcquireRWLockWrite(ovsConntrackLockObj, &lockState, 0);
129 ctThreadCtx.exit = 1;
130 KeSetEvent(&ctThreadCtx.event, 0, FALSE);
131 NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
132
133 KeWaitForSingleObject(ctThreadCtx.threadObject, Executive,
134 KernelMode, FALSE, NULL);
135 ObDereferenceObject(ctThreadCtx.threadObject);
136
50da3c73 137 /* Force flush all entries before removing */
43369532 138 OvsCtFlush(0, NULL);
50da3c73 139
792d377d
SV
140 if (ovsConntrackTable) {
141 OvsFreeMemoryWithTag(ovsConntrackTable, OVS_CT_POOL_TAG);
142 ovsConntrackTable = NULL;
143 }
144
145 NdisFreeRWLock(ovsConntrackLockObj);
146 ovsConntrackLockObj = NULL;
dea38804 147 NdisAcquireRWLockWrite(ovsCtNatLockObj, &lockStateNat, 0);
b7a6b3a7 148 OvsNatCleanup();
dea38804
AK
149 NdisReleaseRWLock(ovsCtNatLockObj, &lockStateNat);
150 NdisFreeRWLock(ovsCtNatLockObj);
151 ovsCtNatLockObj = NULL;
792d377d
SV
152}
153
154static __inline VOID
155OvsCtKeyReverse(OVS_CT_KEY *key)
156{
157 struct ct_endpoint tmp;
158 tmp = key->src;
159 key->src = key->dst;
160 key->dst = tmp;
161}
162
163static __inline VOID
164OvsCtUpdateFlowKey(struct OvsFlowKey *key,
165 UINT32 state,
166 UINT16 zone,
167 UINT32 mark,
168 struct ovs_key_ct_labels *labels)
169{
170 key->ct.state = state | OVS_CS_F_TRACKED;
171 key->ct.zone = zone;
172 key->ct.mark = mark;
173 if (labels) {
174 NdisMoveMemory(&key->ct.labels, labels,
175 sizeof(struct ovs_key_ct_labels));
176 } else {
177 memset(&key->ct.labels, 0,
178 sizeof(struct ovs_key_ct_labels));
179 }
180}
181
d7e6f8e6
SV
182static __inline VOID
183OvsPostCtEventEntry(POVS_CT_ENTRY entry, UINT8 type)
184{
185 OVS_CT_EVENT_ENTRY ctEventEntry = {0};
186 NdisMoveMemory(&ctEventEntry.entry, entry, sizeof(OVS_CT_ENTRY));
187 ctEventEntry.type = type;
188 OvsPostCtEvent(&ctEventEntry);
189}
190
5ff83483
AK
191static __inline VOID
192OvsCtIncrementCounters(POVS_CT_ENTRY entry, BOOLEAN reply, PNET_BUFFER_LIST nbl)
193{
194 if (reply) {
195 entry->rev_key.byteCount+= OvsPacketLenNBL(nbl);
196 entry->rev_key.packetCount++;
197 } else {
198 entry->key.byteCount += OvsPacketLenNBL(nbl);
199 entry->key.packetCount++;
200 }
201}
202
b7a6b3a7
YL
203static __inline BOOLEAN
204OvsCtAddEntry(POVS_CT_ENTRY entry, OvsConntrackKeyLookupCtx *ctx,
205 PNAT_ACTION_INFO natInfo, UINT64 now)
5b37c6ae 206{
b7a6b3a7
YL
207 NdisMoveMemory(&entry->key, &ctx->key, sizeof(OVS_CT_KEY));
208 NdisMoveMemory(&entry->rev_key, &ctx->key, sizeof(OVS_CT_KEY));
5b37c6ae 209 OvsCtKeyReverse(&entry->rev_key);
b7a6b3a7
YL
210
211 /* NatInfo is always initialized to be disabled, so that if NAT action
212 * fails, we will not end up deleting an non-existent NAT entry.
213 */
214 if (natInfo == NULL) {
215 entry->natInfo.natAction = NAT_ACTION_NONE;
216 } else {
dea38804
AK
217 LOCK_STATE_EX lockStateNat;
218 NdisAcquireRWLockWrite(ovsCtNatLockObj, &lockStateNat, 0);
b7a6b3a7
YL
219 if (OvsIsForwardNat(natInfo->natAction)) {
220 entry->natInfo = *natInfo;
221 if (!OvsNatTranslateCtEntry(entry)) {
dea38804 222 NdisReleaseRWLock(ovsCtNatLockObj, &lockStateNat);
b7a6b3a7
YL
223 return FALSE;
224 }
225 ctx->hash = OvsHashCtKey(&entry->key);
226 } else {
227 entry->natInfo.natAction = natInfo->natAction;
228 }
dea38804 229 NdisReleaseRWLock(ovsCtNatLockObj, &lockStateNat);
b7a6b3a7
YL
230 }
231
b50d56a7 232 entry->timestampStart = now;
5b37c6ae
SV
233 InsertHeadList(&ovsConntrackTable[ctx->hash & CT_HASH_TABLE_MASK],
234 &entry->link);
b841a441 235
c4d9f931 236 InterlockedIncrement((LONG volatile *)&ctTotalEntries);
b7a6b3a7 237 return TRUE;
5b37c6ae
SV
238}
239
792d377d 240static __inline POVS_CT_ENTRY
b7a6b3a7 241OvsCtEntryCreate(OvsForwardingContext *fwdCtx,
5b37c6ae
SV
242 UINT8 ipProto,
243 UINT32 l4Offset,
792d377d
SV
244 OvsConntrackKeyLookupCtx *ctx,
245 OvsFlowKey *key,
b7a6b3a7 246 PNAT_ACTION_INFO natInfo,
792d377d 247 BOOLEAN commit,
b841a441
SV
248 UINT64 currentTime,
249 BOOLEAN *entryCreated)
792d377d
SV
250{
251 POVS_CT_ENTRY entry = NULL;
252 UINT32 state = 0;
b7c5dbee 253 POVS_CT_ENTRY parentEntry;
b7a6b3a7 254 PNET_BUFFER_LIST curNbl = fwdCtx->curNbl;
e68988b8 255
b7c5dbee
SV
256 *entryCreated = FALSE;
257 state |= OVS_CS_F_NEW;
813f5f0a 258
b7c5dbee
SV
259 switch (ipProto) {
260 case IPPROTO_TCP:
261 {
262 TCPHdr tcpStorage;
263 const TCPHdr *tcp;
264 tcp = OvsGetTcp(curNbl, l4Offset, &tcpStorage);
265 if (!OvsConntrackValidateTcpPacket(tcp)) {
266 state = OVS_CS_F_INVALID;
b7a6b3a7 267 break;
5b37c6ae 268 }
6e83dfd9 269
b7c5dbee
SV
270 if (commit) {
271 entry = OvsConntrackCreateTcpEntry(tcp, curNbl, currentTime);
272 }
273 break;
274 }
275 case IPPROTO_ICMP:
276 {
277 ICMPHdr storage;
278 const ICMPHdr *icmp;
279 icmp = OvsGetIcmp(curNbl, l4Offset, &storage);
280 if (!OvsConntrackValidateIcmpPacket(icmp)) {
281 state = OVS_CS_F_INVALID;
b7a6b3a7 282 break;
6e83dfd9 283 }
b7c5dbee
SV
284
285 if (commit) {
286 entry = OvsConntrackCreateIcmpEntry(currentTime);
287 }
288 break;
289 }
290 case IPPROTO_UDP:
291 {
292 if (commit) {
293 entry = OvsConntrackCreateOtherEntry(currentTime);
294 }
295 break;
296 }
297 default:
298 state = OVS_CS_F_INVALID;
299 break;
300 }
301
c4d9f931
AK
302 parentEntry = OvsCtRelatedLookup(ctx->key, currentTime);
303 if (parentEntry != NULL && state != OVS_CS_F_INVALID) {
304 state |= OVS_CS_F_RELATED;
305 }
306
b7c5dbee
SV
307 if (state != OVS_CS_F_INVALID && commit) {
308 if (entry) {
309 entry->parent = parentEntry;
310 if (OvsCtAddEntry(entry, ctx, natInfo, currentTime)) {
b841a441 311 *entryCreated = TRUE;
b7c5dbee
SV
312 } else {
313 /* Unable to add entry to the list */
314 OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG);
315 state = OVS_CS_F_INVALID;
316 entry = NULL;
5b37c6ae 317 }
b7c5dbee
SV
318 } else {
319 /* OvsAllocateMemoryWithTag returned NULL; treat as invalid */
320 state = OVS_CS_F_INVALID;
6e83dfd9 321 }
792d377d
SV
322 }
323
324 OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
5ff83483
AK
325 if (entry) {
326 OvsCtIncrementCounters(entry, ctx->reply, curNbl);
327 }
792d377d
SV
328 return entry;
329}
330
5b37c6ae
SV
331static enum CT_UPDATE_RES
332OvsCtUpdateEntry(OVS_CT_ENTRY* entry,
b7a6b3a7
YL
333 PNET_BUFFER_LIST nbl,
334 UINT8 ipProto,
335 UINT32 l4Offset,
336 BOOLEAN reply,
337 UINT64 now)
5b37c6ae 338{
c4d9f931 339 CT_UPDATE_RES status;
f32b745b
SV
340 switch (ipProto) {
341 case IPPROTO_TCP:
5b37c6ae 342 {
f32b745b
SV
343 TCPHdr tcpStorage;
344 const TCPHdr *tcp;
345 tcp = OvsGetTcp(nbl, l4Offset, &tcpStorage);
346 if (!tcp) {
c4d9f931
AK
347 status = CT_UPDATE_INVALID;
348 break;
f32b745b 349 }
c4d9f931
AK
350 status = OvsConntrackUpdateTcpEntry(entry, tcp, nbl, reply, now);
351 break;
f32b745b
SV
352 }
353 case IPPROTO_ICMP:
c4d9f931
AK
354 status = OvsConntrackUpdateIcmpEntry(entry, reply, now);
355 break;
f32b745b 356 case IPPROTO_UDP:
c4d9f931
AK
357 status = OvsConntrackUpdateOtherEntry(entry, reply, now);
358 break;
f32b745b 359 default:
c4d9f931
AK
360 status = CT_UPDATE_INVALID;
361 break;
b7a6b3a7 362 }
c4d9f931 363 return status;
792d377d
SV
364}
365
366static __inline BOOLEAN
367OvsCtEntryExpired(POVS_CT_ENTRY entry)
368{
792d377d
SV
369 UINT64 currentTime;
370 NdisGetCurrentSystemTime((LARGE_INTEGER *)&currentTime);
371 return entry->expiration < currentTime;
372}
373
c4d9f931
AK
374static __inline VOID
375OvsCtEntryDelete(POVS_CT_ENTRY entry, BOOLEAN forceDelete)
376{
377 if (entry == NULL) {
378 return;
379 }
380 if (forceDelete || OvsCtEntryExpired(entry)) {
381 if (entry->natInfo.natAction) {
dea38804
AK
382 LOCK_STATE_EX lockStateNat;
383 NdisAcquireRWLockWrite(ovsCtNatLockObj, &lockStateNat, 0);
c4d9f931 384 OvsNatDeleteKey(&entry->key);
dea38804 385 NdisReleaseRWLock(ovsCtNatLockObj, &lockStateNat);
c4d9f931
AK
386 }
387 OvsPostCtEventEntry(entry, OVS_EVENT_CT_DELETE);
388 RemoveEntryList(&entry->link);
389 OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG);
390 InterlockedDecrement((LONG volatile*)&ctTotalEntries);
391 return;
392 }
393}
394
792d377d 395static __inline NDIS_STATUS
45bde017
AK
396OvsDetectCtPacket(OvsForwardingContext *fwdCtx,
397 OvsFlowKey *key,
398 PNET_BUFFER_LIST *newNbl)
792d377d
SV
399{
400 /* Currently we support only Unfragmented TCP packets */
401 switch (ntohs(key->l2.dlType)) {
402 case ETH_TYPE_IPV4:
403 if (key->ipKey.nwFrag != OVS_FRAG_TYPE_NONE) {
45bde017
AK
404 return OvsProcessIpv4Fragment(fwdCtx->switchContext,
405 &fwdCtx->curNbl,
406 fwdCtx->completionList,
407 fwdCtx->fwdDetail->SourcePortId,
408 key->tunKey.tunnelId,
409 newNbl);
792d377d 410 }
5b37c6ae
SV
411 if (key->ipKey.nwProto == IPPROTO_TCP
412 || key->ipKey.nwProto == IPPROTO_UDP
413 || key->ipKey.nwProto == IPPROTO_ICMP) {
414 return NDIS_STATUS_SUCCESS;
792d377d 415 }
5b37c6ae 416 return NDIS_STATUS_NOT_SUPPORTED;
792d377d
SV
417 case ETH_TYPE_IPV6:
418 return NDIS_STATUS_NOT_SUPPORTED;
419 }
420
421 return NDIS_STATUS_NOT_SUPPORTED;
422}
423
b7a6b3a7 424BOOLEAN
792d377d
SV
425OvsCtKeyAreSame(OVS_CT_KEY ctxKey, OVS_CT_KEY entryKey)
426{
de8f7050
AK
427 return ((NdisEqualMemory(&ctxKey.src, &entryKey.src,
428 sizeof(struct ct_endpoint))) &&
429 (NdisEqualMemory(&ctxKey.dst, &entryKey.dst,
430 sizeof(struct ct_endpoint))) &&
431 (ctxKey.dl_type == entryKey.dl_type) &&
432 (ctxKey.nw_proto == entryKey.nw_proto) &&
433 (ctxKey.zone == entryKey.zone));
792d377d
SV
434}
435
b7a6b3a7 436POVS_CT_ENTRY
792d377d
SV
437OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
438{
439 PLIST_ENTRY link;
440 POVS_CT_ENTRY entry;
441 BOOLEAN reply = FALSE;
442 POVS_CT_ENTRY found = NULL;
662975e4
SV
443
444 /* Reverse NAT must be performed before OvsCtLookup, so here
445 * we simply need to flip the src and dst in key and compare
446 * they are equal. Note that flipped key is not equal to
447 * rev_key due to NAT effect.
448 */
449 OVS_CT_KEY revCtxKey = ctx->key;
450 OvsCtKeyReverse(&revCtxKey);
792d377d 451
6e83dfd9 452 if (!ctTotalEntries) {
d33f0efa
SV
453 return found;
454 }
455
792d377d
SV
456 LIST_FORALL(&ovsConntrackTable[ctx->hash & CT_HASH_TABLE_MASK], link) {
457 entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);
458
662975e4 459 if (OvsCtKeyAreSame(ctx->key, entry->key)) {
792d377d
SV
460 found = entry;
461 reply = FALSE;
792d377d
SV
462 }
463
c4d9f931 464 if (!found && OvsCtKeyAreSame(revCtxKey, entry->key)) {
792d377d
SV
465 found = entry;
466 reply = TRUE;
792d377d 467 }
792d377d 468
c4d9f931
AK
469 if (found) {
470 if (OvsCtEntryExpired(found)) {
471 found = NULL;
472 } else {
473 ctx->reply = reply;
474 }
475 break;
792d377d
SV
476 }
477 }
478
479 ctx->entry = found;
480 return found;
481}
482
b7a6b3a7
YL
483UINT32
484OvsHashCtKey(const OVS_CT_KEY *key)
792d377d 485{
b7a6b3a7
YL
486 UINT32 hsrc, hdst, hash;
487 hsrc = OvsJhashBytes((UINT32*) &key->src, sizeof(key->src), 0);
488 hdst = OvsJhashBytes((UINT32*) &key->dst, sizeof(key->dst), 0);
5b37c6ae 489 hash = hsrc ^ hdst; /* TO identify reverse traffic */
b7a6b3a7
YL
490 hash = OvsJhashBytes((uint32_t *) &key->dst + 1,
491 ((uint32_t *) (key + 1) -
492 (uint32_t *) (&key->dst + 1)),
5b37c6ae 493 hash);
b7a6b3a7 494 return hash;
5b37c6ae 495}
792d377d 496
6e83dfd9
SV
497static UINT8
498OvsReverseIcmpType(UINT8 type)
499{
500 switch (type) {
501 case ICMP4_ECHO_REQUEST:
502 return ICMP4_ECHO_REPLY;
503 case ICMP4_ECHO_REPLY:
504 return ICMP4_ECHO_REQUEST;
505 case ICMP4_TIMESTAMP_REQUEST:
506 return ICMP4_TIMESTAMP_REPLY;
507 case ICMP4_TIMESTAMP_REPLY:
508 return ICMP4_TIMESTAMP_REQUEST;
509 case ICMP4_INFO_REQUEST:
510 return ICMP4_INFO_REPLY;
511 case ICMP4_INFO_REPLY:
512 return ICMP4_INFO_REQUEST;
513 default:
514 return 0;
515 }
516}
517
5b37c6ae
SV
518static __inline NDIS_STATUS
519OvsCtSetupLookupCtx(OvsFlowKey *flowKey,
520 UINT16 zone,
521 OvsConntrackKeyLookupCtx *ctx,
522 PNET_BUFFER_LIST curNbl,
523 UINT32 l4Offset)
524{
b7a6b3a7 525 const OVS_NAT_ENTRY *natEntry;
792d377d
SV
526 ctx->key.zone = zone;
527 ctx->key.dl_type = flowKey->l2.dlType;
5b37c6ae 528 ctx->related = FALSE;
792d377d 529
5b37c6ae 530 /* Extract L3 and L4*/
792d377d
SV
531 if (flowKey->l2.dlType == htons(ETH_TYPE_IPV4)) {
532 ctx->key.src.addr.ipv4 = flowKey->ipKey.nwSrc;
533 ctx->key.dst.addr.ipv4 = flowKey->ipKey.nwDst;
534 ctx->key.nw_proto = flowKey->ipKey.nwProto;
535
536 ctx->key.src.port = flowKey->ipKey.l4.tpSrc;
537 ctx->key.dst.port = flowKey->ipKey.l4.tpDst;
5b37c6ae
SV
538 if (flowKey->ipKey.nwProto == IPPROTO_ICMP) {
539 ICMPHdr icmpStorage;
540 const ICMPHdr *icmp;
541 icmp = OvsGetIcmp(curNbl, l4Offset, &icmpStorage);
542 ASSERT(icmp);
5b37c6ae
SV
543
544 /* Related bit is set when ICMP has an error */
545 /* XXX parse out the appropriate src and dst from inner pkt */
546 switch (icmp->type) {
f32b745b
SV
547 case ICMP4_ECHO_REQUEST:
548 case ICMP4_ECHO_REPLY:
549 case ICMP4_TIMESTAMP_REQUEST:
550 case ICMP4_TIMESTAMP_REPLY:
551 case ICMP4_INFO_REQUEST:
552 case ICMP4_INFO_REPLY:
553 if (icmp->code != 0) {
554 return NDIS_STATUS_INVALID_PACKET;
555 }
556 /* Separate ICMP connection: identified using id */
557 ctx->key.dst.icmp_id = icmp->fields.echo.id;
558 ctx->key.src.icmp_id = icmp->fields.echo.id;
559 ctx->key.src.icmp_type = icmp->type;
560 ctx->key.dst.icmp_type = OvsReverseIcmpType(icmp->type);
561 break;
562 case ICMP4_DEST_UNREACH:
563 case ICMP4_TIME_EXCEEDED:
564 case ICMP4_PARAM_PROB:
565 case ICMP4_SOURCE_QUENCH:
566 case ICMP4_REDIRECT: {
567 /* XXX Handle inner packet */
568 ctx->related = TRUE;
569 break;
570 }
571 default:
572 ctx->related = FALSE;
5b37c6ae
SV
573 }
574 }
792d377d
SV
575 } else if (flowKey->l2.dlType == htons(ETH_TYPE_IPV6)) {
576 ctx->key.src.addr.ipv6 = flowKey->ipv6Key.ipv6Src;
577 ctx->key.dst.addr.ipv6 = flowKey->ipv6Key.ipv6Dst;
578 ctx->key.nw_proto = flowKey->ipv6Key.nwProto;
579
580 ctx->key.src.port = flowKey->ipv6Key.l4.tpSrc;
581 ctx->key.dst.port = flowKey->ipv6Key.l4.tpDst;
5b37c6ae
SV
582 /* XXX Handle ICMPv6 errors*/
583 } else {
584 return NDIS_STATUS_INVALID_PACKET;
792d377d
SV
585 }
586
dea38804
AK
587 LOCK_STATE_EX lockStateNat;
588 NdisAcquireRWLockRead(ovsCtNatLockObj, &lockStateNat, 0);
b7a6b3a7 589 natEntry = OvsNatLookup(&ctx->key, TRUE);
dea38804 590 NdisReleaseRWLock(ovsCtNatLockObj, &lockStateNat);
b7a6b3a7
YL
591 if (natEntry) {
592 /* Translate address first for reverse NAT */
593 ctx->key = natEntry->ctEntry->key;
594 OvsCtKeyReverse(&ctx->key);
595 }
596
597 ctx->hash = OvsHashCtKey(&ctx->key);
5b37c6ae 598 return NDIS_STATUS_SUCCESS;
792d377d
SV
599}
600
e68988b8
SV
601static __inline BOOLEAN
602OvsDetectFtpPacket(OvsFlowKey *key) {
603 return (key->ipKey.nwProto == IPPROTO_TCP &&
604 (ntohs(key->ipKey.l4.tpDst) == IPPORT_FTP ||
605 ntohs(key->ipKey.l4.tpSrc) == IPPORT_FTP));
606}
607
792d377d
SV
608/*
609 *----------------------------------------------------------------------------
610 * OvsProcessConntrackEntry
611 * Check the TCP flags and set the ct_state of the entry
612 *----------------------------------------------------------------------------
613 */
614static __inline POVS_CT_ENTRY
b7a6b3a7 615OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx,
5b37c6ae 616 UINT32 l4Offset,
792d377d
SV
617 OvsConntrackKeyLookupCtx *ctx,
618 OvsFlowKey *key,
619 UINT16 zone,
b7a6b3a7 620 NAT_ACTION_INFO *natInfo,
792d377d 621 BOOLEAN commit,
b841a441
SV
622 UINT64 currentTime,
623 BOOLEAN *entryCreated)
792d377d
SV
624{
625 POVS_CT_ENTRY entry = ctx->entry;
626 UINT32 state = 0;
b7a6b3a7 627 PNET_BUFFER_LIST curNbl = fwdCtx->curNbl;
b841a441 628 *entryCreated = FALSE;
792d377d
SV
629
630 /* If an entry was found, update the state based on TCP flags */
631 if (ctx->related) {
632 state |= OVS_CS_F_RELATED;
633 if (ctx->reply) {
88e62998 634 state |= OVS_CS_F_REPLY_DIR;
792d377d
SV
635 }
636 } else {
637 CT_UPDATE_RES result;
5b37c6ae
SV
638 result = OvsCtUpdateEntry(entry, curNbl, key->ipKey.nwProto,
639 l4Offset, ctx->reply, currentTime);
792d377d
SV
640 switch (result) {
641 case CT_UPDATE_VALID:
642 state |= OVS_CS_F_ESTABLISHED;
643 if (ctx->reply) {
644 state |= OVS_CS_F_REPLY_DIR;
645 }
646 break;
647 case CT_UPDATE_INVALID:
648 state |= OVS_CS_F_INVALID;
649 break;
650 case CT_UPDATE_NEW:
651 //Delete and update the Conntrack
c4d9f931 652 OvsCtEntryDelete(ctx->entry, TRUE);
792d377d 653 ctx->entry = NULL;
b7a6b3a7
YL
654 entry = OvsCtEntryCreate(fwdCtx, key->ipKey.nwProto, l4Offset,
655 ctx, key, natInfo, commit, currentTime,
b841a441 656 entryCreated);
b50d56a7
SV
657 if (!entry) {
658 return NULL;
659 }
792d377d
SV
660 break;
661 }
662 }
e68988b8
SV
663
664 if (key->ipKey.nwProto == IPPROTO_TCP && entry) {
665 /* Update the related bit if there is a parent */
666 if (entry->parent) {
667 state |= OVS_CS_F_RELATED;
668 } else {
669 POVS_CT_ENTRY parentEntry;
670 parentEntry = OvsCtRelatedLookup(ctx->key, currentTime);
813f5f0a 671 entry->parent = parentEntry;
e68988b8 672 if (parentEntry != NULL) {
e68988b8
SV
673 state |= OVS_CS_F_RELATED;
674 }
675 }
676 }
677
792d377d
SV
678 /* Copy mark and label from entry into flowKey. If actions specify
679 different mark and label, update the flowKey. */
5b37c6ae
SV
680 if (entry != NULL) {
681 OvsCtUpdateFlowKey(key, state, zone, entry->mark, &entry->labels);
682 } else {
683 OvsCtUpdateFlowKey(key, state, zone, 0, NULL);
684 }
792d377d
SV
685 return entry;
686}
687
688static __inline VOID
689OvsConntrackSetMark(OvsFlowKey *key,
690 POVS_CT_ENTRY entry,
691 UINT32 value,
6c6204b6
AK
692 UINT32 mask,
693 BOOLEAN *markChanged)
792d377d
SV
694{
695 UINT32 newMark;
696 newMark = value | (entry->mark & ~(mask));
697 if (entry->mark != newMark) {
698 entry->mark = newMark;
699 key->ct.mark = newMark;
6c6204b6 700 *markChanged = TRUE;
792d377d
SV
701 }
702}
703
704static __inline void
705OvsConntrackSetLabels(OvsFlowKey *key,
706 POVS_CT_ENTRY entry,
707 struct ovs_key_ct_labels *val,
6c6204b6
AK
708 struct ovs_key_ct_labels *mask,
709 BOOLEAN *labelChanged)
792d377d 710{
1ec0750e 711 ovs_u128 v, m, pktMdLabel = {0};
792d377d
SV
712 memcpy(&v, val, sizeof v);
713 memcpy(&m, mask, sizeof m);
714
715 pktMdLabel.u64.lo = v.u64.lo | (pktMdLabel.u64.lo & ~(m.u64.lo));
716 pktMdLabel.u64.hi = v.u64.hi | (pktMdLabel.u64.hi & ~(m.u64.hi));
717
6c6204b6
AK
718 if (!NdisEqualMemory(&entry->labels, &pktMdLabel,
719 sizeof(struct ovs_key_ct_labels))) {
720 *labelChanged = TRUE;
721 }
792d377d
SV
722 NdisMoveMemory(&entry->labels, &pktMdLabel,
723 sizeof(struct ovs_key_ct_labels));
724 NdisMoveMemory(&key->ct.labels, &pktMdLabel,
725 sizeof(struct ovs_key_ct_labels));
726}
727
c4d9f931
AK
728static void
729OvsCtSetMarkLabel(OvsFlowKey *key,
730 POVS_CT_ENTRY entry,
731 MD_MARK *mark,
732 MD_LABELS *labels,
733 BOOLEAN *triggerUpdateEvent)
734{
735 if (mark) {
736 OvsConntrackSetMark(key, entry, mark->value, mark->mask,
737 triggerUpdateEvent);
738 }
739
740 if (labels) {
741 OvsConntrackSetLabels(key, entry, &labels->value, &labels->mask,
742 triggerUpdateEvent);
743 }
744}
745
746static __inline void
747OvsCtUpdateTuple(OvsFlowKey *key, OVS_CT_KEY *ctKey)
748{
749 key->ct.tuple_ipv4.ipv4_src = ctKey->src.addr.ipv4_aligned;
750 key->ct.tuple_ipv4.ipv4_dst = ctKey->dst.addr.ipv4_aligned;
751 key->ct.tuple_ipv4.ipv4_proto = ctKey->nw_proto;
752
753 /* Orig tuple Port is overloaded to take in ICMP-Type & Code */
754 /* This mimics the behavior in lib/conntrack.c*/
755 key->ct.tuple_ipv4.src_port = ctKey->nw_proto != IPPROTO_ICMP ?
756 ctKey->src.port :
757 htons(ctKey->src.icmp_type);
758 key->ct.tuple_ipv4.dst_port = ctKey->nw_proto != IPPROTO_ICMP ?
759 ctKey->dst.port :
760 htons(ctKey->src.icmp_code);
761}
762
792d377d 763static __inline NDIS_STATUS
b7a6b3a7 764OvsCtExecute_(OvsForwardingContext *fwdCtx,
792d377d
SV
765 OvsFlowKey *key,
766 OVS_PACKET_HDR_INFO *layers,
767 BOOLEAN commit,
69bd2449 768 BOOLEAN force,
792d377d
SV
769 UINT16 zone,
770 MD_MARK *mark,
e68988b8 771 MD_LABELS *labels,
1ef6b404 772 PCHAR helper,
6c6204b6
AK
773 PNAT_ACTION_INFO natInfo,
774 BOOLEAN postUpdateEvent)
792d377d
SV
775{
776 NDIS_STATUS status = NDIS_STATUS_SUCCESS;
6c6204b6 777 BOOLEAN triggerUpdateEvent = FALSE;
792d377d 778 POVS_CT_ENTRY entry = NULL;
b7a6b3a7 779 PNET_BUFFER_LIST curNbl = fwdCtx->curNbl;
792d377d 780 OvsConntrackKeyLookupCtx ctx = { 0 };
792d377d 781 LOCK_STATE_EX lockState;
5b37c6ae 782 UINT64 currentTime;
792d377d
SV
783 NdisGetCurrentSystemTime((LARGE_INTEGER *) &currentTime);
784
1ef6b404 785
792d377d 786 /* Retrieve the Conntrack Key related fields from packet */
5b37c6ae 787 OvsCtSetupLookupCtx(key, zone, &ctx, curNbl, layers->l4Offset);
792d377d
SV
788
789 NdisAcquireRWLockWrite(ovsConntrackLockObj, &lockState, 0);
790
791 /* Lookup Conntrack entries for a matching entry */
792 entry = OvsCtLookup(&ctx);
b841a441 793 BOOLEAN entryCreated = FALSE;
69bd2449
SV
794
795 /* Delete entry in reverse direction if 'force' is specified */
796 if (entry && force && ctx.reply) {
c4d9f931 797 OvsCtEntryDelete(entry, TRUE);
69bd2449
SV
798 entry = NULL;
799 }
800
c3a90b46
SV
801 if (!entry && commit && ctTotalEntries >= CT_MAX_ENTRIES) {
802 /* Don't proceed with processing if the max limit has been hit.
803 * This blocks only new entries from being created and doesn't
804 * affect existing connections.
805 */
806 NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
807 OVS_LOG_ERROR("Conntrack Limit hit: %lu", ctTotalEntries);
808 return NDIS_STATUS_RESOURCES;
809 }
810
5ff83483
AK
811 /* Increment the counters soon after the lookup, since we set ct.state
812 * to OVS_CS_F_TRACKED after processing the ct entry.
813 */
814 if (entry && (!(key->ct.state & OVS_CS_F_TRACKED))) {
815 OvsCtIncrementCounters(entry, ctx.reply, curNbl);
816 }
817
792d377d
SV
818 if (!entry) {
819 /* If no matching entry was found, create one and add New state */
b7a6b3a7 820 entry = OvsCtEntryCreate(fwdCtx, key->ipKey.nwProto,
5b37c6ae 821 layers->l4Offset, &ctx,
b7a6b3a7 822 key, natInfo, commit, currentTime,
b841a441 823 &entryCreated);
792d377d
SV
824 } else {
825 /* Process the entry and update CT flags */
b7a6b3a7
YL
826 entry = OvsProcessConntrackEntry(fwdCtx, layers->l4Offset, &ctx, key,
827 zone, natInfo, commit, currentTime,
b841a441 828 &entryCreated);
792d377d
SV
829 }
830
c4d9f931
AK
831 if (entry == NULL) {
832 return status;
833 }
b7a6b3a7
YL
834 /*
835 * Note that natInfo is not the same as entry->natInfo here. natInfo
836 * is decided by action in the openflow rule, entry->natInfo is decided
837 * when the entry is created. In the reverse NAT case, natInfo is
838 * NAT_ACTION_REVERSE, yet entry->natInfo is NAT_ACTION_SRC or
839 * NAT_ACTION_DST without NAT_ACTION_REVERSE
840 */
c4d9f931 841 if (natInfo->natAction != NAT_ACTION_NONE)
b7a6b3a7 842 {
dea38804
AK
843 LOCK_STATE_EX lockStateNat;
844 NdisAcquireRWLockWrite(ovsCtNatLockObj, &lockStateNat, 0);
b7a6b3a7
YL
845 OvsNatPacket(fwdCtx, entry, entry->natInfo.natAction,
846 key, ctx.reply);
dea38804 847 NdisReleaseRWLock(ovsCtNatLockObj, &lockStateNat);
b7a6b3a7
YL
848 }
849
c4d9f931 850 OvsCtSetMarkLabel(key, entry, mark, labels, &triggerUpdateEvent);
792d377d 851
c4d9f931 852 if (OvsDetectFtpPacket(key)) {
e68988b8
SV
853 /* FTP parser will always be loaded */
854 UNREFERENCED_PARAMETER(helper);
855
856 status = OvsCtHandleFtp(curNbl, key, layers, currentTime, entry,
857 (ntohs(key->ipKey.l4.tpDst) == IPPORT_FTP));
858 if (status != NDIS_STATUS_SUCCESS) {
859 OVS_LOG_ERROR("Error while parsing the FTP packet");
860 }
861 }
862
813f5f0a 863 /* Add original tuple information to flow Key */
c4d9f931 864 if (entry->key.dl_type == ntohs(ETH_TYPE_IPV4)) {
813f5f0a
SV
865 if (entry->parent != NULL) {
866 POVS_CT_ENTRY parent = entry->parent;
c4d9f931 867 OvsCtUpdateTuple(key, &parent->key);
813f5f0a 868 } else {
c4d9f931 869 OvsCtUpdateTuple(key, &entry->key);
813f5f0a 870 }
813f5f0a
SV
871 }
872
c4d9f931 873 if (entryCreated) {
b841a441
SV
874 OvsPostCtEventEntry(entry, OVS_EVENT_CT_NEW);
875 }
c4d9f931 876 if (postUpdateEvent && !entryCreated && triggerUpdateEvent) {
6c6204b6
AK
877 OvsPostCtEventEntry(entry, OVS_EVENT_CT_UPDATE);
878 }
b841a441 879
792d377d
SV
880 NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
881
882 return status;
883}
884
885/*
886 *---------------------------------------------------------------------------
887 * OvsExecuteConntrackAction
888 * Executes Conntrack actions XXX - Add more
45bde017 889 * For the Ipv4 fragments, consume the orginal fragment NBL
792d377d
SV
890 *---------------------------------------------------------------------------
891 */
892NDIS_STATUS
ae584afe 893OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx,
792d377d
SV
894 OvsFlowKey *key,
895 const PNL_ATTR a)
896{
897 PNL_ATTR ctAttr;
898 BOOLEAN commit = FALSE;
69bd2449 899 BOOLEAN force = FALSE;
6c6204b6 900 BOOLEAN postUpdateEvent = FALSE;
792d377d 901 UINT16 zone = 0;
6c6204b6 902 UINT32 eventmask = 0;
792d377d
SV
903 MD_MARK *mark = NULL;
904 MD_LABELS *labels = NULL;
e68988b8 905 PCHAR helper = NULL;
1ef6b404 906 NAT_ACTION_INFO natActionInfo;
ae584afe 907 OVS_PACKET_HDR_INFO *layers = &fwdCtx->layers;
45bde017 908 PNET_BUFFER_LIST newNbl = NULL;
792d377d
SV
909 NDIS_STATUS status;
910
1ef6b404 911 memset(&natActionInfo, 0, sizeof natActionInfo);
45bde017 912 status = OvsDetectCtPacket(fwdCtx, key, &newNbl);
792d377d
SV
913 if (status != NDIS_STATUS_SUCCESS) {
914 return status;
915 }
916
69bd2449 917 /* XXX Convert this to NL_ATTR_FOR_EACH */
792d377d
SV
918 ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_ZONE);
919 if (ctAttr) {
920 zone = NlAttrGetU16(ctAttr);
921 }
922 ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_COMMIT);
923 if (ctAttr) {
924 commit = TRUE;
925 }
926 ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_MARK);
927 if (ctAttr) {
928 mark = NlAttrGet(ctAttr);
929 }
930 ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_LABELS);
931 if (ctAttr) {
932 labels = NlAttrGet(ctAttr);
933 }
1ef6b404
AK
934 natActionInfo.natAction = NAT_ACTION_NONE;
935 ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_NAT);
936 if (ctAttr) {
937 /* Pares Nested NAT attributes. */
938 PNL_ATTR natAttr;
939 unsigned int left;
940 BOOLEAN hasMinIp = FALSE;
941 BOOLEAN hasMinPort = FALSE;
942 BOOLEAN hasMaxIp = FALSE;
943 BOOLEAN hasMaxPort = FALSE;
944 NL_NESTED_FOR_EACH_UNSAFE (natAttr, left, ctAttr) {
b7a6b3a7
YL
945 enum ovs_nat_attr subtype = NlAttrType(natAttr);
946 switch(subtype) {
1ef6b404
AK
947 case OVS_NAT_ATTR_SRC:
948 case OVS_NAT_ATTR_DST:
949 natActionInfo.natAction |=
b7a6b3a7 950 ((subtype == OVS_NAT_ATTR_SRC)
1ef6b404
AK
951 ? NAT_ACTION_SRC : NAT_ACTION_DST);
952 break;
953 case OVS_NAT_ATTR_IP_MIN:
1ef6b404 954 memcpy(&natActionInfo.minAddr,
e8808c22 955 NlAttrData(natAttr), NlAttrGetSize(natAttr));
1ef6b404
AK
956 hasMinIp = TRUE;
957 break;
958 case OVS_NAT_ATTR_IP_MAX:
1ef6b404 959 memcpy(&natActionInfo.maxAddr,
e8808c22 960 NlAttrData(natAttr), NlAttrGetSize(natAttr));
1ef6b404
AK
961 hasMaxIp = TRUE;
962 break;
963 case OVS_NAT_ATTR_PROTO_MIN:
964 natActionInfo.minPort = NlAttrGetU16(natAttr);
965 hasMinPort = TRUE;
966 break;
967 case OVS_NAT_ATTR_PROTO_MAX:
968 natActionInfo.maxPort = NlAttrGetU16(natAttr);
969 hasMaxPort = TRUE;
970 break;
971 case OVS_NAT_ATTR_PERSISTENT:
972 case OVS_NAT_ATTR_PROTO_HASH:
973 case OVS_NAT_ATTR_PROTO_RANDOM:
974 break;
975 }
976 }
977 if (natActionInfo.natAction == NAT_ACTION_NONE) {
978 natActionInfo.natAction = NAT_ACTION_REVERSE;
979 }
980 if (hasMinIp && !hasMaxIp) {
981 memcpy(&natActionInfo.maxAddr,
982 &natActionInfo.minAddr,
983 sizeof(natActionInfo.maxAddr));
984 }
985 if (hasMinPort && !hasMaxPort) {
986 natActionInfo.maxPort = natActionInfo.minPort;
987 }
988 if (hasMinPort || hasMaxPort) {
989 if (natActionInfo.natAction & NAT_ACTION_SRC) {
990 natActionInfo.natAction |= NAT_ACTION_SRC_PORT;
991 } else if (natActionInfo.natAction & NAT_ACTION_DST) {
992 natActionInfo.natAction |= NAT_ACTION_DST_PORT;
993 }
994 }
995 }
e68988b8
SV
996 ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_HELPER);
997 if (ctAttr) {
998 helper = NlAttrGetString(ctAttr);
999 if (helper == NULL) {
1000 return NDIS_STATUS_INVALID_PARAMETER;
1001 }
1002 if (strcmp("ftp", helper) != 0) {
1003 /* Only support FTP */
1004 return NDIS_STATUS_NOT_SUPPORTED;
1005 }
1006 }
69bd2449
SV
1007 ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_FORCE_COMMIT);
1008 if (ctAttr) {
1009 force = TRUE;
1010 /* Force implicitly means commit */
1011 commit = TRUE;
1012 }
6c6204b6
AK
1013 ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_EVENTMASK);
1014 if (ctAttr) {
1015 eventmask = NlAttrGetU32(ctAttr);
1016 /* Only mark and label updates are supported. */
1017 if (eventmask & (1 << IPCT_MARK | 1 << IPCT_LABEL))
1018 postUpdateEvent = TRUE;
1019 }
45bde017 1020 /* If newNbl is not allocated, use the current Nbl*/
b7a6b3a7 1021 status = OvsCtExecute_(fwdCtx, key, layers,
6c6204b6
AK
1022 commit, force, zone, mark, labels, helper, &natActionInfo,
1023 postUpdateEvent);
792d377d
SV
1024 return status;
1025}
1026
1027/*
1028 *----------------------------------------------------------------------------
b7a6b3a7 1029 * OvsConntrackEntryCleaner
792d377d
SV
1030 * Runs periodically and cleans up the connection tracker
1031 *----------------------------------------------------------------------------
1032 */
1033VOID
b7a6b3a7 1034OvsConntrackEntryCleaner(PVOID data)
792d377d
SV
1035{
1036
1037 POVS_CT_THREAD_CTX context = (POVS_CT_THREAD_CTX)data;
1038 PLIST_ENTRY link, next;
1039 POVS_CT_ENTRY entry;
880b52e6 1040 LOCK_STATE_EX lockState;
792d377d
SV
1041 BOOLEAN success = TRUE;
1042
1043 while (success) {
880b52e6
SR
1044 if (ovsConntrackLockObj == NULL) {
1045 /* Lock has been freed by 'OvsCleanupConntrack()' */
1046 break;
1047 }
792d377d
SV
1048 NdisAcquireRWLockWrite(ovsConntrackLockObj, &lockState, 0);
1049 if (context->exit) {
1050 NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
1051 break;
1052 }
1053
1054 /* Set the timeout for the thread and cleanup */
b7a6b3a7 1055 INT64 threadSleepTimeout = -CT_CLEANUP_INTERVAL;
792d377d 1056
d33f0efa
SV
1057 if (ctTotalEntries) {
1058 for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) {
1059 LIST_FORALL_SAFE(&ovsConntrackTable[i], link, next) {
1060 entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);
c4d9f931 1061 OvsCtEntryDelete(entry, FALSE);
792d377d
SV
1062 }
1063 }
1064 }
792d377d
SV
1065 NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
1066 KeWaitForSingleObject(&context->event, Executive, KernelMode,
1067 FALSE, (LARGE_INTEGER *)&threadSleepTimeout);
1068 }
1069
1070 PsTerminateSystemThread(STATUS_SUCCESS);
1071}
434972b7
SV
1072
1073/*
1074 *----------------------------------------------------------------------------
1075 * OvsCtFlush
43369532 1076 * Flushes out all Conntrack Entries that match any of the arguments
434972b7
SV
1077 *----------------------------------------------------------------------------
1078 */
1079static __inline NDIS_STATUS
43369532 1080OvsCtFlush(UINT16 zone, struct ovs_key_ct_tuple_ipv4 *tuple)
434972b7
SV
1081{
1082 PLIST_ENTRY link, next;
1083 POVS_CT_ENTRY entry;
1084
dea38804 1085 LOCK_STATE_EX lockState, lockStateNat;
434972b7
SV
1086 NdisAcquireRWLockWrite(ovsConntrackLockObj, &lockState, 0);
1087
d33f0efa 1088 if (ctTotalEntries) {
c4d9f931 1089 for (UINT32 i = 0; i < CT_HASH_TABLE_SIZE; i++) {
d33f0efa
SV
1090 LIST_FORALL_SAFE(&ovsConntrackTable[i], link, next) {
1091 entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);
43369532
AK
1092 if (tuple) {
1093 if (tuple->ipv4_proto != IPPROTO_ICMP &&
1094 tuple->ipv4_src == entry->key.src.addr.ipv4_aligned &&
1095 tuple->ipv4_dst == entry->key.dst.addr.ipv4_aligned &&
1096 tuple->ipv4_proto == entry->key.nw_proto &&
1097 tuple->src_port == entry->key.src.port &&
1098 tuple->dst_port == entry->key.dst.port &&
1099 (zone ? entry->key.zone == zone: TRUE)) {
c4d9f931 1100 OvsCtEntryDelete(entry, TRUE);
43369532
AK
1101 } else if (tuple->ipv4_src == entry->key.src.addr.ipv4_aligned &&
1102 tuple->ipv4_dst == entry->key.dst.addr.ipv4_aligned &&
1103 tuple->ipv4_proto == entry->key.nw_proto &&
1104 tuple->src_port == entry->key.src.icmp_type &&
1105 tuple->dst_port == entry->key.src.icmp_code &&
1106 (zone ? entry->key.zone == zone: TRUE)) {
c4d9f931 1107 OvsCtEntryDelete(entry, TRUE);
43369532
AK
1108 }
1109 } else if (!zone || zone == entry->key.zone) {
c4d9f931 1110 OvsCtEntryDelete(entry, TRUE);
43369532 1111 }
d33f0efa 1112 }
434972b7
SV
1113 }
1114 }
1115
dea38804 1116 NdisAcquireRWLockWrite(ovsCtNatLockObj, &lockStateNat, 0);
b7a6b3a7 1117 OvsNatFlush(zone);
dea38804 1118 NdisReleaseRWLock(ovsCtNatLockObj, &lockStateNat);
434972b7
SV
1119 NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
1120 return NDIS_STATUS_SUCCESS;
1121}
1122
1123NTSTATUS
1124OvsCtDeleteCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
1125 UINT32 *replyLen)
1126{
1127 POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
1128 POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
1129 PNL_MSG_HDR nlMsgHdr = &(msgIn->nlMsg);
1130 PNL_ATTR ctAttrs[__CTA_MAX];
1131 UINT32 attrOffset = NLMSG_HDRLEN + NF_GEN_MSG_HDRLEN + OVS_HDRLEN;
1132 NL_ERROR nlError = NL_ERROR_SUCCESS;
1133 NTSTATUS status;
1134 UINT16 zone = 0;
43369532 1135 struct ovs_key_ct_tuple_ipv4 *ct_tuple = NULL;
434972b7
SV
1136 NL_BUFFER nlBuf;
1137 UINT16 nlmsgType;
1138 PNL_MSG_HDR nlMsg;
1139
43369532
AK
1140 static const NL_POLICY ctAttrPolicy[] = {
1141 [CTA_TUPLE_ORIG] = {.type = NL_A_NESTED, .optional = TRUE},
1142 [CTA_ZONE] = {.type = NL_A_BE16, .optional = TRUE },
434972b7
SV
1143 };
1144
1145 if ((NlAttrParse(nlMsgHdr, attrOffset, NlNfMsgAttrsLen(nlMsgHdr),
43369532 1146 ctAttrPolicy, ARRAY_SIZE(ctAttrPolicy),
434972b7
SV
1147 ctAttrs, ARRAY_SIZE(ctAttrs)))
1148 != TRUE) {
43369532 1149 OVS_LOG_ERROR("Ct attr parsing failed for msg: %p", nlMsgHdr);
434972b7
SV
1150 status = STATUS_INVALID_PARAMETER;
1151 goto done;
1152 }
1153
1154 if (ctAttrs[CTA_ZONE]) {
159cc1f4 1155 zone = ntohs(NlAttrGetU16(ctAttrs[CTA_ZONE]));
434972b7
SV
1156 }
1157
43369532
AK
1158 if (ctAttrs[CTA_TUPLE_ORIG]) {
1159 ct_tuple = OvsAllocateMemoryWithTag(sizeof(struct ovs_key_ct_tuple_ipv4),
1160 OVS_CT_POOL_TAG);
1161 if (ct_tuple == NULL) {
1162 status = STATUS_INSUFFICIENT_RESOURCES;
1163 goto done;
1164 }
1165 /* Parse ct tuple. */
1166 status = MapNlToCtTuple(msgIn, ctAttrs[CTA_TUPLE_ORIG], ct_tuple);
1167 if (status != STATUS_SUCCESS) {
1168 goto done;
1169 }
1170 }
1171
1172 status = OvsCtFlush(zone, ct_tuple);
434972b7
SV
1173 if (status == STATUS_SUCCESS) {
1174 nlmsgType = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_DELETE);
1175 NlBufInit(&nlBuf,
1176 usrParamsCtx->outputBuffer,
1177 usrParamsCtx->outputLength);
34a2010c
AS
1178 if (!NlFillOvsMsgForNfGenMsg(&nlBuf, nlmsgType, NLM_F_CREATE,
1179 msgIn->nlMsg.nlmsgSeq,
1180 msgIn->nlMsg.nlmsgPid,
1181 AF_UNSPEC,
1182 msgIn->nfGenMsg.version,
1183 0)) {
1184 status = STATUS_INVALID_PARAMETER;
1185 }
434972b7
SV
1186 nlMsg = (PNL_MSG_HDR)NlBufAt(&nlBuf, 0, 0);
1187 nlMsg->nlmsgLen = NlBufSize(&nlBuf);
1188 *replyLen = msgOut->nlMsg.nlmsgLen;
1189 }
1190
1191done:
43369532
AK
1192 if (ct_tuple) {
1193 OvsFreeMemoryWithTag(ct_tuple, OVS_CT_POOL_TAG);
1194 }
1195
434972b7
SV
1196 nlError = NlMapStatusToNlErr(status);
1197 if (nlError != NL_ERROR_SUCCESS) {
1198 POVS_MESSAGE_ERROR msgError = (POVS_MESSAGE_ERROR)
1199 usrParamsCtx->outputBuffer;
1200
1201 ASSERT(msgError);
1202 NlBuildErrorMsg(msgIn, msgError, nlError, replyLen);
1203 ASSERT(*replyLen != 0);
1204 status = STATUS_SUCCESS;
1205 }
1206
1207 return status;
1208}
b50d56a7 1209
43369532
AK
1210static __inline NDIS_STATUS
1211MapNlToCtTuple(POVS_MESSAGE msgIn, PNL_ATTR ctAttr,
1212 struct ovs_key_ct_tuple_ipv4 *ct_tuple) {
1213
1214 PNL_MSG_HDR nlMsgHdr = &(msgIn->nlMsg);
1215 PNL_ATTR ctTupleAttrs[__CTA_MAX];
1216 UINT32 attrOffset;
1217 static const NL_POLICY ctTuplePolicy[] = {
1218 [CTA_TUPLE_IP] = {.type = NL_A_NESTED, .optional = FALSE },
1219 [CTA_TUPLE_PROTO] = {.type = NL_A_NESTED, .optional = FALSE},
1220 };
1221
1222 static const NL_POLICY ctTupleIpPolicy[] = {
1223 [CTA_IP_V4_SRC] = { .type = NL_A_BE32, .optional = TRUE },
1224 [CTA_IP_V4_DST] = { .type = NL_A_BE32, .optional = TRUE },
1225 };
1226
1227 static const NL_POLICY ctTupleProtoPolicy[] = {
1228 [CTA_PROTO_NUM] = { .type = NL_A_U8, .optional = FALSE },
1229 [CTA_PROTO_SRC_PORT] = { .type = NL_A_BE16, .optional = TRUE },
1230 [CTA_PROTO_DST_PORT] = { .type = NL_A_BE16, .optional = TRUE },
1231 [CTA_PROTO_ICMP_TYPE] = { .type = NL_A_U8, .optional = TRUE },
1232 [CTA_PROTO_ICMP_CODE] = { .type = NL_A_U8, .optional = TRUE },
1233 };
1234
1235 if (!ctAttr) {
1236 return STATUS_INVALID_PARAMETER;
1237 }
1238
1239 attrOffset = (UINT32)((PCHAR) ctAttr - (PCHAR)nlMsgHdr);
1240 if ((NlAttrParseNested(nlMsgHdr, attrOffset, NlAttrLen(ctAttr),
1241 ctTuplePolicy, ARRAY_SIZE(ctTuplePolicy),
1242 ctTupleAttrs, ARRAY_SIZE(ctTupleAttrs)))
1243 != TRUE) {
1244 OVS_LOG_ERROR("CTA_TUPLE attr parsing failed for msg: %p", nlMsgHdr);
1245 return STATUS_INVALID_PARAMETER;
1246 }
1247
1248 if (ctTupleAttrs[CTA_TUPLE_IP]) {
1249 PNL_ATTR ctTupleIpAttrs[__CTA_MAX];
1250 attrOffset = (UINT32)((PCHAR) ctTupleAttrs[CTA_TUPLE_IP] - (PCHAR)nlMsgHdr);
1251 if ((NlAttrParseNested(nlMsgHdr, attrOffset, NlAttrLen(ctTupleAttrs[CTA_TUPLE_IP]),
1252 ctTupleIpPolicy, ARRAY_SIZE(ctTupleIpPolicy),
1253 ctTupleIpAttrs, ARRAY_SIZE(ctTupleIpAttrs)))
1254 != TRUE) {
1255 OVS_LOG_ERROR("CTA_TUPLE_IP attr parsing failed for msg: %p", nlMsgHdr);
1256 return STATUS_INVALID_PARAMETER;
1257 }
1258
1259 if (ctTupleIpAttrs[CTA_IP_V4_SRC] && ctTupleIpAttrs[CTA_IP_V4_DST]) {
1260 ct_tuple->ipv4_src = NlAttrGetU32(ctTupleIpAttrs[CTA_IP_V4_SRC]);
1261 ct_tuple->ipv4_dst = NlAttrGetU32(ctTupleIpAttrs[CTA_IP_V4_DST]);
1262 }
1263 }
1264
1265 if (ctTupleAttrs[CTA_TUPLE_PROTO]) {
1266 PNL_ATTR ctTupleProtoAttrs[__CTA_MAX];
1267 attrOffset = (UINT32)((PCHAR) ctTupleAttrs[CTA_TUPLE_PROTO] - (PCHAR)nlMsgHdr);
1268 if ((NlAttrParseNested(nlMsgHdr, attrOffset, NlAttrLen(ctTupleAttrs[CTA_TUPLE_PROTO]),
1269 ctTupleProtoPolicy, ARRAY_SIZE(ctTupleProtoPolicy),
1270 ctTupleProtoAttrs, ARRAY_SIZE(ctTupleProtoAttrs)))
1271 != TRUE) {
1272 OVS_LOG_ERROR("CTA_TUPLE_PROTO attr parsing failed for msg: %p", nlMsgHdr);
1273 return STATUS_INVALID_PARAMETER;
1274 }
1275
1276 if (ctTupleProtoAttrs[CTA_PROTO_NUM]) {
1277 ct_tuple->ipv4_proto = NlAttrGetU8 (ctTupleProtoAttrs[CTA_PROTO_NUM]);
1278 if (ctTupleProtoAttrs[CTA_PROTO_SRC_PORT] && ctTupleProtoAttrs[CTA_PROTO_DST_PORT]) {
1279 ct_tuple->src_port = NlAttrGetU16(ctTupleProtoAttrs[CTA_PROTO_SRC_PORT]);
1280 ct_tuple->dst_port = NlAttrGetU16(ctTupleProtoAttrs[CTA_PROTO_DST_PORT]);
1281 } else if (ctTupleProtoAttrs[CTA_PROTO_ICMP_TYPE] &&
1282 ctTupleProtoAttrs[CTA_PROTO_ICMP_CODE] ) {
1283 ct_tuple->src_port = NlAttrGetU8(ctTupleProtoAttrs[CTA_PROTO_ICMP_TYPE]);
1284 ct_tuple->dst_port = NlAttrGetU8(ctTupleProtoAttrs[CTA_PROTO_ICMP_CODE]);
1285 }
1286
1287 }
1288 }
1289
1290 return NDIS_STATUS_SUCCESS;
1291}
1292
b50d56a7
SV
1293static __inline NDIS_STATUS
1294MapIpTupleToNl(PNL_BUFFER nlBuf, OVS_CT_KEY *key)
1295{
1296 NDIS_STATUS status = NDIS_STATUS_SUCCESS;
1297 UINT32 offset = 0;
1298
1299 offset = NlMsgStartNested(nlBuf, CTA_TUPLE_IP);
1300 if (!offset) {
1301 return NDIS_STATUS_FAILURE;
1302 }
1303
1304 if (key->dl_type == ntohs(ETH_TYPE_IPV4)) {
1305 if (!NlMsgPutTailU32(nlBuf, CTA_IP_V4_SRC, key->src.addr.ipv4)) {
1306 status = NDIS_STATUS_FAILURE;
1307 goto done;
1308 }
1309 if (!NlMsgPutTailU32(nlBuf, CTA_IP_V4_DST, key->dst.addr.ipv4)) {
1310 status = NDIS_STATUS_FAILURE;
1311 goto done;
1312 }
1313 } else if (key->dl_type == ntohs(ETH_TYPE_IPV6)) {
1314 if (!NlMsgPutTailUnspec(nlBuf, CTA_IP_V6_SRC,
1315 (PCHAR)(&key->src.addr.ipv6),
1316 sizeof(key->src.addr.ipv6))) {
1317 status = NDIS_STATUS_FAILURE;
1318 goto done;
1319 }
1320 if (!NlMsgPutTailUnspec(nlBuf, CTA_IP_V6_DST,
1321 (PCHAR)(&key->dst.addr.ipv6),
1322 sizeof(key->dst.addr.ipv6))) {
1323 status = NDIS_STATUS_FAILURE;
1324 goto done;
1325 }
1326 }
1327
1328done:
1329 NlMsgEndNested(nlBuf, offset);
1330 return status;
1331}
1332
1333static __inline NDIS_STATUS
1334MapProtoTupleToNl(PNL_BUFFER nlBuf, OVS_CT_KEY *key)
1335{
1336 NDIS_STATUS status = NDIS_STATUS_SUCCESS;
1337 UINT32 offset = 0;
1338
1339 offset = NlMsgStartNested(nlBuf, CTA_TUPLE_PROTO);
1340 if (!offset) {
1341 return NDIS_STATUS_FAILURE;
1342 }
1343
1344 if (!NlMsgPutTailU8(nlBuf, CTA_PROTO_NUM, key->nw_proto)) {
1345 status = NDIS_STATUS_FAILURE;
1346 goto done;
1347 }
1348
1349 if (key->dl_type == ntohs(ETH_TYPE_IPV4)
1350 || key->dl_type == ntohs(ETH_TYPE_IPV6)) {
1351 /* ICMP and ICMPv6 Type, Code and ID are currently not tracked */
1352 if (key->nw_proto == IPPROTO_ICMP) {
6e83dfd9
SV
1353 if (!NlMsgPutTailU16(nlBuf, CTA_PROTO_ICMP_ID,
1354 htons(key->src.icmp_id))) {
b50d56a7
SV
1355 status = NDIS_STATUS_FAILURE;
1356 goto done;
1357 }
6e83dfd9
SV
1358 if (!NlMsgPutTailU8(nlBuf, CTA_PROTO_ICMP_TYPE,
1359 key->src.icmp_type)) {
b50d56a7
SV
1360 status = NDIS_STATUS_FAILURE;
1361 goto done;
1362 }
6e83dfd9
SV
1363 if (!NlMsgPutTailU8(nlBuf, CTA_PROTO_ICMP_CODE,
1364 key->src.icmp_code)) {
b50d56a7
SV
1365 status = NDIS_STATUS_FAILURE;
1366 goto done;
1367 }
1368 } else if (key->nw_proto == IPPROTO_ICMPV6) {
1369 if (!NlMsgPutTailU16(nlBuf, CTA_PROTO_ICMPV6_ID, 0)) {
1370 status = NDIS_STATUS_FAILURE;
1371 goto done;
1372 }
1373 if (!NlMsgPutTailU8(nlBuf, CTA_PROTO_ICMPV6_TYPE, 0)) {
1374 status = NDIS_STATUS_FAILURE;
1375 goto done;
1376 }
1377 if (!NlMsgPutTailU8(nlBuf, CTA_PROTO_ICMPV6_CODE, 0)) {
1378 status = NDIS_STATUS_FAILURE;
1379 goto done;
1380 }
1381 } else if (key->nw_proto == IPPROTO_TCP
1382 || key->nw_proto == IPPROTO_UDP) {
1383 if (!NlMsgPutTailU16(nlBuf, CTA_PROTO_SRC_PORT,
1384 key->src.port)) {
1385 status = NDIS_STATUS_FAILURE;
1386 goto done;
1387 }
1388 if (!NlMsgPutTailU16(nlBuf, CTA_PROTO_DST_PORT,
1389 key->dst.port)) {
1390 status = NDIS_STATUS_FAILURE;
1391 goto done;
1392 }
1393 }
1394 }
1395
1396done:
1397 NlMsgEndNested(nlBuf, offset);
1398 return status;
1399}
1400
1401static __inline NDIS_STATUS
1402MapCtKeyTupleToNl(PNL_BUFFER nlBuf,
1403 UINT16 tupleType,
1404 OVS_CT_KEY *key)
1405{
1406 NDIS_STATUS status = NDIS_STATUS_SUCCESS;
1407 UINT32 offset = 0;
1408
1409 offset = NlMsgStartNested(nlBuf, tupleType);
1410 if (!offset) {
1411 return NDIS_STATUS_FAILURE;
1412 }
1413
1414 status = MapIpTupleToNl(nlBuf, key);
1415 if (status != NDIS_STATUS_SUCCESS) {
1416 goto done;
1417 }
1418
1419 status = MapProtoTupleToNl(nlBuf, key);
1420 if (status != NDIS_STATUS_SUCCESS) {
1421 goto done;
1422 }
1423
1424done:
1425 NlMsgEndNested(nlBuf, offset);
1426 return status;
1427}
1428
1429static __inline NDIS_STATUS
1430MapCtCounterToNl(PNL_BUFFER nlBuf,
1431 UINT16 counterType,
1432 OVS_CT_KEY *key)
1433{
1434 NDIS_STATUS status = NDIS_STATUS_SUCCESS;
1435 UINT32 offset = 0;
1436
1437 offset = NlMsgStartNested(nlBuf, counterType);
1438 if (!offset) {
1439 return NDIS_STATUS_FAILURE;
1440 }
1441
1442 if (!NlMsgPutTailU64(nlBuf, CTA_COUNTERS_PACKETS,
1443 htonll(key->packetCount))) {
1444 status = NDIS_STATUS_FAILURE;
1445 goto done;
1446 }
1447
1448 if (!NlMsgPutTailU64(nlBuf, CTA_COUNTERS_BYTES,
1449 htonll(key->byteCount))) {
1450 status = NDIS_STATUS_FAILURE;
1451 goto done;
1452 }
1453
1454done:
1455 NlMsgEndNested(nlBuf, offset);
1456 return status;
1457}
1458
1459/* Userspace expects system time to be Unix timestamp in Nano Seconds */
1460static __inline unsigned
1461WindowsTickToUnixSeconds(long long windowsTicks)
1462{
1463 /*
1464 * Windows epoch starts 1601-01-01T00:00:00Z. It's 11644473600 seconds
1465 * before the UNIX/Linux epoch (1970-01-01T00:00:00Z). Windows ticks are
1466 * in 100 nanoseconds
1467 */
1468 return (unsigned)((windowsTicks / WINDOWS_TICK
1469 - SEC_TO_UNIX_EPOCH));
1470}
1471
9afd6746 1472NTSTATUS
b50d56a7 1473OvsCreateNlMsgFromCtEntry(POVS_CT_ENTRY entry,
b50d56a7
SV
1474 PVOID outBuffer,
1475 UINT32 outBufLen,
9afd6746
SV
1476 UINT8 eventType,
1477 UINT32 nlmsgSeq,
1478 UINT32 nlmsgPid,
1479 UINT8 nfGenVersion,
1480 UINT32 dpIfIndex)
b50d56a7
SV
1481{
1482 NL_BUFFER nlBuf;
1483 BOOLEAN ok;
1484 PNL_MSG_HDR nlMsg;
1485 UINT32 timeout;
1486 NDIS_STATUS status;
1487 UINT64 currentTime, expiration;
9afd6746 1488 UINT16 nlmsgType;
6c6204b6 1489 UINT16 nlmsgFlags = NLM_F_CREATE;
b50d56a7
SV
1490 NdisGetCurrentSystemTime((LARGE_INTEGER *)&currentTime);
1491 UINT8 nfgenFamily = 0;
c4d9f931 1492
b50d56a7
SV
1493 if (entry->key.dl_type == htons(ETH_TYPE_IPV4)) {
1494 nfgenFamily = AF_INET;
1495 } else if (entry->key.dl_type == htons(ETH_TYPE_IPV6)) {
1496 nfgenFamily = AF_INET6;
1497 }
1498
1499 NlBufInit(&nlBuf, outBuffer, outBufLen);
1500 /* Mimic netfilter */
6c6204b6 1501 if (eventType == OVS_EVENT_CT_NEW || eventType == OVS_EVENT_CT_UPDATE) {
9afd6746
SV
1502 nlmsgType = (UINT16) (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_NEW);
1503 } else if (eventType == OVS_EVENT_CT_DELETE) {
1504 nlmsgType = (UINT16) (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_DELETE);
1505 } else {
1506 return STATUS_INVALID_PARAMETER;
1507 }
1508
6c6204b6
AK
1509 if (eventType == OVS_EVENT_CT_UPDATE) {
1510 /* In netlink-conntrack.c IPCTNL_MSG_CT_NEW msg type is used to
1511 * differentiate between OVS_EVENT_CT_NEW and OVS_EVENT_CT_UPDATE
1512 * events based on nlmsgFlags, unset it to notify an update event.
1513 */
1514 nlmsgFlags = 0;
1515 }
1516 ok = NlFillOvsMsgForNfGenMsg(&nlBuf, nlmsgType, nlmsgFlags,
9afd6746
SV
1517 nlmsgSeq, nlmsgPid, nfgenFamily,
1518 nfGenVersion, dpIfIndex);
b50d56a7
SV
1519 if (!ok) {
1520 return STATUS_INVALID_BUFFER_SIZE;
1521 }
1522
1523 status = MapCtKeyTupleToNl(&nlBuf, CTA_TUPLE_ORIG, &entry->key);
1524 if (status != NDIS_STATUS_SUCCESS) {
1525 return STATUS_UNSUCCESSFUL;
1526 }
1527
1528 status = MapCtKeyTupleToNl(&nlBuf, CTA_TUPLE_REPLY, &entry->rev_key);
1529 if (status != NDIS_STATUS_SUCCESS) {
1530 return STATUS_UNSUCCESSFUL;
1531 }
1532
1533 status = MapCtCounterToNl(&nlBuf, CTA_COUNTERS_ORIG, &entry->key);
1534 if (status != NDIS_STATUS_SUCCESS) {
1535 return STATUS_UNSUCCESSFUL;
1536 }
1537
1538 status = MapCtCounterToNl(&nlBuf, CTA_COUNTERS_REPLY, &entry->rev_key);
1539 if (status != NDIS_STATUS_SUCCESS) {
1540 return STATUS_UNSUCCESSFUL;
1541 }
1542
1543 if (entry->key.zone) {
1544 if (!NlMsgPutTailU16(&nlBuf, CTA_ZONE, htons(entry->key.zone))) {
1545 return STATUS_INVALID_BUFFER_SIZE;
1546 }
1547 }
1548
1549 if (entry->mark) {
1550 if (!NlMsgPutTailU32(&nlBuf, CTA_MARK, htonl(entry->mark))) {
1551 return STATUS_INVALID_BUFFER_SIZE;
1552 }
1553 }
1554
1555 if (entry->labels.ct_labels) {
1556 ok = NlMsgPutTailUnspec(&nlBuf, CTA_LABELS,
1557 (PCHAR)(&entry->labels),
1558 sizeof(entry->labels));
1559 if (!ok) {
1560 return STATUS_INVALID_BUFFER_SIZE;
1561 }
1562 }
1563
1564 if (entry->expiration > currentTime) {
1565 expiration = entry->expiration - currentTime;
1566 timeout = (UINT32) (expiration / CT_INTERVAL_SEC);
1567 if (!NlMsgPutTailU32(&nlBuf, CTA_TIMEOUT, htonl(timeout))) {
1568 return STATUS_INVALID_BUFFER_SIZE;
1569 }
1570 }
1571
1572 if (entry->key.nw_proto == IPPROTO_TCP) {
1573 /* Add ProtoInfo for TCP */
1574 UINT32 offset;
1575 offset = NlMsgStartNested(&nlBuf, CTA_PROTOINFO);
1576 if (!offset) {
1577 return NDIS_STATUS_FAILURE;
1578 }
1579
1580 status = OvsCtMapTcpProtoInfoToNl(&nlBuf, entry);
1581 NlMsgEndNested(&nlBuf, offset);
1582 if (status != NDIS_STATUS_SUCCESS) {
1583 return STATUS_UNSUCCESSFUL;
1584 }
1585 }
1586
1587 /* CTA_STATUS is required but not implemented. Default to 0 */
1588 if (!NlMsgPutTailU32(&nlBuf, CTA_STATUS, 0)) {
1589 return STATUS_INVALID_BUFFER_SIZE;
1590 }
1591
1592 /* Mimic netfilter - nf_conntrack_netlink.c:
1593 *
1594 * int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct) {
1595 * NLA_PUT_BE32(skb, CTA_ID, htonl((unsigned long)ct));
1596 * return 0;
1597 * }
1598 *
1599 */
1600 if(!NlMsgPutTailU32(&nlBuf, CTA_ID, htonl((UINT32) entry))) {
1601 return STATUS_INVALID_BUFFER_SIZE;
1602 }
1603
1604 if (entry->timestampStart) {
1605 UINT32 offset;
1606 offset = NlMsgStartNested(&nlBuf, CTA_TIMESTAMP);
1607 if (!offset) {
1608 return NDIS_STATUS_FAILURE;
1609 }
1610 UINT64 start;
1611 start = WindowsTickToUnixSeconds(entry->timestampStart);
1612 start = start * SEC_TO_NANOSEC;
1613 if (!NlMsgPutTailU64(&nlBuf, CTA_TIMESTAMP_START, htonll(start))) {
1614 NlMsgEndNested(&nlBuf, offset);
1615 return STATUS_INVALID_BUFFER_SIZE;
1616 }
1617
1618 NlMsgEndNested(&nlBuf, offset);
1619 }
1620
1621 nlMsg = (PNL_MSG_HDR)NlBufAt(&nlBuf, 0, 0);
1622 nlMsg->nlmsgLen = NlBufSize(&nlBuf);
1623
1624 return STATUS_SUCCESS;
1625}
1626
1627/*
1628 *----------------------------------------------------------------------------
1629 * OvsCtDumpCmdHandler --
1630 * Handler for IPCTNL_MSG_CT_GET command.
1631 *
1632 * XXX - Try to consolidate dump handler patterns around dumpState usage
1633 * The following dumpHandler is similar to one vport.c uses
1634 *----------------------------------------------------------------------------
1635*/
1636NTSTATUS
1637OvsCtDumpCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
1638 UINT32 *replyLen)
1639{
1640 NTSTATUS rc;
1641 /* Setup Dump Start if it's OVS_WRITE_DEV_OP and return */
1642 if (usrParamsCtx->devOp == OVS_WRITE_DEV_OP) {
1643 *replyLen = 0;
1644 OvsSetupDumpStart(usrParamsCtx);
1645 return STATUS_SUCCESS;
1646 }
1647
1648 POVS_OPEN_INSTANCE instance =
1649 (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
1650 POVS_MESSAGE msgIn;
b50d56a7
SV
1651
1652 ASSERT(usrParamsCtx->devOp == OVS_READ_DEV_OP);
1653 if (instance->dumpState.ovsMsg == NULL) {
1654 ASSERT(FALSE);
1655 return STATUS_INVALID_DEVICE_STATE;
1656 }
1657
1658 /* Output buffer has been validated while validating read dev op. */
1659 ASSERT(usrParamsCtx->outputBuffer != NULL);
1660 msgIn = instance->dumpState.ovsMsg;
1661 UINT32 inBucket = instance->dumpState.index[0];
1662 UINT32 inIndex = instance->dumpState.index[1];
d33f0efa 1663 UINT32 i = CT_HASH_TABLE_SIZE;
b50d56a7
SV
1664 UINT32 outIndex = 0;
1665
1666 LOCK_STATE_EX lockState;
1667 NdisAcquireRWLockRead(ovsConntrackLockObj, &lockState, 0);
1668
d33f0efa
SV
1669 if (ctTotalEntries) {
1670 for (i = inBucket; i < CT_HASH_TABLE_SIZE; i++) {
1671 PLIST_ENTRY head, link;
1672 head = &ovsConntrackTable[i];
1673 POVS_CT_ENTRY entry = NULL;
1674
1675 outIndex = 0;
1676 LIST_FORALL(head, link) {
1677 /*
1678 * if one or more dumps were previously done on this same
1679 * bucket, inIndex will be > 0, so we'll need to reply with
1680 * the inIndex + 1 ct-entry from the bucket.
1681 */
1682 if (outIndex >= inIndex) {
1683 entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);
1684
9afd6746 1685 rc = OvsCreateNlMsgFromCtEntry(entry,
d33f0efa
SV
1686 usrParamsCtx->outputBuffer,
1687 usrParamsCtx->outputLength,
9afd6746
SV
1688 OVS_EVENT_CT_NEW,
1689 msgIn->nlMsg.nlmsgSeq,
1690 msgIn->nlMsg.nlmsgPid,
1691 msgIn->nfGenMsg.version,
d33f0efa
SV
1692 0);
1693
1694 if (rc != NDIS_STATUS_SUCCESS) {
1695 NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
1696 return STATUS_UNSUCCESSFUL;
1697 }
1698
1699 ++outIndex;
1700 break;
b50d56a7
SV
1701 }
1702
1703 ++outIndex;
b50d56a7
SV
1704 }
1705
d33f0efa
SV
1706 if (entry) {
1707 break;
1708 }
b50d56a7 1709
d33f0efa
SV
1710 /*
1711 * if no ct-entry was found above, check the next bucket, beginning
1712 * with the first (i.e. index 0) elem from within that bucket
1713 */
1714 inIndex = 0;
b50d56a7 1715 }
b50d56a7 1716 }
b50d56a7
SV
1717 instance->dumpState.index[0] = i;
1718 instance->dumpState.index[1] = outIndex;
1719 NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
1720
1721 /* if i < CT_HASH_TABLE_SIZE => entry was found */
1722 if (i < CT_HASH_TABLE_SIZE) {
1723 POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
1724 *replyLen = msgOut->nlMsg.nlmsgLen;
1725 } else {
1726 /* if i >= CT_HASH_TABLE_SIZE => entry was not found => dump done */
1727 *replyLen = 0;
1728 FreeUserDumpState(instance);
1729 }
1730
1731 return STATUS_SUCCESS;
1732}
b389a124
AS
1733
1734#pragma warning(pop)