2 * Copyright (c) 2008, 2009, 2010, 2011, 2013, 2014 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include <arpa/inet.h>
23 #include "byte-order.h"
24 #include "collectors.h"
27 #include "lib/netflow.h"
30 #include "ofproto/netflow.h"
32 #include "poll-loop.h"
33 #include "socket-util.h"
36 #include "openvswitch/vlog.h"
38 VLOG_DEFINE_THIS_MODULE(netflow
);
41 uint8_t engine_type
; /* Value of engine_type to use. */
42 uint8_t engine_id
; /* Value of engine_id to use. */
43 long long int boot_time
; /* Time when netflow_create() was called. */
44 struct collectors
*collectors
; /* NetFlow collectors. */
45 bool add_id_to_iface
; /* Put the 7 least significiant bits of
46 * 'engine_id' into the most significant
47 * bits of the interface fields. */
48 uint32_t netflow_cnt
; /* Flow sequence number for NetFlow. */
49 struct ofpbuf packet
; /* NetFlow packet being accumulated. */
50 long long int active_timeout
; /* Timeout for flows that are still active. */
51 long long int next_timeout
; /* Next scheduled active timeout. */
52 long long int reconfig_time
; /* When we reconfigured the timeouts. */
54 struct hmap flows
; /* Contains 'netflow_flows'. */
56 struct ovs_refcount ref_cnt
;
60 struct hmap_node hmap_node
;
62 long long int last_expired
; /* Time this flow last timed out. */
63 long long int created
; /* Time flow was created since time out. */
65 ofp_port_t output_iface
; /* Output interface index. */
66 uint16_t tcp_flags
; /* Bitwise-OR of all TCP flags seen. */
68 ofp_port_t in_port
; /* Input port. */
69 ovs_be32 nw_src
; /* IPv4 source address. */
70 ovs_be32 nw_dst
; /* IPv4 destination address. */
71 uint8_t nw_tos
; /* IP ToS (including DSCP and ECN). */
72 uint8_t nw_proto
; /* IP protocol. */
73 ovs_be16 tp_src
; /* TCP/UDP/SCTP source port. */
74 ovs_be16 tp_dst
; /* TCP/UDP/SCTP destination port. */
76 uint64_t packet_count
; /* Packets from subrules. */
77 uint64_t byte_count
; /* Bytes from subrules. */
78 long long int used
; /* Last-used time (0 if never used). */
81 static struct ovs_mutex mutex
= OVS_MUTEX_INITIALIZER
;
82 static atomic_count netflow_count
= ATOMIC_COUNT_INIT(0);
84 static struct netflow_flow
*netflow_flow_lookup(const struct netflow
*,
87 static uint32_t netflow_flow_hash(const struct flow
*);
88 static void netflow_expire__(struct netflow
*, struct netflow_flow
*)
90 static void netflow_run__(struct netflow
*) OVS_REQUIRES(mutex
);
93 netflow_mask_wc(struct flow
*flow
, struct flow_wildcards
*wc
)
95 if (flow
->dl_type
!= htons(ETH_TYPE_IP
)) {
98 memset(&wc
->masks
.nw_proto
, 0xff, sizeof wc
->masks
.nw_proto
);
99 memset(&wc
->masks
.nw_src
, 0xff, sizeof wc
->masks
.nw_src
);
100 memset(&wc
->masks
.nw_dst
, 0xff, sizeof wc
->masks
.nw_dst
);
101 flow_unwildcard_tp_ports(flow
, wc
);
102 wc
->masks
.nw_tos
|= IP_DSCP_MASK
;
106 gen_netflow_rec(struct netflow
*nf
, struct netflow_flow
*nf_flow
,
107 uint32_t packet_count
, uint32_t byte_count
)
110 struct netflow_v5_header
*nf_hdr
;
111 struct netflow_v5_record
*nf_rec
;
113 if (!nf
->packet
.size
) {
116 time_wall_timespec(&now
);
118 nf_hdr
= ofpbuf_put_zeros(&nf
->packet
, sizeof *nf_hdr
);
119 nf_hdr
->version
= htons(NETFLOW_V5_VERSION
);
120 nf_hdr
->count
= htons(0);
121 nf_hdr
->sysuptime
= htonl(time_msec() - nf
->boot_time
);
122 nf_hdr
->unix_secs
= htonl(now
.tv_sec
);
123 nf_hdr
->unix_nsecs
= htonl(now
.tv_nsec
);
124 nf_hdr
->engine_type
= nf
->engine_type
;
125 nf_hdr
->engine_id
= nf
->engine_id
;
126 nf_hdr
->sampling_interval
= htons(0);
129 nf_hdr
= nf
->packet
.data
;
130 nf_hdr
->count
= htons(ntohs(nf_hdr
->count
) + 1);
131 nf_hdr
->flow_seq
= htonl(nf
->netflow_cnt
++);
133 nf_rec
= ofpbuf_put_zeros(&nf
->packet
, sizeof *nf_rec
);
134 nf_rec
->src_addr
= nf_flow
->nw_src
;
135 nf_rec
->dst_addr
= nf_flow
->nw_dst
;
136 nf_rec
->nexthop
= htonl(0);
137 if (nf
->add_id_to_iface
) {
138 uint16_t iface
= (nf
->engine_id
& 0x7f) << 9;
139 nf_rec
->input
= htons(iface
| (ofp_to_u16(nf_flow
->in_port
) & 0x1ff));
140 nf_rec
->output
= htons(iface
141 | (ofp_to_u16(nf_flow
->output_iface
) & 0x1ff));
143 nf_rec
->input
= htons(ofp_to_u16(nf_flow
->in_port
));
144 nf_rec
->output
= htons(ofp_to_u16(nf_flow
->output_iface
));
146 nf_rec
->packet_count
= htonl(packet_count
);
147 nf_rec
->byte_count
= htonl(byte_count
);
148 nf_rec
->init_time
= htonl(nf_flow
->created
- nf
->boot_time
);
149 nf_rec
->used_time
= htonl(MAX(nf_flow
->created
, nf_flow
->used
)
151 if (nf_flow
->nw_proto
== IPPROTO_ICMP
) {
152 /* In NetFlow, the ICMP type and code are concatenated and
153 * placed in the 'dst_port' field. */
154 uint8_t type
= ntohs(nf_flow
->tp_src
);
155 uint8_t code
= ntohs(nf_flow
->tp_dst
);
156 nf_rec
->src_port
= htons(0);
157 nf_rec
->dst_port
= htons((type
<< 8) | code
);
159 nf_rec
->src_port
= nf_flow
->tp_src
;
160 nf_rec
->dst_port
= nf_flow
->tp_dst
;
162 nf_rec
->tcp_flags
= (uint8_t) nf_flow
->tcp_flags
;
163 nf_rec
->ip_proto
= nf_flow
->nw_proto
;
164 nf_rec
->ip_tos
= nf_flow
->nw_tos
& IP_DSCP_MASK
;
166 /* NetFlow messages are limited to 30 records. */
167 if (ntohs(nf_hdr
->count
) >= 30) {
173 netflow_flow_update(struct netflow
*nf
, const struct flow
*flow
,
174 ofp_port_t output_iface
,
175 const struct dpif_flow_stats
*stats
)
178 struct netflow_flow
*nf_flow
;
181 /* NetFlow only reports on IP packets. */
182 if (flow
->dl_type
!= htons(ETH_TYPE_IP
)) {
186 ovs_mutex_lock(&mutex
);
187 nf_flow
= netflow_flow_lookup(nf
, flow
);
189 nf_flow
= xzalloc(sizeof *nf_flow
);
190 nf_flow
->in_port
= flow
->in_port
.ofp_port
;
191 nf_flow
->nw_src
= flow
->nw_src
;
192 nf_flow
->nw_dst
= flow
->nw_dst
;
193 nf_flow
->nw_tos
= flow
->nw_tos
;
194 nf_flow
->nw_proto
= flow
->nw_proto
;
195 nf_flow
->tp_src
= flow
->tp_src
;
196 nf_flow
->tp_dst
= flow
->tp_dst
;
197 nf_flow
->created
= stats
->used
;
198 nf_flow
->output_iface
= output_iface
;
199 hmap_insert(&nf
->flows
, &nf_flow
->hmap_node
, netflow_flow_hash(flow
));
202 if (nf_flow
->output_iface
!= output_iface
) {
203 netflow_expire__(nf
, nf_flow
);
204 nf_flow
->created
= stats
->used
;
205 nf_flow
->output_iface
= output_iface
;
208 nf_flow
->packet_count
+= stats
->n_packets
;
209 nf_flow
->byte_count
+= stats
->n_bytes
;
210 nf_flow
->tcp_flags
|= stats
->tcp_flags
;
212 used
= MAX(nf_flow
->used
, stats
->used
);
213 if (nf_flow
->used
!= used
) {
214 nf_flow
->used
= used
;
215 if (!nf
->active_timeout
|| !nf_flow
->last_expired
216 || nf
->reconfig_time
> nf_flow
->last_expired
) {
217 /* Keep the time updated to prevent a flood of expiration in
219 nf_flow
->last_expired
= time_msec();
223 ovs_mutex_unlock(&mutex
);
227 netflow_expire__(struct netflow
*nf
, struct netflow_flow
*nf_flow
)
230 uint64_t pkts
, bytes
;
232 pkts
= nf_flow
->packet_count
;
233 bytes
= nf_flow
->byte_count
;
235 nf_flow
->last_expired
+= nf
->active_timeout
;
241 if ((bytes
>> 32) <= 175) {
242 /* NetFlow v5 records are limited to 32-bit counters. If we've wrapped
243 * a counter, send as multiple records so we don't lose track of any
244 * traffic. We try to evenly distribute the packet and byte counters,
245 * so that the bytes-per-packet lengths don't look wonky across the
248 int n_recs
= (bytes
+ UINT32_MAX
- 1) / UINT32_MAX
;
249 uint32_t pkt_count
= pkts
/ n_recs
;
250 uint32_t byte_count
= bytes
/ n_recs
;
252 gen_netflow_rec(nf
, nf_flow
, pkt_count
, byte_count
);
258 /* In 600 seconds, a 10GbE link can theoretically transmit 75 * 10**10
259 * == 175 * 2**32 bytes. The byte counter is bigger than that, so it's
260 * probably a bug--for example, the netdev code uses UINT64_MAX to
261 * report "unknown value", and perhaps that has leaked through to here.
263 * We wouldn't want to hit the loop above in this case, because it
264 * would try to send up to UINT32_MAX netflow records, which would take
267 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
269 VLOG_WARN_RL(&rl
, "impossible byte counter %"PRIu64
, bytes
);
272 /* Update flow tracking data. */
273 nf_flow
->packet_count
= 0;
274 nf_flow
->byte_count
= 0;
275 nf_flow
->tcp_flags
= 0;
279 netflow_flow_clear(struct netflow
*nf
, struct flow
*flow
) OVS_EXCLUDED(mutex
)
281 struct netflow_flow
*nf_flow
;
283 ovs_mutex_lock(&mutex
);
284 nf_flow
= netflow_flow_lookup(nf
, flow
);
286 netflow_expire__(nf
, nf_flow
);
287 hmap_remove(&nf
->flows
, &nf_flow
->hmap_node
);
290 ovs_mutex_unlock(&mutex
);
293 /* Returns true if it's time to send out a round of NetFlow active timeouts,
294 * false otherwise. */
296 netflow_run__(struct netflow
*nf
) OVS_REQUIRES(mutex
)
298 long long int now
= time_msec();
299 struct netflow_flow
*nf_flow
, *next
;
301 if (nf
->packet
.size
) {
302 collectors_send(nf
->collectors
, nf
->packet
.data
, nf
->packet
.size
);
306 if (!nf
->active_timeout
|| now
< nf
->next_timeout
) {
310 nf
->next_timeout
= now
+ 1000;
312 HMAP_FOR_EACH_SAFE (nf_flow
, next
, hmap_node
, &nf
->flows
) {
313 if (now
> nf_flow
->last_expired
+ nf
->active_timeout
) {
314 bool idle
= nf_flow
->used
< nf_flow
->last_expired
;
315 netflow_expire__(nf
, nf_flow
);
318 /* If the netflow_flow hasn't been used in a while, it's
319 * possible the upper layer lost track of it. */
320 hmap_remove(&nf
->flows
, &nf_flow
->hmap_node
);
328 netflow_run(struct netflow
*nf
)
330 ovs_mutex_lock(&mutex
);
332 ovs_mutex_unlock(&mutex
);
336 netflow_wait(struct netflow
*nf
) OVS_EXCLUDED(mutex
)
338 ovs_mutex_lock(&mutex
);
339 if (nf
->active_timeout
) {
340 poll_timer_wait_until(nf
->next_timeout
);
342 if (nf
->packet
.size
) {
343 poll_immediate_wake();
345 ovs_mutex_unlock(&mutex
);
349 netflow_set_options(struct netflow
*nf
,
350 const struct netflow_options
*nf_options
)
354 long long int old_timeout
;
356 ovs_mutex_lock(&mutex
);
357 nf
->engine_type
= nf_options
->engine_type
;
358 nf
->engine_id
= nf_options
->engine_id
;
359 nf
->add_id_to_iface
= nf_options
->add_id_to_iface
;
361 collectors_destroy(nf
->collectors
);
362 collectors_create(&nf_options
->collectors
, 0, &nf
->collectors
);
364 old_timeout
= nf
->active_timeout
;
365 if (nf_options
->active_timeout
>= 0) {
366 nf
->active_timeout
= nf_options
->active_timeout
;
368 nf
->active_timeout
= NF_ACTIVE_TIMEOUT_DEFAULT
;
370 nf
->active_timeout
*= 1000;
371 if (old_timeout
!= nf
->active_timeout
) {
372 nf
->reconfig_time
= time_msec();
373 nf
->next_timeout
= time_msec();
375 ovs_mutex_unlock(&mutex
);
383 struct netflow
*nf
= xzalloc(sizeof *nf
);
387 nf
->boot_time
= time_msec();
388 nf
->collectors
= NULL
;
389 nf
->add_id_to_iface
= false;
391 hmap_init(&nf
->flows
);
392 ovs_refcount_init(&nf
->ref_cnt
);
393 ofpbuf_init(&nf
->packet
, 1500);
394 atomic_count_inc(&netflow_count
);
399 netflow_ref(const struct netflow
*nf_
)
401 struct netflow
*nf
= CONST_CAST(struct netflow
*, nf_
);
403 ovs_refcount_ref(&nf
->ref_cnt
);
409 netflow_unref(struct netflow
*nf
)
411 if (nf
&& ovs_refcount_unref_relaxed(&nf
->ref_cnt
) == 1) {
412 atomic_count_dec(&netflow_count
);
413 collectors_destroy(nf
->collectors
);
414 ofpbuf_uninit(&nf
->packet
);
419 /* Returns true if there exist any netflow objects, false otherwise.
420 * Callers must cope with transient false positives, i.e., there is no tight
421 * synchronization with the count and the actual existence of netflow objects.
426 return atomic_count_get(&netflow_count
) > 0;
431 static struct netflow_flow
*
432 netflow_flow_lookup(const struct netflow
*nf
, const struct flow
*flow
)
435 struct netflow_flow
*nf_flow
;
437 HMAP_FOR_EACH_WITH_HASH (nf_flow
, hmap_node
, netflow_flow_hash(flow
),
439 if (flow
->in_port
.ofp_port
== nf_flow
->in_port
440 && flow
->nw_src
== nf_flow
->nw_src
441 && flow
->nw_dst
== nf_flow
->nw_dst
442 && flow
->nw_tos
== nf_flow
->nw_tos
443 && flow
->nw_proto
== nf_flow
->nw_proto
444 && flow
->tp_src
== nf_flow
->tp_src
445 && flow
->tp_dst
== nf_flow
->tp_dst
) {
454 netflow_flow_hash(const struct flow
*flow
)
458 hash
= hash_add(hash
, (OVS_FORCE
uint32_t) flow
->in_port
.ofp_port
);
459 hash
= hash_add(hash
, ntohl(flow
->nw_src
));
460 hash
= hash_add(hash
, ntohl(flow
->nw_dst
));
461 hash
= hash_add(hash
, flow
->nw_tos
);
462 hash
= hash_add(hash
, flow
->nw_proto
);
463 hash
= hash_add(hash
, ntohs(flow
->tp_src
));
464 hash
= hash_add(hash
, ntohs(flow
->tp_dst
));
466 return hash_finish(hash
, 28);