2 * Copyright (c) 2008, 2009, 2010, 2011, 2013, 2014, 2015 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include <sys/types.h>
20 #include <netinet/in.h>
21 #include <arpa/inet.h>
25 #include "byte-order.h"
26 #include "collectors.h"
29 #include "lib/netflow.h"
30 #include "openvswitch/ofpbuf.h"
32 #include "ofproto/netflow.h"
34 #include "openvswitch/poll-loop.h"
35 #include "socket-util.h"
38 #include "openvswitch/vlog.h"
40 VLOG_DEFINE_THIS_MODULE(netflow
);
43 uint8_t engine_type
; /* Value of engine_type to use. */
44 uint8_t engine_id
; /* Value of engine_id to use. */
45 long long int boot_time
; /* Time when netflow_create() was called. */
46 struct collectors
*collectors
; /* NetFlow collectors. */
47 bool add_id_to_iface
; /* Put the 7 least significiant bits of
48 * 'engine_id' into the most significant
49 * bits of the interface fields. */
50 uint32_t netflow_cnt
; /* Flow sequence number for NetFlow. */
51 struct ofpbuf packet
; /* NetFlow packet being accumulated. */
52 long long int active_timeout
; /* Timeout for flows that are still active. */
53 long long int next_timeout
; /* Next scheduled active timeout. */
54 long long int reconfig_time
; /* When we reconfigured the timeouts. */
56 struct hmap flows
; /* Contains 'netflow_flows'. */
58 struct ovs_refcount ref_cnt
;
62 struct hmap_node hmap_node
;
64 long long int last_expired
; /* Time this flow last timed out. */
65 long long int created
; /* Time flow was created since time out. */
67 ofp_port_t output_iface
; /* Output interface index. */
68 uint16_t tcp_flags
; /* Bitwise-OR of all TCP flags seen. */
70 ofp_port_t in_port
; /* Input port. */
71 ovs_be32 nw_src
; /* IPv4 source address. */
72 ovs_be32 nw_dst
; /* IPv4 destination address. */
73 uint8_t nw_tos
; /* IP ToS (including DSCP and ECN). */
74 uint8_t nw_proto
; /* IP protocol. */
75 ovs_be16 tp_src
; /* TCP/UDP/SCTP source port. */
76 ovs_be16 tp_dst
; /* TCP/UDP/SCTP destination port. */
78 uint64_t packet_count
; /* Packets from subrules. */
79 uint64_t byte_count
; /* Bytes from subrules. */
80 long long int used
; /* Last-used time (0 if never used). */
83 static struct ovs_mutex mutex
= OVS_MUTEX_INITIALIZER
;
84 static atomic_count netflow_count
= ATOMIC_COUNT_INIT(0);
86 static struct netflow_flow
*netflow_flow_lookup(const struct netflow
*,
89 static uint32_t netflow_flow_hash(const struct flow
*);
90 static void netflow_expire__(struct netflow
*, struct netflow_flow
*)
92 static void netflow_run__(struct netflow
*) OVS_REQUIRES(mutex
);
95 netflow_mask_wc(const struct flow
*flow
, struct flow_wildcards
*wc
)
97 if (flow
->dl_type
!= htons(ETH_TYPE_IP
)) {
100 memset(&wc
->masks
.nw_proto
, 0xff, sizeof wc
->masks
.nw_proto
);
101 memset(&wc
->masks
.nw_src
, 0xff, sizeof wc
->masks
.nw_src
);
102 memset(&wc
->masks
.nw_dst
, 0xff, sizeof wc
->masks
.nw_dst
);
103 flow_unwildcard_tp_ports(flow
, wc
);
104 wc
->masks
.nw_tos
|= IP_DSCP_MASK
;
108 gen_netflow_rec(struct netflow
*nf
, struct netflow_flow
*nf_flow
,
109 uint32_t packet_count
, uint32_t byte_count
)
112 struct netflow_v5_header
*nf_hdr
;
113 struct netflow_v5_record
*nf_rec
;
115 if (!nf
->packet
.size
) {
118 time_wall_timespec(&now
);
120 nf_hdr
= ofpbuf_put_zeros(&nf
->packet
, sizeof *nf_hdr
);
121 nf_hdr
->version
= htons(NETFLOW_V5_VERSION
);
122 nf_hdr
->count
= htons(0);
123 nf_hdr
->sysuptime
= htonl(time_msec() - nf
->boot_time
);
124 nf_hdr
->unix_secs
= htonl(now
.tv_sec
);
125 nf_hdr
->unix_nsecs
= htonl(now
.tv_nsec
);
126 nf_hdr
->engine_type
= nf
->engine_type
;
127 nf_hdr
->engine_id
= nf
->engine_id
;
128 nf_hdr
->sampling_interval
= htons(0);
131 nf_hdr
= nf
->packet
.data
;
132 nf_hdr
->count
= htons(ntohs(nf_hdr
->count
) + 1);
133 nf_hdr
->flow_seq
= htonl(nf
->netflow_cnt
++);
135 nf_rec
= ofpbuf_put_zeros(&nf
->packet
, sizeof *nf_rec
);
136 nf_rec
->src_addr
= nf_flow
->nw_src
;
137 nf_rec
->dst_addr
= nf_flow
->nw_dst
;
138 nf_rec
->nexthop
= htonl(0);
139 if (nf
->add_id_to_iface
) {
140 uint16_t iface
= (nf
->engine_id
& 0x7f) << 9;
141 nf_rec
->input
= htons(iface
| (ofp_to_u16(nf_flow
->in_port
) & 0x1ff));
142 nf_rec
->output
= htons(iface
143 | (ofp_to_u16(nf_flow
->output_iface
) & 0x1ff));
145 nf_rec
->input
= htons(ofp_to_u16(nf_flow
->in_port
));
146 nf_rec
->output
= htons(ofp_to_u16(nf_flow
->output_iface
));
148 nf_rec
->packet_count
= htonl(packet_count
);
149 nf_rec
->byte_count
= htonl(byte_count
);
150 nf_rec
->init_time
= htonl(nf_flow
->created
- nf
->boot_time
);
151 nf_rec
->used_time
= htonl(MAX(nf_flow
->created
, nf_flow
->used
)
153 if (nf_flow
->nw_proto
== IPPROTO_ICMP
) {
154 /* In NetFlow, the ICMP type and code are concatenated and
155 * placed in the 'dst_port' field. */
156 uint8_t type
= ntohs(nf_flow
->tp_src
);
157 uint8_t code
= ntohs(nf_flow
->tp_dst
);
158 nf_rec
->src_port
= htons(0);
159 nf_rec
->dst_port
= htons((type
<< 8) | code
);
161 nf_rec
->src_port
= nf_flow
->tp_src
;
162 nf_rec
->dst_port
= nf_flow
->tp_dst
;
164 nf_rec
->tcp_flags
= (uint8_t) nf_flow
->tcp_flags
;
165 nf_rec
->ip_proto
= nf_flow
->nw_proto
;
166 nf_rec
->ip_tos
= nf_flow
->nw_tos
& IP_DSCP_MASK
;
168 /* NetFlow messages are limited to 30 records. */
169 if (ntohs(nf_hdr
->count
) >= 30) {
175 netflow_flow_update(struct netflow
*nf
, const struct flow
*flow
,
176 ofp_port_t output_iface
,
177 const struct dpif_flow_stats
*stats
)
180 struct netflow_flow
*nf_flow
;
183 /* NetFlow only reports on IP packets. */
184 if (flow
->dl_type
!= htons(ETH_TYPE_IP
)) {
188 ovs_mutex_lock(&mutex
);
189 nf_flow
= netflow_flow_lookup(nf
, flow
);
191 nf_flow
= xzalloc(sizeof *nf_flow
);
192 nf_flow
->in_port
= flow
->in_port
.ofp_port
;
193 nf_flow
->nw_src
= flow
->nw_src
;
194 nf_flow
->nw_dst
= flow
->nw_dst
;
195 nf_flow
->nw_tos
= flow
->nw_tos
;
196 nf_flow
->nw_proto
= flow
->nw_proto
;
197 nf_flow
->tp_src
= flow
->tp_src
;
198 nf_flow
->tp_dst
= flow
->tp_dst
;
199 nf_flow
->created
= stats
->used
;
200 nf_flow
->output_iface
= output_iface
;
201 hmap_insert(&nf
->flows
, &nf_flow
->hmap_node
, netflow_flow_hash(flow
));
204 if (nf_flow
->output_iface
!= output_iface
) {
205 netflow_expire__(nf
, nf_flow
);
206 nf_flow
->created
= stats
->used
;
207 nf_flow
->output_iface
= output_iface
;
210 nf_flow
->packet_count
+= stats
->n_packets
;
211 nf_flow
->byte_count
+= stats
->n_bytes
;
212 nf_flow
->tcp_flags
|= stats
->tcp_flags
;
214 used
= MAX(nf_flow
->used
, stats
->used
);
215 if (nf_flow
->used
!= used
) {
216 nf_flow
->used
= used
;
217 if (!nf
->active_timeout
|| !nf_flow
->last_expired
218 || nf
->reconfig_time
> nf_flow
->last_expired
) {
219 /* Keep the time updated to prevent a flood of expiration in
221 nf_flow
->last_expired
= time_msec();
225 ovs_mutex_unlock(&mutex
);
229 netflow_expire__(struct netflow
*nf
, struct netflow_flow
*nf_flow
)
232 uint64_t pkts
, bytes
;
234 pkts
= nf_flow
->packet_count
;
235 bytes
= nf_flow
->byte_count
;
237 nf_flow
->last_expired
+= nf
->active_timeout
;
243 if ((bytes
>> 32) <= 175) {
244 /* NetFlow v5 records are limited to 32-bit counters. If we've wrapped
245 * a counter, send as multiple records so we don't lose track of any
246 * traffic. We try to evenly distribute the packet and byte counters,
247 * so that the bytes-per-packet lengths don't look wonky across the
250 int n_recs
= (bytes
+ UINT32_MAX
- 1) / UINT32_MAX
;
251 uint32_t pkt_count
= pkts
/ n_recs
;
252 uint32_t byte_count
= bytes
/ n_recs
;
254 gen_netflow_rec(nf
, nf_flow
, pkt_count
, byte_count
);
260 /* In 600 seconds, a 10GbE link can theoretically transmit 75 * 10**10
261 * == 175 * 2**32 bytes. The byte counter is bigger than that, so it's
262 * probably a bug--for example, the netdev code uses UINT64_MAX to
263 * report "unknown value", and perhaps that has leaked through to here.
265 * We wouldn't want to hit the loop above in this case, because it
266 * would try to send up to UINT32_MAX netflow records, which would take
269 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
271 VLOG_WARN_RL(&rl
, "impossible byte counter %"PRIu64
, bytes
);
274 /* Update flow tracking data. */
275 nf_flow
->packet_count
= 0;
276 nf_flow
->byte_count
= 0;
277 nf_flow
->tcp_flags
= 0;
281 netflow_flow_clear(struct netflow
*nf
, const struct flow
*flow
)
284 struct netflow_flow
*nf_flow
;
286 ovs_mutex_lock(&mutex
);
287 nf_flow
= netflow_flow_lookup(nf
, flow
);
289 netflow_expire__(nf
, nf_flow
);
290 hmap_remove(&nf
->flows
, &nf_flow
->hmap_node
);
293 ovs_mutex_unlock(&mutex
);
296 /* Returns true if it's time to send out a round of NetFlow active timeouts,
297 * false otherwise. */
299 netflow_run__(struct netflow
*nf
) OVS_REQUIRES(mutex
)
301 long long int now
= time_msec();
302 struct netflow_flow
*nf_flow
, *next
;
304 if (nf
->packet
.size
) {
305 collectors_send(nf
->collectors
, nf
->packet
.data
, nf
->packet
.size
);
309 if (!nf
->active_timeout
|| now
< nf
->next_timeout
) {
313 nf
->next_timeout
= now
+ 1000;
315 HMAP_FOR_EACH_SAFE (nf_flow
, next
, hmap_node
, &nf
->flows
) {
316 if (now
> nf_flow
->last_expired
+ nf
->active_timeout
) {
317 bool idle
= nf_flow
->used
< nf_flow
->last_expired
;
318 netflow_expire__(nf
, nf_flow
);
321 /* If the netflow_flow hasn't been used in a while, it's
322 * possible the upper layer lost track of it. */
323 hmap_remove(&nf
->flows
, &nf_flow
->hmap_node
);
331 netflow_run(struct netflow
*nf
)
333 ovs_mutex_lock(&mutex
);
335 ovs_mutex_unlock(&mutex
);
339 netflow_wait(struct netflow
*nf
) OVS_EXCLUDED(mutex
)
341 ovs_mutex_lock(&mutex
);
342 if (nf
->active_timeout
) {
343 poll_timer_wait_until(nf
->next_timeout
);
345 if (nf
->packet
.size
) {
346 poll_immediate_wake();
348 ovs_mutex_unlock(&mutex
);
352 netflow_set_options(struct netflow
*nf
,
353 const struct netflow_options
*nf_options
)
357 long long int old_timeout
;
359 ovs_mutex_lock(&mutex
);
360 nf
->engine_type
= nf_options
->engine_type
;
361 nf
->engine_id
= nf_options
->engine_id
;
362 nf
->add_id_to_iface
= nf_options
->add_id_to_iface
;
364 collectors_destroy(nf
->collectors
);
365 collectors_create(&nf_options
->collectors
, -1, &nf
->collectors
);
367 old_timeout
= nf
->active_timeout
;
368 if (nf_options
->active_timeout
>= 0) {
369 nf
->active_timeout
= nf_options
->active_timeout
;
371 nf
->active_timeout
= NF_ACTIVE_TIMEOUT_DEFAULT
;
373 nf
->active_timeout
*= 1000;
374 if (old_timeout
!= nf
->active_timeout
) {
375 nf
->reconfig_time
= time_msec();
376 nf
->next_timeout
= time_msec();
378 ovs_mutex_unlock(&mutex
);
386 struct netflow
*nf
= xzalloc(sizeof *nf
);
390 nf
->boot_time
= time_msec();
391 nf
->collectors
= NULL
;
392 nf
->add_id_to_iface
= false;
394 hmap_init(&nf
->flows
);
395 ovs_refcount_init(&nf
->ref_cnt
);
396 ofpbuf_init(&nf
->packet
, 1500);
397 atomic_count_inc(&netflow_count
);
402 netflow_ref(const struct netflow
*nf_
)
404 struct netflow
*nf
= CONST_CAST(struct netflow
*, nf_
);
406 ovs_refcount_ref(&nf
->ref_cnt
);
412 netflow_unref(struct netflow
*nf
)
414 if (nf
&& ovs_refcount_unref_relaxed(&nf
->ref_cnt
) == 1) {
415 atomic_count_dec(&netflow_count
);
416 collectors_destroy(nf
->collectors
);
417 ofpbuf_uninit(&nf
->packet
);
419 struct netflow_flow
*nf_flow
, *next
;
420 HMAP_FOR_EACH_SAFE (nf_flow
, next
, hmap_node
, &nf
->flows
) {
421 hmap_remove(&nf
->flows
, &nf_flow
->hmap_node
);
424 hmap_destroy(&nf
->flows
);
430 /* Returns true if there exist any netflow objects, false otherwise.
431 * Callers must cope with transient false positives, i.e., there is no tight
432 * synchronization with the count and the actual existence of netflow objects.
437 return atomic_count_get(&netflow_count
) > 0;
442 static struct netflow_flow
*
443 netflow_flow_lookup(const struct netflow
*nf
, const struct flow
*flow
)
446 struct netflow_flow
*nf_flow
;
448 HMAP_FOR_EACH_WITH_HASH (nf_flow
, hmap_node
, netflow_flow_hash(flow
),
450 if (flow
->in_port
.ofp_port
== nf_flow
->in_port
451 && flow
->nw_src
== nf_flow
->nw_src
452 && flow
->nw_dst
== nf_flow
->nw_dst
453 && flow
->nw_tos
== nf_flow
->nw_tos
454 && flow
->nw_proto
== nf_flow
->nw_proto
455 && flow
->tp_src
== nf_flow
->tp_src
456 && flow
->tp_dst
== nf_flow
->tp_dst
) {
465 netflow_flow_hash(const struct flow
*flow
)
469 hash
= hash_add(hash
, (OVS_FORCE
uint32_t) flow
->in_port
.ofp_port
);
470 hash
= hash_add(hash
, ntohl(flow
->nw_src
));
471 hash
= hash_add(hash
, ntohl(flow
->nw_dst
));
472 hash
= hash_add(hash
, flow
->nw_tos
);
473 hash
= hash_add(hash
, flow
->nw_proto
);
474 hash
= hash_add(hash
, ntohs(flow
->tp_src
));
475 hash
= hash_add(hash
, ntohs(flow
->tp_dst
));
477 return hash_finish(hash
, 28);