]> git.proxmox.com Git - ovs.git/blob - ofproto/ofproto-dpif-sflow.c
flow: Refactor flow_compose() API.
[ovs.git] / ofproto / ofproto-dpif-sflow.c
1 /*
2 * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Nicira, Inc.
3 * Copyright (c) 2009 InMon Corp.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 #include <config.h>
19 #include "ofproto-dpif-sflow.h"
20 #include <inttypes.h>
21 #include <sys/resource.h>
22 #include <sys/socket.h>
23 #include <net/if.h>
24 #include <stdlib.h>
25 #include "collectors.h"
26 #include "compiler.h"
27 #include "dpif.h"
28 #include "hash.h"
29 #include "openvswitch/hmap.h"
30 #include "netdev.h"
31 #include "netlink.h"
32 #include "openvswitch/ofpbuf.h"
33 #include "ofproto.h"
34 #include "packets.h"
35 #include "poll-loop.h"
36 #include "ovs-router.h"
37 #include "route-table.h"
38 #include "sflow_api.h"
39 #include "socket-util.h"
40 #include "timeval.h"
41 #include "openvswitch/vlog.h"
42 #include "lib/odp-util.h"
43 #include "lib/unaligned.h"
44 #include "ofproto-provider.h"
45 #include "lacp.h"
46
47 VLOG_DEFINE_THIS_MODULE(sflow);
48
49 static struct ovs_mutex mutex;
50
51 /* This global var is used to determine which sFlow
52 sub-agent should send the datapath counters. */
53 #define SFLOW_GC_SUBID_UNCLAIMED (uint32_t)-1
54 static uint32_t sflow_global_counters_subid = SFLOW_GC_SUBID_UNCLAIMED;
55
56 /*
57 * The enum dpif_sflow_tunnel_type is to declare the types supported
58 */
59 enum dpif_sflow_tunnel_type {
60 DPIF_SFLOW_TUNNEL_UNKNOWN = 0,
61 DPIF_SFLOW_TUNNEL_VXLAN,
62 DPIF_SFLOW_TUNNEL_GRE,
63 DPIF_SFLOW_TUNNEL_LISP,
64 DPIF_SFLOW_TUNNEL_GENEVE
65 };
66
67 struct dpif_sflow_port {
68 struct hmap_node hmap_node; /* In struct dpif_sflow's "ports" hmap. */
69 SFLDataSource_instance dsi; /* sFlow library's notion of port number. */
70 struct ofport *ofport; /* To retrive port stats. */
71 odp_port_t odp_port;
72 enum dpif_sflow_tunnel_type tunnel_type;
73 };
74
75 struct dpif_sflow {
76 struct collectors *collectors;
77 SFLAgent *sflow_agent;
78 struct ofproto_sflow_options *options;
79 time_t next_tick;
80 size_t n_flood, n_all;
81 struct hmap ports; /* Contains "struct dpif_sflow_port"s. */
82 uint32_t probability;
83 struct ovs_refcount ref_cnt;
84 };
85
86 static void dpif_sflow_del_port__(struct dpif_sflow *,
87 struct dpif_sflow_port *);
88
89 #define RECEIVER_INDEX 1
90
91 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
92
93 static bool
94 ofproto_sflow_options_equal(const struct ofproto_sflow_options *a,
95 const struct ofproto_sflow_options *b)
96 {
97 return (sset_equals(&a->targets, &b->targets)
98 && a->sampling_rate == b->sampling_rate
99 && a->polling_interval == b->polling_interval
100 && a->header_len == b->header_len
101 && a->sub_id == b->sub_id
102 && nullable_string_is_equal(a->agent_device, b->agent_device)
103 && nullable_string_is_equal(a->control_ip, b->control_ip));
104 }
105
106 static struct ofproto_sflow_options *
107 ofproto_sflow_options_clone(const struct ofproto_sflow_options *old)
108 {
109 struct ofproto_sflow_options *new = xmemdup(old, sizeof *old);
110 sset_clone(&new->targets, &old->targets);
111 new->agent_device = nullable_xstrdup(old->agent_device);
112 new->control_ip = nullable_xstrdup(old->control_ip);
113 return new;
114 }
115
116 static void
117 ofproto_sflow_options_destroy(struct ofproto_sflow_options *options)
118 {
119 if (options) {
120 sset_destroy(&options->targets);
121 free(options->agent_device);
122 free(options->control_ip);
123 free(options);
124 }
125 }
126
127 /* sFlow library callback to allocate memory. */
128 static void *
129 sflow_agent_alloc_cb(void *magic OVS_UNUSED, SFLAgent *agent OVS_UNUSED,
130 size_t bytes)
131 {
132 return xzalloc(bytes);
133 }
134
135 /* sFlow library callback to free memory. */
136 static int
137 sflow_agent_free_cb(void *magic OVS_UNUSED, SFLAgent *agent OVS_UNUSED,
138 void *obj)
139 {
140 free(obj);
141 return 0;
142 }
143
144 /* sFlow library callback to report error. */
145 static void
146 sflow_agent_error_cb(void *magic OVS_UNUSED, SFLAgent *agent OVS_UNUSED,
147 char *msg)
148 {
149 VLOG_WARN("sFlow agent error: %s", msg);
150 }
151
152 /* sFlow library callback to send datagram. */
153 static void
154 sflow_agent_send_packet_cb(void *ds_, SFLAgent *agent OVS_UNUSED,
155 SFLReceiver *receiver OVS_UNUSED, u_char *pkt,
156 uint32_t pktLen)
157 {
158 struct dpif_sflow *ds = ds_;
159 collectors_send(ds->collectors, pkt, pktLen);
160 }
161
162 static struct dpif_sflow_port *
163 dpif_sflow_find_port(const struct dpif_sflow *ds, odp_port_t odp_port)
164 OVS_REQUIRES(mutex)
165 {
166 struct dpif_sflow_port *dsp;
167
168 HMAP_FOR_EACH_IN_BUCKET (dsp, hmap_node, hash_odp_port(odp_port),
169 &ds->ports) {
170 if (dsp->odp_port == odp_port) {
171 return dsp;
172 }
173 }
174 return NULL;
175 }
176
177 /* Call to get the datapath stats. Modeled after the dpctl utility.
178 *
179 * It might be more efficient for this module to be given a handle it can use
180 * to get these stats more efficiently, but this is only going to be called
181 * once every 20-30 seconds. Return number of datapaths found (normally expect
182 * 1). */
183 static int
184 sflow_get_dp_stats(struct dpif_sflow *ds OVS_UNUSED,
185 struct dpif_dp_stats *dp_totals)
186 {
187 struct sset types;
188 const char *type;
189 int count = 0;
190
191 memset(dp_totals, 0, sizeof *dp_totals);
192 sset_init(&types);
193 dp_enumerate_types(&types);
194 SSET_FOR_EACH (type, &types) {
195 struct sset names;
196 const char *name;
197 sset_init(&names);
198 if (dp_enumerate_names(type, &names) == 0) {
199 SSET_FOR_EACH (name, &names) {
200 struct dpif *dpif;
201 if (dpif_open(name, type, &dpif) == 0) {
202 struct dpif_dp_stats dp_stats;
203 if (dpif_get_dp_stats(dpif, &dp_stats) == 0) {
204 count++;
205 dp_totals->n_hit += dp_stats.n_hit;
206 dp_totals->n_missed += dp_stats.n_missed;
207 dp_totals->n_lost += dp_stats.n_lost;
208 dp_totals->n_flows += dp_stats.n_flows;
209 dp_totals->n_mask_hit += dp_stats.n_mask_hit;
210 dp_totals->n_masks += dp_stats.n_masks;
211 }
212 dpif_close(dpif);
213 }
214 }
215 sset_destroy(&names);
216 }
217 }
218 sset_destroy(&types);
219 return count;
220 }
221
222 /* If there are multiple bridges defined then we need some
223 minimal artibration to decide which one should send the
224 global counters. This function allows each sub-agent to
225 ask if he should do it or not. */
226 static bool
227 sflow_global_counters_subid_test(uint32_t subid)
228 OVS_REQUIRES(mutex)
229 {
230 if (sflow_global_counters_subid == SFLOW_GC_SUBID_UNCLAIMED) {
231 /* The role is up for grabs. */
232 sflow_global_counters_subid = subid;
233 }
234 return (sflow_global_counters_subid == subid);
235 }
236
237 static void
238 sflow_global_counters_subid_clear(uint32_t subid)
239 OVS_REQUIRES(mutex)
240 {
241 if (sflow_global_counters_subid == subid) {
242 /* The sub-agent that was sending global counters
243 is going away, so reset to allow another
244 to take over. */
245 sflow_global_counters_subid = SFLOW_GC_SUBID_UNCLAIMED;
246 }
247 }
248
249 static void
250 sflow_agent_get_global_counters(void *ds_, SFLPoller *poller,
251 SFL_COUNTERS_SAMPLE_TYPE *cs)
252 OVS_REQUIRES(mutex)
253 {
254 struct dpif_sflow *ds = ds_;
255 SFLCounters_sample_element dp_elem, res_elem;
256 struct dpif_dp_stats dp_totals;
257 struct rusage usage;
258
259 if (!sflow_global_counters_subid_test(poller->agent->subId)) {
260 /* Another sub-agent is currently responsible for this. */
261 return;
262 }
263
264 /* datapath stats */
265 if (sflow_get_dp_stats(ds, &dp_totals)) {
266 dp_elem.tag = SFLCOUNTERS_OVSDP;
267 dp_elem.counterBlock.ovsdp.n_hit = dp_totals.n_hit;
268 dp_elem.counterBlock.ovsdp.n_missed = dp_totals.n_missed;
269 dp_elem.counterBlock.ovsdp.n_lost = dp_totals.n_lost;
270 dp_elem.counterBlock.ovsdp.n_mask_hit = dp_totals.n_mask_hit;
271 dp_elem.counterBlock.ovsdp.n_flows = dp_totals.n_flows;
272 dp_elem.counterBlock.ovsdp.n_masks = dp_totals.n_masks;
273 SFLADD_ELEMENT(cs, &dp_elem);
274 }
275
276 /* resource usage */
277 getrusage(RUSAGE_SELF, &usage);
278 res_elem.tag = SFLCOUNTERS_APP_RESOURCES;
279 res_elem.counterBlock.appResources.user_time
280 = timeval_to_msec(&usage.ru_utime);
281 res_elem.counterBlock.appResources.system_time
282 = timeval_to_msec(&usage.ru_stime);
283 res_elem.counterBlock.appResources.mem_used = (usage.ru_maxrss * 1024);
284 SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.mem_max);
285 SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.fd_open);
286 SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.fd_max);
287 SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.conn_open);
288 SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.conn_max);
289
290 SFLADD_ELEMENT(cs, &res_elem);
291 sfl_poller_writeCountersSample(poller, cs);
292 }
293
294 static void
295 sflow_agent_get_counters(void *ds_, SFLPoller *poller,
296 SFL_COUNTERS_SAMPLE_TYPE *cs)
297 OVS_REQUIRES(mutex)
298 {
299 struct dpif_sflow *ds = ds_;
300 SFLCounters_sample_element elem, lacp_elem, of_elem, name_elem;
301 SFLCounters_sample_element eth_elem;
302 enum netdev_features current;
303 struct dpif_sflow_port *dsp;
304 SFLIf_counters *counters;
305 SFLEthernet_counters* eth_counters;
306 struct netdev_stats stats;
307 enum netdev_flags flags;
308 struct lacp_slave_stats lacp_stats;
309 const char *ifName;
310
311 dsp = dpif_sflow_find_port(ds, u32_to_odp(poller->bridgePort));
312 if (!dsp) {
313 return;
314 }
315
316 elem.tag = SFLCOUNTERS_GENERIC;
317 counters = &elem.counterBlock.generic;
318 counters->ifIndex = SFL_DS_INDEX(poller->dsi);
319 counters->ifType = 6;
320 if (!netdev_get_features(dsp->ofport->netdev, &current, NULL, NULL, NULL)) {
321 /* The values of ifDirection come from MAU MIB (RFC 2668): 0 = unknown,
322 1 = full-duplex, 2 = half-duplex, 3 = in, 4=out */
323 counters->ifSpeed = netdev_features_to_bps(current, 0);
324 counters->ifDirection = (netdev_features_is_full_duplex(current)
325 ? 1 : 2);
326 } else {
327 counters->ifSpeed = 100000000;
328 counters->ifDirection = 0;
329 }
330 if (!netdev_get_flags(dsp->ofport->netdev, &flags) && flags & NETDEV_UP) {
331 counters->ifStatus = 1; /* ifAdminStatus up. */
332 if (netdev_get_carrier(dsp->ofport->netdev)) {
333 counters->ifStatus |= 2; /* ifOperStatus us. */
334 }
335 } else {
336 counters->ifStatus = 0; /* Down. */
337 }
338
339 /* XXX
340 1. Is the multicast counter filled in?
341 2. Does the multicast counter include broadcasts?
342 3. Does the rx_packets counter include multicasts/broadcasts?
343 */
344 ofproto_port_get_stats(dsp->ofport, &stats);
345 counters->ifInOctets = stats.rx_bytes;
346 counters->ifInUcastPkts = stats.rx_packets;
347 counters->ifInMulticastPkts = stats.multicast;
348 counters->ifInBroadcastPkts = stats.rx_broadcast_packets;
349 counters->ifInDiscards = stats.rx_dropped;
350 counters->ifInErrors = stats.rx_errors;
351 counters->ifInUnknownProtos = -1;
352 counters->ifOutOctets = stats.tx_bytes;
353 counters->ifOutUcastPkts = stats.tx_packets;
354 counters->ifOutMulticastPkts = stats.tx_multicast_packets;
355 counters->ifOutBroadcastPkts = stats.tx_broadcast_packets;
356 counters->ifOutDiscards = stats.tx_dropped;
357 counters->ifOutErrors = stats.tx_errors;
358 counters->ifPromiscuousMode = 0;
359
360 SFLADD_ELEMENT(cs, &elem);
361
362 /* Include LACP counters and identifiers if this port is part of a LAG. */
363 if (ofproto_port_get_lacp_stats(dsp->ofport, &lacp_stats) == 0) {
364 memset(&lacp_elem, 0, sizeof lacp_elem);
365 lacp_elem.tag = SFLCOUNTERS_LACP;
366 lacp_elem.counterBlock.lacp.actorSystemID =
367 lacp_stats.dot3adAggPortActorSystemID;
368 lacp_elem.counterBlock.lacp.partnerSystemID =
369 lacp_stats.dot3adAggPortPartnerOperSystemID;
370 lacp_elem.counterBlock.lacp.attachedAggID =
371 lacp_stats.dot3adAggPortAttachedAggID;
372 lacp_elem.counterBlock.lacp.portState.v.actorAdmin =
373 lacp_stats.dot3adAggPortActorAdminState;
374 lacp_elem.counterBlock.lacp.portState.v.actorOper =
375 lacp_stats.dot3adAggPortActorOperState;
376 lacp_elem.counterBlock.lacp.portState.v.partnerAdmin =
377 lacp_stats.dot3adAggPortPartnerAdminState;
378 lacp_elem.counterBlock.lacp.portState.v.partnerOper =
379 lacp_stats.dot3adAggPortPartnerOperState;
380 lacp_elem.counterBlock.lacp.LACPDUsRx =
381 lacp_stats.dot3adAggPortStatsLACPDUsRx;
382 SFL_UNDEF_COUNTER(lacp_elem.counterBlock.lacp.markerPDUsRx);
383 SFL_UNDEF_COUNTER(lacp_elem.counterBlock.lacp.markerResponsePDUsRx);
384 SFL_UNDEF_COUNTER(lacp_elem.counterBlock.lacp.unknownRx);
385 lacp_elem.counterBlock.lacp.illegalRx =
386 lacp_stats.dot3adAggPortStatsIllegalRx;
387 lacp_elem.counterBlock.lacp.LACPDUsTx =
388 lacp_stats.dot3adAggPortStatsLACPDUsTx;
389 SFL_UNDEF_COUNTER(lacp_elem.counterBlock.lacp.markerPDUsTx);
390 SFL_UNDEF_COUNTER(lacp_elem.counterBlock.lacp.markerResponsePDUsTx);
391 SFLADD_ELEMENT(cs, &lacp_elem);
392 }
393
394 /* Include Port name. */
395 if ((ifName = netdev_get_name(dsp->ofport->netdev)) != NULL) {
396 memset(&name_elem, 0, sizeof name_elem);
397 name_elem.tag = SFLCOUNTERS_PORTNAME;
398 name_elem.counterBlock.portName.portName.str = (char *)ifName;
399 name_elem.counterBlock.portName.portName.len = strlen(ifName);
400 SFLADD_ELEMENT(cs, &name_elem);
401 }
402
403 /* Include OpenFlow DPID and openflow port number. */
404 memset(&of_elem, 0, sizeof of_elem);
405 of_elem.tag = SFLCOUNTERS_OPENFLOWPORT;
406 of_elem.counterBlock.ofPort.datapath_id =
407 ofproto_get_datapath_id(dsp->ofport->ofproto);
408 of_elem.counterBlock.ofPort.port_no =
409 (OVS_FORCE uint32_t)dsp->ofport->ofp_port;
410 SFLADD_ELEMENT(cs, &of_elem);
411
412 /* Include ethernet counters */
413 memset(&eth_elem, 0, sizeof eth_elem);
414 eth_elem.tag = SFLCOUNTERS_ETHERNET;
415 eth_counters = &eth_elem.counterBlock.ethernet;
416 eth_counters->dot3StatsAlignmentErrors = stats.rx_frame_errors;
417 eth_counters->dot3StatsFCSErrors = stats.rx_crc_errors;
418 eth_counters->dot3StatsFrameTooLongs = stats.rx_oversize_errors;
419 SFL_UNDEF_COUNTER(eth_counters->dot3StatsSingleCollisionFrames);
420 SFL_UNDEF_COUNTER(eth_counters->dot3StatsMultipleCollisionFrames);
421 SFL_UNDEF_COUNTER(eth_counters->dot3StatsSQETestErrors);
422 SFL_UNDEF_COUNTER(eth_counters->dot3StatsDeferredTransmissions);
423 SFL_UNDEF_COUNTER(eth_counters->dot3StatsLateCollisions);
424 SFL_UNDEF_COUNTER(eth_counters->dot3StatsExcessiveCollisions);
425 SFL_UNDEF_COUNTER(eth_counters->dot3StatsInternalMacTransmitErrors);
426 SFL_UNDEF_COUNTER(eth_counters->dot3StatsCarrierSenseErrors);
427 SFL_UNDEF_COUNTER(eth_counters->dot3StatsInternalMacReceiveErrors);
428 SFL_UNDEF_COUNTER(eth_counters->dot3StatsSymbolErrors);
429 SFLADD_ELEMENT(cs, &eth_elem);
430
431 sfl_poller_writeCountersSample(poller, cs);
432 }
433
434 /* Obtains an address to use for the local sFlow agent and stores it into
435 * '*agent_addr'. Returns true if successful, false on failure.
436 *
437 * The sFlow agent address should be a local IP address that is persistent and
438 * reachable over the network, if possible. The IP address associated with
439 * 'agent_device' is used if it has one, and otherwise 'control_ip', the IP
440 * address used to talk to the controller. If the agent device is not
441 * specified then it is figured out by taking a look at the routing table based
442 * on 'targets'. */
443 static bool
444 sflow_choose_agent_address(const char *agent_device,
445 const struct sset *targets,
446 const char *control_ip,
447 SFLAddress *agent_addr)
448 {
449 const char *target;
450 struct in_addr in4;
451
452 memset(agent_addr, 0, sizeof *agent_addr);
453 agent_addr->type = SFLADDRESSTYPE_IP_V4;
454
455 if (agent_device) {
456 if (!netdev_get_in4_by_name(agent_device, &in4)) {
457 goto success;
458 }
459 }
460
461 SSET_FOR_EACH (target, targets) {
462 union {
463 struct sockaddr_storage ss;
464 struct sockaddr_in sin;
465 } sa;
466 char name[IFNAMSIZ];
467
468 if (inet_parse_active(target, SFL_DEFAULT_COLLECTOR_PORT, &sa.ss)
469 && sa.ss.ss_family == AF_INET) {
470 struct in6_addr addr6, src, gw;
471
472 in6_addr_set_mapped_ipv4(&addr6, sa.sin.sin_addr.s_addr);
473 /* sFlow only supports target in default routing table with
474 * packet mark zero.
475 */
476 if (ovs_router_lookup(0, &addr6, name, &src, &gw)) {
477
478 in4.s_addr = in6_addr_get_mapped_ipv4(&src);
479 goto success;
480 }
481 }
482 }
483
484 if (control_ip && !lookup_ip(control_ip, &in4)) {
485 goto success;
486 }
487
488 VLOG_ERR("could not determine IP address for sFlow agent");
489 return false;
490
491 success:
492 agent_addr->address.ip_v4.addr = (OVS_FORCE uint32_t) in4.s_addr;
493 return true;
494 }
495
496 static void
497 dpif_sflow_clear__(struct dpif_sflow *ds) OVS_REQUIRES(mutex)
498 {
499 if (ds->sflow_agent) {
500 sflow_global_counters_subid_clear(ds->sflow_agent->subId);
501 sfl_agent_release(ds->sflow_agent);
502 free(ds->sflow_agent);
503 ds->sflow_agent = NULL;
504 }
505 collectors_destroy(ds->collectors);
506 ds->collectors = NULL;
507 ofproto_sflow_options_destroy(ds->options);
508 ds->options = NULL;
509
510 /* Turn off sampling to save CPU cycles. */
511 ds->probability = 0;
512 }
513
514 void
515 dpif_sflow_clear(struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
516 {
517 ovs_mutex_lock(&mutex);
518 dpif_sflow_clear__(ds);
519 ovs_mutex_unlock(&mutex);
520 }
521
522 bool
523 dpif_sflow_is_enabled(const struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
524 {
525 bool enabled;
526
527 ovs_mutex_lock(&mutex);
528 enabled = ds->collectors != NULL;
529 ovs_mutex_unlock(&mutex);
530 return enabled;
531 }
532
533 struct dpif_sflow *
534 dpif_sflow_create(void)
535 {
536 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
537 struct dpif_sflow *ds;
538
539 if (ovsthread_once_start(&once)) {
540 ovs_mutex_init_recursive(&mutex);
541 ovsthread_once_done(&once);
542 }
543
544 ds = xcalloc(1, sizeof *ds);
545 ds->next_tick = time_now() + 1;
546 hmap_init(&ds->ports);
547 ds->probability = 0;
548 ovs_refcount_init(&ds->ref_cnt);
549
550 return ds;
551 }
552
553 struct dpif_sflow *
554 dpif_sflow_ref(const struct dpif_sflow *ds_)
555 {
556 struct dpif_sflow *ds = CONST_CAST(struct dpif_sflow *, ds_);
557 if (ds) {
558 ovs_refcount_ref(&ds->ref_cnt);
559 }
560 return ds;
561 }
562
563 /* 32-bit fraction of packets to sample with. A value of 0 samples no packets,
564 * a value of %UINT32_MAX samples all packets and intermediate values sample
565 * intermediate fractions of packets. */
566 uint32_t
567 dpif_sflow_get_probability(const struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
568 {
569 uint32_t probability;
570 ovs_mutex_lock(&mutex);
571 probability = ds->probability;
572 ovs_mutex_unlock(&mutex);
573 return probability;
574 }
575
576 void
577 dpif_sflow_unref(struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
578 {
579 if (ds && ovs_refcount_unref_relaxed(&ds->ref_cnt) == 1) {
580 struct dpif_sflow_port *dsp, *next;
581
582 dpif_sflow_clear(ds);
583 HMAP_FOR_EACH_SAFE (dsp, next, hmap_node, &ds->ports) {
584 dpif_sflow_del_port__(ds, dsp);
585 }
586 hmap_destroy(&ds->ports);
587 free(ds);
588 }
589 }
590
591 static void
592 dpif_sflow_add_poller(struct dpif_sflow *ds, struct dpif_sflow_port *dsp)
593 OVS_REQUIRES(mutex)
594 {
595 SFLPoller *poller = sfl_agent_addPoller(ds->sflow_agent, &dsp->dsi, ds,
596 sflow_agent_get_counters);
597 sfl_poller_set_sFlowCpInterval(poller, ds->options->polling_interval);
598 sfl_poller_set_sFlowCpReceiver(poller, RECEIVER_INDEX);
599 sfl_poller_set_bridgePort(poller, odp_to_u32(dsp->odp_port));
600 }
601
602 static enum dpif_sflow_tunnel_type
603 dpif_sflow_tunnel_type(struct ofport *ofport) {
604 const char *type = netdev_get_type(ofport->netdev);
605 if (type) {
606 if (strcmp(type, "gre") == 0) {
607 return DPIF_SFLOW_TUNNEL_GRE;
608 } else if (strcmp(type, "vxlan") == 0) {
609 return DPIF_SFLOW_TUNNEL_VXLAN;
610 } else if (strcmp(type, "lisp") == 0) {
611 return DPIF_SFLOW_TUNNEL_LISP;
612 } else if (strcmp(type, "geneve") == 0) {
613 return DPIF_SFLOW_TUNNEL_GENEVE;
614 }
615 }
616 return DPIF_SFLOW_TUNNEL_UNKNOWN;
617 }
618
619 static uint8_t
620 dpif_sflow_tunnel_proto(enum dpif_sflow_tunnel_type tunnel_type)
621 {
622 /* Default to 0 (IPPROTO_IP), meaning "unknown". */
623 uint8_t ipproto = 0;
624 switch(tunnel_type) {
625
626 case DPIF_SFLOW_TUNNEL_GRE:
627 ipproto = IPPROTO_GRE;
628 break;
629
630 case DPIF_SFLOW_TUNNEL_VXLAN:
631 case DPIF_SFLOW_TUNNEL_LISP:
632 case DPIF_SFLOW_TUNNEL_GENEVE:
633 ipproto = IPPROTO_UDP;
634
635 case DPIF_SFLOW_TUNNEL_UNKNOWN:
636 break;
637 }
638 return ipproto;
639 }
640
641 void
642 dpif_sflow_add_port(struct dpif_sflow *ds, struct ofport *ofport,
643 odp_port_t odp_port) OVS_EXCLUDED(mutex)
644 {
645 struct dpif_sflow_port *dsp;
646 int ifindex;
647 enum dpif_sflow_tunnel_type tunnel_type;
648
649 ovs_mutex_lock(&mutex);
650 dpif_sflow_del_port(ds, odp_port);
651
652 tunnel_type = dpif_sflow_tunnel_type(ofport);
653 ifindex = netdev_get_ifindex(ofport->netdev);
654
655 if (ifindex <= 0
656 && tunnel_type == DPIF_SFLOW_TUNNEL_UNKNOWN) {
657 /* Not an ifindex port, and not a tunnel port either
658 * so do not add a cross-reference to it here.
659 */
660 goto out;
661 }
662
663 /* Add to table of ports. */
664 dsp = xmalloc(sizeof *dsp);
665 dsp->ofport = ofport;
666 dsp->odp_port = odp_port;
667 dsp->tunnel_type = tunnel_type;
668 hmap_insert(&ds->ports, &dsp->hmap_node, hash_odp_port(odp_port));
669
670 if (ifindex > 0) {
671 /* Add poller for ports that have ifindex. */
672 SFL_DS_SET(dsp->dsi, SFL_DSCLASS_IFINDEX, ifindex, 0);
673 if (ds->sflow_agent) {
674 dpif_sflow_add_poller(ds, dsp);
675 }
676 } else {
677 /* Record "ifindex unknown" for the others */
678 SFL_DS_SET(dsp->dsi, SFL_DSCLASS_IFINDEX, 0, 0);
679 }
680
681 out:
682 ovs_mutex_unlock(&mutex);
683 }
684
685 static void
686 dpif_sflow_del_port__(struct dpif_sflow *ds, struct dpif_sflow_port *dsp)
687 OVS_REQUIRES(mutex)
688 {
689 if (ds->sflow_agent
690 && SFL_DS_INDEX(dsp->dsi)) {
691 sfl_agent_removePoller(ds->sflow_agent, &dsp->dsi);
692 sfl_agent_removeSampler(ds->sflow_agent, &dsp->dsi);
693 }
694 hmap_remove(&ds->ports, &dsp->hmap_node);
695 free(dsp);
696 }
697
698 void
699 dpif_sflow_del_port(struct dpif_sflow *ds, odp_port_t odp_port)
700 OVS_EXCLUDED(mutex)
701 {
702 struct dpif_sflow_port *dsp;
703
704 ovs_mutex_lock(&mutex);
705 dsp = dpif_sflow_find_port(ds, odp_port);
706 if (dsp) {
707 dpif_sflow_del_port__(ds, dsp);
708 }
709 ovs_mutex_unlock(&mutex);
710 }
711
712 void
713 dpif_sflow_set_options(struct dpif_sflow *ds,
714 const struct ofproto_sflow_options *options)
715 OVS_EXCLUDED(mutex)
716 {
717 struct dpif_sflow_port *dsp;
718 bool options_changed;
719 SFLReceiver *receiver;
720 SFLAddress agentIP;
721 time_t now;
722 SFLDataSource_instance dsi;
723 uint32_t dsIndex;
724 SFLSampler *sampler;
725 SFLPoller *poller;
726
727 ovs_mutex_lock(&mutex);
728 if (sset_is_empty(&options->targets) || !options->sampling_rate) {
729 /* No point in doing any work if there are no targets or nothing to
730 * sample. */
731 dpif_sflow_clear__(ds);
732 goto out;
733 }
734
735 options_changed = (!ds->options
736 || !ofproto_sflow_options_equal(options, ds->options));
737
738 /* Configure collectors if options have changed or if we're shortchanged in
739 * collectors (which indicates that opening one or more of the configured
740 * collectors failed, so that we should retry). */
741 if (options_changed
742 || collectors_count(ds->collectors) < sset_count(&options->targets)) {
743 collectors_destroy(ds->collectors);
744 collectors_create(&options->targets, SFL_DEFAULT_COLLECTOR_PORT,
745 &ds->collectors);
746 if (ds->collectors == NULL) {
747 VLOG_WARN_RL(&rl, "no collectors could be initialized, "
748 "sFlow disabled");
749 dpif_sflow_clear__(ds);
750 goto out;
751 }
752 }
753
754 /* Choose agent IP address and agent device (if not yet setup) */
755 if (!sflow_choose_agent_address(options->agent_device,
756 &options->targets,
757 options->control_ip, &agentIP)) {
758 dpif_sflow_clear__(ds);
759 goto out;
760 }
761
762 /* Avoid reconfiguring if options didn't change. */
763 if (!options_changed) {
764 goto out;
765 }
766 ofproto_sflow_options_destroy(ds->options);
767 ds->options = ofproto_sflow_options_clone(options);
768
769 /* Create agent. */
770 VLOG_INFO("creating sFlow agent %d", options->sub_id);
771 if (ds->sflow_agent) {
772 sflow_global_counters_subid_clear(ds->sflow_agent->subId);
773 sfl_agent_release(ds->sflow_agent);
774 }
775 ds->sflow_agent = xcalloc(1, sizeof *ds->sflow_agent);
776 now = time_wall();
777 sfl_agent_init(ds->sflow_agent,
778 &agentIP,
779 options->sub_id,
780 now, /* Boot time. */
781 now, /* Current time. */
782 ds, /* Pointer supplied to callbacks. */
783 sflow_agent_alloc_cb,
784 sflow_agent_free_cb,
785 sflow_agent_error_cb,
786 sflow_agent_send_packet_cb);
787
788 receiver = sfl_agent_addReceiver(ds->sflow_agent);
789 sfl_receiver_set_sFlowRcvrOwner(receiver, "Open vSwitch sFlow");
790 sfl_receiver_set_sFlowRcvrTimeout(receiver, 0xffffffff);
791
792 /* Set the sampling_rate down in the datapath. */
793 ds->probability = MAX(1, UINT32_MAX / ds->options->sampling_rate);
794
795 /* Add a single sampler for the bridge. This appears as a PHYSICAL_ENTITY
796 because it is associated with the hypervisor, and interacts with the server
797 hardware directly. The sub_id is used to distinguish this sampler from
798 others on other bridges within the same agent. */
799 dsIndex = 1000 + options->sub_id;
800 SFL_DS_SET(dsi, SFL_DSCLASS_PHYSICAL_ENTITY, dsIndex, 0);
801 sampler = sfl_agent_addSampler(ds->sflow_agent, &dsi);
802 sfl_sampler_set_sFlowFsPacketSamplingRate(sampler, ds->options->sampling_rate);
803 sfl_sampler_set_sFlowFsMaximumHeaderSize(sampler, ds->options->header_len);
804 sfl_sampler_set_sFlowFsReceiver(sampler, RECEIVER_INDEX);
805
806 /* Add a counter poller for the bridge so we can use it to send
807 global counters such as datapath cache hit/miss stats. */
808 poller = sfl_agent_addPoller(ds->sflow_agent, &dsi, ds,
809 sflow_agent_get_global_counters);
810 sfl_poller_set_sFlowCpInterval(poller, ds->options->polling_interval);
811 sfl_poller_set_sFlowCpReceiver(poller, RECEIVER_INDEX);
812
813 /* Add pollers for the currently known ifindex-ports */
814 HMAP_FOR_EACH (dsp, hmap_node, &ds->ports) {
815 if (SFL_DS_INDEX(dsp->dsi)) {
816 dpif_sflow_add_poller(ds, dsp);
817 }
818 }
819
820
821 out:
822 ovs_mutex_unlock(&mutex);
823 }
824
825 int
826 dpif_sflow_odp_port_to_ifindex(const struct dpif_sflow *ds,
827 odp_port_t odp_port) OVS_EXCLUDED(mutex)
828 {
829 struct dpif_sflow_port *dsp;
830 int ret;
831
832 ovs_mutex_lock(&mutex);
833 dsp = dpif_sflow_find_port(ds, odp_port);
834 ret = dsp ? SFL_DS_INDEX(dsp->dsi) : 0;
835 ovs_mutex_unlock(&mutex);
836 return ret;
837 }
838
839 static void
840 dpif_sflow_tunnel_v4(uint8_t tunnel_ipproto,
841 const struct flow_tnl *tunnel,
842 SFLSampled_ipv4 *ipv4)
843
844 {
845 ipv4->protocol = tunnel_ipproto;
846 ipv4->tos = tunnel->ip_tos;
847 ipv4->src_ip.addr = (OVS_FORCE uint32_t) tunnel->ip_src;
848 ipv4->dst_ip.addr = (OVS_FORCE uint32_t) tunnel->ip_dst;
849 ipv4->src_port = (OVS_FORCE uint16_t) tunnel->tp_src;
850 ipv4->dst_port = (OVS_FORCE uint16_t) tunnel->tp_dst;
851 }
852
853 static void
854 dpif_sflow_push_mpls_lse(struct dpif_sflow_actions *sflow_actions,
855 ovs_be32 lse)
856 {
857 if (sflow_actions->mpls_stack_depth >= FLOW_MAX_MPLS_LABELS) {
858 sflow_actions->mpls_err = true;
859 return;
860 }
861
862 /* Record the new lse in host-byte-order. */
863 /* BOS flag will be fixed later when we send stack to sFlow library. */
864 sflow_actions->mpls_lse[sflow_actions->mpls_stack_depth++] = ntohl(lse);
865 }
866
867 static void
868 dpif_sflow_pop_mpls_lse(struct dpif_sflow_actions *sflow_actions)
869 {
870 if (sflow_actions->mpls_stack_depth == 0) {
871 sflow_actions->mpls_err = true;
872 return;
873 }
874 sflow_actions->mpls_stack_depth--;
875 }
876
877 static void
878 dpif_sflow_set_mpls(struct dpif_sflow_actions *sflow_actions,
879 const struct ovs_key_mpls *mpls_key, int n)
880 {
881 int ii;
882 if (n > FLOW_MAX_MPLS_LABELS) {
883 sflow_actions->mpls_err = true;
884 return;
885 }
886
887 for (ii = 0; ii < n; ii++) {
888 /* Reverse stack order, and use host-byte-order for each lse. */
889 sflow_actions->mpls_lse[n - ii - 1] = ntohl(mpls_key[ii].mpls_lse);
890 }
891 sflow_actions->mpls_stack_depth = n;
892 }
893
894 static void
895 sflow_read_tnl_push_action(const struct nlattr *attr,
896 struct dpif_sflow_actions *sflow_actions)
897 {
898 /* Modeled on lib/odp-util.c: format_odp_tnl_push_header */
899 const struct ovs_action_push_tnl *data = nl_attr_get(attr);
900 const struct eth_header *eth = (const struct eth_header *) data->header;
901 const struct ip_header *ip
902 = ALIGNED_CAST(const struct ip_header *, eth + 1);
903
904 sflow_actions->out_port = data->out_port;
905
906 /* Ethernet. */
907 /* TODO: SFlow does not currently define a MAC-in-MAC
908 * encapsulation structure. We could use an extension
909 * structure to report this.
910 */
911
912 /* IPv4 */
913 /* Cannot assume alignment so just use memcpy. */
914 sflow_actions->tunnel.ip_src = get_16aligned_be32(&ip->ip_src);
915 sflow_actions->tunnel.ip_dst = get_16aligned_be32(&ip->ip_dst);
916 sflow_actions->tunnel.ip_tos = ip->ip_tos;
917 sflow_actions->tunnel.ip_ttl = ip->ip_ttl;
918 /* The tnl_push action can supply the ip_protocol too. */
919 sflow_actions->tunnel_ipproto = ip->ip_proto;
920
921 /* Layer 4 */
922 if (data->tnl_type == OVS_VPORT_TYPE_VXLAN
923 || data->tnl_type == OVS_VPORT_TYPE_GENEVE) {
924 const struct udp_header *udp = (const struct udp_header *) (ip + 1);
925 sflow_actions->tunnel.tp_src = udp->udp_src;
926 sflow_actions->tunnel.tp_dst = udp->udp_dst;
927
928 if (data->tnl_type == OVS_VPORT_TYPE_VXLAN) {
929 const struct vxlanhdr *vxh = (const struct vxlanhdr *) (udp + 1);
930 uint64_t tun_id = ntohl(get_16aligned_be32(&vxh->vx_vni)) >> 8;
931 sflow_actions->tunnel.tun_id = htonll(tun_id);
932 } else {
933 const struct genevehdr *gnh = (const struct genevehdr *) (udp + 1);
934 uint64_t tun_id = ntohl(get_16aligned_be32(&gnh->vni)) >> 8;
935 sflow_actions->tunnel.tun_id = htonll(tun_id);
936 }
937 } else if (data->tnl_type == OVS_VPORT_TYPE_GRE) {
938 const void *l4 = ip + 1;
939 const struct gre_base_hdr *greh = (const struct gre_base_hdr *) l4;
940 ovs_16aligned_be32 *options = (ovs_16aligned_be32 *)(greh + 1);
941 if (greh->flags & htons(GRE_CSUM)) {
942 options++;
943 }
944 if (greh->flags & htons(GRE_KEY)) {
945 uint64_t tun_id = ntohl(get_16aligned_be32(options));
946 sflow_actions->tunnel.tun_id = htonll(tun_id);
947 }
948 }
949 }
950
951 static void
952 sflow_read_set_action(const struct nlattr *attr,
953 struct dpif_sflow_actions *sflow_actions)
954 {
955 enum ovs_key_attr type = nl_attr_type(attr);
956 switch (type) {
957 case OVS_KEY_ATTR_ENCAP:
958 if (++sflow_actions->encap_depth > 1) {
959 /* Do not handle multi-encap for now. */
960 sflow_actions->tunnel_err = true;
961 } else {
962 dpif_sflow_read_actions(NULL,
963 nl_attr_get(attr), nl_attr_get_size(attr),
964 sflow_actions);
965 }
966 break;
967 case OVS_KEY_ATTR_PRIORITY:
968 case OVS_KEY_ATTR_SKB_MARK:
969 case OVS_KEY_ATTR_DP_HASH:
970 case OVS_KEY_ATTR_RECIRC_ID:
971 break;
972
973 case OVS_KEY_ATTR_TUNNEL: {
974 if (++sflow_actions->encap_depth > 1) {
975 /* Do not handle multi-encap for now. */
976 sflow_actions->tunnel_err = true;
977 } else {
978 if (odp_tun_key_from_attr(attr, &sflow_actions->tunnel)
979 == ODP_FIT_ERROR) {
980 /* Tunnel parsing error. */
981 sflow_actions->tunnel_err = true;
982 }
983 }
984 break;
985 }
986
987 case OVS_KEY_ATTR_IN_PORT:
988 case OVS_KEY_ATTR_ETHERNET:
989 case OVS_KEY_ATTR_VLAN:
990 break;
991
992 case OVS_KEY_ATTR_MPLS: {
993 const struct ovs_key_mpls *mpls_key = nl_attr_get(attr);
994 size_t size = nl_attr_get_size(attr);
995 dpif_sflow_set_mpls(sflow_actions, mpls_key, size / sizeof *mpls_key);
996 break;
997 }
998
999 case OVS_KEY_ATTR_ETHERTYPE:
1000 case OVS_KEY_ATTR_IPV4:
1001 if (sflow_actions->encap_depth == 1) {
1002 const struct ovs_key_ipv4 *key = nl_attr_get(attr);
1003 if (key->ipv4_src) {
1004 sflow_actions->tunnel.ip_src = key->ipv4_src;
1005 }
1006 if (key->ipv4_dst) {
1007 sflow_actions->tunnel.ip_dst = key->ipv4_dst;
1008 }
1009 if (key->ipv4_proto) {
1010 sflow_actions->tunnel_ipproto = key->ipv4_proto;
1011 }
1012 if (key->ipv4_tos) {
1013 sflow_actions->tunnel.ip_tos = key->ipv4_tos;
1014 }
1015 if (key->ipv4_ttl) {
1016 sflow_actions->tunnel.ip_tos = key->ipv4_ttl;
1017 }
1018 }
1019 break;
1020
1021 case OVS_KEY_ATTR_IPV6:
1022 /* TODO: parse IPv6 encap. */
1023 break;
1024
1025 /* These have the same structure and format. */
1026 case OVS_KEY_ATTR_TCP:
1027 case OVS_KEY_ATTR_UDP:
1028 case OVS_KEY_ATTR_SCTP:
1029 if (sflow_actions->encap_depth == 1) {
1030 const struct ovs_key_tcp *key = nl_attr_get(attr);
1031 if (key->tcp_src) {
1032 sflow_actions->tunnel.tp_src = key->tcp_src;
1033 }
1034 if (key->tcp_dst) {
1035 sflow_actions->tunnel.tp_dst = key->tcp_dst;
1036 }
1037 }
1038 break;
1039
1040 case OVS_KEY_ATTR_TCP_FLAGS:
1041 case OVS_KEY_ATTR_ICMP:
1042 case OVS_KEY_ATTR_ICMPV6:
1043 case OVS_KEY_ATTR_ARP:
1044 case OVS_KEY_ATTR_ND:
1045 case OVS_KEY_ATTR_CT_STATE:
1046 case OVS_KEY_ATTR_CT_ZONE:
1047 case OVS_KEY_ATTR_CT_MARK:
1048 case OVS_KEY_ATTR_CT_LABELS:
1049 case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4:
1050 case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6:
1051 case OVS_KEY_ATTR_UNSPEC:
1052 case OVS_KEY_ATTR_PACKET_TYPE:
1053 case __OVS_KEY_ATTR_MAX:
1054 default:
1055 break;
1056 }
1057 }
1058
1059 static void
1060 dpif_sflow_capture_input_mpls(const struct flow *flow,
1061 struct dpif_sflow_actions *sflow_actions)
1062 {
1063 if (eth_type_mpls(flow->dl_type)) {
1064 int depth = 0;
1065 int ii;
1066 ovs_be32 lse;
1067 /* Calculate depth by detecting BOS. */
1068 for (ii = 0; ii < FLOW_MAX_MPLS_LABELS; ii++) {
1069 lse = flow->mpls_lse[ii];
1070 depth++;
1071 if (lse & htonl(MPLS_BOS_MASK)) {
1072 break;
1073 }
1074 }
1075 /* Capture stack, reversing stack order, and
1076 * using host-byte-order for each lse. BOS flag
1077 * is ignored for now. It is set later when
1078 * the output stack is encoded.
1079 */
1080 for (ii = 0; ii < depth; ii++) {
1081 lse = flow->mpls_lse[ii];
1082 sflow_actions->mpls_lse[depth - ii - 1] = ntohl(lse);
1083 }
1084 sflow_actions->mpls_stack_depth = depth;
1085 }
1086 }
1087
1088 void
1089 dpif_sflow_read_actions(const struct flow *flow,
1090 const struct nlattr *actions, size_t actions_len,
1091 struct dpif_sflow_actions *sflow_actions)
1092 {
1093 const struct nlattr *a;
1094 unsigned int left;
1095
1096 if (actions_len == 0) {
1097 /* Packet dropped.*/
1098 return;
1099 }
1100
1101 if (flow != NULL) {
1102 /* Make sure the MPLS output stack
1103 * is seeded with the input stack.
1104 */
1105 dpif_sflow_capture_input_mpls(flow, sflow_actions);
1106
1107 /* XXX when 802.1AD(QinQ) is supported then
1108 * we can do the same with VLAN stacks here
1109 */
1110 }
1111
1112 NL_ATTR_FOR_EACH (a, left, actions, actions_len) {
1113 enum ovs_action_attr type = nl_attr_type(a);
1114 switch (type) {
1115 case OVS_ACTION_ATTR_OUTPUT:
1116 /* Capture the output port in case we need it
1117 * to get the output tunnel type.
1118 */
1119 sflow_actions->out_port = nl_attr_get_odp_port(a);
1120 break;
1121
1122 case OVS_ACTION_ATTR_TUNNEL_POP:
1123 /* XXX: Do not handle this for now. It's not clear
1124 * if we should start with encap_depth == 1 when we
1125 * see an input tunnel, or if we should assume
1126 * that the input tunnel was always "popped" if it
1127 * was presented to us decoded in flow->tunnel?
1128 *
1129 * If we do handle this it might look like this,
1130 * as we clear the captured tunnel info and decrement
1131 * the encap_depth:
1132 *
1133 * memset(&sflow_actions->tunnel, 0, sizeof struct flow_tnl);
1134 * sflow_actions->tunnel_ipproto = 0;
1135 * --sflow_actions->encap_depth;
1136 *
1137 * but for now just disable the tunnel annotation:
1138 */
1139 sflow_actions->tunnel_err = true;
1140 break;
1141
1142 case OVS_ACTION_ATTR_TUNNEL_PUSH:
1143 /* XXX: This actions appears to come with it's own
1144 * OUTPUT action, so should it be regarded as having
1145 * an implicit "pop" following it too? Put another
1146 * way, would two tnl_push() actions in succession
1147 * result in a packet with two layers of encap?
1148 */
1149 if (++sflow_actions->encap_depth > 1) {
1150 /* Do not handle multi-encap for now. */
1151 sflow_actions->tunnel_err = true;
1152 } else {
1153 sflow_read_tnl_push_action(a, sflow_actions);
1154 }
1155 break;
1156
1157 case OVS_ACTION_ATTR_TRUNC:
1158 case OVS_ACTION_ATTR_USERSPACE:
1159 case OVS_ACTION_ATTR_RECIRC:
1160 case OVS_ACTION_ATTR_HASH:
1161 case OVS_ACTION_ATTR_CT:
1162 case OVS_ACTION_ATTR_METER:
1163 break;
1164
1165 case OVS_ACTION_ATTR_SET_MASKED:
1166 /* TODO: apply mask. XXX: Are we likely to see this? */
1167 break;
1168
1169 case OVS_ACTION_ATTR_SET:
1170 sflow_read_set_action(nl_attr_get(a), sflow_actions);
1171 break;
1172
1173 case OVS_ACTION_ATTR_PUSH_VLAN:
1174 case OVS_ACTION_ATTR_POP_VLAN:
1175 /* TODO: 802.1AD(QinQ) is not supported by OVS (yet), so do not
1176 * construct a VLAN-stack. The sFlow user-action cookie already
1177 * captures the egress VLAN ID so there is nothing more to do here.
1178 */
1179 break;
1180
1181 case OVS_ACTION_ATTR_PUSH_MPLS: {
1182 const struct ovs_action_push_mpls *mpls = nl_attr_get(a);
1183 if (mpls) {
1184 dpif_sflow_push_mpls_lse(sflow_actions, mpls->mpls_lse);
1185 }
1186 break;
1187 }
1188 case OVS_ACTION_ATTR_POP_MPLS: {
1189 dpif_sflow_pop_mpls_lse(sflow_actions);
1190 break;
1191 }
1192 case OVS_ACTION_ATTR_PUSH_ETH:
1193 case OVS_ACTION_ATTR_POP_ETH:
1194 /* TODO: SFlow does not currently define a MAC-in-MAC
1195 * encapsulation structure. We could use an extension
1196 * structure to report this.
1197 */
1198 break;
1199 case OVS_ACTION_ATTR_SAMPLE:
1200 case OVS_ACTION_ATTR_CLONE:
1201 case OVS_ACTION_ATTR_UNSPEC:
1202 case __OVS_ACTION_ATTR_MAX:
1203 default:
1204 break;
1205 }
1206 }
1207 }
1208
1209 static void
1210 dpif_sflow_encode_mpls_stack(SFLLabelStack *stack,
1211 uint32_t *mpls_lse_buf,
1212 const struct dpif_sflow_actions *sflow_actions)
1213 {
1214 /* Put the MPLS stack back into "packet header" order,
1215 * and make sure the BOS flag is set correctly on the last
1216 * one. Each lse is still in host-byte-order.
1217 */
1218 int ii;
1219 uint32_t lse;
1220 stack->depth = sflow_actions->mpls_stack_depth;
1221 stack->stack = mpls_lse_buf;
1222 for (ii = 0; ii < stack->depth; ii++) {
1223 lse = sflow_actions->mpls_lse[stack->depth - ii - 1];
1224 stack->stack[ii] = (lse & ~MPLS_BOS_MASK);
1225 }
1226 stack->stack[stack->depth - 1] |= MPLS_BOS_MASK;
1227 }
1228
1229 /* Extract the output port count from the user action cookie.
1230 * See http://sflow.org/sflow_version_5.txt "Input/Output port information"
1231 */
1232 static uint32_t
1233 dpif_sflow_cookie_num_outputs(const union user_action_cookie *cookie)
1234 {
1235 uint32_t format = cookie->sflow.output & 0xC0000000;
1236 uint32_t port_n = cookie->sflow.output & 0x3FFFFFFF;
1237 if (format == 0) {
1238 return port_n ? 1 : 0;
1239 }
1240 else if (format == 0x80000000) {
1241 return port_n;
1242 }
1243 return 0;
1244 }
1245
1246 void
1247 dpif_sflow_received(struct dpif_sflow *ds, const struct dp_packet *packet,
1248 const struct flow *flow, odp_port_t odp_in_port,
1249 const union user_action_cookie *cookie,
1250 const struct dpif_sflow_actions *sflow_actions)
1251 OVS_EXCLUDED(mutex)
1252 {
1253 SFL_FLOW_SAMPLE_TYPE fs;
1254 SFLFlow_sample_element hdrElem;
1255 SFLSampled_header *header;
1256 SFLFlow_sample_element switchElem;
1257 uint8_t tnlInProto, tnlOutProto;
1258 SFLFlow_sample_element tnlInElem, tnlOutElem;
1259 SFLFlow_sample_element vniInElem, vniOutElem;
1260 SFLFlow_sample_element mplsElem;
1261 uint32_t mpls_lse_buf[FLOW_MAX_MPLS_LABELS];
1262 SFLSampler *sampler;
1263 struct dpif_sflow_port *in_dsp;
1264 struct dpif_sflow_port *out_dsp;
1265 ovs_be16 vlan_tci;
1266
1267 ovs_mutex_lock(&mutex);
1268 sampler = ds->sflow_agent->samplers;
1269 if (!sampler) {
1270 goto out;
1271 }
1272
1273 /* Build a flow sample. */
1274 memset(&fs, 0, sizeof fs);
1275
1276 /* Look up the input ifIndex if this port has one. Otherwise just
1277 * leave it as 0 (meaning 'unknown') and continue. */
1278 in_dsp = dpif_sflow_find_port(ds, odp_in_port);
1279 if (in_dsp) {
1280 fs.input = SFL_DS_INDEX(in_dsp->dsi);
1281 }
1282
1283 /* Make the assumption that the random number generator in the datapath converges
1284 * to the configured mean, and just increment the samplePool by the configured
1285 * sampling rate every time. */
1286 sampler->samplePool += sfl_sampler_get_sFlowFsPacketSamplingRate(sampler);
1287
1288 /* Sampled header. */
1289 memset(&hdrElem, 0, sizeof hdrElem);
1290 hdrElem.tag = SFLFLOW_HEADER;
1291 header = &hdrElem.flowType.header;
1292 header->header_protocol = SFLHEADER_ETHERNET_ISO8023;
1293 /* The frame_length should include the Ethernet FCS (4 bytes),
1294 * but it has already been stripped, so we need to add 4 here. */
1295 header->frame_length = dp_packet_size(packet) + 4;
1296 /* Ethernet FCS stripped off. */
1297 header->stripped = 4;
1298 header->header_length = MIN(dp_packet_size(packet),
1299 sampler->sFlowFsMaximumHeaderSize);
1300 header->header_bytes = dp_packet_data(packet);
1301
1302 /* Add extended switch element. */
1303 memset(&switchElem, 0, sizeof(switchElem));
1304 switchElem.tag = SFLFLOW_EX_SWITCH;
1305 switchElem.flowType.sw.src_vlan = vlan_tci_to_vid(flow->vlans[0].tci);
1306 switchElem.flowType.sw.src_priority = vlan_tci_to_pcp(flow->vlans[0].tci);
1307
1308 /* Retrieve data from user_action_cookie. */
1309 vlan_tci = cookie->sflow.vlan_tci;
1310 switchElem.flowType.sw.dst_vlan = vlan_tci_to_vid(vlan_tci);
1311 switchElem.flowType.sw.dst_priority = vlan_tci_to_pcp(vlan_tci);
1312
1313 fs.output = cookie->sflow.output;
1314
1315 /* Input tunnel. */
1316 if (flow->tunnel.ip_dst) {
1317 memset(&tnlInElem, 0, sizeof(tnlInElem));
1318 tnlInElem.tag = SFLFLOW_EX_IPV4_TUNNEL_INGRESS;
1319 tnlInProto = in_dsp ? dpif_sflow_tunnel_proto(in_dsp->tunnel_type) : 0;
1320 dpif_sflow_tunnel_v4(tnlInProto,
1321 &flow->tunnel,
1322 &tnlInElem.flowType.ipv4);
1323 SFLADD_ELEMENT(&fs, &tnlInElem);
1324 if (flow->tunnel.tun_id) {
1325 memset(&vniInElem, 0, sizeof(vniInElem));
1326 vniInElem.tag = SFLFLOW_EX_VNI_INGRESS;
1327 vniInElem.flowType.tunnel_vni.vni
1328 = ntohll(flow->tunnel.tun_id);
1329 SFLADD_ELEMENT(&fs, &vniInElem);
1330 }
1331 }
1332
1333 /* Output tunnel. */
1334 if (sflow_actions
1335 && sflow_actions->encap_depth == 1
1336 && !sflow_actions->tunnel_err
1337 && dpif_sflow_cookie_num_outputs(cookie) == 1) {
1338 tnlOutProto = sflow_actions->tunnel_ipproto;
1339 if (tnlOutProto == 0) {
1340 /* Try to infer the ip-protocol from the output port. */
1341 if (sflow_actions->out_port != ODPP_NONE) {
1342 out_dsp = dpif_sflow_find_port(ds, sflow_actions->out_port);
1343 if (out_dsp) {
1344 tnlOutProto = dpif_sflow_tunnel_proto(out_dsp->tunnel_type);
1345 }
1346 }
1347 }
1348 memset(&tnlOutElem, 0, sizeof(tnlOutElem));
1349 tnlOutElem.tag = SFLFLOW_EX_IPV4_TUNNEL_EGRESS;
1350 dpif_sflow_tunnel_v4(tnlOutProto,
1351 &sflow_actions->tunnel,
1352 &tnlOutElem.flowType.ipv4);
1353 SFLADD_ELEMENT(&fs, &tnlOutElem);
1354 if (sflow_actions->tunnel.tun_id) {
1355 memset(&vniOutElem, 0, sizeof(vniOutElem));
1356 vniOutElem.tag = SFLFLOW_EX_VNI_EGRESS;
1357 vniOutElem.flowType.tunnel_vni.vni
1358 = ntohll(sflow_actions->tunnel.tun_id);
1359 SFLADD_ELEMENT(&fs, &vniOutElem);
1360 }
1361 }
1362
1363 /* MPLS output label stack. */
1364 if (sflow_actions
1365 && sflow_actions->mpls_stack_depth > 0
1366 && !sflow_actions->mpls_err
1367 && dpif_sflow_cookie_num_outputs(cookie) == 1) {
1368 memset(&mplsElem, 0, sizeof(mplsElem));
1369 mplsElem.tag = SFLFLOW_EX_MPLS;
1370 dpif_sflow_encode_mpls_stack(&mplsElem.flowType.mpls.out_stack,
1371 mpls_lse_buf,
1372 sflow_actions);
1373 SFLADD_ELEMENT(&fs, &mplsElem);
1374 }
1375
1376 /* Submit the flow sample to be encoded into the next datagram. */
1377 SFLADD_ELEMENT(&fs, &hdrElem);
1378 SFLADD_ELEMENT(&fs, &switchElem);
1379 sfl_sampler_writeFlowSample(sampler, &fs);
1380
1381 out:
1382 ovs_mutex_unlock(&mutex);
1383 }
1384
1385 void
1386 dpif_sflow_run(struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
1387 {
1388 ovs_mutex_lock(&mutex);
1389 if (ds->collectors != NULL) {
1390 time_t now = time_now();
1391 route_table_run();
1392 if (now >= ds->next_tick) {
1393 sfl_agent_tick(ds->sflow_agent, time_wall());
1394 ds->next_tick = now + 1;
1395 }
1396 }
1397 ovs_mutex_unlock(&mutex);
1398 }
1399
1400 void
1401 dpif_sflow_wait(struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
1402 {
1403 ovs_mutex_lock(&mutex);
1404 if (ds->collectors != NULL) {
1405 poll_timer_wait_until(ds->next_tick * 1000LL);
1406 }
1407 ovs_mutex_unlock(&mutex);
1408 }