]> git.proxmox.com Git - mirror_ovs.git/blame - ofproto/ofproto-dpif-sflow.c
lacp: Don't send or receive PDUs when carrier state of slave is down
[mirror_ovs.git] / ofproto / ofproto-dpif-sflow.c
CommitLineData
72b06300 1/*
2225c0b9 2 * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Nicira, Inc.
f1588b1f 3 * Copyright (c) 2009 InMon Corp.
72b06300
BP
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#include <config.h>
bae473fe 19#include "ofproto-dpif-sflow.h"
72b06300 20#include <inttypes.h>
3d2912f2 21#include <sys/resource.h>
f6eb6b20 22#include <sys/socket.h>
733adf2a 23#include <net/if.h>
72b06300
BP
24#include <stdlib.h>
25#include "collectors.h"
72b06300 26#include "compiler.h"
bae473fe 27#include "dpif.h"
0cc96e48 28#include "hash.h"
ee89ea7b 29#include "openvswitch/hmap.h"
72b06300 30#include "netdev.h"
cdee00fd 31#include "netlink.h"
64c96779 32#include "openvswitch/ofpbuf.h"
72b06300 33#include "ofproto.h"
26233bb4 34#include "packets.h"
fd016ae3 35#include "openvswitch/poll-loop.h"
d9b4ebc5 36#include "ovs-router.h"
733adf2a 37#include "route-table.h"
72b06300
BP
38#include "sflow_api.h"
39#include "socket-util.h"
40#include "timeval.h"
e6211adc 41#include "openvswitch/vlog.h"
975a704c 42#include "lib/odp-util.h"
7321bda3 43#include "lib/unaligned.h"
392c7182 44#include "ofproto-provider.h"
50b9699f 45#include "lacp.h"
72b06300 46
d98e6007 47VLOG_DEFINE_THIS_MODULE(sflow);
5136ce49 48
34ae6d76
EJ
49static struct ovs_mutex mutex;
50
3d2912f2
NM
51/* This global var is used to determine which sFlow
52 sub-agent should send the datapath counters. */
53#define SFLOW_GC_SUBID_UNCLAIMED (uint32_t)-1
54static uint32_t sflow_global_counters_subid = SFLOW_GC_SUBID_UNCLAIMED;
55
7321bda3
NM
56/*
57 * The enum dpif_sflow_tunnel_type is to declare the types supported
58 */
59enum dpif_sflow_tunnel_type {
60 DPIF_SFLOW_TUNNEL_UNKNOWN = 0,
61 DPIF_SFLOW_TUNNEL_VXLAN,
62 DPIF_SFLOW_TUNNEL_GRE,
7321bda3 63 DPIF_SFLOW_TUNNEL_LISP,
7321bda3
NM
64 DPIF_SFLOW_TUNNEL_GENEVE
65};
66
bae473fe
JP
67struct dpif_sflow_port {
68 struct hmap_node hmap_node; /* In struct dpif_sflow's "ports" hmap. */
72b06300 69 SFLDataSource_instance dsi; /* sFlow library's notion of port number. */
392c7182 70 struct ofport *ofport; /* To retrive port stats. */
4e022ec0 71 odp_port_t odp_port;
7321bda3 72 enum dpif_sflow_tunnel_type tunnel_type;
72b06300
BP
73};
74
bae473fe 75struct dpif_sflow {
72b06300
BP
76 struct collectors *collectors;
77 SFLAgent *sflow_agent;
78 struct ofproto_sflow_options *options;
72b06300
BP
79 time_t next_tick;
80 size_t n_flood, n_all;
bae473fe 81 struct hmap ports; /* Contains "struct dpif_sflow_port"s. */
6ff686f2 82 uint32_t probability;
37bec3d3 83 struct ovs_refcount ref_cnt;
72b06300
BP
84};
85
bae473fe
JP
86static void dpif_sflow_del_port__(struct dpif_sflow *,
87 struct dpif_sflow_port *);
0cc96e48 88
72b06300
BP
89#define RECEIVER_INDEX 1
90
91static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
92
93static bool
94ofproto_sflow_options_equal(const struct ofproto_sflow_options *a,
bae473fe 95 const struct ofproto_sflow_options *b)
72b06300 96{
81e2083f 97 return (sset_equals(&a->targets, &b->targets)
72b06300
BP
98 && a->sampling_rate == b->sampling_rate
99 && a->polling_interval == b->polling_interval
100 && a->header_len == b->header_len
101 && a->sub_id == b->sub_id
fa49ca80
BP
102 && nullable_string_is_equal(a->agent_device, b->agent_device)
103 && nullable_string_is_equal(a->control_ip, b->control_ip));
72b06300
BP
104}
105
106static struct ofproto_sflow_options *
107ofproto_sflow_options_clone(const struct ofproto_sflow_options *old)
108{
109 struct ofproto_sflow_options *new = xmemdup(old, sizeof *old);
81e2083f 110 sset_clone(&new->targets, &old->targets);
2225c0b9
BP
111 new->agent_device = nullable_xstrdup(old->agent_device);
112 new->control_ip = nullable_xstrdup(old->control_ip);
72b06300
BP
113 return new;
114}
115
116static void
117ofproto_sflow_options_destroy(struct ofproto_sflow_options *options)
118{
119 if (options) {
81e2083f 120 sset_destroy(&options->targets);
72b06300
BP
121 free(options->agent_device);
122 free(options->control_ip);
123 free(options);
124 }
125}
126
127/* sFlow library callback to allocate memory. */
128static void *
67a4917b
BP
129sflow_agent_alloc_cb(void *magic OVS_UNUSED, SFLAgent *agent OVS_UNUSED,
130 size_t bytes)
72b06300 131{
37551f8b 132 return xzalloc(bytes);
72b06300
BP
133}
134
135/* sFlow library callback to free memory. */
136static int
67a4917b
BP
137sflow_agent_free_cb(void *magic OVS_UNUSED, SFLAgent *agent OVS_UNUSED,
138 void *obj)
72b06300
BP
139{
140 free(obj);
141 return 0;
142}
143
144/* sFlow library callback to report error. */
145static void
67a4917b
BP
146sflow_agent_error_cb(void *magic OVS_UNUSED, SFLAgent *agent OVS_UNUSED,
147 char *msg)
72b06300
BP
148{
149 VLOG_WARN("sFlow agent error: %s", msg);
150}
151
152/* sFlow library callback to send datagram. */
153static void
bae473fe 154sflow_agent_send_packet_cb(void *ds_, SFLAgent *agent OVS_UNUSED,
67a4917b 155 SFLReceiver *receiver OVS_UNUSED, u_char *pkt,
72b06300
BP
156 uint32_t pktLen)
157{
bae473fe
JP
158 struct dpif_sflow *ds = ds_;
159 collectors_send(ds->collectors, pkt, pktLen);
72b06300
BP
160}
161
bae473fe 162static struct dpif_sflow_port *
4e022ec0 163dpif_sflow_find_port(const struct dpif_sflow *ds, odp_port_t odp_port)
344e21d4 164 OVS_REQUIRES(mutex)
0cc96e48 165{
bae473fe 166 struct dpif_sflow_port *dsp;
0cc96e48 167
f9c0c3ec
EJ
168 HMAP_FOR_EACH_IN_BUCKET (dsp, hmap_node, hash_odp_port(odp_port),
169 &ds->ports) {
e1b1d06a 170 if (dsp->odp_port == odp_port) {
bae473fe 171 return dsp;
0cc96e48
BP
172 }
173 }
174 return NULL;
175}
176
3d2912f2
NM
177/* Call to get the datapath stats. Modeled after the dpctl utility.
178 *
179 * It might be more efficient for this module to be given a handle it can use
180 * to get these stats more efficiently, but this is only going to be called
181 * once every 20-30 seconds. Return number of datapaths found (normally expect
182 * 1). */
183static int
184sflow_get_dp_stats(struct dpif_sflow *ds OVS_UNUSED,
185 struct dpif_dp_stats *dp_totals)
186{
187 struct sset types;
188 const char *type;
189 int count = 0;
190
191 memset(dp_totals, 0, sizeof *dp_totals);
192 sset_init(&types);
193 dp_enumerate_types(&types);
194 SSET_FOR_EACH (type, &types) {
195 struct sset names;
196 const char *name;
197 sset_init(&names);
198 if (dp_enumerate_names(type, &names) == 0) {
199 SSET_FOR_EACH (name, &names) {
200 struct dpif *dpif;
201 if (dpif_open(name, type, &dpif) == 0) {
202 struct dpif_dp_stats dp_stats;
203 if (dpif_get_dp_stats(dpif, &dp_stats) == 0) {
204 count++;
205 dp_totals->n_hit += dp_stats.n_hit;
206 dp_totals->n_missed += dp_stats.n_missed;
207 dp_totals->n_lost += dp_stats.n_lost;
208 dp_totals->n_flows += dp_stats.n_flows;
209 dp_totals->n_mask_hit += dp_stats.n_mask_hit;
210 dp_totals->n_masks += dp_stats.n_masks;
211 }
212 dpif_close(dpif);
213 }
214 }
215 sset_destroy(&names);
216 }
217 }
218 sset_destroy(&types);
219 return count;
220}
221
222/* If there are multiple bridges defined then we need some
223 minimal artibration to decide which one should send the
224 global counters. This function allows each sub-agent to
225 ask if he should do it or not. */
226static bool
227sflow_global_counters_subid_test(uint32_t subid)
228 OVS_REQUIRES(mutex)
229{
230 if (sflow_global_counters_subid == SFLOW_GC_SUBID_UNCLAIMED) {
231 /* The role is up for grabs. */
232 sflow_global_counters_subid = subid;
233 }
234 return (sflow_global_counters_subid == subid);
235}
236
237static void
238sflow_global_counters_subid_clear(uint32_t subid)
239 OVS_REQUIRES(mutex)
240{
241 if (sflow_global_counters_subid == subid) {
242 /* The sub-agent that was sending global counters
243 is going away, so reset to allow another
244 to take over. */
245 sflow_global_counters_subid = SFLOW_GC_SUBID_UNCLAIMED;
246 }
247}
248
249static void
250sflow_agent_get_global_counters(void *ds_, SFLPoller *poller,
251 SFL_COUNTERS_SAMPLE_TYPE *cs)
252 OVS_REQUIRES(mutex)
253{
254 struct dpif_sflow *ds = ds_;
255 SFLCounters_sample_element dp_elem, res_elem;
256 struct dpif_dp_stats dp_totals;
257 struct rusage usage;
258
259 if (!sflow_global_counters_subid_test(poller->agent->subId)) {
260 /* Another sub-agent is currently responsible for this. */
261 return;
262 }
263
264 /* datapath stats */
265 if (sflow_get_dp_stats(ds, &dp_totals)) {
266 dp_elem.tag = SFLCOUNTERS_OVSDP;
267 dp_elem.counterBlock.ovsdp.n_hit = dp_totals.n_hit;
268 dp_elem.counterBlock.ovsdp.n_missed = dp_totals.n_missed;
269 dp_elem.counterBlock.ovsdp.n_lost = dp_totals.n_lost;
270 dp_elem.counterBlock.ovsdp.n_mask_hit = dp_totals.n_mask_hit;
271 dp_elem.counterBlock.ovsdp.n_flows = dp_totals.n_flows;
272 dp_elem.counterBlock.ovsdp.n_masks = dp_totals.n_masks;
273 SFLADD_ELEMENT(cs, &dp_elem);
274 }
275
276 /* resource usage */
277 getrusage(RUSAGE_SELF, &usage);
278 res_elem.tag = SFLCOUNTERS_APP_RESOURCES;
279 res_elem.counterBlock.appResources.user_time
280 = timeval_to_msec(&usage.ru_utime);
281 res_elem.counterBlock.appResources.system_time
282 = timeval_to_msec(&usage.ru_stime);
283 res_elem.counterBlock.appResources.mem_used = (usage.ru_maxrss * 1024);
284 SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.mem_max);
285 SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.fd_open);
286 SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.fd_max);
287 SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.conn_open);
288 SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.conn_max);
289
290 SFLADD_ELEMENT(cs, &res_elem);
291 sfl_poller_writeCountersSample(poller, cs);
292}
293
72b06300 294static void
bae473fe 295sflow_agent_get_counters(void *ds_, SFLPoller *poller,
72b06300 296 SFL_COUNTERS_SAMPLE_TYPE *cs)
344e21d4 297 OVS_REQUIRES(mutex)
72b06300 298{
bae473fe 299 struct dpif_sflow *ds = ds_;
50b9699f 300 SFLCounters_sample_element elem, lacp_elem, of_elem, name_elem;
934386dd 301 SFLCounters_sample_element eth_elem;
6c038611 302 enum netdev_features current;
bae473fe 303 struct dpif_sflow_port *dsp;
72b06300 304 SFLIf_counters *counters;
934386dd 305 SFLEthernet_counters* eth_counters;
72b06300
BP
306 struct netdev_stats stats;
307 enum netdev_flags flags;
50b9699f
NM
308 struct lacp_slave_stats lacp_stats;
309 const char *ifName;
72b06300 310
4e022ec0 311 dsp = dpif_sflow_find_port(ds, u32_to_odp(poller->bridgePort));
bae473fe 312 if (!dsp) {
72b06300
BP
313 return;
314 }
315
316 elem.tag = SFLCOUNTERS_GENERIC;
317 counters = &elem.counterBlock.generic;
318 counters->ifIndex = SFL_DS_INDEX(poller->dsi);
319 counters->ifType = 6;
392c7182 320 if (!netdev_get_features(dsp->ofport->netdev, &current, NULL, NULL, NULL)) {
bae473fe
JP
321 /* The values of ifDirection come from MAU MIB (RFC 2668): 0 = unknown,
322 1 = full-duplex, 2 = half-duplex, 3 = in, 4=out */
d02a5f8e 323 counters->ifSpeed = netdev_features_to_bps(current, 0);
72b06300
BP
324 counters->ifDirection = (netdev_features_is_full_duplex(current)
325 ? 1 : 2);
326 } else {
327 counters->ifSpeed = 100000000;
b6dab095 328 counters->ifDirection = 0;
72b06300 329 }
392c7182 330 if (!netdev_get_flags(dsp->ofport->netdev, &flags) && flags & NETDEV_UP) {
72b06300 331 counters->ifStatus = 1; /* ifAdminStatus up. */
392c7182 332 if (netdev_get_carrier(dsp->ofport->netdev)) {
72b06300
BP
333 counters->ifStatus |= 2; /* ifOperStatus us. */
334 }
335 } else {
336 counters->ifStatus = 0; /* Down. */
337 }
338
339 /* XXX
340 1. Is the multicast counter filled in?
341 2. Does the multicast counter include broadcasts?
342 3. Does the rx_packets counter include multicasts/broadcasts?
343 */
392c7182 344 ofproto_port_get_stats(dsp->ofport, &stats);
72b06300
BP
345 counters->ifInOctets = stats.rx_bytes;
346 counters->ifInUcastPkts = stats.rx_packets;
347 counters->ifInMulticastPkts = stats.multicast;
934386dd 348 counters->ifInBroadcastPkts = stats.rx_broadcast_packets;
72b06300
BP
349 counters->ifInDiscards = stats.rx_dropped;
350 counters->ifInErrors = stats.rx_errors;
351 counters->ifInUnknownProtos = -1;
352 counters->ifOutOctets = stats.tx_bytes;
353 counters->ifOutUcastPkts = stats.tx_packets;
934386dd
RW
354 counters->ifOutMulticastPkts = stats.tx_multicast_packets;
355 counters->ifOutBroadcastPkts = stats.tx_broadcast_packets;
72b06300
BP
356 counters->ifOutDiscards = stats.tx_dropped;
357 counters->ifOutErrors = stats.tx_errors;
358 counters->ifPromiscuousMode = 0;
359
360 SFLADD_ELEMENT(cs, &elem);
50b9699f
NM
361
362 /* Include LACP counters and identifiers if this port is part of a LAG. */
363 if (ofproto_port_get_lacp_stats(dsp->ofport, &lacp_stats) == 0) {
5a0e4aec
BP
364 memset(&lacp_elem, 0, sizeof lacp_elem);
365 lacp_elem.tag = SFLCOUNTERS_LACP;
366 lacp_elem.counterBlock.lacp.actorSystemID =
74ff3298 367 lacp_stats.dot3adAggPortActorSystemID;
5a0e4aec 368 lacp_elem.counterBlock.lacp.partnerSystemID =
74ff3298 369 lacp_stats.dot3adAggPortPartnerOperSystemID;
5a0e4aec
BP
370 lacp_elem.counterBlock.lacp.attachedAggID =
371 lacp_stats.dot3adAggPortAttachedAggID;
372 lacp_elem.counterBlock.lacp.portState.v.actorAdmin =
373 lacp_stats.dot3adAggPortActorAdminState;
374 lacp_elem.counterBlock.lacp.portState.v.actorOper =
375 lacp_stats.dot3adAggPortActorOperState;
376 lacp_elem.counterBlock.lacp.portState.v.partnerAdmin =
377 lacp_stats.dot3adAggPortPartnerAdminState;
378 lacp_elem.counterBlock.lacp.portState.v.partnerOper =
379 lacp_stats.dot3adAggPortPartnerOperState;
380 lacp_elem.counterBlock.lacp.LACPDUsRx =
381 lacp_stats.dot3adAggPortStatsLACPDUsRx;
382 SFL_UNDEF_COUNTER(lacp_elem.counterBlock.lacp.markerPDUsRx);
383 SFL_UNDEF_COUNTER(lacp_elem.counterBlock.lacp.markerResponsePDUsRx);
384 SFL_UNDEF_COUNTER(lacp_elem.counterBlock.lacp.unknownRx);
385 lacp_elem.counterBlock.lacp.illegalRx =
386 lacp_stats.dot3adAggPortStatsIllegalRx;
387 lacp_elem.counterBlock.lacp.LACPDUsTx =
388 lacp_stats.dot3adAggPortStatsLACPDUsTx;
389 SFL_UNDEF_COUNTER(lacp_elem.counterBlock.lacp.markerPDUsTx);
390 SFL_UNDEF_COUNTER(lacp_elem.counterBlock.lacp.markerResponsePDUsTx);
391 SFLADD_ELEMENT(cs, &lacp_elem);
50b9699f
NM
392 }
393
394 /* Include Port name. */
395 if ((ifName = netdev_get_name(dsp->ofport->netdev)) != NULL) {
5a0e4aec
BP
396 memset(&name_elem, 0, sizeof name_elem);
397 name_elem.tag = SFLCOUNTERS_PORTNAME;
398 name_elem.counterBlock.portName.portName.str = (char *)ifName;
399 name_elem.counterBlock.portName.portName.len = strlen(ifName);
400 SFLADD_ELEMENT(cs, &name_elem);
50b9699f
NM
401 }
402
403 /* Include OpenFlow DPID and openflow port number. */
404 memset(&of_elem, 0, sizeof of_elem);
405 of_elem.tag = SFLCOUNTERS_OPENFLOWPORT;
406 of_elem.counterBlock.ofPort.datapath_id =
5a0e4aec 407 ofproto_get_datapath_id(dsp->ofport->ofproto);
50b9699f
NM
408 of_elem.counterBlock.ofPort.port_no =
409 (OVS_FORCE uint32_t)dsp->ofport->ofp_port;
410 SFLADD_ELEMENT(cs, &of_elem);
411
934386dd
RW
412 /* Include ethernet counters */
413 memset(&eth_elem, 0, sizeof eth_elem);
414 eth_elem.tag = SFLCOUNTERS_ETHERNET;
415 eth_counters = &eth_elem.counterBlock.ethernet;
416 eth_counters->dot3StatsAlignmentErrors = stats.rx_frame_errors;
417 eth_counters->dot3StatsFCSErrors = stats.rx_crc_errors;
418 eth_counters->dot3StatsFrameTooLongs = stats.rx_oversize_errors;
419 SFL_UNDEF_COUNTER(eth_counters->dot3StatsSingleCollisionFrames);
420 SFL_UNDEF_COUNTER(eth_counters->dot3StatsMultipleCollisionFrames);
421 SFL_UNDEF_COUNTER(eth_counters->dot3StatsSQETestErrors);
422 SFL_UNDEF_COUNTER(eth_counters->dot3StatsDeferredTransmissions);
423 SFL_UNDEF_COUNTER(eth_counters->dot3StatsLateCollisions);
424 SFL_UNDEF_COUNTER(eth_counters->dot3StatsExcessiveCollisions);
425 SFL_UNDEF_COUNTER(eth_counters->dot3StatsInternalMacTransmitErrors);
426 SFL_UNDEF_COUNTER(eth_counters->dot3StatsCarrierSenseErrors);
427 SFL_UNDEF_COUNTER(eth_counters->dot3StatsInternalMacReceiveErrors);
428 SFL_UNDEF_COUNTER(eth_counters->dot3StatsSymbolErrors);
429 SFLADD_ELEMENT(cs, &eth_elem);
430
72b06300
BP
431 sfl_poller_writeCountersSample(poller, cs);
432}
433
434/* Obtains an address to use for the local sFlow agent and stores it into
435 * '*agent_addr'. Returns true if successful, false on failure.
436 *
437 * The sFlow agent address should be a local IP address that is persistent and
438 * reachable over the network, if possible. The IP address associated with
439 * 'agent_device' is used if it has one, and otherwise 'control_ip', the IP
733adf2a
LG
440 * address used to talk to the controller. If the agent device is not
441 * specified then it is figured out by taking a look at the routing table based
442 * on 'targets'. */
72b06300 443static bool
733adf2a
LG
444sflow_choose_agent_address(const char *agent_device,
445 const struct sset *targets,
446 const char *control_ip,
72b06300
BP
447 SFLAddress *agent_addr)
448{
71c54ad2 449 struct in6_addr ip;
72b06300
BP
450
451 if (agent_device) {
71c54ad2
BP
452 /* If 'agent_device' is the name of a network device, use its IP
453 * address. */
454 if (!netdev_get_ip_by_name(agent_device, &ip)) {
455 goto success;
456 }
457
458 /* If 'agent_device' is itself an IP address, use it. */
459 struct sockaddr_storage ss;
460 if (inet_parse_address(agent_device, &ss)) {
461 ip = ss_get_address(&ss);
733adf2a
LG
462 goto success;
463 }
464 }
465
71c54ad2
BP
466 /* Otherwise, use an appropriate local IP address for one of the
467 * collectors' remote IP addresses. */
468 const char *target;
733adf2a 469 SSET_FOR_EACH (target, targets) {
71c54ad2 470 struct sockaddr_storage ss;
f31b8ae7 471 if (inet_parse_active(target, SFL_DEFAULT_COLLECTOR_PORT, &ss, true)) {
ed52ca57
PS
472 /* sFlow only supports target in default routing table with
473 * packet mark zero.
474 */
5c6590ec 475 struct in6_addr target_ip = ss_get_address(&ss);
ec6c5379 476
8e4e4588 477 struct in6_addr gw, src = in6addr_any;
71c54ad2 478 char name[IFNAMSIZ];
5c6590ec
JP
479 if (ovs_router_lookup(0, &target_ip, name, &src, &gw)) {
480 ip = src;
e731d71b
AS
481 goto success;
482 }
72b06300
BP
483 }
484 }
485
71c54ad2
BP
486 struct sockaddr_storage ss;
487 if (control_ip && inet_parse_address(control_ip, &ss)) {
488 ip = ss_get_address(&ss);
72b06300
BP
489 goto success;
490 }
491
492 VLOG_ERR("could not determine IP address for sFlow agent");
493 return false;
494
495success:
71c54ad2
BP
496 memset(agent_addr, 0, sizeof *agent_addr);
497 if (IN6_IS_ADDR_V4MAPPED(&ip)) {
498 agent_addr->type = SFLADDRESSTYPE_IP_V4;
499 agent_addr->address.ip_v4.addr
500 = (OVS_FORCE uint32_t) in6_addr_get_mapped_ipv4(&ip);
501 } else {
502 agent_addr->type = SFLADDRESSTYPE_IP_V6;
503 memcpy(agent_addr->address.ip_v6.addr, ip.s6_addr,
504 sizeof agent_addr->address.ip_v6.addr);
505 }
72b06300
BP
506 return true;
507}
508
34ae6d76 509static void
bd3950dd 510dpif_sflow_clear__(struct dpif_sflow *ds) OVS_REQUIRES(mutex)
72b06300 511{
bae473fe 512 if (ds->sflow_agent) {
3d2912f2 513 sflow_global_counters_subid_clear(ds->sflow_agent->subId);
bae473fe 514 sfl_agent_release(ds->sflow_agent);
0891637f 515 free(ds->sflow_agent);
bae473fe 516 ds->sflow_agent = NULL;
72b06300 517 }
bae473fe
JP
518 collectors_destroy(ds->collectors);
519 ds->collectors = NULL;
520 ofproto_sflow_options_destroy(ds->options);
521 ds->options = NULL;
72b06300 522
72b06300 523 /* Turn off sampling to save CPU cycles. */
6ff686f2 524 ds->probability = 0;
72b06300
BP
525}
526
34ae6d76
EJ
527void
528dpif_sflow_clear(struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
529{
530 ovs_mutex_lock(&mutex);
531 dpif_sflow_clear__(ds);
532 ovs_mutex_unlock(&mutex);
533}
534
72b06300 535bool
34ae6d76 536dpif_sflow_is_enabled(const struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
72b06300 537{
34ae6d76
EJ
538 bool enabled;
539
540 ovs_mutex_lock(&mutex);
541 enabled = ds->collectors != NULL;
542 ovs_mutex_unlock(&mutex);
543 return enabled;
72b06300
BP
544}
545
bae473fe 546struct dpif_sflow *
4213f19d 547dpif_sflow_create(void)
72b06300 548{
34ae6d76 549 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
bae473fe 550 struct dpif_sflow *ds;
72b06300 551
34ae6d76 552 if (ovsthread_once_start(&once)) {
834d6caf 553 ovs_mutex_init_recursive(&mutex);
34ae6d76
EJ
554 ovsthread_once_done(&once);
555 }
556
bae473fe 557 ds = xcalloc(1, sizeof *ds);
bae473fe
JP
558 ds->next_tick = time_now() + 1;
559 hmap_init(&ds->ports);
6ff686f2 560 ds->probability = 0;
37bec3d3 561 ovs_refcount_init(&ds->ref_cnt);
733adf2a 562
bae473fe 563 return ds;
72b06300
BP
564}
565
9723bcce
EJ
566struct dpif_sflow *
567dpif_sflow_ref(const struct dpif_sflow *ds_)
568{
569 struct dpif_sflow *ds = CONST_CAST(struct dpif_sflow *, ds_);
570 if (ds) {
37bec3d3 571 ovs_refcount_ref(&ds->ref_cnt);
9723bcce
EJ
572 }
573 return ds;
574}
575
6ff686f2
PS
576/* 32-bit fraction of packets to sample with. A value of 0 samples no packets,
577 * a value of %UINT32_MAX samples all packets and intermediate values sample
578 * intermediate fractions of packets. */
579uint32_t
34ae6d76 580dpif_sflow_get_probability(const struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
6ff686f2 581{
34ae6d76
EJ
582 uint32_t probability;
583 ovs_mutex_lock(&mutex);
584 probability = ds->probability;
585 ovs_mutex_unlock(&mutex);
586 return probability;
6ff686f2
PS
587}
588
72b06300 589void
34ae6d76 590dpif_sflow_unref(struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
72b06300 591{
24f83812 592 if (ds && ovs_refcount_unref_relaxed(&ds->ref_cnt) == 1) {
bae473fe 593 struct dpif_sflow_port *dsp, *next;
32d9dc11 594
bae473fe
JP
595 dpif_sflow_clear(ds);
596 HMAP_FOR_EACH_SAFE (dsp, next, hmap_node, &ds->ports) {
597 dpif_sflow_del_port__(ds, dsp);
32d9dc11 598 }
bae473fe
JP
599 hmap_destroy(&ds->ports);
600 free(ds);
72b06300
BP
601 }
602}
603
604static void
392c7182 605dpif_sflow_add_poller(struct dpif_sflow *ds, struct dpif_sflow_port *dsp)
bd3950dd 606 OVS_REQUIRES(mutex)
72b06300 607{
bae473fe 608 SFLPoller *poller = sfl_agent_addPoller(ds->sflow_agent, &dsp->dsi, ds,
72b06300 609 sflow_agent_get_counters);
bae473fe 610 sfl_poller_set_sFlowCpInterval(poller, ds->options->polling_interval);
72b06300 611 sfl_poller_set_sFlowCpReceiver(poller, RECEIVER_INDEX);
4e022ec0 612 sfl_poller_set_bridgePort(poller, odp_to_u32(dsp->odp_port));
72b06300
BP
613}
614
7321bda3
NM
615static enum dpif_sflow_tunnel_type
616dpif_sflow_tunnel_type(struct ofport *ofport) {
617 const char *type = netdev_get_type(ofport->netdev);
618 if (type) {
5a0e4aec
BP
619 if (strcmp(type, "gre") == 0) {
620 return DPIF_SFLOW_TUNNEL_GRE;
621 } else if (strcmp(type, "vxlan") == 0) {
622 return DPIF_SFLOW_TUNNEL_VXLAN;
623 } else if (strcmp(type, "lisp") == 0) {
624 return DPIF_SFLOW_TUNNEL_LISP;
625 } else if (strcmp(type, "geneve") == 0) {
626 return DPIF_SFLOW_TUNNEL_GENEVE;
627 }
7321bda3
NM
628 }
629 return DPIF_SFLOW_TUNNEL_UNKNOWN;
630}
631
632static uint8_t
633dpif_sflow_tunnel_proto(enum dpif_sflow_tunnel_type tunnel_type)
634{
635 /* Default to 0 (IPPROTO_IP), meaning "unknown". */
636 uint8_t ipproto = 0;
637 switch(tunnel_type) {
638
639 case DPIF_SFLOW_TUNNEL_GRE:
7321bda3
NM
640 ipproto = IPPROTO_GRE;
641 break;
642
7321bda3
NM
643 case DPIF_SFLOW_TUNNEL_VXLAN:
644 case DPIF_SFLOW_TUNNEL_LISP:
645 case DPIF_SFLOW_TUNNEL_GENEVE:
646 ipproto = IPPROTO_UDP;
647
648 case DPIF_SFLOW_TUNNEL_UNKNOWN:
649 break;
650 }
651 return ipproto;
652}
653
72b06300 654void
e1b1d06a 655dpif_sflow_add_port(struct dpif_sflow *ds, struct ofport *ofport,
34ae6d76 656 odp_port_t odp_port) OVS_EXCLUDED(mutex)
72b06300 657{
bae473fe 658 struct dpif_sflow_port *dsp;
743cea45 659 int ifindex;
7321bda3 660 enum dpif_sflow_tunnel_type tunnel_type;
72b06300 661
34ae6d76 662 ovs_mutex_lock(&mutex);
bae473fe 663 dpif_sflow_del_port(ds, odp_port);
72b06300 664
7321bda3 665 tunnel_type = dpif_sflow_tunnel_type(ofport);
392c7182 666 ifindex = netdev_get_ifindex(ofport->netdev);
743cea45 667
7321bda3 668 if (ifindex <= 0
5a0e4aec 669 && tunnel_type == DPIF_SFLOW_TUNNEL_UNKNOWN) {
7321bda3 670 /* Not an ifindex port, and not a tunnel port either
5a0e4aec
BP
671 * so do not add a cross-reference to it here.
672 */
34ae6d76 673 goto out;
72b06300 674 }
743cea45
NM
675
676 /* Add to table of ports. */
677 dsp = xmalloc(sizeof *dsp);
392c7182 678 dsp->ofport = ofport;
e1b1d06a 679 dsp->odp_port = odp_port;
7321bda3 680 dsp->tunnel_type = tunnel_type;
f9c0c3ec 681 hmap_insert(&ds->ports, &dsp->hmap_node, hash_odp_port(odp_port));
72b06300 682
7321bda3 683 if (ifindex > 0) {
5a0e4aec
BP
684 /* Add poller for ports that have ifindex. */
685 SFL_DS_SET(dsp->dsi, SFL_DSCLASS_IFINDEX, ifindex, 0);
686 if (ds->sflow_agent) {
687 dpif_sflow_add_poller(ds, dsp);
688 }
7321bda3 689 } else {
5a0e4aec
BP
690 /* Record "ifindex unknown" for the others */
691 SFL_DS_SET(dsp->dsi, SFL_DSCLASS_IFINDEX, 0, 0);
72b06300 692 }
34ae6d76
EJ
693
694out:
695 ovs_mutex_unlock(&mutex);
72b06300
BP
696}
697
0cc96e48 698static void
bae473fe 699dpif_sflow_del_port__(struct dpif_sflow *ds, struct dpif_sflow_port *dsp)
bd3950dd 700 OVS_REQUIRES(mutex)
0cc96e48 701{
7321bda3 702 if (ds->sflow_agent
5a0e4aec
BP
703 && SFL_DS_INDEX(dsp->dsi)) {
704 sfl_agent_removePoller(ds->sflow_agent, &dsp->dsi);
705 sfl_agent_removeSampler(ds->sflow_agent, &dsp->dsi);
0cc96e48 706 }
bae473fe
JP
707 hmap_remove(&ds->ports, &dsp->hmap_node);
708 free(dsp);
0cc96e48
BP
709}
710
72b06300 711void
4e022ec0 712dpif_sflow_del_port(struct dpif_sflow *ds, odp_port_t odp_port)
34ae6d76 713 OVS_EXCLUDED(mutex)
72b06300 714{
34ae6d76
EJ
715 struct dpif_sflow_port *dsp;
716
717 ovs_mutex_lock(&mutex);
718 dsp = dpif_sflow_find_port(ds, odp_port);
bae473fe
JP
719 if (dsp) {
720 dpif_sflow_del_port__(ds, dsp);
72b06300 721 }
34ae6d76 722 ovs_mutex_unlock(&mutex);
72b06300
BP
723}
724
725void
bae473fe
JP
726dpif_sflow_set_options(struct dpif_sflow *ds,
727 const struct ofproto_sflow_options *options)
34ae6d76 728 OVS_EXCLUDED(mutex)
72b06300 729{
bae473fe 730 struct dpif_sflow_port *dsp;
72b06300 731 bool options_changed;
72b06300 732 SFLReceiver *receiver;
72b06300
BP
733 SFLAddress agentIP;
734 time_t now;
743cea45
NM
735 SFLDataSource_instance dsi;
736 uint32_t dsIndex;
737 SFLSampler *sampler;
3d2912f2 738 SFLPoller *poller;
72b06300 739
34ae6d76 740 ovs_mutex_lock(&mutex);
81e2083f 741 if (sset_is_empty(&options->targets) || !options->sampling_rate) {
a68813c3
BP
742 /* No point in doing any work if there are no targets or nothing to
743 * sample. */
34ae6d76
EJ
744 dpif_sflow_clear__(ds);
745 goto out;
a68813c3
BP
746 }
747
bae473fe
JP
748 options_changed = (!ds->options
749 || !ofproto_sflow_options_equal(options, ds->options));
72b06300
BP
750
751 /* Configure collectors if options have changed or if we're shortchanged in
752 * collectors (which indicates that opening one or more of the configured
753 * collectors failed, so that we should retry). */
754 if (options_changed
bae473fe
JP
755 || collectors_count(ds->collectors) < sset_count(&options->targets)) {
756 collectors_destroy(ds->collectors);
02ef592c 757 collectors_create(&options->targets, SFL_DEFAULT_COLLECTOR_PORT,
bae473fe
JP
758 &ds->collectors);
759 if (ds->collectors == NULL) {
a68813c3
BP
760 VLOG_WARN_RL(&rl, "no collectors could be initialized, "
761 "sFlow disabled");
34ae6d76
EJ
762 dpif_sflow_clear__(ds);
763 goto out;
72b06300
BP
764 }
765 }
766
733adf2a
LG
767 /* Choose agent IP address and agent device (if not yet setup) */
768 if (!sflow_choose_agent_address(options->agent_device,
769 &options->targets,
770 options->control_ip, &agentIP)) {
34ae6d76
EJ
771 dpif_sflow_clear__(ds);
772 goto out;
733adf2a
LG
773 }
774
72b06300
BP
775 /* Avoid reconfiguring if options didn't change. */
776 if (!options_changed) {
34ae6d76 777 goto out;
72b06300 778 }
bae473fe
JP
779 ofproto_sflow_options_destroy(ds->options);
780 ds->options = ofproto_sflow_options_clone(options);
72b06300 781
72b06300
BP
782 /* Create agent. */
783 VLOG_INFO("creating sFlow agent %d", options->sub_id);
bae473fe 784 if (ds->sflow_agent) {
3d2912f2 785 sflow_global_counters_subid_clear(ds->sflow_agent->subId);
bae473fe 786 sfl_agent_release(ds->sflow_agent);
72b06300 787 }
bae473fe 788 ds->sflow_agent = xcalloc(1, sizeof *ds->sflow_agent);
c73814a3 789 now = time_wall();
bae473fe 790 sfl_agent_init(ds->sflow_agent,
72b06300
BP
791 &agentIP,
792 options->sub_id,
793 now, /* Boot time. */
794 now, /* Current time. */
bae473fe 795 ds, /* Pointer supplied to callbacks. */
72b06300
BP
796 sflow_agent_alloc_cb,
797 sflow_agent_free_cb,
798 sflow_agent_error_cb,
799 sflow_agent_send_packet_cb);
800
bae473fe 801 receiver = sfl_agent_addReceiver(ds->sflow_agent);
e53df206 802 sfl_receiver_set_sFlowRcvrOwner(receiver, "Open vSwitch sFlow");
72b06300
BP
803 sfl_receiver_set_sFlowRcvrTimeout(receiver, 0xffffffff);
804
72b06300 805 /* Set the sampling_rate down in the datapath. */
6ff686f2 806 ds->probability = MAX(1, UINT32_MAX / ds->options->sampling_rate);
72b06300 807
743cea45
NM
808 /* Add a single sampler for the bridge. This appears as a PHYSICAL_ENTITY
809 because it is associated with the hypervisor, and interacts with the server
810 hardware directly. The sub_id is used to distinguish this sampler from
811 others on other bridges within the same agent. */
812 dsIndex = 1000 + options->sub_id;
813 SFL_DS_SET(dsi, SFL_DSCLASS_PHYSICAL_ENTITY, dsIndex, 0);
814 sampler = sfl_agent_addSampler(ds->sflow_agent, &dsi);
815 sfl_sampler_set_sFlowFsPacketSamplingRate(sampler, ds->options->sampling_rate);
816 sfl_sampler_set_sFlowFsMaximumHeaderSize(sampler, ds->options->header_len);
817 sfl_sampler_set_sFlowFsReceiver(sampler, RECEIVER_INDEX);
818
3d2912f2
NM
819 /* Add a counter poller for the bridge so we can use it to send
820 global counters such as datapath cache hit/miss stats. */
821 poller = sfl_agent_addPoller(ds->sflow_agent, &dsi, ds,
822 sflow_agent_get_global_counters);
823 sfl_poller_set_sFlowCpInterval(poller, ds->options->polling_interval);
824 sfl_poller_set_sFlowCpReceiver(poller, RECEIVER_INDEX);
825
743cea45 826 /* Add pollers for the currently known ifindex-ports */
bae473fe 827 HMAP_FOR_EACH (dsp, hmap_node, &ds->ports) {
7321bda3
NM
828 if (SFL_DS_INDEX(dsp->dsi)) {
829 dpif_sflow_add_poller(ds, dsp);
5a0e4aec 830 }
72b06300 831 }
34ae6d76
EJ
832
833
834out:
835 ovs_mutex_unlock(&mutex);
72b06300
BP
836}
837
6ff686f2 838int
bae473fe 839dpif_sflow_odp_port_to_ifindex(const struct dpif_sflow *ds,
34ae6d76 840 odp_port_t odp_port) OVS_EXCLUDED(mutex)
c1e98da1 841{
34ae6d76
EJ
842 struct dpif_sflow_port *dsp;
843 int ret;
844
845 ovs_mutex_lock(&mutex);
846 dsp = dpif_sflow_find_port(ds, odp_port);
847 ret = dsp ? SFL_DS_INDEX(dsp->dsi) : 0;
848 ovs_mutex_unlock(&mutex);
849 return ret;
c1e98da1
BP
850}
851
7321bda3
NM
852static void
853dpif_sflow_tunnel_v4(uint8_t tunnel_ipproto,
854 const struct flow_tnl *tunnel,
855 SFLSampled_ipv4 *ipv4)
856
857{
858 ipv4->protocol = tunnel_ipproto;
859 ipv4->tos = tunnel->ip_tos;
860 ipv4->src_ip.addr = (OVS_FORCE uint32_t) tunnel->ip_src;
861 ipv4->dst_ip.addr = (OVS_FORCE uint32_t) tunnel->ip_dst;
862 ipv4->src_port = (OVS_FORCE uint16_t) tunnel->tp_src;
863 ipv4->dst_port = (OVS_FORCE uint16_t) tunnel->tp_dst;
864}
865
866static void
867dpif_sflow_push_mpls_lse(struct dpif_sflow_actions *sflow_actions,
868 ovs_be32 lse)
869{
870 if (sflow_actions->mpls_stack_depth >= FLOW_MAX_MPLS_LABELS) {
5a0e4aec
BP
871 sflow_actions->mpls_err = true;
872 return;
7321bda3
NM
873 }
874
875 /* Record the new lse in host-byte-order. */
876 /* BOS flag will be fixed later when we send stack to sFlow library. */
877 sflow_actions->mpls_lse[sflow_actions->mpls_stack_depth++] = ntohl(lse);
878}
879
880static void
881dpif_sflow_pop_mpls_lse(struct dpif_sflow_actions *sflow_actions)
882{
883 if (sflow_actions->mpls_stack_depth == 0) {
5a0e4aec
BP
884 sflow_actions->mpls_err = true;
885 return;
7321bda3
NM
886 }
887 sflow_actions->mpls_stack_depth--;
888}
889
890static void
891dpif_sflow_set_mpls(struct dpif_sflow_actions *sflow_actions,
5a0e4aec 892 const struct ovs_key_mpls *mpls_key, int n)
7321bda3
NM
893{
894 int ii;
895 if (n > FLOW_MAX_MPLS_LABELS) {
5a0e4aec
BP
896 sflow_actions->mpls_err = true;
897 return;
7321bda3
NM
898 }
899
900 for (ii = 0; ii < n; ii++) {
5a0e4aec
BP
901 /* Reverse stack order, and use host-byte-order for each lse. */
902 sflow_actions->mpls_lse[n - ii - 1] = ntohl(mpls_key[ii].mpls_lse);
7321bda3
NM
903 }
904 sflow_actions->mpls_stack_depth = n;
905}
906
907static void
908sflow_read_tnl_push_action(const struct nlattr *attr,
909 struct dpif_sflow_actions *sflow_actions)
910{
911 /* Modeled on lib/odp-util.c: format_odp_tnl_push_header */
912 const struct ovs_action_push_tnl *data = nl_attr_get(attr);
913 const struct eth_header *eth = (const struct eth_header *) data->header;
914 const struct ip_header *ip
915 = ALIGNED_CAST(const struct ip_header *, eth + 1);
916
81765c00 917 sflow_actions->out_port = data->out_port;
7321bda3
NM
918
919 /* Ethernet. */
920 /* TODO: SFlow does not currently define a MAC-in-MAC
921 * encapsulation structure. We could use an extension
922 * structure to report this.
923 */
924
925 /* IPv4 */
926 /* Cannot assume alignment so just use memcpy. */
927 sflow_actions->tunnel.ip_src = get_16aligned_be32(&ip->ip_src);
928 sflow_actions->tunnel.ip_dst = get_16aligned_be32(&ip->ip_dst);
929 sflow_actions->tunnel.ip_tos = ip->ip_tos;
930 sflow_actions->tunnel.ip_ttl = ip->ip_ttl;
931 /* The tnl_push action can supply the ip_protocol too. */
932 sflow_actions->tunnel_ipproto = ip->ip_proto;
933
934 /* Layer 4 */
935 if (data->tnl_type == OVS_VPORT_TYPE_VXLAN
936 || data->tnl_type == OVS_VPORT_TYPE_GENEVE) {
937 const struct udp_header *udp = (const struct udp_header *) (ip + 1);
938 sflow_actions->tunnel.tp_src = udp->udp_src;
939 sflow_actions->tunnel.tp_dst = udp->udp_dst;
940
941 if (data->tnl_type == OVS_VPORT_TYPE_VXLAN) {
942 const struct vxlanhdr *vxh = (const struct vxlanhdr *) (udp + 1);
943 uint64_t tun_id = ntohl(get_16aligned_be32(&vxh->vx_vni)) >> 8;
944 sflow_actions->tunnel.tun_id = htonll(tun_id);
945 } else {
946 const struct genevehdr *gnh = (const struct genevehdr *) (udp + 1);
947 uint64_t tun_id = ntohl(get_16aligned_be32(&gnh->vni)) >> 8;
948 sflow_actions->tunnel.tun_id = htonll(tun_id);
949 }
950 } else if (data->tnl_type == OVS_VPORT_TYPE_GRE) {
951 const void *l4 = ip + 1;
952 const struct gre_base_hdr *greh = (const struct gre_base_hdr *) l4;
953 ovs_16aligned_be32 *options = (ovs_16aligned_be32 *)(greh + 1);
954 if (greh->flags & htons(GRE_CSUM)) {
955 options++;
956 }
957 if (greh->flags & htons(GRE_KEY)) {
958 uint64_t tun_id = ntohl(get_16aligned_be32(options));
959 sflow_actions->tunnel.tun_id = htonll(tun_id);
960 }
961 }
962}
963
964static void
965sflow_read_set_action(const struct nlattr *attr,
966 struct dpif_sflow_actions *sflow_actions)
967{
968 enum ovs_key_attr type = nl_attr_type(attr);
969 switch (type) {
970 case OVS_KEY_ATTR_ENCAP:
971 if (++sflow_actions->encap_depth > 1) {
972 /* Do not handle multi-encap for now. */
973 sflow_actions->tunnel_err = true;
974 } else {
975 dpif_sflow_read_actions(NULL,
976 nl_attr_get(attr), nl_attr_get_size(attr),
283d8662 977 sflow_actions, true);
7321bda3
NM
978 }
979 break;
980 case OVS_KEY_ATTR_PRIORITY:
981 case OVS_KEY_ATTR_SKB_MARK:
982 case OVS_KEY_ATTR_DP_HASH:
983 case OVS_KEY_ATTR_RECIRC_ID:
984 break;
985
986 case OVS_KEY_ATTR_TUNNEL: {
987 if (++sflow_actions->encap_depth > 1) {
988 /* Do not handle multi-encap for now. */
989 sflow_actions->tunnel_err = true;
990 } else {
d40533fc 991 if (odp_tun_key_from_attr(attr, &sflow_actions->tunnel, NULL)
7321bda3
NM
992 == ODP_FIT_ERROR) {
993 /* Tunnel parsing error. */
994 sflow_actions->tunnel_err = true;
995 }
996 }
997 break;
998 }
999
1000 case OVS_KEY_ATTR_IN_PORT:
1001 case OVS_KEY_ATTR_ETHERNET:
1002 case OVS_KEY_ATTR_VLAN:
1003 break;
1004
1005 case OVS_KEY_ATTR_MPLS: {
1006 const struct ovs_key_mpls *mpls_key = nl_attr_get(attr);
1007 size_t size = nl_attr_get_size(attr);
1008 dpif_sflow_set_mpls(sflow_actions, mpls_key, size / sizeof *mpls_key);
1009 break;
1010 }
1011
1012 case OVS_KEY_ATTR_ETHERTYPE:
1013 case OVS_KEY_ATTR_IPV4:
1014 if (sflow_actions->encap_depth == 1) {
1015 const struct ovs_key_ipv4 *key = nl_attr_get(attr);
1016 if (key->ipv4_src) {
1017 sflow_actions->tunnel.ip_src = key->ipv4_src;
1018 }
1019 if (key->ipv4_dst) {
1020 sflow_actions->tunnel.ip_dst = key->ipv4_dst;
1021 }
1022 if (key->ipv4_proto) {
1023 sflow_actions->tunnel_ipproto = key->ipv4_proto;
1024 }
1025 if (key->ipv4_tos) {
1026 sflow_actions->tunnel.ip_tos = key->ipv4_tos;
1027 }
1028 if (key->ipv4_ttl) {
1029 sflow_actions->tunnel.ip_tos = key->ipv4_ttl;
1030 }
1031 }
1032 break;
1033
1034 case OVS_KEY_ATTR_IPV6:
1035 /* TODO: parse IPv6 encap. */
1036 break;
1037
1038 /* These have the same structure and format. */
1039 case OVS_KEY_ATTR_TCP:
1040 case OVS_KEY_ATTR_UDP:
1041 case OVS_KEY_ATTR_SCTP:
1042 if (sflow_actions->encap_depth == 1) {
1043 const struct ovs_key_tcp *key = nl_attr_get(attr);
1044 if (key->tcp_src) {
1045 sflow_actions->tunnel.tp_src = key->tcp_src;
1046 }
1047 if (key->tcp_dst) {
1048 sflow_actions->tunnel.tp_dst = key->tcp_dst;
1049 }
1050 }
1051 break;
1052
1053 case OVS_KEY_ATTR_TCP_FLAGS:
1054 case OVS_KEY_ATTR_ICMP:
1055 case OVS_KEY_ATTR_ICMPV6:
1056 case OVS_KEY_ATTR_ARP:
1057 case OVS_KEY_ATTR_ND:
9b2b8497 1058 case OVS_KEY_ATTR_ND_EXTENSIONS:
07659514
JS
1059 case OVS_KEY_ATTR_CT_STATE:
1060 case OVS_KEY_ATTR_CT_ZONE:
8e53fe8c 1061 case OVS_KEY_ATTR_CT_MARK:
9daf2348 1062 case OVS_KEY_ATTR_CT_LABELS:
c30b4cea
JR
1063 case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4:
1064 case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6:
7321bda3 1065 case OVS_KEY_ATTR_UNSPEC:
beb75a40 1066 case OVS_KEY_ATTR_PACKET_TYPE:
3d2fbd70 1067 case OVS_KEY_ATTR_NSH:
7321bda3
NM
1068 case __OVS_KEY_ATTR_MAX:
1069 default:
1070 break;
1071 }
1072}
1073
1074static void
1075dpif_sflow_capture_input_mpls(const struct flow *flow,
1076 struct dpif_sflow_actions *sflow_actions)
1077{
1078 if (eth_type_mpls(flow->dl_type)) {
1079 int depth = 0;
1080 int ii;
1081 ovs_be32 lse;
1082 /* Calculate depth by detecting BOS. */
1083 for (ii = 0; ii < FLOW_MAX_MPLS_LABELS; ii++) {
1084 lse = flow->mpls_lse[ii];
1085 depth++;
1086 if (lse & htonl(MPLS_BOS_MASK)) {
1087 break;
1088 }
1089 }
1090 /* Capture stack, reversing stack order, and
1091 * using host-byte-order for each lse. BOS flag
1092 * is ignored for now. It is set later when
1093 * the output stack is encoded.
1094 */
1095 for (ii = 0; ii < depth; ii++) {
1096 lse = flow->mpls_lse[ii];
1097 sflow_actions->mpls_lse[depth - ii - 1] = ntohl(lse);
1098 }
1099 sflow_actions->mpls_stack_depth = depth;
1100 }
1101}
1102
1103void
1104dpif_sflow_read_actions(const struct flow *flow,
283d8662
ZB
1105 const struct nlattr *actions, size_t actions_len,
1106 struct dpif_sflow_actions *sflow_actions,
1107 bool capture_mpls)
7321bda3
NM
1108{
1109 const struct nlattr *a;
1110 unsigned int left;
1111
1112 if (actions_len == 0) {
5a0e4aec
BP
1113 /* Packet dropped.*/
1114 return;
7321bda3
NM
1115 }
1116
283d8662 1117 if (flow != NULL && capture_mpls == true) {
5a0e4aec
BP
1118 /* Make sure the MPLS output stack
1119 * is seeded with the input stack.
1120 */
1121 dpif_sflow_capture_input_mpls(flow, sflow_actions);
1122
1123 /* XXX when 802.1AD(QinQ) is supported then
1124 * we can do the same with VLAN stacks here
1125 */
7321bda3
NM
1126 }
1127
1128 NL_ATTR_FOR_EACH (a, left, actions, actions_len) {
5a0e4aec
BP
1129 enum ovs_action_attr type = nl_attr_type(a);
1130 switch (type) {
1131 case OVS_ACTION_ATTR_OUTPUT:
1132 /* Capture the output port in case we need it
1133 * to get the output tunnel type.
1134 */
1135 sflow_actions->out_port = nl_attr_get_odp_port(a);
1136 break;
1137
1138 case OVS_ACTION_ATTR_TUNNEL_POP:
1139 /* XXX: Do not handle this for now. It's not clear
1140 * if we should start with encap_depth == 1 when we
1141 * see an input tunnel, or if we should assume
1142 * that the input tunnel was always "popped" if it
1143 * was presented to us decoded in flow->tunnel?
1144 *
1145 * If we do handle this it might look like this,
1146 * as we clear the captured tunnel info and decrement
1147 * the encap_depth:
1148 *
1149 * memset(&sflow_actions->tunnel, 0, sizeof struct flow_tnl);
1150 * sflow_actions->tunnel_ipproto = 0;
1151 * --sflow_actions->encap_depth;
1152 *
1153 * but for now just disable the tunnel annotation:
1154 */
1155 sflow_actions->tunnel_err = true;
1156 break;
1157
1158 case OVS_ACTION_ATTR_TUNNEL_PUSH:
1159 /* XXX: This actions appears to come with it's own
1160 * OUTPUT action, so should it be regarded as having
1161 * an implicit "pop" following it too? Put another
1162 * way, would two tnl_push() actions in succession
1163 * result in a packet with two layers of encap?
1164 */
1165 if (++sflow_actions->encap_depth > 1) {
1166 /* Do not handle multi-encap for now. */
1167 sflow_actions->tunnel_err = true;
1168 } else {
1169 sflow_read_tnl_push_action(a, sflow_actions);
1170 }
1171 break;
1172
1173 case OVS_ACTION_ATTR_TRUNC:
1174 case OVS_ACTION_ATTR_USERSPACE:
1175 case OVS_ACTION_ATTR_RECIRC:
1176 case OVS_ACTION_ATTR_HASH:
07659514 1177 case OVS_ACTION_ATTR_CT:
1fe178d2 1178 case OVS_ACTION_ATTR_CT_CLEAR:
5dddf960 1179 case OVS_ACTION_ATTR_METER:
5a0e4aec
BP
1180 break;
1181
1182 case OVS_ACTION_ATTR_SET_MASKED:
1183 /* TODO: apply mask. XXX: Are we likely to see this? */
1184 break;
1185
1186 case OVS_ACTION_ATTR_SET:
1187 sflow_read_set_action(nl_attr_get(a), sflow_actions);
1188 break;
1189
1190 case OVS_ACTION_ATTR_PUSH_VLAN:
1191 case OVS_ACTION_ATTR_POP_VLAN:
1192 /* TODO: 802.1AD(QinQ) is not supported by OVS (yet), so do not
1193 * construct a VLAN-stack. The sFlow user-action cookie already
1194 * captures the egress VLAN ID so there is nothing more to do here.
1195 */
1196 break;
1197
1198 case OVS_ACTION_ATTR_PUSH_MPLS: {
1199 const struct ovs_action_push_mpls *mpls = nl_attr_get(a);
1200 if (mpls) {
1201 dpif_sflow_push_mpls_lse(sflow_actions, mpls->mpls_lse);
1202 }
1203 break;
1204 }
1205 case OVS_ACTION_ATTR_POP_MPLS: {
1206 dpif_sflow_pop_mpls_lse(sflow_actions);
1207 break;
1208 }
1209 case OVS_ACTION_ATTR_PUSH_ETH:
1210 case OVS_ACTION_ATTR_POP_ETH:
1211 /* TODO: SFlow does not currently define a MAC-in-MAC
1212 * encapsulation structure. We could use an extension
1213 * structure to report this.
1214 */
1215 break;
283d8662
ZB
1216 case OVS_ACTION_ATTR_CLONE:
1217 if (flow != NULL) {
1218 dpif_sflow_read_actions(flow, nl_attr_get(a), nl_attr_get_size(a),
1219 sflow_actions, false);
1220 }
1221 break;
5a0e4aec 1222 case OVS_ACTION_ATTR_SAMPLE:
f59cb331
YY
1223 case OVS_ACTION_ATTR_PUSH_NSH:
1224 case OVS_ACTION_ATTR_POP_NSH:
5a0e4aec 1225 case OVS_ACTION_ATTR_UNSPEC:
5b34f8fc 1226 case OVS_ACTION_ATTR_CHECK_PKT_LEN:
5a0e4aec
BP
1227 case __OVS_ACTION_ATTR_MAX:
1228 default:
1229 break;
1230 }
7321bda3
NM
1231 }
1232}
1233
1234static void
1235dpif_sflow_encode_mpls_stack(SFLLabelStack *stack,
1236 uint32_t *mpls_lse_buf,
1237 const struct dpif_sflow_actions *sflow_actions)
1238{
1239 /* Put the MPLS stack back into "packet header" order,
1240 * and make sure the BOS flag is set correctly on the last
1241 * one. Each lse is still in host-byte-order.
1242 */
1243 int ii;
1244 uint32_t lse;
1245 stack->depth = sflow_actions->mpls_stack_depth;
1246 stack->stack = mpls_lse_buf;
1247 for (ii = 0; ii < stack->depth; ii++) {
1248 lse = sflow_actions->mpls_lse[stack->depth - ii - 1];
1249 stack->stack[ii] = (lse & ~MPLS_BOS_MASK);
1250 }
1251 stack->stack[stack->depth - 1] |= MPLS_BOS_MASK;
1252}
1253
1254/* Extract the output port count from the user action cookie.
1255 * See http://sflow.org/sflow_version_5.txt "Input/Output port information"
1256 */
1257static uint32_t
8de6ff3e 1258dpif_sflow_cookie_num_outputs(const struct user_action_cookie *cookie)
7321bda3
NM
1259{
1260 uint32_t format = cookie->sflow.output & 0xC0000000;
1261 uint32_t port_n = cookie->sflow.output & 0x3FFFFFFF;
1262 if (format == 0) {
1263 return port_n ? 1 : 0;
1264 }
1265 else if (format == 0x80000000) {
1266 return port_n;
1267 }
1268 return 0;
1269}
1270
72b06300 1271void
cf62fa4c 1272dpif_sflow_received(struct dpif_sflow *ds, const struct dp_packet *packet,
8de6ff3e
JP
1273 const struct flow *flow, odp_port_t odp_in_port,
1274 const struct user_action_cookie *cookie,
1275 const struct dpif_sflow_actions *sflow_actions)
34ae6d76 1276 OVS_EXCLUDED(mutex)
72b06300
BP
1277{
1278 SFL_FLOW_SAMPLE_TYPE fs;
1279 SFLFlow_sample_element hdrElem;
1280 SFLSampled_header *header;
1281 SFLFlow_sample_element switchElem;
7321bda3
NM
1282 uint8_t tnlInProto, tnlOutProto;
1283 SFLFlow_sample_element tnlInElem, tnlOutElem;
1284 SFLFlow_sample_element vniInElem, vniOutElem;
1285 SFLFlow_sample_element mplsElem;
1286 uint32_t mpls_lse_buf[FLOW_MAX_MPLS_LABELS];
56fd8edf 1287 SFLSampler *sampler;
6ff686f2 1288 struct dpif_sflow_port *in_dsp;
7321bda3 1289 struct dpif_sflow_port *out_dsp;
1673e0e4 1290 ovs_be16 vlan_tci;
72b06300 1291
34ae6d76 1292 ovs_mutex_lock(&mutex);
743cea45
NM
1293 sampler = ds->sflow_agent->samplers;
1294 if (!sampler) {
34ae6d76 1295 goto out;
6ff686f2 1296 }
6ff686f2 1297
743cea45
NM
1298 /* Build a flow sample. */
1299 memset(&fs, 0, sizeof fs);
72b06300 1300
743cea45
NM
1301 /* Look up the input ifIndex if this port has one. Otherwise just
1302 * leave it as 0 (meaning 'unknown') and continue. */
1303 in_dsp = dpif_sflow_find_port(ds, odp_in_port);
1304 if (in_dsp) {
1305 fs.input = SFL_DS_INDEX(in_dsp->dsi);
56fd8edf
BP
1306 }
1307
8de6ff3e
JP
1308 /* Make the assumption that the random number generator in the
1309 * datapath converges to the configured mean, and just increment the
1310 * samplePool by the configured sampling rate every time. */
743cea45
NM
1311 sampler->samplePool += sfl_sampler_get_sFlowFsPacketSamplingRate(sampler);
1312
72b06300
BP
1313 /* Sampled header. */
1314 memset(&hdrElem, 0, sizeof hdrElem);
1315 hdrElem.tag = SFLFLOW_HEADER;
1316 header = &hdrElem.flowType.header;
1317 header->header_protocol = SFLHEADER_ETHERNET_ISO8023;
c62caaa3 1318 /* The frame_length should include the Ethernet FCS (4 bytes),
743cea45 1319 * but it has already been stripped, so we need to add 4 here. */
cf62fa4c 1320 header->frame_length = dp_packet_size(packet) + 4;
c62caaa3
NM
1321 /* Ethernet FCS stripped off. */
1322 header->stripped = 4;
cf62fa4c 1323 header->header_length = MIN(dp_packet_size(packet),
856081f6 1324 sampler->sFlowFsMaximumHeaderSize);
cf62fa4c 1325 header->header_bytes = dp_packet_data(packet);
72b06300
BP
1326
1327 /* Add extended switch element. */
1328 memset(&switchElem, 0, sizeof(switchElem));
1329 switchElem.tag = SFLFLOW_EX_SWITCH;
f0fb825a
EG
1330 switchElem.flowType.sw.src_vlan = vlan_tci_to_vid(flow->vlans[0].tci);
1331 switchElem.flowType.sw.src_priority = vlan_tci_to_pcp(flow->vlans[0].tci);
6ff686f2
PS
1332
1333 /* Retrieve data from user_action_cookie. */
1673e0e4
BP
1334 vlan_tci = cookie->sflow.vlan_tci;
1335 switchElem.flowType.sw.dst_vlan = vlan_tci_to_vid(vlan_tci);
1336 switchElem.flowType.sw.dst_priority = vlan_tci_to_pcp(vlan_tci);
46b47a41 1337
1673e0e4 1338 fs.output = cookie->sflow.output;
72b06300 1339
7321bda3
NM
1340 /* Input tunnel. */
1341 if (flow->tunnel.ip_dst) {
5a0e4aec
BP
1342 memset(&tnlInElem, 0, sizeof(tnlInElem));
1343 tnlInElem.tag = SFLFLOW_EX_IPV4_TUNNEL_INGRESS;
1344 tnlInProto = in_dsp ? dpif_sflow_tunnel_proto(in_dsp->tunnel_type) : 0;
1345 dpif_sflow_tunnel_v4(tnlInProto,
1346 &flow->tunnel,
1347 &tnlInElem.flowType.ipv4);
1348 SFLADD_ELEMENT(&fs, &tnlInElem);
1349 if (flow->tunnel.tun_id) {
1350 memset(&vniInElem, 0, sizeof(vniInElem));
1351 vniInElem.tag = SFLFLOW_EX_VNI_INGRESS;
1352 vniInElem.flowType.tunnel_vni.vni
1353 = ntohll(flow->tunnel.tun_id);
1354 SFLADD_ELEMENT(&fs, &vniInElem);
1355 }
7321bda3
NM
1356 }
1357
1358 /* Output tunnel. */
1359 if (sflow_actions
5a0e4aec
BP
1360 && sflow_actions->encap_depth == 1
1361 && !sflow_actions->tunnel_err
1362 && dpif_sflow_cookie_num_outputs(cookie) == 1) {
1363 tnlOutProto = sflow_actions->tunnel_ipproto;
1364 if (tnlOutProto == 0) {
1365 /* Try to infer the ip-protocol from the output port. */
1366 if (sflow_actions->out_port != ODPP_NONE) {
1367 out_dsp = dpif_sflow_find_port(ds, sflow_actions->out_port);
1368 if (out_dsp) {
1369 tnlOutProto = dpif_sflow_tunnel_proto(out_dsp->tunnel_type);
1370 }
1371 }
1372 }
1373 memset(&tnlOutElem, 0, sizeof(tnlOutElem));
1374 tnlOutElem.tag = SFLFLOW_EX_IPV4_TUNNEL_EGRESS;
1375 dpif_sflow_tunnel_v4(tnlOutProto,
1376 &sflow_actions->tunnel,
1377 &tnlOutElem.flowType.ipv4);
1378 SFLADD_ELEMENT(&fs, &tnlOutElem);
1379 if (sflow_actions->tunnel.tun_id) {
1380 memset(&vniOutElem, 0, sizeof(vniOutElem));
1381 vniOutElem.tag = SFLFLOW_EX_VNI_EGRESS;
1382 vniOutElem.flowType.tunnel_vni.vni
1383 = ntohll(sflow_actions->tunnel.tun_id);
1384 SFLADD_ELEMENT(&fs, &vniOutElem);
1385 }
7321bda3
NM
1386 }
1387
1388 /* MPLS output label stack. */
1389 if (sflow_actions
5a0e4aec
BP
1390 && sflow_actions->mpls_stack_depth > 0
1391 && !sflow_actions->mpls_err
1392 && dpif_sflow_cookie_num_outputs(cookie) == 1) {
1393 memset(&mplsElem, 0, sizeof(mplsElem));
1394 mplsElem.tag = SFLFLOW_EX_MPLS;
1395 dpif_sflow_encode_mpls_stack(&mplsElem.flowType.mpls.out_stack,
1396 mpls_lse_buf,
1397 sflow_actions);
1398 SFLADD_ELEMENT(&fs, &mplsElem);
7321bda3
NM
1399 }
1400
72b06300
BP
1401 /* Submit the flow sample to be encoded into the next datagram. */
1402 SFLADD_ELEMENT(&fs, &hdrElem);
1403 SFLADD_ELEMENT(&fs, &switchElem);
1404 sfl_sampler_writeFlowSample(sampler, &fs);
34ae6d76
EJ
1405
1406out:
1407 ovs_mutex_unlock(&mutex);
72b06300
BP
1408}
1409
72b06300 1410void
34ae6d76 1411dpif_sflow_run(struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
72b06300 1412{
34ae6d76
EJ
1413 ovs_mutex_lock(&mutex);
1414 if (ds->collectors != NULL) {
72b06300 1415 time_t now = time_now();
733adf2a 1416 route_table_run();
bae473fe
JP
1417 if (now >= ds->next_tick) {
1418 sfl_agent_tick(ds->sflow_agent, time_wall());
1419 ds->next_tick = now + 1;
72b06300
BP
1420 }
1421 }
34ae6d76 1422 ovs_mutex_unlock(&mutex);
72b06300
BP
1423}
1424
1425void
34ae6d76 1426dpif_sflow_wait(struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
72b06300 1427{
34ae6d76
EJ
1428 ovs_mutex_lock(&mutex);
1429 if (ds->collectors != NULL) {
bae473fe 1430 poll_timer_wait_until(ds->next_tick * 1000LL);
72b06300 1431 }
34ae6d76 1432 ovs_mutex_unlock(&mutex);
72b06300 1433}