2 * Copyright (c) 2007, 2008, 2009, 2010, 2011 Nicira Networks.
3 * Distributed under the terms of the GNU GPL version 2.
5 * Significant portions of this file may be copied from parts of the Linux
6 * kernel, by Linus Torvalds and others.
9 /* Functions for managing the dp interface/device. */
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13 #include <linux/init.h>
14 #include <linux/module.h>
15 #include <linux/if_arp.h>
16 #include <linux/if_vlan.h>
19 #include <linux/jhash.h>
20 #include <linux/delay.h>
21 #include <linux/time.h>
22 #include <linux/etherdevice.h>
23 #include <linux/genetlink.h>
24 #include <linux/kernel.h>
25 #include <linux/kthread.h>
26 #include <linux/mutex.h>
27 #include <linux/percpu.h>
28 #include <linux/rcupdate.h>
29 #include <linux/tcp.h>
30 #include <linux/udp.h>
31 #include <linux/version.h>
32 #include <linux/ethtool.h>
33 #include <linux/wait.h>
34 #include <asm/system.h>
35 #include <asm/div64.h>
37 #include <linux/highmem.h>
38 #include <linux/netfilter_bridge.h>
39 #include <linux/netfilter_ipv4.h>
40 #include <linux/inetdevice.h>
41 #include <linux/list.h>
42 #include <linux/rculist.h>
43 #include <linux/dmi.h>
44 #include <net/inet_ecn.h>
45 #include <net/genetlink.h>
47 #include "openvswitch/datapath-protocol.h"
54 #include "vport-internal_dev.h"
56 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) || \
57 LINUX_VERSION_CODE >= KERNEL_VERSION(3,1,0)
58 #error Kernels before 2.6.18 or after 3.0 are not supported by this version of Open vSwitch.
61 int (*dp_ioctl_hook
)(struct net_device
*dev
, struct ifreq
*rq
, int cmd
);
62 EXPORT_SYMBOL(dp_ioctl_hook
);
67 * Writes to device state (add/remove datapath, port, set operations on vports,
68 * etc.) are protected by RTNL.
70 * Writes to other state (flow table modifications, set miscellaneous datapath
71 * parameters such as drop frags, etc.) are protected by genl_mutex. The RTNL
72 * lock nests inside genl_mutex.
74 * Reads are protected by RCU.
76 * There are a few special cases (mostly stats) that have their own
77 * synchronization but they nest under all of above and don't interact with
81 /* Global list of datapaths to enable dumping them all out.
82 * Protected by genl_mutex.
84 static LIST_HEAD(dps
);
86 static struct vport
*new_vport(const struct vport_parms
*);
87 static int queue_userspace_packets(struct datapath
*, u32 pid
, struct sk_buff
*,
88 const struct dp_upcall_info
*);
90 /* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
91 struct datapath
*get_dp(int dp_ifindex
)
93 struct datapath
*dp
= NULL
;
94 struct net_device
*dev
;
97 dev
= dev_get_by_index_rcu(&init_net
, dp_ifindex
);
99 struct vport
*vport
= internal_dev_get_vport(dev
);
107 EXPORT_SYMBOL_GPL(get_dp
);
109 /* Must be called with genl_mutex. */
110 static struct flow_table
*get_table_protected(struct datapath
*dp
)
112 return rcu_dereference_protected(dp
->table
, lockdep_genl_is_held());
115 /* Must be called with rcu_read_lock or RTNL lock. */
116 static struct vport
*get_vport_protected(struct datapath
*dp
, u16 port_no
)
118 return rcu_dereference_rtnl(dp
->ports
[port_no
]);
121 /* Must be called with rcu_read_lock or RTNL lock. */
122 const char *dp_name(const struct datapath
*dp
)
124 return vport_get_name(rcu_dereference_rtnl(dp
->ports
[OVSP_LOCAL
]));
127 static int get_dpifindex(struct datapath
*dp
)
134 local
= get_vport_protected(dp
, OVSP_LOCAL
);
136 ifindex
= vport_get_ifindex(local
);
145 static inline size_t br_nlmsg_size(void)
147 return NLMSG_ALIGN(sizeof(struct ifinfomsg
))
148 + nla_total_size(IFNAMSIZ
) /* IFLA_IFNAME */
149 + nla_total_size(MAX_ADDR_LEN
) /* IFLA_ADDRESS */
150 + nla_total_size(4) /* IFLA_MASTER */
151 + nla_total_size(4) /* IFLA_MTU */
152 + nla_total_size(1); /* IFLA_OPERSTATE */
155 /* Caller must hold RTNL lock. */
156 static int dp_fill_ifinfo(struct sk_buff
*skb
,
157 const struct vport
*port
,
158 int event
, unsigned int flags
)
160 struct datapath
*dp
= port
->dp
;
161 int ifindex
= vport_get_ifindex(port
);
162 struct ifinfomsg
*hdr
;
163 struct nlmsghdr
*nlh
;
168 nlh
= nlmsg_put(skb
, 0, 0, event
, sizeof(*hdr
), flags
);
172 hdr
= nlmsg_data(nlh
);
173 hdr
->ifi_family
= AF_BRIDGE
;
175 hdr
->ifi_type
= ARPHRD_ETHER
;
176 hdr
->ifi_index
= ifindex
;
177 hdr
->ifi_flags
= vport_get_flags(port
);
180 NLA_PUT_STRING(skb
, IFLA_IFNAME
, vport_get_name(port
));
181 NLA_PUT_U32(skb
, IFLA_MASTER
, get_dpifindex(dp
));
182 NLA_PUT_U32(skb
, IFLA_MTU
, vport_get_mtu(port
));
183 #ifdef IFLA_OPERSTATE
184 NLA_PUT_U8(skb
, IFLA_OPERSTATE
,
185 vport_is_running(port
)
186 ? vport_get_operstate(port
)
190 NLA_PUT(skb
, IFLA_ADDRESS
, ETH_ALEN
, vport_get_addr(port
));
192 return nlmsg_end(skb
, nlh
);
195 nlmsg_cancel(skb
, nlh
);
199 /* Caller must hold RTNL lock. */
200 static void dp_ifinfo_notify(int event
, struct vport
*port
)
205 skb
= nlmsg_new(br_nlmsg_size(), GFP_KERNEL
);
209 err
= dp_fill_ifinfo(skb
, port
, event
, 0);
211 /* -EMSGSIZE implies BUG in br_nlmsg_size() */
212 WARN_ON(err
== -EMSGSIZE
);
216 rtnl_notify(skb
, &init_net
, 0, RTNLGRP_LINK
, NULL
, GFP_KERNEL
);
220 rtnl_set_sk_err(&init_net
, RTNLGRP_LINK
, err
);
223 static void release_dp(struct kobject
*kobj
)
225 struct datapath
*dp
= container_of(kobj
, struct datapath
, ifobj
);
229 static struct kobj_type dp_ktype
= {
230 .release
= release_dp
233 static void destroy_dp_rcu(struct rcu_head
*rcu
)
235 struct datapath
*dp
= container_of(rcu
, struct datapath
, rcu
);
237 flow_tbl_destroy(dp
->table
);
238 free_percpu(dp
->stats_percpu
);
239 kobject_put(&dp
->ifobj
);
242 /* Called with RTNL lock and genl_lock. */
243 static struct vport
*new_vport(const struct vport_parms
*parms
)
247 vport
= vport_add(parms
);
248 if (!IS_ERR(vport
)) {
249 struct datapath
*dp
= parms
->dp
;
251 rcu_assign_pointer(dp
->ports
[parms
->port_no
], vport
);
252 list_add(&vport
->node
, &dp
->port_list
);
254 dp_ifinfo_notify(RTM_NEWLINK
, vport
);
260 /* Called with RTNL lock. */
261 void dp_detach_port(struct vport
*p
)
265 if (p
->port_no
!= OVSP_LOCAL
)
267 dp_ifinfo_notify(RTM_DELLINK
, p
);
269 /* First drop references to device. */
271 rcu_assign_pointer(p
->dp
->ports
[p
->port_no
], NULL
);
273 /* Then destroy it. */
277 /* Must be called with rcu_read_lock. */
278 void dp_process_received_packet(struct vport
*p
, struct sk_buff
*skb
)
280 struct datapath
*dp
= p
->dp
;
281 struct sw_flow
*flow
;
282 struct dp_stats_percpu
*stats
;
283 int stats_counter_off
;
286 OVS_CB(skb
)->vport
= p
;
288 if (!OVS_CB(skb
)->flow
) {
289 struct sw_flow_key key
;
293 /* Extract flow from 'skb' into 'key'. */
294 error
= flow_extract(skb
, p
->port_no
, &key
, &key_len
, &is_frag
);
295 if (unlikely(error
)) {
300 if (is_frag
&& dp
->drop_frags
) {
302 stats_counter_off
= offsetof(struct dp_stats_percpu
, n_frags
);
307 flow
= flow_tbl_lookup(rcu_dereference(dp
->table
), &key
, key_len
);
308 if (unlikely(!flow
)) {
309 struct dp_upcall_info upcall
;
311 upcall
.cmd
= OVS_PACKET_CMD_MISS
;
313 dp_upcall(dp
, skb
, &upcall
);
315 stats_counter_off
= offsetof(struct dp_stats_percpu
, n_missed
);
319 OVS_CB(skb
)->flow
= flow
;
322 stats_counter_off
= offsetof(struct dp_stats_percpu
, n_hit
);
323 flow_used(OVS_CB(skb
)->flow
, skb
);
324 execute_actions(dp
, skb
);
327 /* Update datapath statistics. */
329 stats
= per_cpu_ptr(dp
->stats_percpu
, smp_processor_id());
331 write_seqcount_begin(&stats
->seqlock
);
332 (*(u64
*)((u8
*)stats
+ stats_counter_off
))++;
333 write_seqcount_end(&stats
->seqlock
);
338 static void copy_and_csum_skb(struct sk_buff
*skb
, void *to
)
340 u16 csum_start
, csum_offset
;
343 get_skb_csum_pointers(skb
, &csum_start
, &csum_offset
);
344 csum_start
-= skb_headroom(skb
);
346 skb_copy_bits(skb
, 0, to
, csum_start
);
348 csum
= skb_copy_and_csum_bits(skb
, csum_start
, to
+ csum_start
,
349 skb
->len
- csum_start
, 0);
350 *(__sum16
*)(to
+ csum_start
+ csum_offset
) = csum_fold(csum
);
353 static struct genl_family dp_packet_genl_family
= {
354 .id
= GENL_ID_GENERATE
,
355 .hdrsize
= sizeof(struct ovs_header
),
356 .name
= OVS_PACKET_FAMILY
,
358 .maxattr
= OVS_PACKET_ATTR_MAX
361 int dp_upcall(struct datapath
*dp
, struct sk_buff
*skb
,
362 const struct dp_upcall_info
*upcall_info
)
364 struct sk_buff
*segs
= NULL
;
365 struct dp_stats_percpu
*stats
;
369 if (OVS_CB(skb
)->flow
)
370 pid
= OVS_CB(skb
)->flow
->upcall_pid
;
372 pid
= OVS_CB(skb
)->vport
->upcall_pid
;
379 forward_ip_summed(skb
, true);
381 /* Break apart GSO packets into their component pieces. Otherwise
382 * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */
383 if (skb_is_gso(skb
)) {
384 segs
= skb_gso_segment(skb
, NETIF_F_SG
| NETIF_F_HW_CSUM
);
393 err
= queue_userspace_packets(dp
, pid
, skb
, upcall_info
);
395 struct sk_buff
*next
;
396 /* Free GSO-segments */
400 } while ((segs
= next
) != NULL
);
410 stats
= per_cpu_ptr(dp
->stats_percpu
, smp_processor_id());
412 write_seqcount_begin(&stats
->seqlock
);
414 write_seqcount_end(&stats
->seqlock
);
421 /* Send each packet in the 'skb' list to userspace for 'dp' as directed by
422 * 'upcall_info'. There will be only one packet unless we broke up a GSO
425 static int queue_userspace_packets(struct datapath
*dp
, u32 pid
,
427 const struct dp_upcall_info
*upcall_info
)
431 dp_ifindex
= get_dpifindex(dp
);
436 struct ovs_header
*upcall
;
437 struct sk_buff
*user_skb
; /* to be queued to userspace */
442 err
= vlan_deaccel_tag(skb
);
446 if (nla_attr_size(skb
->len
) > USHRT_MAX
)
449 len
= sizeof(struct ovs_header
);
450 len
+= nla_total_size(skb
->len
);
451 len
+= nla_total_size(FLOW_BUFSIZE
);
452 if (upcall_info
->cmd
== OVS_PACKET_CMD_ACTION
)
453 len
+= nla_total_size(8);
455 user_skb
= genlmsg_new(len
, GFP_ATOMIC
);
459 upcall
= genlmsg_put(user_skb
, 0, 0, &dp_packet_genl_family
,
460 0, upcall_info
->cmd
);
461 upcall
->dp_ifindex
= dp_ifindex
;
463 nla
= nla_nest_start(user_skb
, OVS_PACKET_ATTR_KEY
);
464 flow_to_nlattrs(upcall_info
->key
, user_skb
);
465 nla_nest_end(user_skb
, nla
);
467 if (upcall_info
->cmd
== OVS_PACKET_CMD_ACTION
)
468 nla_put_u64(user_skb
, OVS_PACKET_ATTR_USERDATA
,
469 upcall_info
->userdata
);
471 nla
= __nla_reserve(user_skb
, OVS_PACKET_ATTR_PACKET
, skb
->len
);
472 if (skb
->ip_summed
== CHECKSUM_PARTIAL
)
473 copy_and_csum_skb(skb
, nla_data(nla
));
475 skb_copy_bits(skb
, 0, nla_data(nla
), skb
->len
);
477 err
= genlmsg_unicast(&init_net
, user_skb
, pid
);
481 } while ((skb
= skb
->next
));
486 /* Called with genl_mutex. */
487 static int flush_flows(int dp_ifindex
)
489 struct flow_table
*old_table
;
490 struct flow_table
*new_table
;
493 dp
= get_dp(dp_ifindex
);
497 old_table
= get_table_protected(dp
);
498 new_table
= flow_tbl_alloc(TBL_MIN_BUCKETS
);
502 rcu_assign_pointer(dp
->table
, new_table
);
504 flow_tbl_deferred_destroy(old_table
);
508 static int validate_actions(const struct nlattr
*attr
, int depth
);
510 static int validate_sample(const struct nlattr
*attr
, int depth
)
512 static const struct nla_policy sample_policy
[OVS_SAMPLE_ATTR_MAX
+ 1] =
514 [OVS_SAMPLE_ATTR_PROBABILITY
] = {.type
= NLA_U32
},
515 [OVS_SAMPLE_ATTR_ACTIONS
] = {.type
= NLA_UNSPEC
},
517 struct nlattr
*a
[OVS_SAMPLE_ATTR_MAX
+ 1];
520 error
= nla_parse_nested(a
, OVS_SAMPLE_ATTR_MAX
, attr
, sample_policy
);
524 if (!a
[OVS_SAMPLE_ATTR_PROBABILITY
])
526 if (!a
[OVS_SAMPLE_ATTR_ACTIONS
])
529 return validate_actions(a
[OVS_SAMPLE_ATTR_ACTIONS
], (depth
+ 1));
532 static int validate_actions(const struct nlattr
*attr
, int depth
)
534 const struct nlattr
*a
;
537 if (depth
>= SAMPLE_ACTION_DEPTH
)
540 nla_for_each_nested(a
, attr
, rem
) {
541 static const u32 action_lens
[OVS_ACTION_ATTR_MAX
+ 1] = {
542 [OVS_ACTION_ATTR_OUTPUT
] = 4,
543 [OVS_ACTION_ATTR_USERSPACE
] = 8,
544 [OVS_ACTION_ATTR_PUSH_VLAN
] = 2,
545 [OVS_ACTION_ATTR_POP_VLAN
] = 0,
546 [OVS_ACTION_ATTR_SET_DL_SRC
] = ETH_ALEN
,
547 [OVS_ACTION_ATTR_SET_DL_DST
] = ETH_ALEN
,
548 [OVS_ACTION_ATTR_SET_NW_SRC
] = 4,
549 [OVS_ACTION_ATTR_SET_NW_DST
] = 4,
550 [OVS_ACTION_ATTR_SET_NW_TOS
] = 1,
551 [OVS_ACTION_ATTR_SET_TP_SRC
] = 2,
552 [OVS_ACTION_ATTR_SET_TP_DST
] = 2,
553 [OVS_ACTION_ATTR_SET_TUNNEL
] = 8,
554 [OVS_ACTION_ATTR_SET_PRIORITY
] = 4,
555 [OVS_ACTION_ATTR_POP_PRIORITY
] = 0,
557 int type
= nla_type(a
);
559 /* Match expected attr len for given attr len except for
560 * OVS_ACTION_ATTR_SAMPLE, as it has nested actions list which
561 * is variable size. */
562 if (type
> OVS_ACTION_ATTR_MAX
||
563 (nla_len(a
) != action_lens
[type
] &&
564 type
!= OVS_ACTION_ATTR_SAMPLE
))
568 case OVS_ACTION_ATTR_UNSPEC
:
571 case OVS_ACTION_ATTR_USERSPACE
:
572 case OVS_ACTION_ATTR_POP_VLAN
:
573 case OVS_ACTION_ATTR_SET_DL_SRC
:
574 case OVS_ACTION_ATTR_SET_DL_DST
:
575 case OVS_ACTION_ATTR_SET_NW_SRC
:
576 case OVS_ACTION_ATTR_SET_NW_DST
:
577 case OVS_ACTION_ATTR_SET_TP_SRC
:
578 case OVS_ACTION_ATTR_SET_TP_DST
:
579 case OVS_ACTION_ATTR_SET_TUNNEL
:
580 case OVS_ACTION_ATTR_SET_PRIORITY
:
581 case OVS_ACTION_ATTR_POP_PRIORITY
:
582 /* No validation needed. */
585 case OVS_ACTION_ATTR_OUTPUT
:
586 if (nla_get_u32(a
) >= DP_MAX_PORTS
)
590 case OVS_ACTION_ATTR_PUSH_VLAN
:
591 if (nla_get_be16(a
) & htons(VLAN_CFI_MASK
))
595 case OVS_ACTION_ATTR_SET_NW_TOS
:
596 if (nla_get_u8(a
) & INET_ECN_MASK
)
600 case OVS_ACTION_ATTR_SAMPLE
:
601 err
= validate_sample(a
, depth
);
616 static void clear_stats(struct sw_flow
*flow
)
620 flow
->packet_count
= 0;
621 flow
->byte_count
= 0;
624 static int ovs_packet_cmd_execute(struct sk_buff
*skb
, struct genl_info
*info
)
626 struct ovs_header
*ovs_header
= info
->userhdr
;
627 struct nlattr
**a
= info
->attrs
;
628 struct sw_flow_actions
*acts
;
629 struct sk_buff
*packet
;
630 struct sw_flow
*flow
;
639 if (!a
[OVS_PACKET_ATTR_PACKET
] || !a
[OVS_PACKET_ATTR_KEY
] ||
640 !a
[OVS_PACKET_ATTR_ACTIONS
] ||
641 nla_len(a
[OVS_PACKET_ATTR_PACKET
]) < ETH_HLEN
)
644 err
= validate_actions(a
[OVS_PACKET_ATTR_ACTIONS
], 0);
648 len
= nla_len(a
[OVS_PACKET_ATTR_PACKET
]);
649 packet
= __dev_alloc_skb(NET_IP_ALIGN
+ len
, GFP_KERNEL
);
653 skb_reserve(packet
, NET_IP_ALIGN
);
655 memcpy(__skb_put(packet
, len
), nla_data(a
[OVS_PACKET_ATTR_PACKET
]), len
);
657 skb_reset_mac_header(packet
);
658 eth
= eth_hdr(packet
);
660 /* Normally, setting the skb 'protocol' field would be handled by a
661 * call to eth_type_trans(), but it assumes there's a sending
662 * device, which we may not have. */
663 if (ntohs(eth
->h_proto
) >= 1536)
664 packet
->protocol
= eth
->h_proto
;
666 packet
->protocol
= htons(ETH_P_802_2
);
668 /* Build an sw_flow for sending this packet. */
674 err
= flow_extract(packet
, -1, &flow
->key
, &key_len
, &is_frag
);
678 err
= flow_metadata_from_nlattrs(&flow
->key
.eth
.in_port
,
679 &flow
->key
.eth
.tun_id
,
680 a
[OVS_PACKET_ATTR_KEY
]);
684 flow
->hash
= flow_hash(&flow
->key
, key_len
);
686 if (a
[OVS_PACKET_ATTR_UPCALL_PID
])
687 flow
->upcall_pid
= nla_get_u32(a
[OVS_PACKET_ATTR_UPCALL_PID
]);
689 flow
->upcall_pid
= NETLINK_CB(skb
).pid
;
691 acts
= flow_actions_alloc(a
[OVS_PACKET_ATTR_ACTIONS
]);
695 rcu_assign_pointer(flow
->sf_acts
, acts
);
697 OVS_CB(packet
)->flow
= flow
;
700 dp
= get_dp(ovs_header
->dp_ifindex
);
705 if (flow
->key
.eth
.in_port
< DP_MAX_PORTS
)
706 OVS_CB(packet
)->vport
= get_vport_protected(dp
,
707 flow
->key
.eth
.in_port
);
709 err
= execute_actions(dp
, packet
);
725 static const struct nla_policy packet_policy
[OVS_PACKET_ATTR_MAX
+ 1] = {
726 [OVS_PACKET_ATTR_PACKET
] = { .type
= NLA_UNSPEC
},
727 [OVS_PACKET_ATTR_KEY
] = { .type
= NLA_NESTED
},
728 [OVS_PACKET_ATTR_ACTIONS
] = { .type
= NLA_NESTED
},
729 [OVS_PACKET_ATTR_UPCALL_PID
] = { .type
= NLA_U32
},
732 static struct genl_ops dp_packet_genl_ops
[] = {
733 { .cmd
= OVS_PACKET_CMD_EXECUTE
,
734 .flags
= GENL_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
735 .policy
= packet_policy
,
736 .doit
= ovs_packet_cmd_execute
740 static void get_dp_stats(struct datapath
*dp
, struct ovs_dp_stats
*stats
)
743 struct flow_table
*table
= get_table_protected(dp
);
745 stats
->n_flows
= flow_tbl_count(table
);
747 stats
->n_frags
= stats
->n_hit
= stats
->n_missed
= stats
->n_lost
= 0;
748 for_each_possible_cpu(i
) {
749 const struct dp_stats_percpu
*percpu_stats
;
750 struct dp_stats_percpu local_stats
;
753 percpu_stats
= per_cpu_ptr(dp
->stats_percpu
, i
);
756 seqcount
= read_seqcount_begin(&percpu_stats
->seqlock
);
757 local_stats
= *percpu_stats
;
758 } while (read_seqcount_retry(&percpu_stats
->seqlock
, seqcount
));
760 stats
->n_frags
+= local_stats
.n_frags
;
761 stats
->n_hit
+= local_stats
.n_hit
;
762 stats
->n_missed
+= local_stats
.n_missed
;
763 stats
->n_lost
+= local_stats
.n_lost
;
767 static const struct nla_policy flow_policy
[OVS_FLOW_ATTR_MAX
+ 1] = {
768 [OVS_FLOW_ATTR_KEY
] = { .type
= NLA_NESTED
},
769 [OVS_FLOW_ATTR_UPCALL_PID
] = { .type
= NLA_U32
},
770 [OVS_FLOW_ATTR_ACTIONS
] = { .type
= NLA_NESTED
},
771 [OVS_FLOW_ATTR_CLEAR
] = { .type
= NLA_FLAG
},
774 static struct genl_family dp_flow_genl_family
= {
775 .id
= GENL_ID_GENERATE
,
776 .hdrsize
= sizeof(struct ovs_header
),
777 .name
= OVS_FLOW_FAMILY
,
779 .maxattr
= OVS_FLOW_ATTR_MAX
782 static struct genl_multicast_group dp_flow_multicast_group
= {
783 .name
= OVS_FLOW_MCGROUP
786 /* Called with genl_lock. */
787 static int ovs_flow_cmd_fill_info(struct sw_flow
*flow
, struct datapath
*dp
,
788 struct sk_buff
*skb
, u32 pid
, u32 seq
, u32 flags
, u8 cmd
)
790 const int skb_orig_len
= skb
->len
;
791 const struct sw_flow_actions
*sf_acts
;
792 struct ovs_flow_stats stats
;
793 struct ovs_header
*ovs_header
;
799 sf_acts
= rcu_dereference_protected(flow
->sf_acts
,
800 lockdep_genl_is_held());
802 ovs_header
= genlmsg_put(skb
, pid
, seq
, &dp_flow_genl_family
, flags
, cmd
);
806 ovs_header
->dp_ifindex
= get_dpifindex(dp
);
808 nla
= nla_nest_start(skb
, OVS_FLOW_ATTR_KEY
);
810 goto nla_put_failure
;
811 err
= flow_to_nlattrs(&flow
->key
, skb
);
814 nla_nest_end(skb
, nla
);
816 NLA_PUT_U32(skb
, OVS_FLOW_ATTR_UPCALL_PID
, flow
->upcall_pid
);
818 spin_lock_bh(&flow
->lock
);
820 stats
.n_packets
= flow
->packet_count
;
821 stats
.n_bytes
= flow
->byte_count
;
822 tcp_flags
= flow
->tcp_flags
;
823 spin_unlock_bh(&flow
->lock
);
826 NLA_PUT_U64(skb
, OVS_FLOW_ATTR_USED
, flow_used_time(used
));
829 NLA_PUT(skb
, OVS_FLOW_ATTR_STATS
, sizeof(struct ovs_flow_stats
), &stats
);
832 NLA_PUT_U8(skb
, OVS_FLOW_ATTR_TCP_FLAGS
, tcp_flags
);
834 /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
835 * this is the first flow to be dumped into 'skb'. This is unusual for
836 * Netlink but individual action lists can be longer than
837 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
838 * The userspace caller can always fetch the actions separately if it
839 * really wants them. (Most userspace callers in fact don't care.)
841 * This can only fail for dump operations because the skb is always
842 * properly sized for single flows.
844 err
= nla_put(skb
, OVS_FLOW_ATTR_ACTIONS
, sf_acts
->actions_len
,
846 if (err
< 0 && skb_orig_len
)
849 return genlmsg_end(skb
, ovs_header
);
854 genlmsg_cancel(skb
, ovs_header
);
858 static struct sk_buff
*ovs_flow_cmd_alloc_info(struct sw_flow
*flow
)
860 const struct sw_flow_actions
*sf_acts
;
863 sf_acts
= rcu_dereference_protected(flow
->sf_acts
,
864 lockdep_genl_is_held());
866 len
= nla_total_size(FLOW_BUFSIZE
); /* OVS_FLOW_ATTR_KEY */
867 len
+= nla_total_size(sf_acts
->actions_len
); /* OVS_FLOW_ATTR_ACTIONS */
868 len
+= nla_total_size(sizeof(struct ovs_flow_stats
)); /* OVS_FLOW_ATTR_STATS */
869 len
+= nla_total_size(1); /* OVS_FLOW_ATTR_TCP_FLAGS */
870 len
+= nla_total_size(8); /* OVS_FLOW_ATTR_USED */
871 return genlmsg_new(NLMSG_ALIGN(sizeof(struct ovs_header
)) + len
, GFP_KERNEL
);
874 static struct sk_buff
*ovs_flow_cmd_build_info(struct sw_flow
*flow
, struct datapath
*dp
,
875 u32 pid
, u32 seq
, u8 cmd
)
880 skb
= ovs_flow_cmd_alloc_info(flow
);
882 return ERR_PTR(-ENOMEM
);
884 retval
= ovs_flow_cmd_fill_info(flow
, dp
, skb
, pid
, seq
, 0, cmd
);
889 static int ovs_flow_cmd_new_or_set(struct sk_buff
*skb
, struct genl_info
*info
)
891 struct nlattr
**a
= info
->attrs
;
892 struct ovs_header
*ovs_header
= info
->userhdr
;
893 struct sw_flow_key key
;
894 struct sw_flow
*flow
;
895 struct sk_buff
*reply
;
897 struct flow_table
*table
;
903 if (!a
[OVS_FLOW_ATTR_KEY
])
905 error
= flow_from_nlattrs(&key
, &key_len
, a
[OVS_FLOW_ATTR_KEY
]);
909 /* Validate actions. */
910 if (a
[OVS_FLOW_ATTR_ACTIONS
]) {
911 error
= validate_actions(a
[OVS_FLOW_ATTR_ACTIONS
], 0);
914 } else if (info
->genlhdr
->cmd
== OVS_FLOW_CMD_NEW
) {
919 dp
= get_dp(ovs_header
->dp_ifindex
);
924 table
= get_table_protected(dp
);
925 flow
= flow_tbl_lookup(table
, &key
, key_len
);
927 struct sw_flow_actions
*acts
;
929 /* Bail out if we're not allowed to create a new flow. */
931 if (info
->genlhdr
->cmd
== OVS_FLOW_CMD_SET
)
934 /* Expand table, if necessary, to make room. */
935 if (flow_tbl_need_to_expand(table
)) {
936 struct flow_table
*new_table
;
938 new_table
= flow_tbl_expand(table
);
939 if (!IS_ERR(new_table
)) {
940 rcu_assign_pointer(dp
->table
, new_table
);
941 flow_tbl_deferred_destroy(table
);
942 table
= get_table_protected(dp
);
949 error
= PTR_ERR(flow
);
955 if (a
[OVS_FLOW_ATTR_UPCALL_PID
])
956 flow
->upcall_pid
= nla_get_u32(a
[OVS_FLOW_ATTR_UPCALL_PID
]);
958 flow
->upcall_pid
= NETLINK_CB(skb
).pid
;
960 /* Obtain actions. */
961 acts
= flow_actions_alloc(a
[OVS_FLOW_ATTR_ACTIONS
]);
962 error
= PTR_ERR(acts
);
964 goto error_free_flow
;
965 rcu_assign_pointer(flow
->sf_acts
, acts
);
967 /* Put flow in bucket. */
968 flow
->hash
= flow_hash(&key
, key_len
);
969 flow_tbl_insert(table
, flow
);
971 reply
= ovs_flow_cmd_build_info(flow
, dp
, info
->snd_pid
,
972 info
->snd_seq
, OVS_FLOW_CMD_NEW
);
974 /* We found a matching flow. */
975 struct sw_flow_actions
*old_acts
;
977 /* Bail out if we're not allowed to modify an existing flow.
978 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
979 * because Generic Netlink treats the latter as a dump
980 * request. We also accept NLM_F_EXCL in case that bug ever
984 if (info
->genlhdr
->cmd
== OVS_FLOW_CMD_NEW
&&
985 info
->nlhdr
->nlmsg_flags
& (NLM_F_CREATE
| NLM_F_EXCL
))
988 /* Update actions. */
989 old_acts
= rcu_dereference_protected(flow
->sf_acts
,
990 lockdep_genl_is_held());
991 if (a
[OVS_FLOW_ATTR_ACTIONS
] &&
992 (old_acts
->actions_len
!= nla_len(a
[OVS_FLOW_ATTR_ACTIONS
]) ||
993 memcmp(old_acts
->actions
, nla_data(a
[OVS_FLOW_ATTR_ACTIONS
]),
994 old_acts
->actions_len
))) {
995 struct sw_flow_actions
*new_acts
;
997 new_acts
= flow_actions_alloc(a
[OVS_FLOW_ATTR_ACTIONS
]);
998 error
= PTR_ERR(new_acts
);
999 if (IS_ERR(new_acts
))
1002 rcu_assign_pointer(flow
->sf_acts
, new_acts
);
1003 flow_deferred_free_acts(old_acts
);
1006 reply
= ovs_flow_cmd_build_info(flow
, dp
, info
->snd_pid
,
1007 info
->snd_seq
, OVS_FLOW_CMD_NEW
);
1009 if (a
[OVS_FLOW_ATTR_UPCALL_PID
])
1010 flow
->upcall_pid
= nla_get_u32(a
[OVS_FLOW_ATTR_UPCALL_PID
]);
1013 if (a
[OVS_FLOW_ATTR_CLEAR
]) {
1014 spin_lock_bh(&flow
->lock
);
1016 spin_unlock_bh(&flow
->lock
);
1021 genl_notify(reply
, genl_info_net(info
), info
->snd_pid
,
1022 dp_flow_multicast_group
.id
, info
->nlhdr
, GFP_KERNEL
);
1024 netlink_set_err(INIT_NET_GENL_SOCK
, 0,
1025 dp_flow_multicast_group
.id
, PTR_ERR(reply
));
1034 static int ovs_flow_cmd_get(struct sk_buff
*skb
, struct genl_info
*info
)
1036 struct nlattr
**a
= info
->attrs
;
1037 struct ovs_header
*ovs_header
= info
->userhdr
;
1038 struct sw_flow_key key
;
1039 struct sk_buff
*reply
;
1040 struct sw_flow
*flow
;
1041 struct datapath
*dp
;
1042 struct flow_table
*table
;
1046 if (!a
[OVS_FLOW_ATTR_KEY
])
1048 err
= flow_from_nlattrs(&key
, &key_len
, a
[OVS_FLOW_ATTR_KEY
]);
1052 dp
= get_dp(ovs_header
->dp_ifindex
);
1056 table
= get_table_protected(dp
);
1057 flow
= flow_tbl_lookup(table
, &key
, key_len
);
1061 reply
= ovs_flow_cmd_build_info(flow
, dp
, info
->snd_pid
, info
->snd_seq
, OVS_FLOW_CMD_NEW
);
1063 return PTR_ERR(reply
);
1065 return genlmsg_reply(reply
, info
);
1068 static int ovs_flow_cmd_del(struct sk_buff
*skb
, struct genl_info
*info
)
1070 struct nlattr
**a
= info
->attrs
;
1071 struct ovs_header
*ovs_header
= info
->userhdr
;
1072 struct sw_flow_key key
;
1073 struct sk_buff
*reply
;
1074 struct sw_flow
*flow
;
1075 struct datapath
*dp
;
1076 struct flow_table
*table
;
1080 if (!a
[OVS_FLOW_ATTR_KEY
])
1081 return flush_flows(ovs_header
->dp_ifindex
);
1082 err
= flow_from_nlattrs(&key
, &key_len
, a
[OVS_FLOW_ATTR_KEY
]);
1086 dp
= get_dp(ovs_header
->dp_ifindex
);
1090 table
= get_table_protected(dp
);
1091 flow
= flow_tbl_lookup(table
, &key
, key_len
);
1095 reply
= ovs_flow_cmd_alloc_info(flow
);
1099 flow_tbl_remove(table
, flow
);
1101 err
= ovs_flow_cmd_fill_info(flow
, dp
, reply
, info
->snd_pid
,
1102 info
->snd_seq
, 0, OVS_FLOW_CMD_DEL
);
1105 flow_deferred_free(flow
);
1107 genl_notify(reply
, genl_info_net(info
), info
->snd_pid
,
1108 dp_flow_multicast_group
.id
, info
->nlhdr
, GFP_KERNEL
);
1112 static int ovs_flow_cmd_dump(struct sk_buff
*skb
, struct netlink_callback
*cb
)
1114 struct ovs_header
*ovs_header
= genlmsg_data(nlmsg_data(cb
->nlh
));
1115 struct datapath
*dp
;
1117 dp
= get_dp(ovs_header
->dp_ifindex
);
1122 struct sw_flow
*flow
;
1125 bucket
= cb
->args
[0];
1127 flow
= flow_tbl_next(get_table_protected(dp
), &bucket
, &obj
);
1131 if (ovs_flow_cmd_fill_info(flow
, dp
, skb
, NETLINK_CB(cb
->skb
).pid
,
1132 cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
,
1133 OVS_FLOW_CMD_NEW
) < 0)
1136 cb
->args
[0] = bucket
;
1142 static struct genl_ops dp_flow_genl_ops
[] = {
1143 { .cmd
= OVS_FLOW_CMD_NEW
,
1144 .flags
= GENL_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
1145 .policy
= flow_policy
,
1146 .doit
= ovs_flow_cmd_new_or_set
1148 { .cmd
= OVS_FLOW_CMD_DEL
,
1149 .flags
= GENL_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
1150 .policy
= flow_policy
,
1151 .doit
= ovs_flow_cmd_del
1153 { .cmd
= OVS_FLOW_CMD_GET
,
1154 .flags
= 0, /* OK for unprivileged users. */
1155 .policy
= flow_policy
,
1156 .doit
= ovs_flow_cmd_get
,
1157 .dumpit
= ovs_flow_cmd_dump
1159 { .cmd
= OVS_FLOW_CMD_SET
,
1160 .flags
= GENL_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
1161 .policy
= flow_policy
,
1162 .doit
= ovs_flow_cmd_new_or_set
,
1166 static const struct nla_policy datapath_policy
[OVS_DP_ATTR_MAX
+ 1] = {
1167 #ifdef HAVE_NLA_NUL_STRING
1168 [OVS_DP_ATTR_NAME
] = { .type
= NLA_NUL_STRING
, .len
= IFNAMSIZ
- 1 },
1170 [OVS_DP_ATTR_UPCALL_PID
] = { .type
= NLA_U32
},
1171 [OVS_DP_ATTR_IPV4_FRAGS
] = { .type
= NLA_U32
},
1174 static struct genl_family dp_datapath_genl_family
= {
1175 .id
= GENL_ID_GENERATE
,
1176 .hdrsize
= sizeof(struct ovs_header
),
1177 .name
= OVS_DATAPATH_FAMILY
,
1179 .maxattr
= OVS_DP_ATTR_MAX
1182 static struct genl_multicast_group dp_datapath_multicast_group
= {
1183 .name
= OVS_DATAPATH_MCGROUP
1186 static int ovs_dp_cmd_fill_info(struct datapath
*dp
, struct sk_buff
*skb
,
1187 u32 pid
, u32 seq
, u32 flags
, u8 cmd
)
1189 struct ovs_header
*ovs_header
;
1193 ovs_header
= genlmsg_put(skb
, pid
, seq
, &dp_datapath_genl_family
,
1198 ovs_header
->dp_ifindex
= get_dpifindex(dp
);
1201 err
= nla_put_string(skb
, OVS_DP_ATTR_NAME
, dp_name(dp
));
1204 goto nla_put_failure
;
1206 nla
= nla_reserve(skb
, OVS_DP_ATTR_STATS
, sizeof(struct ovs_dp_stats
));
1208 goto nla_put_failure
;
1209 get_dp_stats(dp
, nla_data(nla
));
1211 NLA_PUT_U32(skb
, OVS_DP_ATTR_IPV4_FRAGS
,
1212 dp
->drop_frags
? OVS_DP_FRAG_DROP
: OVS_DP_FRAG_ZERO
);
1214 return genlmsg_end(skb
, ovs_header
);
1217 genlmsg_cancel(skb
, ovs_header
);
1222 static struct sk_buff
*ovs_dp_cmd_build_info(struct datapath
*dp
, u32 pid
,
1225 struct sk_buff
*skb
;
1228 skb
= nlmsg_new(NLMSG_DEFAULT_SIZE
, GFP_KERNEL
);
1230 return ERR_PTR(-ENOMEM
);
1232 retval
= ovs_dp_cmd_fill_info(dp
, skb
, pid
, seq
, 0, cmd
);
1235 return ERR_PTR(retval
);
1240 static int ovs_dp_cmd_validate(struct nlattr
*a
[OVS_DP_ATTR_MAX
+ 1])
1242 if (a
[OVS_DP_ATTR_IPV4_FRAGS
]) {
1243 u32 frags
= nla_get_u32(a
[OVS_DP_ATTR_IPV4_FRAGS
]);
1245 if (frags
!= OVS_DP_FRAG_ZERO
&& frags
!= OVS_DP_FRAG_DROP
)
1249 return CHECK_NUL_STRING(a
[OVS_DP_ATTR_NAME
], IFNAMSIZ
- 1);
1252 /* Called with genl_mutex and optionally with RTNL lock also. */
1253 static struct datapath
*lookup_datapath(struct ovs_header
*ovs_header
, struct nlattr
*a
[OVS_DP_ATTR_MAX
+ 1])
1255 struct datapath
*dp
;
1257 if (!a
[OVS_DP_ATTR_NAME
])
1258 dp
= get_dp(ovs_header
->dp_ifindex
);
1260 struct vport
*vport
;
1263 vport
= vport_locate(nla_data(a
[OVS_DP_ATTR_NAME
]));
1264 dp
= vport
&& vport
->port_no
== OVSP_LOCAL
? vport
->dp
: NULL
;
1267 return dp
? dp
: ERR_PTR(-ENODEV
);
1270 /* Called with genl_mutex. */
1271 static void change_datapath(struct datapath
*dp
, struct nlattr
*a
[OVS_DP_ATTR_MAX
+ 1])
1273 if (a
[OVS_DP_ATTR_IPV4_FRAGS
])
1274 dp
->drop_frags
= nla_get_u32(a
[OVS_DP_ATTR_IPV4_FRAGS
]) == OVS_DP_FRAG_DROP
;
1277 static int ovs_dp_cmd_new(struct sk_buff
*skb
, struct genl_info
*info
)
1279 struct nlattr
**a
= info
->attrs
;
1280 struct vport_parms parms
;
1281 struct sk_buff
*reply
;
1282 struct datapath
*dp
;
1283 struct vport
*vport
;
1287 if (!a
[OVS_DP_ATTR_NAME
])
1290 err
= ovs_dp_cmd_validate(a
);
1296 if (!try_module_get(THIS_MODULE
))
1297 goto err_unlock_rtnl
;
1300 dp
= kzalloc(sizeof(*dp
), GFP_KERNEL
);
1302 goto err_put_module
;
1303 INIT_LIST_HEAD(&dp
->port_list
);
1305 /* Initialize kobject for bridge. This will be added as
1306 * /sys/class/net/<devname>/brif later, if sysfs is enabled. */
1307 dp
->ifobj
.kset
= NULL
;
1308 kobject_init(&dp
->ifobj
, &dp_ktype
);
1310 /* Allocate table. */
1312 rcu_assign_pointer(dp
->table
, flow_tbl_alloc(TBL_MIN_BUCKETS
));
1317 dp
->stats_percpu
= alloc_percpu(struct dp_stats_percpu
);
1318 if (!dp
->stats_percpu
) {
1320 goto err_destroy_table
;
1323 change_datapath(dp
, a
);
1325 /* Set up our datapath device. */
1326 parms
.name
= nla_data(a
[OVS_DP_ATTR_NAME
]);
1327 parms
.type
= OVS_VPORT_TYPE_INTERNAL
;
1328 parms
.options
= NULL
;
1330 parms
.port_no
= OVSP_LOCAL
;
1331 if (a
[OVS_DP_ATTR_UPCALL_PID
])
1332 parms
.upcall_pid
= nla_get_u32(a
[OVS_DP_ATTR_UPCALL_PID
]);
1334 parms
.upcall_pid
= NETLINK_CB(skb
).pid
;
1336 vport
= new_vport(&parms
);
1337 if (IS_ERR(vport
)) {
1338 err
= PTR_ERR(vport
);
1342 goto err_destroy_percpu
;
1345 reply
= ovs_dp_cmd_build_info(dp
, info
->snd_pid
, info
->snd_seq
, OVS_DP_CMD_NEW
);
1346 err
= PTR_ERR(reply
);
1348 goto err_destroy_local_port
;
1350 list_add_tail(&dp
->list_node
, &dps
);
1351 dp_sysfs_add_dp(dp
);
1355 genl_notify(reply
, genl_info_net(info
), info
->snd_pid
,
1356 dp_datapath_multicast_group
.id
, info
->nlhdr
, GFP_KERNEL
);
1359 err_destroy_local_port
:
1360 dp_detach_port(get_vport_protected(dp
, OVSP_LOCAL
));
1362 free_percpu(dp
->stats_percpu
);
1364 flow_tbl_destroy(get_table_protected(dp
));
1368 module_put(THIS_MODULE
);
1375 static int ovs_dp_cmd_del(struct sk_buff
*skb
, struct genl_info
*info
)
1377 struct vport
*vport
, *next_vport
;
1378 struct sk_buff
*reply
;
1379 struct datapath
*dp
;
1382 err
= ovs_dp_cmd_validate(info
->attrs
);
1387 dp
= lookup_datapath(info
->userhdr
, info
->attrs
);
1392 reply
= ovs_dp_cmd_build_info(dp
, info
->snd_pid
, info
->snd_seq
, OVS_DP_CMD_DEL
);
1393 err
= PTR_ERR(reply
);
1397 list_for_each_entry_safe (vport
, next_vport
, &dp
->port_list
, node
)
1398 if (vport
->port_no
!= OVSP_LOCAL
)
1399 dp_detach_port(vport
);
1401 dp_sysfs_del_dp(dp
);
1402 list_del(&dp
->list_node
);
1403 dp_detach_port(get_vport_protected(dp
, OVSP_LOCAL
));
1405 /* rtnl_unlock() will wait until all the references to devices that
1406 * are pending unregistration have been dropped. We do it here to
1407 * ensure that any internal devices (which contain DP pointers) are
1408 * fully destroyed before freeing the datapath.
1412 call_rcu(&dp
->rcu
, destroy_dp_rcu
);
1413 module_put(THIS_MODULE
);
1415 genl_notify(reply
, genl_info_net(info
), info
->snd_pid
,
1416 dp_datapath_multicast_group
.id
, info
->nlhdr
, GFP_KERNEL
);
1426 static int ovs_dp_cmd_set(struct sk_buff
*skb
, struct genl_info
*info
)
1428 struct sk_buff
*reply
;
1429 struct datapath
*dp
;
1432 err
= ovs_dp_cmd_validate(info
->attrs
);
1436 dp
= lookup_datapath(info
->userhdr
, info
->attrs
);
1440 change_datapath(dp
, info
->attrs
);
1442 reply
= ovs_dp_cmd_build_info(dp
, info
->snd_pid
, info
->snd_seq
, OVS_DP_CMD_NEW
);
1443 if (IS_ERR(reply
)) {
1444 err
= PTR_ERR(reply
);
1445 netlink_set_err(INIT_NET_GENL_SOCK
, 0,
1446 dp_datapath_multicast_group
.id
, err
);
1450 genl_notify(reply
, genl_info_net(info
), info
->snd_pid
,
1451 dp_datapath_multicast_group
.id
, info
->nlhdr
, GFP_KERNEL
);
1455 static int ovs_dp_cmd_get(struct sk_buff
*skb
, struct genl_info
*info
)
1457 struct sk_buff
*reply
;
1458 struct datapath
*dp
;
1461 err
= ovs_dp_cmd_validate(info
->attrs
);
1465 dp
= lookup_datapath(info
->userhdr
, info
->attrs
);
1469 reply
= ovs_dp_cmd_build_info(dp
, info
->snd_pid
, info
->snd_seq
, OVS_DP_CMD_NEW
);
1471 return PTR_ERR(reply
);
1473 return genlmsg_reply(reply
, info
);
1476 static int ovs_dp_cmd_dump(struct sk_buff
*skb
, struct netlink_callback
*cb
)
1478 struct datapath
*dp
;
1479 int skip
= cb
->args
[0];
1482 list_for_each_entry (dp
, &dps
, list_node
) {
1485 if (ovs_dp_cmd_fill_info(dp
, skb
, NETLINK_CB(cb
->skb
).pid
,
1486 cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
,
1487 OVS_DP_CMD_NEW
) < 0)
1497 static struct genl_ops dp_datapath_genl_ops
[] = {
1498 { .cmd
= OVS_DP_CMD_NEW
,
1499 .flags
= GENL_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
1500 .policy
= datapath_policy
,
1501 .doit
= ovs_dp_cmd_new
1503 { .cmd
= OVS_DP_CMD_DEL
,
1504 .flags
= GENL_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
1505 .policy
= datapath_policy
,
1506 .doit
= ovs_dp_cmd_del
1508 { .cmd
= OVS_DP_CMD_GET
,
1509 .flags
= 0, /* OK for unprivileged users. */
1510 .policy
= datapath_policy
,
1511 .doit
= ovs_dp_cmd_get
,
1512 .dumpit
= ovs_dp_cmd_dump
1514 { .cmd
= OVS_DP_CMD_SET
,
1515 .flags
= GENL_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
1516 .policy
= datapath_policy
,
1517 .doit
= ovs_dp_cmd_set
,
1521 static const struct nla_policy vport_policy
[OVS_VPORT_ATTR_MAX
+ 1] = {
1522 #ifdef HAVE_NLA_NUL_STRING
1523 [OVS_VPORT_ATTR_NAME
] = { .type
= NLA_NUL_STRING
, .len
= IFNAMSIZ
- 1 },
1524 [OVS_VPORT_ATTR_STATS
] = { .len
= sizeof(struct ovs_vport_stats
) },
1525 [OVS_VPORT_ATTR_ADDRESS
] = { .len
= ETH_ALEN
},
1527 [OVS_VPORT_ATTR_STATS
] = { .minlen
= sizeof(struct ovs_vport_stats
) },
1528 [OVS_VPORT_ATTR_ADDRESS
] = { .minlen
= ETH_ALEN
},
1530 [OVS_VPORT_ATTR_PORT_NO
] = { .type
= NLA_U32
},
1531 [OVS_VPORT_ATTR_TYPE
] = { .type
= NLA_U32
},
1532 [OVS_VPORT_ATTR_UPCALL_PID
] = { .type
= NLA_U32
},
1533 [OVS_VPORT_ATTR_OPTIONS
] = { .type
= NLA_NESTED
},
1536 static struct genl_family dp_vport_genl_family
= {
1537 .id
= GENL_ID_GENERATE
,
1538 .hdrsize
= sizeof(struct ovs_header
),
1539 .name
= OVS_VPORT_FAMILY
,
1541 .maxattr
= OVS_VPORT_ATTR_MAX
1544 struct genl_multicast_group dp_vport_multicast_group
= {
1545 .name
= OVS_VPORT_MCGROUP
1548 /* Called with RTNL lock or RCU read lock. */
1549 static int ovs_vport_cmd_fill_info(struct vport
*vport
, struct sk_buff
*skb
,
1550 u32 pid
, u32 seq
, u32 flags
, u8 cmd
)
1552 struct ovs_header
*ovs_header
;
1557 ovs_header
= genlmsg_put(skb
, pid
, seq
, &dp_vport_genl_family
,
1562 ovs_header
->dp_ifindex
= get_dpifindex(vport
->dp
);
1564 NLA_PUT_U32(skb
, OVS_VPORT_ATTR_PORT_NO
, vport
->port_no
);
1565 NLA_PUT_U32(skb
, OVS_VPORT_ATTR_TYPE
, vport_get_type(vport
));
1566 NLA_PUT_STRING(skb
, OVS_VPORT_ATTR_NAME
, vport_get_name(vport
));
1567 NLA_PUT_U32(skb
, OVS_VPORT_ATTR_UPCALL_PID
, vport
->upcall_pid
);
1569 nla
= nla_reserve(skb
, OVS_VPORT_ATTR_STATS
, sizeof(struct ovs_vport_stats
));
1571 goto nla_put_failure
;
1573 vport_get_stats(vport
, nla_data(nla
));
1575 NLA_PUT(skb
, OVS_VPORT_ATTR_ADDRESS
, ETH_ALEN
, vport_get_addr(vport
));
1577 err
= vport_get_options(vport
, skb
);
1578 if (err
== -EMSGSIZE
)
1581 ifindex
= vport_get_ifindex(vport
);
1583 NLA_PUT_U32(skb
, OVS_VPORT_ATTR_IFINDEX
, ifindex
);
1585 return genlmsg_end(skb
, ovs_header
);
1590 genlmsg_cancel(skb
, ovs_header
);
1594 /* Called with RTNL lock or RCU read lock. */
1595 struct sk_buff
*ovs_vport_cmd_build_info(struct vport
*vport
, u32 pid
,
1598 struct sk_buff
*skb
;
1601 skb
= nlmsg_new(NLMSG_DEFAULT_SIZE
, GFP_ATOMIC
);
1603 return ERR_PTR(-ENOMEM
);
1605 retval
= ovs_vport_cmd_fill_info(vport
, skb
, pid
, seq
, 0, cmd
);
1608 return ERR_PTR(retval
);
1613 static int ovs_vport_cmd_validate(struct nlattr
*a
[OVS_VPORT_ATTR_MAX
+ 1])
1615 return CHECK_NUL_STRING(a
[OVS_VPORT_ATTR_NAME
], IFNAMSIZ
- 1);
1618 /* Called with RTNL lock or RCU read lock. */
1619 static struct vport
*lookup_vport(struct ovs_header
*ovs_header
,
1620 struct nlattr
*a
[OVS_VPORT_ATTR_MAX
+ 1])
1622 struct datapath
*dp
;
1623 struct vport
*vport
;
1625 if (a
[OVS_VPORT_ATTR_NAME
]) {
1626 vport
= vport_locate(nla_data(a
[OVS_VPORT_ATTR_NAME
]));
1628 return ERR_PTR(-ENODEV
);
1630 } else if (a
[OVS_VPORT_ATTR_PORT_NO
]) {
1631 u32 port_no
= nla_get_u32(a
[OVS_VPORT_ATTR_PORT_NO
]);
1633 if (port_no
>= DP_MAX_PORTS
)
1634 return ERR_PTR(-EFBIG
);
1636 dp
= get_dp(ovs_header
->dp_ifindex
);
1638 return ERR_PTR(-ENODEV
);
1640 vport
= get_vport_protected(dp
, port_no
);
1642 return ERR_PTR(-ENOENT
);
1645 return ERR_PTR(-EINVAL
);
1648 /* Called with RTNL lock. */
1649 static int change_vport(struct vport
*vport
, struct nlattr
*a
[OVS_VPORT_ATTR_MAX
+ 1])
1653 if (a
[OVS_VPORT_ATTR_STATS
])
1654 vport_set_stats(vport
, nla_data(a
[OVS_VPORT_ATTR_STATS
]));
1656 if (a
[OVS_VPORT_ATTR_ADDRESS
])
1657 err
= vport_set_addr(vport
, nla_data(a
[OVS_VPORT_ATTR_ADDRESS
]));
1662 static int ovs_vport_cmd_new(struct sk_buff
*skb
, struct genl_info
*info
)
1664 struct nlattr
**a
= info
->attrs
;
1665 struct ovs_header
*ovs_header
= info
->userhdr
;
1666 struct vport_parms parms
;
1667 struct sk_buff
*reply
;
1668 struct vport
*vport
;
1669 struct datapath
*dp
;
1674 if (!a
[OVS_VPORT_ATTR_NAME
] || !a
[OVS_VPORT_ATTR_TYPE
])
1677 err
= ovs_vport_cmd_validate(a
);
1682 dp
= get_dp(ovs_header
->dp_ifindex
);
1687 if (a
[OVS_VPORT_ATTR_PORT_NO
]) {
1688 port_no
= nla_get_u32(a
[OVS_VPORT_ATTR_PORT_NO
]);
1691 if (port_no
>= DP_MAX_PORTS
)
1694 vport
= get_vport_protected(dp
, port_no
);
1699 for (port_no
= 1; ; port_no
++) {
1700 if (port_no
>= DP_MAX_PORTS
) {
1704 vport
= get_vport_protected(dp
, port_no
);
1710 parms
.name
= nla_data(a
[OVS_VPORT_ATTR_NAME
]);
1711 parms
.type
= nla_get_u32(a
[OVS_VPORT_ATTR_TYPE
]);
1712 parms
.options
= a
[OVS_VPORT_ATTR_OPTIONS
];
1714 parms
.port_no
= port_no
;
1715 if (a
[OVS_VPORT_ATTR_UPCALL_PID
])
1716 parms
.upcall_pid
= nla_get_u32(a
[OVS_VPORT_ATTR_UPCALL_PID
]);
1718 parms
.upcall_pid
= NETLINK_CB(skb
).pid
;
1720 vport
= new_vport(&parms
);
1721 err
= PTR_ERR(vport
);
1725 dp_sysfs_add_if(vport
);
1727 err
= change_vport(vport
, a
);
1729 reply
= ovs_vport_cmd_build_info(vport
, info
->snd_pid
,
1730 info
->snd_seq
, OVS_VPORT_CMD_NEW
);
1732 err
= PTR_ERR(reply
);
1735 dp_detach_port(vport
);
1738 genl_notify(reply
, genl_info_net(info
), info
->snd_pid
,
1739 dp_vport_multicast_group
.id
, info
->nlhdr
, GFP_KERNEL
);
1748 static int ovs_vport_cmd_set(struct sk_buff
*skb
, struct genl_info
*info
)
1750 struct nlattr
**a
= info
->attrs
;
1751 struct sk_buff
*reply
;
1752 struct vport
*vport
;
1755 err
= ovs_vport_cmd_validate(a
);
1760 vport
= lookup_vport(info
->userhdr
, a
);
1761 err
= PTR_ERR(vport
);
1766 if (a
[OVS_VPORT_ATTR_OPTIONS
])
1767 err
= vport_set_options(vport
, a
[OVS_VPORT_ATTR_OPTIONS
]);
1769 err
= change_vport(vport
, a
);
1770 if (!err
&& a
[OVS_VPORT_ATTR_UPCALL_PID
])
1771 vport
->upcall_pid
= nla_get_u32(a
[OVS_VPORT_ATTR_UPCALL_PID
]);
1773 reply
= ovs_vport_cmd_build_info(vport
, info
->snd_pid
, info
->snd_seq
,
1775 if (IS_ERR(reply
)) {
1776 err
= PTR_ERR(reply
);
1777 netlink_set_err(INIT_NET_GENL_SOCK
, 0,
1778 dp_vport_multicast_group
.id
, err
);
1782 genl_notify(reply
, genl_info_net(info
), info
->snd_pid
,
1783 dp_vport_multicast_group
.id
, info
->nlhdr
, GFP_KERNEL
);
1791 static int ovs_vport_cmd_del(struct sk_buff
*skb
, struct genl_info
*info
)
1793 struct nlattr
**a
= info
->attrs
;
1794 struct sk_buff
*reply
;
1795 struct vport
*vport
;
1798 err
= ovs_vport_cmd_validate(a
);
1803 vport
= lookup_vport(info
->userhdr
, a
);
1804 err
= PTR_ERR(vport
);
1808 if (vport
->port_no
== OVSP_LOCAL
) {
1813 reply
= ovs_vport_cmd_build_info(vport
, info
->snd_pid
, info
->snd_seq
,
1815 err
= PTR_ERR(reply
);
1819 dp_detach_port(vport
);
1821 genl_notify(reply
, genl_info_net(info
), info
->snd_pid
,
1822 dp_vport_multicast_group
.id
, info
->nlhdr
, GFP_KERNEL
);
1830 static int ovs_vport_cmd_get(struct sk_buff
*skb
, struct genl_info
*info
)
1832 struct nlattr
**a
= info
->attrs
;
1833 struct ovs_header
*ovs_header
= info
->userhdr
;
1834 struct sk_buff
*reply
;
1835 struct vport
*vport
;
1838 err
= ovs_vport_cmd_validate(a
);
1843 vport
= lookup_vport(ovs_header
, a
);
1844 err
= PTR_ERR(vport
);
1848 reply
= ovs_vport_cmd_build_info(vport
, info
->snd_pid
, info
->snd_seq
,
1850 err
= PTR_ERR(reply
);
1856 return genlmsg_reply(reply
, info
);
1864 static int ovs_vport_cmd_dump(struct sk_buff
*skb
, struct netlink_callback
*cb
)
1866 struct ovs_header
*ovs_header
= genlmsg_data(nlmsg_data(cb
->nlh
));
1867 struct datapath
*dp
;
1871 dp
= get_dp(ovs_header
->dp_ifindex
);
1876 for (port_no
= cb
->args
[0]; port_no
< DP_MAX_PORTS
; port_no
++) {
1877 struct vport
*vport
;
1879 vport
= get_vport_protected(dp
, port_no
);
1883 if (ovs_vport_cmd_fill_info(vport
, skb
, NETLINK_CB(cb
->skb
).pid
,
1884 cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
,
1885 OVS_VPORT_CMD_NEW
) < 0)
1890 cb
->args
[0] = port_no
;
1896 static struct genl_ops dp_vport_genl_ops
[] = {
1897 { .cmd
= OVS_VPORT_CMD_NEW
,
1898 .flags
= GENL_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
1899 .policy
= vport_policy
,
1900 .doit
= ovs_vport_cmd_new
1902 { .cmd
= OVS_VPORT_CMD_DEL
,
1903 .flags
= GENL_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
1904 .policy
= vport_policy
,
1905 .doit
= ovs_vport_cmd_del
1907 { .cmd
= OVS_VPORT_CMD_GET
,
1908 .flags
= 0, /* OK for unprivileged users. */
1909 .policy
= vport_policy
,
1910 .doit
= ovs_vport_cmd_get
,
1911 .dumpit
= ovs_vport_cmd_dump
1913 { .cmd
= OVS_VPORT_CMD_SET
,
1914 .flags
= GENL_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
1915 .policy
= vport_policy
,
1916 .doit
= ovs_vport_cmd_set
,
1920 struct genl_family_and_ops
{
1921 struct genl_family
*family
;
1922 struct genl_ops
*ops
;
1924 struct genl_multicast_group
*group
;
1927 static const struct genl_family_and_ops dp_genl_families
[] = {
1928 { &dp_datapath_genl_family
,
1929 dp_datapath_genl_ops
, ARRAY_SIZE(dp_datapath_genl_ops
),
1930 &dp_datapath_multicast_group
},
1931 { &dp_vport_genl_family
,
1932 dp_vport_genl_ops
, ARRAY_SIZE(dp_vport_genl_ops
),
1933 &dp_vport_multicast_group
},
1934 { &dp_flow_genl_family
,
1935 dp_flow_genl_ops
, ARRAY_SIZE(dp_flow_genl_ops
),
1936 &dp_flow_multicast_group
},
1937 { &dp_packet_genl_family
,
1938 dp_packet_genl_ops
, ARRAY_SIZE(dp_packet_genl_ops
),
1942 static void dp_unregister_genl(int n_families
)
1946 for (i
= 0; i
< n_families
; i
++)
1947 genl_unregister_family(dp_genl_families
[i
].family
);
1950 static int dp_register_genl(void)
1957 for (i
= 0; i
< ARRAY_SIZE(dp_genl_families
); i
++) {
1958 const struct genl_family_and_ops
*f
= &dp_genl_families
[i
];
1960 err
= genl_register_family_with_ops(f
->family
, f
->ops
,
1967 err
= genl_register_mc_group(f
->family
, f
->group
);
1976 dp_unregister_genl(n_registered
);
1980 static int __init
dp_init(void)
1982 struct sk_buff
*dummy_skb
;
1985 BUILD_BUG_ON(sizeof(struct ovs_skb_cb
) > sizeof(dummy_skb
->cb
));
1987 printk("Open vSwitch %s, built "__DATE__
" "__TIME__
"\n", VERSION BUILDNR
);
1995 goto error_tnl_exit
;
1999 goto error_flow_exit
;
2001 err
= register_netdevice_notifier(&dp_device_notifier
);
2003 goto error_vport_exit
;
2005 err
= dp_register_genl();
2007 goto error_unreg_notifier
;
2011 error_unreg_notifier
:
2012 unregister_netdevice_notifier(&dp_device_notifier
);
2023 static void dp_cleanup(void)
2026 dp_unregister_genl(ARRAY_SIZE(dp_genl_families
));
2027 unregister_netdevice_notifier(&dp_device_notifier
);
2033 module_init(dp_init
);
2034 module_exit(dp_cleanup
);
2036 MODULE_DESCRIPTION("Open vSwitch switching datapath");
2037 MODULE_LICENSE("GPL");