2 * Copyright (c) 2007-2012 Nicira, Inc.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21 #include <linux/init.h>
22 #include <linux/module.h>
23 #include <linux/if_arp.h>
24 #include <linux/if_vlan.h>
27 #include <linux/jhash.h>
28 #include <linux/delay.h>
29 #include <linux/time.h>
30 #include <linux/etherdevice.h>
31 #include <linux/genetlink.h>
32 #include <linux/kernel.h>
33 #include <linux/kthread.h>
34 #include <linux/mutex.h>
35 #include <linux/percpu.h>
36 #include <linux/rcupdate.h>
37 #include <linux/tcp.h>
38 #include <linux/udp.h>
39 #include <linux/version.h>
40 #include <linux/ethtool.h>
41 #include <linux/wait.h>
42 #include <asm/div64.h>
43 #include <linux/highmem.h>
44 #include <linux/netfilter_bridge.h>
45 #include <linux/netfilter_ipv4.h>
46 #include <linux/inetdevice.h>
47 #include <linux/list.h>
48 #include <linux/openvswitch.h>
49 #include <linux/rculist.h>
50 #include <linux/dmi.h>
51 #include <net/genetlink.h>
52 #include <net/net_namespace.h>
53 #include <net/netns/generic.h>
58 #include "genl_exec.h"
61 #include "vport-internal_dev.h"
63 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) || \
64 LINUX_VERSION_CODE >= KERNEL_VERSION(3,9,0)
65 #error Kernels before 2.6.18 or after 3.8 are not supported by this version of Open vSwitch.
68 #define REHASH_FLOW_INTERVAL (10 * 60 * HZ)
69 static void rehash_flow_table(struct work_struct
*work
);
70 static DECLARE_DELAYED_WORK(rehash_flow_wq
, rehash_flow_table
);
72 int ovs_net_id __read_mostly
;
77 * Writes to device state (add/remove datapath, port, set operations on vports,
78 * etc.) are protected by RTNL.
80 * Writes to other state (flow table modifications, set miscellaneous datapath
81 * parameters, etc.) are protected by genl_mutex. The RTNL lock nests inside
84 * Reads are protected by RCU.
86 * There are a few special cases (mostly stats) that have their own
87 * synchronization but they nest under all of above and don't interact with
91 static struct vport
*new_vport(const struct vport_parms
*);
92 static int queue_gso_packets(struct net
*, int dp_ifindex
, struct sk_buff
*,
93 const struct dp_upcall_info
*);
94 static int queue_userspace_packet(struct net
*, int dp_ifindex
,
96 const struct dp_upcall_info
*);
98 /* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
99 static struct datapath
*get_dp(struct net
*net
, int dp_ifindex
)
101 struct datapath
*dp
= NULL
;
102 struct net_device
*dev
;
105 dev
= dev_get_by_index_rcu(net
, dp_ifindex
);
107 struct vport
*vport
= ovs_internal_dev_get_vport(dev
);
116 /* Must be called with rcu_read_lock or RTNL lock. */
117 const char *ovs_dp_name(const struct datapath
*dp
)
119 struct vport
*vport
= ovs_vport_rtnl_rcu(dp
, OVSP_LOCAL
);
120 return vport
->ops
->get_name(vport
);
123 static int get_dpifindex(struct datapath
*dp
)
130 local
= ovs_vport_rcu(dp
, OVSP_LOCAL
);
132 ifindex
= local
->ops
->get_ifindex(local
);
141 static void destroy_dp_rcu(struct rcu_head
*rcu
)
143 struct datapath
*dp
= container_of(rcu
, struct datapath
, rcu
);
145 ovs_flow_tbl_destroy((__force
struct flow_table
*)dp
->table
);
146 free_percpu(dp
->stats_percpu
);
147 release_net(ovs_dp_get_net(dp
));
152 static struct hlist_head
*vport_hash_bucket(const struct datapath
*dp
,
155 return &dp
->ports
[port_no
& (DP_VPORT_HASH_BUCKETS
- 1)];
158 struct vport
*ovs_lookup_vport(const struct datapath
*dp
, u16 port_no
)
161 struct hlist_node
*n
;
162 struct hlist_head
*head
;
164 head
= vport_hash_bucket(dp
, port_no
);
165 hlist_for_each_entry_rcu(vport
, n
, head
, dp_hash_node
) {
166 if (vport
->port_no
== port_no
)
172 /* Called with RTNL lock and genl_lock. */
173 static struct vport
*new_vport(const struct vport_parms
*parms
)
177 vport
= ovs_vport_add(parms
);
178 if (!IS_ERR(vport
)) {
179 struct datapath
*dp
= parms
->dp
;
180 struct hlist_head
*head
= vport_hash_bucket(dp
, vport
->port_no
);
182 hlist_add_head_rcu(&vport
->dp_hash_node
, head
);
187 /* Called with RTNL lock. */
188 void ovs_dp_detach_port(struct vport
*p
)
192 /* First drop references to device. */
193 hlist_del_rcu(&p
->dp_hash_node
);
195 /* Then destroy it. */
199 /* Must be called with rcu_read_lock. */
200 void ovs_dp_process_received_packet(struct vport
*p
, struct sk_buff
*skb
)
202 struct datapath
*dp
= p
->dp
;
203 struct sw_flow
*flow
;
204 struct dp_stats_percpu
*stats
;
208 stats
= this_cpu_ptr(dp
->stats_percpu
);
210 if (!OVS_CB(skb
)->flow
) {
211 struct sw_flow_key key
;
214 /* Extract flow from 'skb' into 'key'. */
215 error
= ovs_flow_extract(skb
, p
->port_no
, &key
, &key_len
);
216 if (unlikely(error
)) {
222 flow
= ovs_flow_tbl_lookup(rcu_dereference(dp
->table
),
224 if (unlikely(!flow
)) {
225 struct dp_upcall_info upcall
;
227 upcall
.cmd
= OVS_PACKET_CMD_MISS
;
229 upcall
.userdata
= NULL
;
230 upcall
.portid
= p
->upcall_portid
;
231 ovs_dp_upcall(dp
, skb
, &upcall
);
233 stats_counter
= &stats
->n_missed
;
237 OVS_CB(skb
)->flow
= flow
;
240 stats_counter
= &stats
->n_hit
;
241 ovs_flow_used(OVS_CB(skb
)->flow
, skb
);
242 ovs_execute_actions(dp
, skb
);
245 /* Update datapath statistics. */
246 u64_stats_update_begin(&stats
->sync
);
248 u64_stats_update_end(&stats
->sync
);
251 static struct genl_family dp_packet_genl_family
= {
252 .id
= GENL_ID_GENERATE
,
253 .hdrsize
= sizeof(struct ovs_header
),
254 .name
= OVS_PACKET_FAMILY
,
255 .version
= OVS_PACKET_VERSION
,
256 .maxattr
= OVS_PACKET_ATTR_MAX
,
260 int ovs_dp_upcall(struct datapath
*dp
, struct sk_buff
*skb
,
261 const struct dp_upcall_info
*upcall_info
)
263 struct dp_stats_percpu
*stats
;
267 if (upcall_info
->portid
== 0) {
272 dp_ifindex
= get_dpifindex(dp
);
278 forward_ip_summed(skb
, true);
280 if (!skb_is_gso(skb
))
281 err
= queue_userspace_packet(ovs_dp_get_net(dp
), dp_ifindex
, skb
, upcall_info
);
283 err
= queue_gso_packets(ovs_dp_get_net(dp
), dp_ifindex
, skb
, upcall_info
);
290 stats
= this_cpu_ptr(dp
->stats_percpu
);
292 u64_stats_update_begin(&stats
->sync
);
294 u64_stats_update_end(&stats
->sync
);
299 static int queue_gso_packets(struct net
*net
, int dp_ifindex
,
301 const struct dp_upcall_info
*upcall_info
)
303 unsigned short gso_type
= skb_shinfo(skb
)->gso_type
;
304 struct dp_upcall_info later_info
;
305 struct sw_flow_key later_key
;
306 struct sk_buff
*segs
, *nskb
;
309 segs
= skb_gso_segment(skb
, NETIF_F_SG
| NETIF_F_HW_CSUM
);
311 return PTR_ERR(segs
);
313 /* Queue all of the segments. */
316 err
= queue_userspace_packet(net
, dp_ifindex
, skb
, upcall_info
);
320 if (skb
== segs
&& gso_type
& SKB_GSO_UDP
) {
321 /* The initial flow key extracted by ovs_flow_extract()
322 * in this case is for a first fragment, so we need to
323 * properly mark later fragments.
325 later_key
= *upcall_info
->key
;
326 later_key
.ip
.frag
= OVS_FRAG_TYPE_LATER
;
328 later_info
= *upcall_info
;
329 later_info
.key
= &later_key
;
330 upcall_info
= &later_info
;
332 } while ((skb
= skb
->next
));
334 /* Free all of the segments. */
342 } while ((skb
= nskb
));
346 static int queue_userspace_packet(struct net
*net
, int dp_ifindex
,
348 const struct dp_upcall_info
*upcall_info
)
350 struct ovs_header
*upcall
;
351 struct sk_buff
*nskb
= NULL
;
352 struct sk_buff
*user_skb
; /* to be queued to userspace */
357 if (vlan_tx_tag_present(skb
)) {
358 nskb
= skb_clone(skb
, GFP_ATOMIC
);
362 err
= vlan_deaccel_tag(nskb
);
369 if (nla_attr_size(skb
->len
) > USHRT_MAX
) {
374 len
= sizeof(struct ovs_header
);
375 len
+= nla_total_size(skb
->len
);
376 len
+= nla_total_size(FLOW_BUFSIZE
);
377 if (upcall_info
->cmd
== OVS_PACKET_CMD_ACTION
)
378 len
+= nla_total_size(8);
380 user_skb
= genlmsg_new(len
, GFP_ATOMIC
);
386 upcall
= genlmsg_put(user_skb
, 0, 0, &dp_packet_genl_family
,
387 0, upcall_info
->cmd
);
388 upcall
->dp_ifindex
= dp_ifindex
;
390 nla
= nla_nest_start(user_skb
, OVS_PACKET_ATTR_KEY
);
391 ovs_flow_to_nlattrs(upcall_info
->key
, user_skb
);
392 nla_nest_end(user_skb
, nla
);
394 if (upcall_info
->userdata
)
395 nla_put_u64(user_skb
, OVS_PACKET_ATTR_USERDATA
,
396 nla_get_u64(upcall_info
->userdata
));
398 nla
= __nla_reserve(user_skb
, OVS_PACKET_ATTR_PACKET
, skb
->len
);
400 skb_copy_and_csum_dev(skb
, nla_data(nla
));
402 err
= genlmsg_unicast(net
, user_skb
, upcall_info
->portid
);
409 /* Called with genl_mutex. */
410 static int flush_flows(struct datapath
*dp
)
412 struct flow_table
*old_table
;
413 struct flow_table
*new_table
;
415 old_table
= genl_dereference(dp
->table
);
416 new_table
= ovs_flow_tbl_alloc(TBL_MIN_BUCKETS
);
420 rcu_assign_pointer(dp
->table
, new_table
);
422 ovs_flow_tbl_deferred_destroy(old_table
);
426 static struct nlattr
*reserve_sfa_size(struct sw_flow_actions
**sfa
, int attr_len
)
429 struct sw_flow_actions
*acts
;
431 int req_size
= NLA_ALIGN(attr_len
);
432 int next_offset
= offsetof(struct sw_flow_actions
, actions
) +
435 if (req_size
<= (ksize(*sfa
) - next_offset
))
438 new_acts_size
= ksize(*sfa
) * 2;
440 if (new_acts_size
> MAX_ACTIONS_BUFSIZE
) {
441 if ((MAX_ACTIONS_BUFSIZE
- next_offset
) < req_size
)
442 return ERR_PTR(-EMSGSIZE
);
443 new_acts_size
= MAX_ACTIONS_BUFSIZE
;
446 acts
= ovs_flow_actions_alloc(new_acts_size
);
450 memcpy(acts
->actions
, (*sfa
)->actions
, (*sfa
)->actions_len
);
451 acts
->actions_len
= (*sfa
)->actions_len
;
456 (*sfa
)->actions_len
+= req_size
;
457 return (struct nlattr
*) ((unsigned char *)(*sfa
) + next_offset
);
460 static int add_action(struct sw_flow_actions
**sfa
, int attrtype
, void *data
, int len
)
464 a
= reserve_sfa_size(sfa
, nla_attr_size(len
));
468 a
->nla_type
= attrtype
;
469 a
->nla_len
= nla_attr_size(len
);
472 memcpy(nla_data(a
), data
, len
);
473 memset((unsigned char *) a
+ a
->nla_len
, 0, nla_padlen(len
));
478 static inline int add_nested_action_start(struct sw_flow_actions
**sfa
, int attrtype
)
480 int used
= (*sfa
)->actions_len
;
483 err
= add_action(sfa
, attrtype
, NULL
, 0);
490 static inline void add_nested_action_end(struct sw_flow_actions
*sfa
, int st_offset
)
492 struct nlattr
*a
= (struct nlattr
*) ((unsigned char *)sfa
->actions
+ st_offset
);
494 a
->nla_len
= sfa
->actions_len
- st_offset
;
497 static int validate_and_copy_actions(const struct nlattr
*attr
,
498 const struct sw_flow_key
*key
, int depth
,
499 struct sw_flow_actions
**sfa
);
501 static int validate_and_copy_sample(const struct nlattr
*attr
,
502 const struct sw_flow_key
*key
, int depth
,
503 struct sw_flow_actions
**sfa
)
505 const struct nlattr
*attrs
[OVS_SAMPLE_ATTR_MAX
+ 1];
506 const struct nlattr
*probability
, *actions
;
507 const struct nlattr
*a
;
508 int rem
, start
, err
, st_acts
;
510 memset(attrs
, 0, sizeof(attrs
));
511 nla_for_each_nested(a
, attr
, rem
) {
512 int type
= nla_type(a
);
513 if (!type
|| type
> OVS_SAMPLE_ATTR_MAX
|| attrs
[type
])
520 probability
= attrs
[OVS_SAMPLE_ATTR_PROBABILITY
];
521 if (!probability
|| nla_len(probability
) != sizeof(u32
))
524 actions
= attrs
[OVS_SAMPLE_ATTR_ACTIONS
];
525 if (!actions
|| (nla_len(actions
) && nla_len(actions
) < NLA_HDRLEN
))
528 /* validation done, copy sample action. */
529 start
= add_nested_action_start(sfa
, OVS_ACTION_ATTR_SAMPLE
);
532 err
= add_action(sfa
, OVS_SAMPLE_ATTR_PROBABILITY
, nla_data(probability
), sizeof(u32
));
535 st_acts
= add_nested_action_start(sfa
, OVS_SAMPLE_ATTR_ACTIONS
);
539 err
= validate_and_copy_actions(actions
, key
, depth
+ 1, sfa
);
543 add_nested_action_end(*sfa
, st_acts
);
544 add_nested_action_end(*sfa
, start
);
549 static int validate_tp_port(const struct sw_flow_key
*flow_key
)
551 if (flow_key
->eth
.type
== htons(ETH_P_IP
)) {
552 if (flow_key
->ipv4
.tp
.src
|| flow_key
->ipv4
.tp
.dst
)
554 } else if (flow_key
->eth
.type
== htons(ETH_P_IPV6
)) {
555 if (flow_key
->ipv6
.tp
.src
|| flow_key
->ipv6
.tp
.dst
)
562 static int validate_and_copy_set_tun(const struct nlattr
*attr
,
563 struct sw_flow_actions
**sfa
)
565 struct ovs_key_ipv4_tunnel tun_key
;
568 err
= ipv4_tun_from_nlattr(nla_data(attr
), &tun_key
);
572 start
= add_nested_action_start(sfa
, OVS_ACTION_ATTR_SET
);
576 err
= add_action(sfa
, OVS_KEY_ATTR_IPV4_TUNNEL
, &tun_key
, sizeof(tun_key
));
577 add_nested_action_end(*sfa
, start
);
582 static int validate_set(const struct nlattr
*a
,
583 const struct sw_flow_key
*flow_key
,
584 struct sw_flow_actions
**sfa
,
587 const struct nlattr
*ovs_key
= nla_data(a
);
588 int key_type
= nla_type(ovs_key
);
590 /* There can be only one key in a action */
591 if (nla_total_size(nla_len(ovs_key
)) != nla_len(a
))
594 if (key_type
> OVS_KEY_ATTR_MAX
||
595 (ovs_key_lens
[key_type
] != nla_len(ovs_key
) &&
596 ovs_key_lens
[key_type
] != -1))
600 const struct ovs_key_ipv4
*ipv4_key
;
601 const struct ovs_key_ipv6
*ipv6_key
;
604 case OVS_KEY_ATTR_PRIORITY
:
605 case OVS_KEY_ATTR_TUN_ID
:
606 case OVS_KEY_ATTR_ETHERNET
:
609 case OVS_KEY_ATTR_SKB_MARK
:
610 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) && !defined(CONFIG_NETFILTER)
611 if (nla_get_u32(ovs_key
) != 0)
616 case OVS_KEY_ATTR_TUNNEL
:
618 err
= validate_and_copy_set_tun(a
, sfa
);
623 case OVS_KEY_ATTR_IPV4
:
624 if (flow_key
->eth
.type
!= htons(ETH_P_IP
))
627 if (!flow_key
->ip
.proto
)
630 ipv4_key
= nla_data(ovs_key
);
631 if (ipv4_key
->ipv4_proto
!= flow_key
->ip
.proto
)
634 if (ipv4_key
->ipv4_frag
!= flow_key
->ip
.frag
)
639 case OVS_KEY_ATTR_IPV6
:
640 if (flow_key
->eth
.type
!= htons(ETH_P_IPV6
))
643 if (!flow_key
->ip
.proto
)
646 ipv6_key
= nla_data(ovs_key
);
647 if (ipv6_key
->ipv6_proto
!= flow_key
->ip
.proto
)
650 if (ipv6_key
->ipv6_frag
!= flow_key
->ip
.frag
)
653 if (ntohl(ipv6_key
->ipv6_label
) & 0xFFF00000)
658 case OVS_KEY_ATTR_TCP
:
659 if (flow_key
->ip
.proto
!= IPPROTO_TCP
)
662 return validate_tp_port(flow_key
);
664 case OVS_KEY_ATTR_UDP
:
665 if (flow_key
->ip
.proto
!= IPPROTO_UDP
)
668 return validate_tp_port(flow_key
);
677 static int validate_userspace(const struct nlattr
*attr
)
679 static const struct nla_policy userspace_policy
[OVS_USERSPACE_ATTR_MAX
+ 1] = {
680 [OVS_USERSPACE_ATTR_PID
] = {.type
= NLA_U32
},
681 [OVS_USERSPACE_ATTR_USERDATA
] = {.type
= NLA_U64
},
683 struct nlattr
*a
[OVS_USERSPACE_ATTR_MAX
+ 1];
686 error
= nla_parse_nested(a
, OVS_USERSPACE_ATTR_MAX
,
687 attr
, userspace_policy
);
691 if (!a
[OVS_USERSPACE_ATTR_PID
] ||
692 !nla_get_u32(a
[OVS_USERSPACE_ATTR_PID
]))
698 static int copy_action(const struct nlattr
*from
,
699 struct sw_flow_actions
**sfa
)
701 int totlen
= NLA_ALIGN(from
->nla_len
);
704 to
= reserve_sfa_size(sfa
, from
->nla_len
);
708 memcpy(to
, from
, totlen
);
712 static int validate_and_copy_actions(const struct nlattr
*attr
,
713 const struct sw_flow_key
*key
,
715 struct sw_flow_actions
**sfa
)
717 const struct nlattr
*a
;
720 if (depth
>= SAMPLE_ACTION_DEPTH
)
723 nla_for_each_nested(a
, attr
, rem
) {
724 /* Expected argument lengths, (u32)-1 for variable length. */
725 static const u32 action_lens
[OVS_ACTION_ATTR_MAX
+ 1] = {
726 [OVS_ACTION_ATTR_OUTPUT
] = sizeof(u32
),
727 [OVS_ACTION_ATTR_USERSPACE
] = (u32
)-1,
728 [OVS_ACTION_ATTR_PUSH_VLAN
] = sizeof(struct ovs_action_push_vlan
),
729 [OVS_ACTION_ATTR_POP_VLAN
] = 0,
730 [OVS_ACTION_ATTR_SET
] = (u32
)-1,
731 [OVS_ACTION_ATTR_SAMPLE
] = (u32
)-1
733 const struct ovs_action_push_vlan
*vlan
;
734 int type
= nla_type(a
);
737 if (type
> OVS_ACTION_ATTR_MAX
||
738 (action_lens
[type
] != nla_len(a
) &&
739 action_lens
[type
] != (u32
)-1))
744 case OVS_ACTION_ATTR_UNSPEC
:
747 case OVS_ACTION_ATTR_USERSPACE
:
748 err
= validate_userspace(a
);
753 case OVS_ACTION_ATTR_OUTPUT
:
754 if (nla_get_u32(a
) >= DP_MAX_PORTS
)
759 case OVS_ACTION_ATTR_POP_VLAN
:
762 case OVS_ACTION_ATTR_PUSH_VLAN
:
764 if (vlan
->vlan_tpid
!= htons(ETH_P_8021Q
))
766 if (!(vlan
->vlan_tci
& htons(VLAN_TAG_PRESENT
)))
770 case OVS_ACTION_ATTR_SET
:
771 err
= validate_set(a
, key
, sfa
, &skip_copy
);
776 case OVS_ACTION_ATTR_SAMPLE
:
777 err
= validate_and_copy_sample(a
, key
, depth
, sfa
);
787 err
= copy_action(a
, sfa
);
799 static void clear_stats(struct sw_flow
*flow
)
803 flow
->packet_count
= 0;
804 flow
->byte_count
= 0;
807 static int ovs_packet_cmd_execute(struct sk_buff
*skb
, struct genl_info
*info
)
809 struct ovs_header
*ovs_header
= info
->userhdr
;
810 struct nlattr
**a
= info
->attrs
;
811 struct sw_flow_actions
*acts
;
812 struct sk_buff
*packet
;
813 struct sw_flow
*flow
;
821 if (!a
[OVS_PACKET_ATTR_PACKET
] || !a
[OVS_PACKET_ATTR_KEY
] ||
822 !a
[OVS_PACKET_ATTR_ACTIONS
] ||
823 nla_len(a
[OVS_PACKET_ATTR_PACKET
]) < ETH_HLEN
)
826 len
= nla_len(a
[OVS_PACKET_ATTR_PACKET
]);
827 packet
= __dev_alloc_skb(NET_IP_ALIGN
+ len
, GFP_KERNEL
);
831 skb_reserve(packet
, NET_IP_ALIGN
);
833 memcpy(__skb_put(packet
, len
), nla_data(a
[OVS_PACKET_ATTR_PACKET
]), len
);
835 skb_reset_mac_header(packet
);
836 eth
= eth_hdr(packet
);
838 /* Normally, setting the skb 'protocol' field would be handled by a
839 * call to eth_type_trans(), but it assumes there's a sending
840 * device, which we may not have. */
841 if (ntohs(eth
->h_proto
) >= 1536)
842 packet
->protocol
= eth
->h_proto
;
844 packet
->protocol
= htons(ETH_P_802_2
);
846 /* Build an sw_flow for sending this packet. */
847 flow
= ovs_flow_alloc();
852 err
= ovs_flow_extract(packet
, -1, &flow
->key
, &key_len
);
856 err
= ovs_flow_metadata_from_nlattrs(flow
, key_len
, a
[OVS_PACKET_ATTR_KEY
]);
859 acts
= ovs_flow_actions_alloc(nla_len(a
[OVS_PACKET_ATTR_ACTIONS
]));
864 err
= validate_and_copy_actions(a
[OVS_PACKET_ATTR_ACTIONS
], &flow
->key
, 0, &acts
);
865 rcu_assign_pointer(flow
->sf_acts
, acts
);
869 OVS_CB(packet
)->flow
= flow
;
870 packet
->priority
= flow
->key
.phy
.priority
;
871 skb_set_mark(packet
, flow
->key
.phy
.skb_mark
);
874 dp
= get_dp(sock_net(skb
->sk
), ovs_header
->dp_ifindex
);
880 err
= ovs_execute_actions(dp
, packet
);
897 static const struct nla_policy packet_policy
[OVS_PACKET_ATTR_MAX
+ 1] = {
898 [OVS_PACKET_ATTR_PACKET
] = { .type
= NLA_UNSPEC
},
899 [OVS_PACKET_ATTR_KEY
] = { .type
= NLA_NESTED
},
900 [OVS_PACKET_ATTR_ACTIONS
] = { .type
= NLA_NESTED
},
903 static struct genl_ops dp_packet_genl_ops
[] = {
904 { .cmd
= OVS_PACKET_CMD_EXECUTE
,
905 .flags
= GENL_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
906 .policy
= packet_policy
,
907 .doit
= ovs_packet_cmd_execute
911 static void get_dp_stats(struct datapath
*dp
, struct ovs_dp_stats
*stats
)
914 struct flow_table
*table
= genl_dereference(dp
->table
);
916 stats
->n_flows
= ovs_flow_tbl_count(table
);
918 stats
->n_hit
= stats
->n_missed
= stats
->n_lost
= 0;
919 for_each_possible_cpu(i
) {
920 const struct dp_stats_percpu
*percpu_stats
;
921 struct dp_stats_percpu local_stats
;
924 percpu_stats
= per_cpu_ptr(dp
->stats_percpu
, i
);
927 start
= u64_stats_fetch_begin_bh(&percpu_stats
->sync
);
928 local_stats
= *percpu_stats
;
929 } while (u64_stats_fetch_retry_bh(&percpu_stats
->sync
, start
));
931 stats
->n_hit
+= local_stats
.n_hit
;
932 stats
->n_missed
+= local_stats
.n_missed
;
933 stats
->n_lost
+= local_stats
.n_lost
;
937 static const struct nla_policy flow_policy
[OVS_FLOW_ATTR_MAX
+ 1] = {
938 [OVS_FLOW_ATTR_KEY
] = { .type
= NLA_NESTED
},
939 [OVS_FLOW_ATTR_ACTIONS
] = { .type
= NLA_NESTED
},
940 [OVS_FLOW_ATTR_CLEAR
] = { .type
= NLA_FLAG
},
943 static struct genl_family dp_flow_genl_family
= {
944 .id
= GENL_ID_GENERATE
,
945 .hdrsize
= sizeof(struct ovs_header
),
946 .name
= OVS_FLOW_FAMILY
,
947 .version
= OVS_FLOW_VERSION
,
948 .maxattr
= OVS_FLOW_ATTR_MAX
,
952 static struct genl_multicast_group ovs_dp_flow_multicast_group
= {
953 .name
= OVS_FLOW_MCGROUP
956 static int actions_to_attr(const struct nlattr
*attr
, int len
, struct sk_buff
*skb
);
957 static int sample_action_to_attr(const struct nlattr
*attr
, struct sk_buff
*skb
)
959 const struct nlattr
*a
;
960 struct nlattr
*start
;
963 start
= nla_nest_start(skb
, OVS_ACTION_ATTR_SAMPLE
);
967 nla_for_each_nested(a
, attr
, rem
) {
968 int type
= nla_type(a
);
969 struct nlattr
*st_sample
;
972 case OVS_SAMPLE_ATTR_PROBABILITY
:
973 if (nla_put(skb
, OVS_SAMPLE_ATTR_PROBABILITY
, sizeof(u32
), nla_data(a
)))
976 case OVS_SAMPLE_ATTR_ACTIONS
:
977 st_sample
= nla_nest_start(skb
, OVS_SAMPLE_ATTR_ACTIONS
);
980 err
= actions_to_attr(nla_data(a
), nla_len(a
), skb
);
983 nla_nest_end(skb
, st_sample
);
988 nla_nest_end(skb
, start
);
992 static int set_action_to_attr(const struct nlattr
*a
, struct sk_buff
*skb
)
994 const struct nlattr
*ovs_key
= nla_data(a
);
995 int key_type
= nla_type(ovs_key
);
996 struct nlattr
*start
;
1000 case OVS_KEY_ATTR_IPV4_TUNNEL
:
1001 start
= nla_nest_start(skb
, OVS_ACTION_ATTR_SET
);
1005 err
= ipv4_tun_to_nlattr(skb
, nla_data(ovs_key
));
1008 nla_nest_end(skb
, start
);
1011 if (nla_put(skb
, OVS_ACTION_ATTR_SET
, nla_len(a
), ovs_key
))
1019 static int actions_to_attr(const struct nlattr
*attr
, int len
, struct sk_buff
*skb
)
1021 const struct nlattr
*a
;
1024 nla_for_each_attr(a
, attr
, len
, rem
) {
1025 int type
= nla_type(a
);
1028 case OVS_ACTION_ATTR_SET
:
1029 err
= set_action_to_attr(a
, skb
);
1034 case OVS_ACTION_ATTR_SAMPLE
:
1035 err
= sample_action_to_attr(a
, skb
);
1040 if (nla_put(skb
, type
, nla_len(a
), nla_data(a
)))
1049 /* Called with genl_lock. */
1050 static int ovs_flow_cmd_fill_info(struct sw_flow
*flow
, struct datapath
*dp
,
1051 struct sk_buff
*skb
, u32 portid
,
1052 u32 seq
, u32 flags
, u8 cmd
)
1054 const int skb_orig_len
= skb
->len
;
1055 const struct sw_flow_actions
*sf_acts
;
1056 struct nlattr
*start
;
1057 struct ovs_flow_stats stats
;
1058 struct ovs_header
*ovs_header
;
1064 sf_acts
= rcu_dereference_protected(flow
->sf_acts
,
1065 lockdep_genl_is_held());
1067 ovs_header
= genlmsg_put(skb
, portid
, seq
, &dp_flow_genl_family
, flags
, cmd
);
1071 ovs_header
->dp_ifindex
= get_dpifindex(dp
);
1073 nla
= nla_nest_start(skb
, OVS_FLOW_ATTR_KEY
);
1075 goto nla_put_failure
;
1076 err
= ovs_flow_to_nlattrs(&flow
->key
, skb
);
1079 nla_nest_end(skb
, nla
);
1081 spin_lock_bh(&flow
->lock
);
1083 stats
.n_packets
= flow
->packet_count
;
1084 stats
.n_bytes
= flow
->byte_count
;
1085 tcp_flags
= flow
->tcp_flags
;
1086 spin_unlock_bh(&flow
->lock
);
1089 nla_put_u64(skb
, OVS_FLOW_ATTR_USED
, ovs_flow_used_time(used
)))
1090 goto nla_put_failure
;
1092 if (stats
.n_packets
&&
1093 nla_put(skb
, OVS_FLOW_ATTR_STATS
,
1094 sizeof(struct ovs_flow_stats
), &stats
))
1095 goto nla_put_failure
;
1098 nla_put_u8(skb
, OVS_FLOW_ATTR_TCP_FLAGS
, tcp_flags
))
1099 goto nla_put_failure
;
1101 /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
1102 * this is the first flow to be dumped into 'skb'. This is unusual for
1103 * Netlink but individual action lists can be longer than
1104 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
1105 * The userspace caller can always fetch the actions separately if it
1106 * really wants them. (Most userspace callers in fact don't care.)
1108 * This can only fail for dump operations because the skb is always
1109 * properly sized for single flows.
1111 start
= nla_nest_start(skb
, OVS_FLOW_ATTR_ACTIONS
);
1113 err
= actions_to_attr(sf_acts
->actions
, sf_acts
->actions_len
, skb
);
1114 if (err
< 0 && skb_orig_len
)
1116 nla_nest_end(skb
, start
);
1117 } else if (skb_orig_len
) {
1122 return genlmsg_end(skb
, ovs_header
);
1127 genlmsg_cancel(skb
, ovs_header
);
1131 static struct sk_buff
*ovs_flow_cmd_alloc_info(struct sw_flow
*flow
)
1133 const struct sw_flow_actions
*sf_acts
;
1136 sf_acts
= rcu_dereference_protected(flow
->sf_acts
,
1137 lockdep_genl_is_held());
1139 /* OVS_FLOW_ATTR_KEY */
1140 len
= nla_total_size(FLOW_BUFSIZE
);
1141 /* OVS_FLOW_ATTR_ACTIONS */
1142 len
+= nla_total_size(sf_acts
->actions_len
);
1143 /* OVS_FLOW_ATTR_STATS */
1144 len
+= nla_total_size(sizeof(struct ovs_flow_stats
));
1145 /* OVS_FLOW_ATTR_TCP_FLAGS */
1146 len
+= nla_total_size(1);
1147 /* OVS_FLOW_ATTR_USED */
1148 len
+= nla_total_size(8);
1150 len
+= NLMSG_ALIGN(sizeof(struct ovs_header
));
1152 return genlmsg_new(len
, GFP_KERNEL
);
1155 static struct sk_buff
*ovs_flow_cmd_build_info(struct sw_flow
*flow
,
1156 struct datapath
*dp
,
1157 u32 portid
, u32 seq
, u8 cmd
)
1159 struct sk_buff
*skb
;
1162 skb
= ovs_flow_cmd_alloc_info(flow
);
1164 return ERR_PTR(-ENOMEM
);
1166 retval
= ovs_flow_cmd_fill_info(flow
, dp
, skb
, portid
, seq
, 0, cmd
);
1171 static int ovs_flow_cmd_new_or_set(struct sk_buff
*skb
, struct genl_info
*info
)
1173 struct nlattr
**a
= info
->attrs
;
1174 struct ovs_header
*ovs_header
= info
->userhdr
;
1175 struct sw_flow_key key
;
1176 struct sw_flow
*flow
;
1177 struct sk_buff
*reply
;
1178 struct datapath
*dp
;
1179 struct flow_table
*table
;
1180 struct sw_flow_actions
*acts
= NULL
;
1186 if (!a
[OVS_FLOW_ATTR_KEY
])
1188 error
= ovs_flow_from_nlattrs(&key
, &key_len
, a
[OVS_FLOW_ATTR_KEY
]);
1192 /* Validate actions. */
1193 if (a
[OVS_FLOW_ATTR_ACTIONS
]) {
1194 acts
= ovs_flow_actions_alloc(nla_len(a
[OVS_FLOW_ATTR_ACTIONS
]));
1195 error
= PTR_ERR(acts
);
1199 error
= validate_and_copy_actions(a
[OVS_FLOW_ATTR_ACTIONS
], &key
, 0, &acts
);
1202 } else if (info
->genlhdr
->cmd
== OVS_FLOW_CMD_NEW
) {
1207 dp
= get_dp(sock_net(skb
->sk
), ovs_header
->dp_ifindex
);
1212 table
= genl_dereference(dp
->table
);
1213 flow
= ovs_flow_tbl_lookup(table
, &key
, key_len
);
1215 /* Bail out if we're not allowed to create a new flow. */
1217 if (info
->genlhdr
->cmd
== OVS_FLOW_CMD_SET
)
1220 /* Expand table, if necessary, to make room. */
1221 if (ovs_flow_tbl_need_to_expand(table
)) {
1222 struct flow_table
*new_table
;
1224 new_table
= ovs_flow_tbl_expand(table
);
1225 if (!IS_ERR(new_table
)) {
1226 rcu_assign_pointer(dp
->table
, new_table
);
1227 ovs_flow_tbl_deferred_destroy(table
);
1228 table
= genl_dereference(dp
->table
);
1232 /* Allocate flow. */
1233 flow
= ovs_flow_alloc();
1235 error
= PTR_ERR(flow
);
1240 rcu_assign_pointer(flow
->sf_acts
, acts
);
1242 /* Put flow in bucket. */
1243 ovs_flow_tbl_insert(table
, flow
, &key
, key_len
);
1245 reply
= ovs_flow_cmd_build_info(flow
, dp
, info
->snd_portid
,
1249 /* We found a matching flow. */
1250 struct sw_flow_actions
*old_acts
;
1252 /* Bail out if we're not allowed to modify an existing flow.
1253 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1254 * because Generic Netlink treats the latter as a dump
1255 * request. We also accept NLM_F_EXCL in case that bug ever
1259 if (info
->genlhdr
->cmd
== OVS_FLOW_CMD_NEW
&&
1260 info
->nlhdr
->nlmsg_flags
& (NLM_F_CREATE
| NLM_F_EXCL
))
1263 /* Update actions. */
1264 old_acts
= rcu_dereference_protected(flow
->sf_acts
,
1265 lockdep_genl_is_held());
1266 rcu_assign_pointer(flow
->sf_acts
, acts
);
1267 ovs_flow_deferred_free_acts(old_acts
);
1269 reply
= ovs_flow_cmd_build_info(flow
, dp
, info
->snd_portid
,
1270 info
->snd_seq
, OVS_FLOW_CMD_NEW
);
1273 if (a
[OVS_FLOW_ATTR_CLEAR
]) {
1274 spin_lock_bh(&flow
->lock
);
1276 spin_unlock_bh(&flow
->lock
);
1281 genl_notify(reply
, genl_info_net(info
), info
->snd_portid
,
1282 ovs_dp_flow_multicast_group
.id
, info
->nlhdr
,
1285 netlink_set_err(GENL_SOCK(sock_net(skb
->sk
)), 0,
1286 ovs_dp_flow_multicast_group
.id
, PTR_ERR(reply
));
1295 static int ovs_flow_cmd_get(struct sk_buff
*skb
, struct genl_info
*info
)
1297 struct nlattr
**a
= info
->attrs
;
1298 struct ovs_header
*ovs_header
= info
->userhdr
;
1299 struct sw_flow_key key
;
1300 struct sk_buff
*reply
;
1301 struct sw_flow
*flow
;
1302 struct datapath
*dp
;
1303 struct flow_table
*table
;
1307 if (!a
[OVS_FLOW_ATTR_KEY
])
1309 err
= ovs_flow_from_nlattrs(&key
, &key_len
, a
[OVS_FLOW_ATTR_KEY
]);
1313 dp
= get_dp(sock_net(skb
->sk
), ovs_header
->dp_ifindex
);
1317 table
= genl_dereference(dp
->table
);
1318 flow
= ovs_flow_tbl_lookup(table
, &key
, key_len
);
1322 reply
= ovs_flow_cmd_build_info(flow
, dp
, info
->snd_portid
,
1323 info
->snd_seq
, OVS_FLOW_CMD_NEW
);
1325 return PTR_ERR(reply
);
1327 return genlmsg_reply(reply
, info
);
1330 static int ovs_flow_cmd_del(struct sk_buff
*skb
, struct genl_info
*info
)
1332 struct nlattr
**a
= info
->attrs
;
1333 struct ovs_header
*ovs_header
= info
->userhdr
;
1334 struct sw_flow_key key
;
1335 struct sk_buff
*reply
;
1336 struct sw_flow
*flow
;
1337 struct datapath
*dp
;
1338 struct flow_table
*table
;
1342 dp
= get_dp(sock_net(skb
->sk
), ovs_header
->dp_ifindex
);
1346 if (!a
[OVS_FLOW_ATTR_KEY
])
1347 return flush_flows(dp
);
1349 err
= ovs_flow_from_nlattrs(&key
, &key_len
, a
[OVS_FLOW_ATTR_KEY
]);
1353 table
= genl_dereference(dp
->table
);
1354 flow
= ovs_flow_tbl_lookup(table
, &key
, key_len
);
1358 reply
= ovs_flow_cmd_alloc_info(flow
);
1362 ovs_flow_tbl_remove(table
, flow
);
1364 err
= ovs_flow_cmd_fill_info(flow
, dp
, reply
, info
->snd_portid
,
1365 info
->snd_seq
, 0, OVS_FLOW_CMD_DEL
);
1368 ovs_flow_deferred_free(flow
);
1370 genl_notify(reply
, genl_info_net(info
), info
->snd_portid
,
1371 ovs_dp_flow_multicast_group
.id
, info
->nlhdr
, GFP_KERNEL
);
1375 static int ovs_flow_cmd_dump(struct sk_buff
*skb
, struct netlink_callback
*cb
)
1377 struct ovs_header
*ovs_header
= genlmsg_data(nlmsg_data(cb
->nlh
));
1378 struct datapath
*dp
;
1379 struct flow_table
*table
;
1381 dp
= get_dp(sock_net(skb
->sk
), ovs_header
->dp_ifindex
);
1385 table
= genl_dereference(dp
->table
);
1388 struct sw_flow
*flow
;
1391 bucket
= cb
->args
[0];
1393 flow
= ovs_flow_tbl_next(table
, &bucket
, &obj
);
1397 if (ovs_flow_cmd_fill_info(flow
, dp
, skb
,
1398 NETLINK_CB(cb
->skb
).portid
,
1399 cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
,
1400 OVS_FLOW_CMD_NEW
) < 0)
1403 cb
->args
[0] = bucket
;
1409 static struct genl_ops dp_flow_genl_ops
[] = {
1410 { .cmd
= OVS_FLOW_CMD_NEW
,
1411 .flags
= GENL_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
1412 .policy
= flow_policy
,
1413 .doit
= ovs_flow_cmd_new_or_set
1415 { .cmd
= OVS_FLOW_CMD_DEL
,
1416 .flags
= GENL_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
1417 .policy
= flow_policy
,
1418 .doit
= ovs_flow_cmd_del
1420 { .cmd
= OVS_FLOW_CMD_GET
,
1421 .flags
= 0, /* OK for unprivileged users. */
1422 .policy
= flow_policy
,
1423 .doit
= ovs_flow_cmd_get
,
1424 .dumpit
= ovs_flow_cmd_dump
1426 { .cmd
= OVS_FLOW_CMD_SET
,
1427 .flags
= GENL_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
1428 .policy
= flow_policy
,
1429 .doit
= ovs_flow_cmd_new_or_set
,
1433 static const struct nla_policy datapath_policy
[OVS_DP_ATTR_MAX
+ 1] = {
1434 #ifdef HAVE_NLA_NUL_STRING
1435 [OVS_DP_ATTR_NAME
] = { .type
= NLA_NUL_STRING
, .len
= IFNAMSIZ
- 1 },
1437 [OVS_DP_ATTR_UPCALL_PID
] = { .type
= NLA_U32
},
1440 static struct genl_family dp_datapath_genl_family
= {
1441 .id
= GENL_ID_GENERATE
,
1442 .hdrsize
= sizeof(struct ovs_header
),
1443 .name
= OVS_DATAPATH_FAMILY
,
1444 .version
= OVS_DATAPATH_VERSION
,
1445 .maxattr
= OVS_DP_ATTR_MAX
,
1449 static struct genl_multicast_group ovs_dp_datapath_multicast_group
= {
1450 .name
= OVS_DATAPATH_MCGROUP
1453 static int ovs_dp_cmd_fill_info(struct datapath
*dp
, struct sk_buff
*skb
,
1454 u32 portid
, u32 seq
, u32 flags
, u8 cmd
)
1456 struct ovs_header
*ovs_header
;
1457 struct ovs_dp_stats dp_stats
;
1460 ovs_header
= genlmsg_put(skb
, portid
, seq
, &dp_datapath_genl_family
,
1465 ovs_header
->dp_ifindex
= get_dpifindex(dp
);
1468 err
= nla_put_string(skb
, OVS_DP_ATTR_NAME
, ovs_dp_name(dp
));
1471 goto nla_put_failure
;
1473 get_dp_stats(dp
, &dp_stats
);
1474 if (nla_put(skb
, OVS_DP_ATTR_STATS
, sizeof(struct ovs_dp_stats
), &dp_stats
))
1475 goto nla_put_failure
;
1477 return genlmsg_end(skb
, ovs_header
);
1480 genlmsg_cancel(skb
, ovs_header
);
1485 static struct sk_buff
*ovs_dp_cmd_build_info(struct datapath
*dp
, u32 portid
,
1488 struct sk_buff
*skb
;
1491 skb
= nlmsg_new(NLMSG_DEFAULT_SIZE
, GFP_KERNEL
);
1493 return ERR_PTR(-ENOMEM
);
1495 retval
= ovs_dp_cmd_fill_info(dp
, skb
, portid
, seq
, 0, cmd
);
1498 return ERR_PTR(retval
);
1503 static int ovs_dp_cmd_validate(struct nlattr
*a
[OVS_DP_ATTR_MAX
+ 1])
1505 return CHECK_NUL_STRING(a
[OVS_DP_ATTR_NAME
], IFNAMSIZ
- 1);
1508 /* Called with genl_mutex and optionally with RTNL lock also. */
1509 static struct datapath
*lookup_datapath(struct net
*net
,
1510 struct ovs_header
*ovs_header
,
1511 struct nlattr
*a
[OVS_DP_ATTR_MAX
+ 1])
1513 struct datapath
*dp
;
1515 if (!a
[OVS_DP_ATTR_NAME
])
1516 dp
= get_dp(net
, ovs_header
->dp_ifindex
);
1518 struct vport
*vport
;
1521 vport
= ovs_vport_locate(net
, nla_data(a
[OVS_DP_ATTR_NAME
]));
1522 dp
= vport
&& vport
->port_no
== OVSP_LOCAL
? vport
->dp
: NULL
;
1525 return dp
? dp
: ERR_PTR(-ENODEV
);
1528 static int ovs_dp_cmd_new(struct sk_buff
*skb
, struct genl_info
*info
)
1530 struct nlattr
**a
= info
->attrs
;
1531 struct vport_parms parms
;
1532 struct sk_buff
*reply
;
1533 struct datapath
*dp
;
1534 struct vport
*vport
;
1535 struct ovs_net
*ovs_net
;
1539 if (!a
[OVS_DP_ATTR_NAME
] || !a
[OVS_DP_ATTR_UPCALL_PID
])
1542 err
= ovs_dp_cmd_validate(a
);
1549 dp
= kzalloc(sizeof(*dp
), GFP_KERNEL
);
1551 goto err_unlock_rtnl
;
1553 ovs_dp_set_net(dp
, hold_net(sock_net(skb
->sk
)));
1555 /* Allocate table. */
1557 rcu_assign_pointer(dp
->table
, ovs_flow_tbl_alloc(TBL_MIN_BUCKETS
));
1561 dp
->stats_percpu
= alloc_percpu(struct dp_stats_percpu
);
1562 if (!dp
->stats_percpu
) {
1564 goto err_destroy_table
;
1567 dp
->ports
= kmalloc(DP_VPORT_HASH_BUCKETS
* sizeof(struct hlist_head
),
1571 goto err_destroy_percpu
;
1574 for (i
= 0; i
< DP_VPORT_HASH_BUCKETS
; i
++)
1575 INIT_HLIST_HEAD(&dp
->ports
[i
]);
1577 /* Set up our datapath device. */
1578 parms
.name
= nla_data(a
[OVS_DP_ATTR_NAME
]);
1579 parms
.type
= OVS_VPORT_TYPE_INTERNAL
;
1580 parms
.options
= NULL
;
1582 parms
.port_no
= OVSP_LOCAL
;
1583 parms
.upcall_portid
= nla_get_u32(a
[OVS_DP_ATTR_UPCALL_PID
]);
1585 vport
= new_vport(&parms
);
1586 if (IS_ERR(vport
)) {
1587 err
= PTR_ERR(vport
);
1591 goto err_destroy_ports_array
;
1594 reply
= ovs_dp_cmd_build_info(dp
, info
->snd_portid
,
1595 info
->snd_seq
, OVS_DP_CMD_NEW
);
1596 err
= PTR_ERR(reply
);
1598 goto err_destroy_local_port
;
1600 ovs_net
= net_generic(ovs_dp_get_net(dp
), ovs_net_id
);
1601 list_add_tail(&dp
->list_node
, &ovs_net
->dps
);
1605 genl_notify(reply
, genl_info_net(info
), info
->snd_portid
,
1606 ovs_dp_datapath_multicast_group
.id
, info
->nlhdr
,
1610 err_destroy_local_port
:
1611 ovs_dp_detach_port(ovs_vport_rtnl(dp
, OVSP_LOCAL
));
1612 err_destroy_ports_array
:
1615 free_percpu(dp
->stats_percpu
);
1617 ovs_flow_tbl_destroy(genl_dereference(dp
->table
));
1619 release_net(ovs_dp_get_net(dp
));
1627 /* Called with genl_mutex. */
1628 static void __dp_destroy(struct datapath
*dp
)
1634 for (i
= 0; i
< DP_VPORT_HASH_BUCKETS
; i
++) {
1635 struct vport
*vport
;
1636 struct hlist_node
*node
, *n
;
1638 hlist_for_each_entry_safe(vport
, node
, n
, &dp
->ports
[i
], dp_hash_node
)
1639 if (vport
->port_no
!= OVSP_LOCAL
)
1640 ovs_dp_detach_port(vport
);
1643 list_del(&dp
->list_node
);
1644 ovs_dp_detach_port(ovs_vport_rtnl(dp
, OVSP_LOCAL
));
1646 /* rtnl_unlock() will wait until all the references to devices that
1647 * are pending unregistration have been dropped. We do it here to
1648 * ensure that any internal devices (which contain DP pointers) are
1649 * fully destroyed before freeing the datapath.
1653 call_rcu(&dp
->rcu
, destroy_dp_rcu
);
1656 static int ovs_dp_cmd_del(struct sk_buff
*skb
, struct genl_info
*info
)
1658 struct sk_buff
*reply
;
1659 struct datapath
*dp
;
1662 err
= ovs_dp_cmd_validate(info
->attrs
);
1666 dp
= lookup_datapath(sock_net(skb
->sk
), info
->userhdr
, info
->attrs
);
1671 reply
= ovs_dp_cmd_build_info(dp
, info
->snd_portid
,
1672 info
->snd_seq
, OVS_DP_CMD_DEL
);
1673 err
= PTR_ERR(reply
);
1679 genl_notify(reply
, genl_info_net(info
), info
->snd_portid
,
1680 ovs_dp_datapath_multicast_group
.id
, info
->nlhdr
,
1686 static int ovs_dp_cmd_set(struct sk_buff
*skb
, struct genl_info
*info
)
1688 struct sk_buff
*reply
;
1689 struct datapath
*dp
;
1692 err
= ovs_dp_cmd_validate(info
->attrs
);
1696 dp
= lookup_datapath(sock_net(skb
->sk
), info
->userhdr
, info
->attrs
);
1700 reply
= ovs_dp_cmd_build_info(dp
, info
->snd_portid
,
1701 info
->snd_seq
, OVS_DP_CMD_NEW
);
1702 if (IS_ERR(reply
)) {
1703 err
= PTR_ERR(reply
);
1704 netlink_set_err(GENL_SOCK(sock_net(skb
->sk
)), 0,
1705 ovs_dp_datapath_multicast_group
.id
, err
);
1709 genl_notify(reply
, genl_info_net(info
), info
->snd_portid
,
1710 ovs_dp_datapath_multicast_group
.id
, info
->nlhdr
,
1716 static int ovs_dp_cmd_get(struct sk_buff
*skb
, struct genl_info
*info
)
1718 struct sk_buff
*reply
;
1719 struct datapath
*dp
;
1722 err
= ovs_dp_cmd_validate(info
->attrs
);
1726 dp
= lookup_datapath(sock_net(skb
->sk
), info
->userhdr
, info
->attrs
);
1730 reply
= ovs_dp_cmd_build_info(dp
, info
->snd_portid
,
1731 info
->snd_seq
, OVS_DP_CMD_NEW
);
1733 return PTR_ERR(reply
);
1735 return genlmsg_reply(reply
, info
);
1738 static int ovs_dp_cmd_dump(struct sk_buff
*skb
, struct netlink_callback
*cb
)
1740 struct ovs_net
*ovs_net
= net_generic(sock_net(skb
->sk
), ovs_net_id
);
1741 struct datapath
*dp
;
1742 int skip
= cb
->args
[0];
1745 list_for_each_entry(dp
, &ovs_net
->dps
, list_node
) {
1747 ovs_dp_cmd_fill_info(dp
, skb
, NETLINK_CB(cb
->skb
).portid
,
1748 cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
,
1749 OVS_DP_CMD_NEW
) < 0)
1759 static struct genl_ops dp_datapath_genl_ops
[] = {
1760 { .cmd
= OVS_DP_CMD_NEW
,
1761 .flags
= GENL_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
1762 .policy
= datapath_policy
,
1763 .doit
= ovs_dp_cmd_new
1765 { .cmd
= OVS_DP_CMD_DEL
,
1766 .flags
= GENL_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
1767 .policy
= datapath_policy
,
1768 .doit
= ovs_dp_cmd_del
1770 { .cmd
= OVS_DP_CMD_GET
,
1771 .flags
= 0, /* OK for unprivileged users. */
1772 .policy
= datapath_policy
,
1773 .doit
= ovs_dp_cmd_get
,
1774 .dumpit
= ovs_dp_cmd_dump
1776 { .cmd
= OVS_DP_CMD_SET
,
1777 .flags
= GENL_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
1778 .policy
= datapath_policy
,
1779 .doit
= ovs_dp_cmd_set
,
1783 static const struct nla_policy vport_policy
[OVS_VPORT_ATTR_MAX
+ 1] = {
1784 #ifdef HAVE_NLA_NUL_STRING
1785 [OVS_VPORT_ATTR_NAME
] = { .type
= NLA_NUL_STRING
, .len
= IFNAMSIZ
- 1 },
1786 [OVS_VPORT_ATTR_STATS
] = { .len
= sizeof(struct ovs_vport_stats
) },
1787 [OVS_VPORT_ATTR_ADDRESS
] = { .len
= ETH_ALEN
},
1789 [OVS_VPORT_ATTR_STATS
] = { .minlen
= sizeof(struct ovs_vport_stats
) },
1790 [OVS_VPORT_ATTR_ADDRESS
] = { .minlen
= ETH_ALEN
},
1792 [OVS_VPORT_ATTR_PORT_NO
] = { .type
= NLA_U32
},
1793 [OVS_VPORT_ATTR_TYPE
] = { .type
= NLA_U32
},
1794 [OVS_VPORT_ATTR_UPCALL_PID
] = { .type
= NLA_U32
},
1795 [OVS_VPORT_ATTR_OPTIONS
] = { .type
= NLA_NESTED
},
1798 static struct genl_family dp_vport_genl_family
= {
1799 .id
= GENL_ID_GENERATE
,
1800 .hdrsize
= sizeof(struct ovs_header
),
1801 .name
= OVS_VPORT_FAMILY
,
1802 .version
= OVS_VPORT_VERSION
,
1803 .maxattr
= OVS_VPORT_ATTR_MAX
,
1807 struct genl_multicast_group ovs_dp_vport_multicast_group
= {
1808 .name
= OVS_VPORT_MCGROUP
1811 /* Called with RTNL lock or RCU read lock. */
1812 static int ovs_vport_cmd_fill_info(struct vport
*vport
, struct sk_buff
*skb
,
1813 u32 portid
, u32 seq
, u32 flags
, u8 cmd
)
1815 struct ovs_header
*ovs_header
;
1816 struct ovs_vport_stats vport_stats
;
1819 ovs_header
= genlmsg_put(skb
, portid
, seq
, &dp_vport_genl_family
,
1824 ovs_header
->dp_ifindex
= get_dpifindex(vport
->dp
);
1826 if (nla_put_u32(skb
, OVS_VPORT_ATTR_PORT_NO
, vport
->port_no
) ||
1827 nla_put_u32(skb
, OVS_VPORT_ATTR_TYPE
, vport
->ops
->type
) ||
1828 nla_put_string(skb
, OVS_VPORT_ATTR_NAME
, vport
->ops
->get_name(vport
)) ||
1829 nla_put_u32(skb
, OVS_VPORT_ATTR_UPCALL_PID
, vport
->upcall_portid
))
1830 goto nla_put_failure
;
1832 ovs_vport_get_stats(vport
, &vport_stats
);
1833 if (nla_put(skb
, OVS_VPORT_ATTR_STATS
, sizeof(struct ovs_vport_stats
),
1835 goto nla_put_failure
;
1837 if (nla_put(skb
, OVS_VPORT_ATTR_ADDRESS
, ETH_ALEN
,
1838 vport
->ops
->get_addr(vport
)))
1839 goto nla_put_failure
;
1841 err
= ovs_vport_get_options(vport
, skb
);
1842 if (err
== -EMSGSIZE
)
1845 return genlmsg_end(skb
, ovs_header
);
1850 genlmsg_cancel(skb
, ovs_header
);
1854 /* Called with RTNL lock or RCU read lock. */
1855 struct sk_buff
*ovs_vport_cmd_build_info(struct vport
*vport
, u32 portid
,
1858 struct sk_buff
*skb
;
1861 skb
= nlmsg_new(NLMSG_DEFAULT_SIZE
, GFP_ATOMIC
);
1863 return ERR_PTR(-ENOMEM
);
1865 retval
= ovs_vport_cmd_fill_info(vport
, skb
, portid
, seq
, 0, cmd
);
1868 return ERR_PTR(retval
);
1873 static int ovs_vport_cmd_validate(struct nlattr
*a
[OVS_VPORT_ATTR_MAX
+ 1])
1875 return CHECK_NUL_STRING(a
[OVS_VPORT_ATTR_NAME
], IFNAMSIZ
- 1);
1878 /* Called with RTNL lock or RCU read lock. */
1879 static struct vport
*lookup_vport(struct net
*net
,
1880 struct ovs_header
*ovs_header
,
1881 struct nlattr
*a
[OVS_VPORT_ATTR_MAX
+ 1])
1883 struct datapath
*dp
;
1884 struct vport
*vport
;
1886 if (a
[OVS_VPORT_ATTR_NAME
]) {
1887 vport
= ovs_vport_locate(net
, nla_data(a
[OVS_VPORT_ATTR_NAME
]));
1889 return ERR_PTR(-ENODEV
);
1890 if (ovs_header
->dp_ifindex
&&
1891 ovs_header
->dp_ifindex
!= get_dpifindex(vport
->dp
))
1892 return ERR_PTR(-ENODEV
);
1894 } else if (a
[OVS_VPORT_ATTR_PORT_NO
]) {
1895 u32 port_no
= nla_get_u32(a
[OVS_VPORT_ATTR_PORT_NO
]);
1897 if (port_no
>= DP_MAX_PORTS
)
1898 return ERR_PTR(-EFBIG
);
1900 dp
= get_dp(net
, ovs_header
->dp_ifindex
);
1902 return ERR_PTR(-ENODEV
);
1904 vport
= ovs_vport_rtnl_rcu(dp
, port_no
);
1906 return ERR_PTR(-ENODEV
);
1909 return ERR_PTR(-EINVAL
);
1912 /* Called with RTNL lock. */
1913 static int change_vport(struct vport
*vport
,
1914 struct nlattr
*a
[OVS_VPORT_ATTR_MAX
+ 1])
1918 if (a
[OVS_VPORT_ATTR_STATS
])
1919 ovs_vport_set_stats(vport
, nla_data(a
[OVS_VPORT_ATTR_STATS
]));
1921 if (a
[OVS_VPORT_ATTR_ADDRESS
])
1922 err
= ovs_vport_set_addr(vport
, nla_data(a
[OVS_VPORT_ATTR_ADDRESS
]));
1927 static int ovs_vport_cmd_new(struct sk_buff
*skb
, struct genl_info
*info
)
1929 struct nlattr
**a
= info
->attrs
;
1930 struct ovs_header
*ovs_header
= info
->userhdr
;
1931 struct vport_parms parms
;
1932 struct sk_buff
*reply
;
1933 struct vport
*vport
;
1934 struct datapath
*dp
;
1939 if (!a
[OVS_VPORT_ATTR_NAME
] || !a
[OVS_VPORT_ATTR_TYPE
] ||
1940 !a
[OVS_VPORT_ATTR_UPCALL_PID
])
1943 err
= ovs_vport_cmd_validate(a
);
1948 dp
= get_dp(sock_net(skb
->sk
), ovs_header
->dp_ifindex
);
1953 if (a
[OVS_VPORT_ATTR_PORT_NO
]) {
1954 port_no
= nla_get_u32(a
[OVS_VPORT_ATTR_PORT_NO
]);
1957 if (port_no
>= DP_MAX_PORTS
)
1960 vport
= ovs_vport_rtnl(dp
, port_no
);
1965 for (port_no
= 1; ; port_no
++) {
1966 if (port_no
>= DP_MAX_PORTS
) {
1970 vport
= ovs_vport_rtnl(dp
, port_no
);
1976 parms
.name
= nla_data(a
[OVS_VPORT_ATTR_NAME
]);
1977 parms
.type
= nla_get_u32(a
[OVS_VPORT_ATTR_TYPE
]);
1978 parms
.options
= a
[OVS_VPORT_ATTR_OPTIONS
];
1980 parms
.port_no
= port_no
;
1981 parms
.upcall_portid
= nla_get_u32(a
[OVS_VPORT_ATTR_UPCALL_PID
]);
1983 vport
= new_vport(&parms
);
1984 err
= PTR_ERR(vport
);
1988 err
= change_vport(vport
, a
);
1990 reply
= ovs_vport_cmd_build_info(vport
, info
->snd_portid
,
1994 err
= PTR_ERR(reply
);
1997 ovs_dp_detach_port(vport
);
2000 genl_notify(reply
, genl_info_net(info
), info
->snd_portid
,
2001 ovs_dp_vport_multicast_group
.id
, info
->nlhdr
, GFP_KERNEL
);
2009 static int ovs_vport_cmd_set(struct sk_buff
*skb
, struct genl_info
*info
)
2011 struct nlattr
**a
= info
->attrs
;
2012 struct sk_buff
*reply
;
2013 struct vport
*vport
;
2016 err
= ovs_vport_cmd_validate(a
);
2021 vport
= lookup_vport(sock_net(skb
->sk
), info
->userhdr
, a
);
2022 err
= PTR_ERR(vport
);
2027 if (a
[OVS_VPORT_ATTR_TYPE
] &&
2028 nla_get_u32(a
[OVS_VPORT_ATTR_TYPE
]) != vport
->ops
->type
)
2031 if (!err
&& a
[OVS_VPORT_ATTR_OPTIONS
])
2032 err
= ovs_vport_set_options(vport
, a
[OVS_VPORT_ATTR_OPTIONS
]);
2034 err
= change_vport(vport
, a
);
2037 if (!err
&& a
[OVS_VPORT_ATTR_UPCALL_PID
])
2038 vport
->upcall_portid
= nla_get_u32(a
[OVS_VPORT_ATTR_UPCALL_PID
]);
2040 reply
= ovs_vport_cmd_build_info(vport
, info
->snd_portid
,
2041 info
->snd_seq
, OVS_VPORT_CMD_NEW
);
2042 if (IS_ERR(reply
)) {
2043 netlink_set_err(GENL_SOCK(sock_net(skb
->sk
)), 0,
2044 ovs_dp_vport_multicast_group
.id
, PTR_ERR(reply
));
2048 genl_notify(reply
, genl_info_net(info
), info
->snd_portid
,
2049 ovs_dp_vport_multicast_group
.id
, info
->nlhdr
, GFP_KERNEL
);
2057 static int ovs_vport_cmd_del(struct sk_buff
*skb
, struct genl_info
*info
)
2059 struct nlattr
**a
= info
->attrs
;
2060 struct sk_buff
*reply
;
2061 struct vport
*vport
;
2064 err
= ovs_vport_cmd_validate(a
);
2069 vport
= lookup_vport(sock_net(skb
->sk
), info
->userhdr
, a
);
2070 err
= PTR_ERR(vport
);
2074 if (vport
->port_no
== OVSP_LOCAL
) {
2079 reply
= ovs_vport_cmd_build_info(vport
, info
->snd_portid
,
2080 info
->snd_seq
, OVS_VPORT_CMD_DEL
);
2081 err
= PTR_ERR(reply
);
2085 ovs_dp_detach_port(vport
);
2087 genl_notify(reply
, genl_info_net(info
), info
->snd_portid
,
2088 ovs_dp_vport_multicast_group
.id
, info
->nlhdr
, GFP_KERNEL
);
2096 static int ovs_vport_cmd_get(struct sk_buff
*skb
, struct genl_info
*info
)
2098 struct nlattr
**a
= info
->attrs
;
2099 struct ovs_header
*ovs_header
= info
->userhdr
;
2100 struct sk_buff
*reply
;
2101 struct vport
*vport
;
2104 err
= ovs_vport_cmd_validate(a
);
2109 vport
= lookup_vport(sock_net(skb
->sk
), ovs_header
, a
);
2110 err
= PTR_ERR(vport
);
2114 reply
= ovs_vport_cmd_build_info(vport
, info
->snd_portid
,
2115 info
->snd_seq
, OVS_VPORT_CMD_NEW
);
2116 err
= PTR_ERR(reply
);
2122 return genlmsg_reply(reply
, info
);
2130 static int ovs_vport_cmd_dump(struct sk_buff
*skb
, struct netlink_callback
*cb
)
2132 struct ovs_header
*ovs_header
= genlmsg_data(nlmsg_data(cb
->nlh
));
2133 struct datapath
*dp
;
2134 int bucket
= cb
->args
[0], skip
= cb
->args
[1];
2137 dp
= get_dp(sock_net(skb
->sk
), ovs_header
->dp_ifindex
);
2142 for (i
= bucket
; i
< DP_VPORT_HASH_BUCKETS
; i
++) {
2143 struct vport
*vport
;
2144 struct hlist_node
*n
;
2147 hlist_for_each_entry_rcu(vport
, n
, &dp
->ports
[i
], dp_hash_node
) {
2149 ovs_vport_cmd_fill_info(vport
, skb
,
2150 NETLINK_CB(cb
->skb
).portid
,
2153 OVS_VPORT_CMD_NEW
) < 0)
2169 static struct genl_ops dp_vport_genl_ops
[] = {
2170 { .cmd
= OVS_VPORT_CMD_NEW
,
2171 .flags
= GENL_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
2172 .policy
= vport_policy
,
2173 .doit
= ovs_vport_cmd_new
2175 { .cmd
= OVS_VPORT_CMD_DEL
,
2176 .flags
= GENL_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
2177 .policy
= vport_policy
,
2178 .doit
= ovs_vport_cmd_del
2180 { .cmd
= OVS_VPORT_CMD_GET
,
2181 .flags
= 0, /* OK for unprivileged users. */
2182 .policy
= vport_policy
,
2183 .doit
= ovs_vport_cmd_get
,
2184 .dumpit
= ovs_vport_cmd_dump
2186 { .cmd
= OVS_VPORT_CMD_SET
,
2187 .flags
= GENL_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
2188 .policy
= vport_policy
,
2189 .doit
= ovs_vport_cmd_set
,
2193 struct genl_family_and_ops
{
2194 struct genl_family
*family
;
2195 struct genl_ops
*ops
;
2197 struct genl_multicast_group
*group
;
2200 static const struct genl_family_and_ops dp_genl_families
[] = {
2201 { &dp_datapath_genl_family
,
2202 dp_datapath_genl_ops
, ARRAY_SIZE(dp_datapath_genl_ops
),
2203 &ovs_dp_datapath_multicast_group
},
2204 { &dp_vport_genl_family
,
2205 dp_vport_genl_ops
, ARRAY_SIZE(dp_vport_genl_ops
),
2206 &ovs_dp_vport_multicast_group
},
2207 { &dp_flow_genl_family
,
2208 dp_flow_genl_ops
, ARRAY_SIZE(dp_flow_genl_ops
),
2209 &ovs_dp_flow_multicast_group
},
2210 { &dp_packet_genl_family
,
2211 dp_packet_genl_ops
, ARRAY_SIZE(dp_packet_genl_ops
),
2215 static void dp_unregister_genl(int n_families
)
2219 for (i
= 0; i
< n_families
; i
++)
2220 genl_unregister_family(dp_genl_families
[i
].family
);
2223 static int dp_register_genl(void)
2230 for (i
= 0; i
< ARRAY_SIZE(dp_genl_families
); i
++) {
2231 const struct genl_family_and_ops
*f
= &dp_genl_families
[i
];
2233 err
= genl_register_family_with_ops(f
->family
, f
->ops
,
2240 err
= genl_register_mc_group(f
->family
, f
->group
);
2249 dp_unregister_genl(n_registered
);
2253 static int __rehash_flow_table(void *dummy
)
2255 struct datapath
*dp
;
2260 struct ovs_net
*ovs_net
= net_generic(net
, ovs_net_id
);
2262 list_for_each_entry(dp
, &ovs_net
->dps
, list_node
) {
2263 struct flow_table
*old_table
= genl_dereference(dp
->table
);
2264 struct flow_table
*new_table
;
2266 new_table
= ovs_flow_tbl_rehash(old_table
);
2267 if (!IS_ERR(new_table
)) {
2268 rcu_assign_pointer(dp
->table
, new_table
);
2269 ovs_flow_tbl_deferred_destroy(old_table
);
2277 static void rehash_flow_table(struct work_struct
*work
)
2279 genl_exec(__rehash_flow_table
, NULL
);
2280 schedule_delayed_work(&rehash_flow_wq
, REHASH_FLOW_INTERVAL
);
2283 static int dp_destroy_all(void *data
)
2285 struct datapath
*dp
, *dp_next
;
2286 struct ovs_net
*ovs_net
= data
;
2288 list_for_each_entry_safe(dp
, dp_next
, &ovs_net
->dps
, list_node
)
2294 static int __net_init
ovs_init_net(struct net
*net
)
2296 struct ovs_net
*ovs_net
= net_generic(net
, ovs_net_id
);
2298 INIT_LIST_HEAD(&ovs_net
->dps
);
2302 static void __net_exit
ovs_exit_net(struct net
*net
)
2304 struct ovs_net
*ovs_net
= net_generic(net
, ovs_net_id
);
2306 genl_exec(dp_destroy_all
, ovs_net
);
2309 static struct pernet_operations ovs_net_ops
= {
2310 .init
= ovs_init_net
,
2311 .exit
= ovs_exit_net
,
2313 .size
= sizeof(struct ovs_net
),
2316 static int __init
dp_init(void)
2320 BUILD_BUG_ON(sizeof(struct ovs_skb_cb
) > FIELD_SIZEOF(struct sk_buff
, cb
));
2322 pr_info("Open vSwitch switching datapath %s, built "__DATE__
" "__TIME__
"\n",
2325 err
= genl_exec_init();
2329 err
= ovs_workqueues_init();
2331 goto error_genl_exec
;
2333 err
= ovs_tnl_init();
2337 err
= ovs_flow_init();
2339 goto error_tnl_exit
;
2341 err
= ovs_vport_init();
2343 goto error_flow_exit
;
2345 err
= register_pernet_device(&ovs_net_ops
);
2347 goto error_vport_exit
;
2349 err
= register_netdevice_notifier(&ovs_dp_device_notifier
);
2351 goto error_netns_exit
;
2353 err
= dp_register_genl();
2355 goto error_unreg_notifier
;
2357 schedule_delayed_work(&rehash_flow_wq
, REHASH_FLOW_INTERVAL
);
2361 error_unreg_notifier
:
2362 unregister_netdevice_notifier(&ovs_dp_device_notifier
);
2364 unregister_pernet_device(&ovs_net_ops
);
2372 ovs_workqueues_exit();
2379 static void dp_cleanup(void)
2381 cancel_delayed_work_sync(&rehash_flow_wq
);
2382 dp_unregister_genl(ARRAY_SIZE(dp_genl_families
));
2383 unregister_netdevice_notifier(&ovs_dp_device_notifier
);
2384 unregister_pernet_device(&ovs_net_ops
);
2389 ovs_workqueues_exit();
2393 module_init(dp_init
);
2394 module_exit(dp_cleanup
);
2396 MODULE_DESCRIPTION("Open vSwitch switching datapath");
2397 MODULE_LICENSE("GPL");
2398 MODULE_VERSION(VERSION
);