2 * Copyright (c) 2007-2014 Nicira, Inc.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21 #include <linux/init.h>
22 #include <linux/module.h>
23 #include <linux/if_arp.h>
24 #include <linux/if_vlan.h>
27 #include <linux/jhash.h>
28 #include <linux/delay.h>
29 #include <linux/time.h>
30 #include <linux/etherdevice.h>
31 #include <linux/genetlink.h>
32 #include <linux/kernel.h>
33 #include <linux/kthread.h>
34 #include <linux/mutex.h>
35 #include <linux/percpu.h>
36 #include <linux/rcupdate.h>
37 #include <linux/tcp.h>
38 #include <linux/udp.h>
39 #include <linux/version.h>
40 #include <linux/ethtool.h>
41 #include <linux/wait.h>
42 #include <asm/div64.h>
43 #include <linux/highmem.h>
44 #include <linux/netfilter_bridge.h>
45 #include <linux/netfilter_ipv4.h>
46 #include <linux/inetdevice.h>
47 #include <linux/list.h>
48 #include <linux/openvswitch.h>
49 #include <linux/rculist.h>
50 #include <linux/dmi.h>
51 #include <linux/genetlink.h>
52 #include <net/genetlink.h>
53 #include <net/genetlink.h>
54 #include <net/net_namespace.h>
55 #include <net/netns/generic.h>
59 #include "flow_table.h"
60 #include "flow_netlink.h"
62 #include "vport-internal_dev.h"
63 #include "vport-netdev.h"
65 int ovs_net_id __read_mostly
;
67 static struct genl_family dp_packet_genl_family
;
68 static struct genl_family dp_flow_genl_family
;
69 static struct genl_family dp_datapath_genl_family
;
71 static struct genl_multicast_group ovs_dp_flow_multicast_group
= {
72 .name
= OVS_FLOW_MCGROUP
75 static struct genl_multicast_group ovs_dp_datapath_multicast_group
= {
76 .name
= OVS_DATAPATH_MCGROUP
79 struct genl_multicast_group ovs_dp_vport_multicast_group
= {
80 .name
= OVS_VPORT_MCGROUP
83 /* Check if need to build a reply message.
84 * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */
85 static bool ovs_must_notify(struct genl_info
*info
,
86 const struct genl_multicast_group
*grp
)
88 return info
->nlhdr
->nlmsg_flags
& NLM_F_ECHO
||
89 netlink_has_listeners(genl_info_net(info
)->genl_sock
, GROUP_ID(grp
));
92 static void ovs_notify(struct genl_family
*family
, struct genl_multicast_group
*grp
,
93 struct sk_buff
*skb
, struct genl_info
*info
)
95 genl_notify(family
, skb
, genl_info_net(info
),
96 info
->snd_portid
, GROUP_ID(grp
), info
->nlhdr
, GFP_KERNEL
);
102 * All writes e.g. Writes to device state (add/remove datapath, port, set
103 * operations on vports, etc.), Writes to other state (flow table
104 * modifications, set miscellaneous datapath parameters, etc.) are protected
107 * Reads are protected by RCU.
109 * There are a few special cases (mostly stats) that have their own
110 * synchronization but they nest under all of above and don't interact with
113 * The RTNL lock nests inside ovs_mutex.
116 static DEFINE_MUTEX(ovs_mutex
);
120 mutex_lock(&ovs_mutex
);
123 void ovs_unlock(void)
125 mutex_unlock(&ovs_mutex
);
128 #ifdef CONFIG_LOCKDEP
129 int lockdep_ovsl_is_held(void)
132 return lockdep_is_held(&ovs_mutex
);
138 static int queue_gso_packets(struct datapath
*dp
, struct sk_buff
*,
139 const struct dp_upcall_info
*);
140 static int queue_userspace_packet(struct datapath
*dp
, struct sk_buff
*,
141 const struct dp_upcall_info
*);
143 /* Must be called with rcu_read_lock. */
144 static struct datapath
*get_dp_rcu(struct net
*net
, int dp_ifindex
)
146 struct net_device
*dev
= dev_get_by_index_rcu(net
, dp_ifindex
);
149 struct vport
*vport
= ovs_internal_dev_get_vport(dev
);
157 /* The caller must hold either ovs_mutex or rcu_read_lock to keep the
158 * returned dp pointer valid. */
159 static inline struct datapath
*get_dp(struct net
*net
, int dp_ifindex
)
163 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
165 dp
= get_dp_rcu(net
, dp_ifindex
);
171 /* Must be called with rcu_read_lock or ovs_mutex. */
172 const char *ovs_dp_name(const struct datapath
*dp
)
174 struct vport
*vport
= ovs_vport_ovsl_rcu(dp
, OVSP_LOCAL
);
175 return vport
->ops
->get_name(vport
);
178 static int get_dpifindex(struct datapath
*dp
)
185 local
= ovs_vport_rcu(dp
, OVSP_LOCAL
);
187 ifindex
= netdev_vport_priv(local
)->dev
->ifindex
;
196 static void destroy_dp_rcu(struct rcu_head
*rcu
)
198 struct datapath
*dp
= container_of(rcu
, struct datapath
, rcu
);
200 ovs_flow_tbl_destroy(&dp
->table
);
201 free_percpu(dp
->stats_percpu
);
202 release_net(ovs_dp_get_net(dp
));
207 static struct hlist_head
*vport_hash_bucket(const struct datapath
*dp
,
210 return &dp
->ports
[port_no
& (DP_VPORT_HASH_BUCKETS
- 1)];
213 /* Called with ovs_mutex or RCU read lock. */
214 struct vport
*ovs_lookup_vport(const struct datapath
*dp
, u16 port_no
)
217 struct hlist_head
*head
;
219 head
= vport_hash_bucket(dp
, port_no
);
220 hlist_for_each_entry_rcu(vport
, head
, dp_hash_node
) {
221 if (vport
->port_no
== port_no
)
227 /* Called with ovs_mutex. */
228 static struct vport
*new_vport(const struct vport_parms
*parms
)
232 vport
= ovs_vport_add(parms
);
233 if (!IS_ERR(vport
)) {
234 struct datapath
*dp
= parms
->dp
;
235 struct hlist_head
*head
= vport_hash_bucket(dp
, vport
->port_no
);
237 hlist_add_head_rcu(&vport
->dp_hash_node
, head
);
242 void ovs_dp_detach_port(struct vport
*p
)
246 /* First drop references to device. */
247 hlist_del_rcu(&p
->dp_hash_node
);
249 /* Then destroy it. */
253 /* Must be called with rcu_read_lock. */
254 void ovs_dp_process_packet(struct sk_buff
*skb
, bool recirc
)
256 const struct vport
*p
= OVS_CB(skb
)->input_vport
;
257 struct sw_flow_key
*pkt_key
= OVS_CB(skb
)->pkt_key
;
258 struct datapath
*dp
= p
->dp
;
259 struct sw_flow
*flow
;
260 struct sw_flow_actions
*sf_acts
;
261 struct dp_stats_percpu
*stats
;
265 stats
= this_cpu_ptr(dp
->stats_percpu
);
268 flow
= ovs_flow_tbl_lookup_stats(&dp
->table
, pkt_key
, skb_get_hash(skb
),
270 if (unlikely(!flow
)) {
271 struct dp_upcall_info upcall
;
273 upcall
.cmd
= OVS_PACKET_CMD_MISS
;
274 upcall
.userdata
= NULL
;
275 upcall
.portid
= ovs_vport_find_upcall_portid(p
, skb
);
276 upcall
.egress_tun_info
= NULL
;
277 ovs_dp_upcall(dp
, skb
, &upcall
);
279 stats_counter
= &stats
->n_missed
;
283 ovs_flow_stats_update(flow
, pkt_key
->tp
.flags
, skb
);
285 sf_acts
= rcu_dereference(flow
->sf_acts
);
286 ovs_execute_actions(dp
, skb
, sf_acts
, recirc
);
287 stats_counter
= &stats
->n_hit
;
290 /* Update datapath statistics. */
291 u64_stats_update_begin(&stats
->sync
);
293 stats
->n_mask_hit
+= n_mask_hit
;
294 u64_stats_update_end(&stats
->sync
);
297 int ovs_dp_upcall(struct datapath
*dp
, struct sk_buff
*skb
,
298 const struct dp_upcall_info
*upcall_info
)
300 struct dp_stats_percpu
*stats
;
303 BUG_ON(!OVS_CB(skb
)->pkt_key
);
305 if (upcall_info
->portid
== 0) {
310 if (!skb_is_gso(skb
))
311 err
= queue_userspace_packet(dp
, skb
, upcall_info
);
313 err
= queue_gso_packets(dp
, skb
, upcall_info
);
320 stats
= this_cpu_ptr(dp
->stats_percpu
);
322 u64_stats_update_begin(&stats
->sync
);
324 u64_stats_update_end(&stats
->sync
);
329 static int queue_gso_packets(struct datapath
*dp
, struct sk_buff
*skb
,
330 const struct dp_upcall_info
*upcall_info
)
332 unsigned short gso_type
= skb_shinfo(skb
)->gso_type
;
333 struct sw_flow_key later_key
;
334 struct sk_buff
*segs
, *nskb
;
337 segs
= __skb_gso_segment(skb
, NETIF_F_SG
, false);
339 return PTR_ERR(segs
);
341 if (gso_type
& SKB_GSO_UDP
) {
342 /* The initial flow key extracted by ovs_flow_key_extract()
343 * in this case is for a first fragment, so we need to
344 * properly mark later fragments.
346 later_key
= *OVS_CB(skb
)->pkt_key
;
347 later_key
.ip
.frag
= OVS_FRAG_TYPE_LATER
;
350 /* Queue all of the segments. */
353 if (gso_type
& SKB_GSO_UDP
&& skb
!= segs
)
354 OVS_CB(skb
)->pkt_key
= &later_key
;
356 err
= queue_userspace_packet(dp
, skb
, upcall_info
);
360 } while ((skb
= skb
->next
));
362 /* Free all of the segments. */
370 } while ((skb
= nskb
));
374 static size_t upcall_msg_size(const struct dp_upcall_info
*upcall_info
,
377 size_t size
= NLMSG_ALIGN(sizeof(struct ovs_header
))
378 + nla_total_size(hdrlen
) /* OVS_PACKET_ATTR_PACKET */
379 + nla_total_size(ovs_key_attr_size()); /* OVS_PACKET_ATTR_KEY */
381 /* OVS_PACKET_ATTR_USERDATA */
382 if (upcall_info
->userdata
)
383 size
+= NLA_ALIGN(upcall_info
->userdata
->nla_len
);
385 /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
386 if (upcall_info
->egress_tun_info
)
387 size
+= nla_total_size(ovs_tun_key_attr_size());
392 static int queue_userspace_packet(struct datapath
*dp
, struct sk_buff
*skb
,
393 const struct dp_upcall_info
*upcall_info
)
395 struct ovs_header
*upcall
;
396 struct sk_buff
*nskb
= NULL
;
397 struct sk_buff
*user_skb
; /* to be queued to userspace */
398 struct sw_flow_key
*pkt_key
= OVS_CB(skb
)->pkt_key
;
400 struct genl_info info
= {
401 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,14,0)
402 .dst_sk
= ovs_dp_get_net(dp
)->genl_sock
,
404 .snd_portid
= upcall_info
->portid
,
410 dp_ifindex
= get_dpifindex(dp
);
414 if (vlan_tx_tag_present(skb
)) {
415 nskb
= skb_clone(skb
, GFP_ATOMIC
);
419 nskb
= __vlan_put_tag(nskb
, nskb
->vlan_proto
, vlan_tx_tag_get(nskb
));
423 vlan_set_tci(nskb
, 0);
428 if (nla_attr_size(skb
->len
) > USHRT_MAX
) {
433 /* Complete checksum if needed */
434 if (skb
->ip_summed
== CHECKSUM_PARTIAL
&&
435 (err
= skb_checksum_help(skb
)))
438 /* Older versions of OVS user space enforce alignment of the last
439 * Netlink attribute to NLA_ALIGNTO which would require extensive
440 * padding logic. Only perform zerocopy if padding is not required.
442 if (dp
->user_features
& OVS_DP_F_UNALIGNED
)
443 hlen
= skb_zerocopy_headlen(skb
);
447 len
= upcall_msg_size(upcall_info
, hlen
);
448 user_skb
= genlmsg_new_unicast(len
, &info
, GFP_ATOMIC
);
454 upcall
= genlmsg_put(user_skb
, 0, 0, &dp_packet_genl_family
,
455 0, upcall_info
->cmd
);
456 upcall
->dp_ifindex
= dp_ifindex
;
458 nla
= nla_nest_start(user_skb
, OVS_PACKET_ATTR_KEY
);
459 err
= ovs_nla_put_flow(dp
, pkt_key
, pkt_key
, user_skb
);
461 nla_nest_end(user_skb
, nla
);
463 if (upcall_info
->userdata
)
464 __nla_put(user_skb
, OVS_PACKET_ATTR_USERDATA
,
465 nla_len(upcall_info
->userdata
),
466 nla_data(upcall_info
->userdata
));
468 if (upcall_info
->egress_tun_info
) {
469 nla
= nla_nest_start(user_skb
, OVS_PACKET_ATTR_EGRESS_TUN_KEY
);
470 err
= ovs_nla_put_egress_tunnel_key(user_skb
,
471 upcall_info
->egress_tun_info
);
473 nla_nest_end(user_skb
, nla
);
476 /* Only reserve room for attribute header, packet data is added
477 * in skb_zerocopy() */
478 if (!(nla
= nla_reserve(user_skb
, OVS_PACKET_ATTR_PACKET
, 0))) {
482 nla
->nla_len
= nla_attr_size(skb
->len
);
484 err
= skb_zerocopy(user_skb
, skb
, skb
->len
, hlen
);
488 /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
489 if (!(dp
->user_features
& OVS_DP_F_UNALIGNED
)) {
490 size_t plen
= NLA_ALIGN(user_skb
->len
) - user_skb
->len
;
493 memset(skb_put(user_skb
, plen
), 0, plen
);
496 ((struct nlmsghdr
*) user_skb
->data
)->nlmsg_len
= user_skb
->len
;
498 err
= genlmsg_unicast(ovs_dp_get_net(dp
), user_skb
, upcall_info
->portid
);
506 static int ovs_packet_cmd_execute(struct sk_buff
*skb
, struct genl_info
*info
)
508 struct ovs_header
*ovs_header
= info
->userhdr
;
509 struct nlattr
**a
= info
->attrs
;
510 struct sw_flow_actions
*acts
;
511 struct sk_buff
*packet
;
512 struct sw_flow
*flow
;
513 struct sw_flow_actions
*sf_acts
;
516 struct vport
*input_vport
;
521 if (!a
[OVS_PACKET_ATTR_PACKET
] || !a
[OVS_PACKET_ATTR_KEY
] ||
522 !a
[OVS_PACKET_ATTR_ACTIONS
])
525 len
= nla_len(a
[OVS_PACKET_ATTR_PACKET
]);
526 packet
= __dev_alloc_skb(NET_IP_ALIGN
+ len
, GFP_KERNEL
);
530 skb_reserve(packet
, NET_IP_ALIGN
);
532 nla_memcpy(__skb_put(packet
, len
), a
[OVS_PACKET_ATTR_PACKET
], len
);
534 skb_reset_mac_header(packet
);
535 eth
= eth_hdr(packet
);
537 /* Normally, setting the skb 'protocol' field would be handled by a
538 * call to eth_type_trans(), but it assumes there's a sending
539 * device, which we may not have. */
540 if (ntohs(eth
->h_proto
) >= ETH_P_802_3_MIN
)
541 packet
->protocol
= eth
->h_proto
;
543 packet
->protocol
= htons(ETH_P_802_2
);
545 /* Build an sw_flow for sending this packet. */
546 flow
= ovs_flow_alloc();
551 err
= ovs_flow_key_extract_userspace(a
[OVS_PACKET_ATTR_KEY
], packet
,
556 err
= ovs_nla_copy_actions(a
[OVS_PACKET_ATTR_ACTIONS
],
561 rcu_assign_pointer(flow
->sf_acts
, acts
);
562 OVS_CB(packet
)->pkt_key
= &flow
->key
;
563 OVS_CB(skb
)->egress_tun_info
= NULL
;
564 packet
->priority
= flow
->key
.phy
.priority
;
565 packet
->mark
= flow
->key
.phy
.skb_mark
;
568 dp
= get_dp_rcu(sock_net(skb
->sk
), ovs_header
->dp_ifindex
);
573 input_vport
= ovs_vport_rcu(dp
, flow
->key
.phy
.in_port
);
575 input_vport
= ovs_vport_rcu(dp
, OVSP_LOCAL
);
580 OVS_CB(packet
)->input_vport
= input_vport
;
581 sf_acts
= rcu_dereference(flow
->sf_acts
);
584 err
= ovs_execute_actions(dp
, packet
, sf_acts
, false);
588 ovs_flow_free(flow
, false);
594 ovs_flow_free(flow
, false);
601 static const struct nla_policy packet_policy
[OVS_PACKET_ATTR_MAX
+ 1] = {
602 [OVS_PACKET_ATTR_PACKET
] = { .len
= ETH_HLEN
},
603 [OVS_PACKET_ATTR_KEY
] = { .type
= NLA_NESTED
},
604 [OVS_PACKET_ATTR_ACTIONS
] = { .type
= NLA_NESTED
},
607 static struct genl_ops dp_packet_genl_ops
[] = {
608 { .cmd
= OVS_PACKET_CMD_EXECUTE
,
609 .flags
= GENL_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
610 .policy
= packet_policy
,
611 .doit
= ovs_packet_cmd_execute
615 static struct genl_family dp_packet_genl_family
= {
616 .id
= GENL_ID_GENERATE
,
617 .hdrsize
= sizeof(struct ovs_header
),
618 .name
= OVS_PACKET_FAMILY
,
619 .version
= OVS_PACKET_VERSION
,
620 .maxattr
= OVS_PACKET_ATTR_MAX
,
622 .parallel_ops
= true,
623 .ops
= dp_packet_genl_ops
,
624 .n_ops
= ARRAY_SIZE(dp_packet_genl_ops
),
627 static void get_dp_stats(struct datapath
*dp
, struct ovs_dp_stats
*stats
,
628 struct ovs_dp_megaflow_stats
*mega_stats
)
632 memset(mega_stats
, 0, sizeof(*mega_stats
));
634 stats
->n_flows
= ovs_flow_tbl_count(&dp
->table
);
635 mega_stats
->n_masks
= ovs_flow_tbl_num_masks(&dp
->table
);
637 stats
->n_hit
= stats
->n_missed
= stats
->n_lost
= 0;
639 for_each_possible_cpu(i
) {
640 const struct dp_stats_percpu
*percpu_stats
;
641 struct dp_stats_percpu local_stats
;
644 percpu_stats
= per_cpu_ptr(dp
->stats_percpu
, i
);
647 start
= u64_stats_fetch_begin_irq(&percpu_stats
->sync
);
648 local_stats
= *percpu_stats
;
649 } while (u64_stats_fetch_retry_irq(&percpu_stats
->sync
, start
));
651 stats
->n_hit
+= local_stats
.n_hit
;
652 stats
->n_missed
+= local_stats
.n_missed
;
653 stats
->n_lost
+= local_stats
.n_lost
;
654 mega_stats
->n_mask_hit
+= local_stats
.n_mask_hit
;
658 static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions
*acts
)
660 return NLMSG_ALIGN(sizeof(struct ovs_header
))
661 + nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_KEY */
662 + nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_MASK */
663 + nla_total_size(sizeof(struct ovs_flow_stats
)) /* OVS_FLOW_ATTR_STATS */
664 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
665 + nla_total_size(8) /* OVS_FLOW_ATTR_USED */
666 + nla_total_size(acts
->actions_len
); /* OVS_FLOW_ATTR_ACTIONS */
669 /* Called with ovs_mutex or RCU read lock. */
670 static int ovs_flow_cmd_fill_match(struct datapath
*dp
,
671 const struct sw_flow
*flow
,
678 nla
= nla_nest_start(skb
, OVS_FLOW_ATTR_KEY
);
682 err
= ovs_nla_put_flow(dp
, &flow
->unmasked_key
,
683 &flow
->unmasked_key
, skb
);
686 nla_nest_end(skb
, nla
);
688 /* Fill flow mask. */
689 nla
= nla_nest_start(skb
, OVS_FLOW_ATTR_MASK
);
693 err
= ovs_nla_put_flow(dp
, &flow
->key
, &flow
->mask
->key
, skb
);
696 nla_nest_end(skb
, nla
);
701 /* Called with ovs_mutex or RCU read lock. */
702 static int ovs_flow_cmd_fill_stats(const struct sw_flow
*flow
,
705 struct ovs_flow_stats stats
;
709 ovs_flow_stats_get(flow
, &stats
, &used
, &tcp_flags
);
712 nla_put_u64(skb
, OVS_FLOW_ATTR_USED
, ovs_flow_used_time(used
)))
715 if (stats
.n_packets
&&
716 nla_put(skb
, OVS_FLOW_ATTR_STATS
, sizeof(struct ovs_flow_stats
), &stats
))
719 if ((u8
)ntohs(tcp_flags
) &&
720 nla_put_u8(skb
, OVS_FLOW_ATTR_TCP_FLAGS
, (u8
)ntohs(tcp_flags
)))
726 /* Called with ovs_mutex or RCU read lock. */
727 static int ovs_flow_cmd_fill_actions(const struct sw_flow
*flow
,
728 struct sk_buff
*skb
, int skb_orig_len
)
730 struct nlattr
*start
;
733 /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
734 * this is the first flow to be dumped into 'skb'. This is unusual for
735 * Netlink but individual action lists can be longer than
736 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
737 * The userspace caller can always fetch the actions separately if it
738 * really wants them. (Most userspace callers in fact don't care.)
740 * This can only fail for dump operations because the skb is always
741 * properly sized for single flows.
743 start
= nla_nest_start(skb
, OVS_FLOW_ATTR_ACTIONS
);
745 const struct sw_flow_actions
*sf_acts
;
747 sf_acts
= rcu_dereference_ovsl(flow
->sf_acts
);
748 err
= ovs_nla_put_actions(sf_acts
->actions
,
749 sf_acts
->actions_len
, skb
);
752 nla_nest_end(skb
, start
);
757 nla_nest_cancel(skb
, start
);
759 } else if (skb_orig_len
) {
766 /* Called with ovs_mutex or RCU read lock. */
767 static int ovs_flow_cmd_fill_info(struct datapath
*dp
,
768 const struct sw_flow
*flow
, int dp_ifindex
,
769 struct sk_buff
*skb
, u32 portid
,
770 u32 seq
, u32 flags
, u8 cmd
)
772 const int skb_orig_len
= skb
->len
;
773 struct ovs_header
*ovs_header
;
776 ovs_header
= genlmsg_put(skb
, portid
, seq
, &dp_flow_genl_family
, flags
, cmd
);
779 ovs_header
->dp_ifindex
= dp_ifindex
;
781 err
= ovs_flow_cmd_fill_match(dp
, flow
, skb
);
785 err
= ovs_flow_cmd_fill_stats(flow
, skb
);
789 err
= ovs_flow_cmd_fill_actions(flow
, skb
, skb_orig_len
);
793 return genlmsg_end(skb
, ovs_header
);
796 genlmsg_cancel(skb
, ovs_header
);
800 /* May not be called with RCU read lock. */
801 static struct sk_buff
*ovs_flow_cmd_alloc_info(const struct sw_flow_actions
*acts
,
802 struct genl_info
*info
,
807 if (!always
&& !ovs_must_notify(info
, &ovs_dp_flow_multicast_group
))
810 skb
= genlmsg_new_unicast(ovs_flow_cmd_msg_size(acts
), info
, GFP_KERNEL
);
813 return ERR_PTR(-ENOMEM
);
818 /* Called with ovs_mutex. */
819 static struct sk_buff
*ovs_flow_cmd_build_info(struct datapath
*dp
,
820 const struct sw_flow
*flow
,
822 struct genl_info
*info
, u8 cmd
,
828 skb
= ovs_flow_cmd_alloc_info(ovsl_dereference(flow
->sf_acts
), info
,
830 if (!skb
|| IS_ERR(skb
))
833 retval
= ovs_flow_cmd_fill_info(dp
, flow
, dp_ifindex
, skb
,
834 info
->snd_portid
, info
->snd_seq
, 0,
840 static int ovs_flow_cmd_new(struct sk_buff
*skb
, struct genl_info
*info
)
842 struct nlattr
**a
= info
->attrs
;
843 struct ovs_header
*ovs_header
= info
->userhdr
;
844 struct sw_flow
*flow
, *new_flow
;
845 struct sw_flow_mask mask
;
846 struct sk_buff
*reply
;
848 struct sw_flow_actions
*acts
;
849 struct sw_flow_match match
;
852 /* Must have key and actions. */
854 if (!a
[OVS_FLOW_ATTR_KEY
]) {
855 OVS_NLERR("Flow key attribute not present in new flow.\n");
858 if (!a
[OVS_FLOW_ATTR_ACTIONS
]) {
859 OVS_NLERR("Flow actions attribute not present in new flow.\n");
863 /* Most of the time we need to allocate a new flow, do it before
865 new_flow
= ovs_flow_alloc();
866 if (IS_ERR(new_flow
)) {
867 error
= PTR_ERR(new_flow
);
872 ovs_match_init(&match
, &new_flow
->unmasked_key
, &mask
);
873 error
= ovs_nla_get_match(&match
,
874 a
[OVS_FLOW_ATTR_KEY
], a
[OVS_FLOW_ATTR_MASK
]);
878 ovs_flow_mask_key(&new_flow
->key
, &new_flow
->unmasked_key
, &mask
);
880 /* Validate actions. */
881 error
= ovs_nla_copy_actions(a
[OVS_FLOW_ATTR_ACTIONS
], &new_flow
->key
,
884 OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
888 reply
= ovs_flow_cmd_alloc_info(acts
, info
, false);
890 error
= PTR_ERR(reply
);
895 dp
= get_dp(sock_net(skb
->sk
), ovs_header
->dp_ifindex
);
900 /* Check if this is a duplicate flow */
901 flow
= ovs_flow_tbl_lookup(&dp
->table
, &new_flow
->unmasked_key
);
903 rcu_assign_pointer(new_flow
->sf_acts
, acts
);
905 /* Put flow in bucket. */
906 error
= ovs_flow_tbl_insert(&dp
->table
, new_flow
, &mask
);
907 if (unlikely(error
)) {
912 if (unlikely(reply
)) {
913 error
= ovs_flow_cmd_fill_info(dp
, new_flow
,
914 ovs_header
->dp_ifindex
,
915 reply
, info
->snd_portid
,
922 struct sw_flow_actions
*old_acts
;
924 /* Bail out if we're not allowed to modify an existing flow.
925 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
926 * because Generic Netlink treats the latter as a dump
927 * request. We also accept NLM_F_EXCL in case that bug ever
930 if (unlikely(info
->nlhdr
->nlmsg_flags
& (NLM_F_CREATE
935 /* The unmasked key has to be the same for flow updates. */
936 if (unlikely(!ovs_flow_cmp_unmasked_key(flow
, &match
))) {
937 /* Look for any overlapping flow. */
938 flow
= ovs_flow_tbl_lookup_exact(&dp
->table
, &match
);
944 /* Update actions. */
945 old_acts
= ovsl_dereference(flow
->sf_acts
);
946 rcu_assign_pointer(flow
->sf_acts
, acts
);
948 if (unlikely(reply
)) {
949 error
= ovs_flow_cmd_fill_info(dp
, flow
,
950 ovs_header
->dp_ifindex
,
951 reply
, info
->snd_portid
,
958 ovs_nla_free_flow_actions(old_acts
);
959 ovs_flow_free(new_flow
, false);
963 ovs_notify(&dp_flow_genl_family
, &ovs_dp_flow_multicast_group
, reply
, info
);
972 ovs_flow_free(new_flow
, false);
977 /* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
978 static struct sw_flow_actions
*get_flow_actions(const struct nlattr
*a
,
979 const struct sw_flow_key
*key
,
980 const struct sw_flow_mask
*mask
)
982 struct sw_flow_actions
*acts
;
983 struct sw_flow_key masked_key
;
986 ovs_flow_mask_key(&masked_key
, key
, mask
);
987 error
= ovs_nla_copy_actions(a
, &masked_key
, &acts
);
989 OVS_NLERR("Actions may not be safe on all matching packets.\n");
990 return ERR_PTR(error
);
996 static int ovs_flow_cmd_set(struct sk_buff
*skb
, struct genl_info
*info
)
998 struct nlattr
**a
= info
->attrs
;
999 struct ovs_header
*ovs_header
= info
->userhdr
;
1000 struct sw_flow_key key
;
1001 struct sw_flow
*flow
;
1002 struct sw_flow_mask mask
;
1003 struct sk_buff
*reply
= NULL
;
1004 struct datapath
*dp
;
1005 struct sw_flow_actions
*old_acts
= NULL
, *acts
= NULL
;
1006 struct sw_flow_match match
;
1011 if (!a
[OVS_FLOW_ATTR_KEY
]) {
1012 OVS_NLERR("Flow key attribute not present in set flow.\n");
1016 ovs_match_init(&match
, &key
, &mask
);
1017 error
= ovs_nla_get_match(&match
,
1018 a
[OVS_FLOW_ATTR_KEY
], a
[OVS_FLOW_ATTR_MASK
]);
1022 /* Validate actions. */
1023 if (a
[OVS_FLOW_ATTR_ACTIONS
]) {
1024 acts
= get_flow_actions(a
[OVS_FLOW_ATTR_ACTIONS
], &key
, &mask
);
1026 error
= PTR_ERR(acts
);
1030 /* Can allocate before locking if have acts. */
1031 reply
= ovs_flow_cmd_alloc_info(acts
, info
, false);
1032 if (IS_ERR(reply
)) {
1033 error
= PTR_ERR(reply
);
1034 goto err_kfree_acts
;
1039 dp
= get_dp(sock_net(skb
->sk
), ovs_header
->dp_ifindex
);
1040 if (unlikely(!dp
)) {
1042 goto err_unlock_ovs
;
1044 /* Check that the flow exists. */
1045 flow
= ovs_flow_tbl_lookup_exact(&dp
->table
, &match
);
1046 if (unlikely(!flow
)) {
1048 goto err_unlock_ovs
;
1051 /* Update actions, if present. */
1053 old_acts
= ovsl_dereference(flow
->sf_acts
);
1054 rcu_assign_pointer(flow
->sf_acts
, acts
);
1056 if (unlikely(reply
)) {
1057 error
= ovs_flow_cmd_fill_info(dp
, flow
,
1058 ovs_header
->dp_ifindex
,
1059 reply
, info
->snd_portid
,
1065 /* Could not alloc without acts before locking. */
1066 reply
= ovs_flow_cmd_build_info(dp
, flow
,
1067 ovs_header
->dp_ifindex
,
1068 info
, OVS_FLOW_CMD_NEW
, false);
1069 if (unlikely(IS_ERR(reply
))) {
1070 error
= PTR_ERR(reply
);
1071 goto err_unlock_ovs
;
1076 if (a
[OVS_FLOW_ATTR_CLEAR
])
1077 ovs_flow_stats_clear(flow
);
1081 ovs_notify(&dp_flow_genl_family
, &ovs_dp_flow_multicast_group
, reply
, info
);
1083 ovs_nla_free_flow_actions(old_acts
);
1095 static int ovs_flow_cmd_get(struct sk_buff
*skb
, struct genl_info
*info
)
1097 struct nlattr
**a
= info
->attrs
;
1098 struct ovs_header
*ovs_header
= info
->userhdr
;
1099 struct sw_flow_key key
;
1100 struct sk_buff
*reply
;
1101 struct sw_flow
*flow
;
1102 struct datapath
*dp
;
1103 struct sw_flow_match match
;
1106 if (!a
[OVS_FLOW_ATTR_KEY
]) {
1107 OVS_NLERR("Flow get message rejected, Key attribute missing.\n");
1111 ovs_match_init(&match
, &key
, NULL
);
1112 err
= ovs_nla_get_match(&match
, a
[OVS_FLOW_ATTR_KEY
], NULL
);
1117 dp
= get_dp(sock_net(skb
->sk
), ovs_header
->dp_ifindex
);
1123 flow
= ovs_flow_tbl_lookup_exact(&dp
->table
, &match
);
1129 reply
= ovs_flow_cmd_build_info(dp
, flow
, ovs_header
->dp_ifindex
, info
,
1130 OVS_FLOW_CMD_NEW
, true);
1131 if (IS_ERR(reply
)) {
1132 err
= PTR_ERR(reply
);
1137 return genlmsg_reply(reply
, info
);
1143 static int ovs_flow_cmd_del(struct sk_buff
*skb
, struct genl_info
*info
)
1145 struct nlattr
**a
= info
->attrs
;
1146 struct ovs_header
*ovs_header
= info
->userhdr
;
1147 struct sw_flow_key key
;
1148 struct sk_buff
*reply
;
1149 struct sw_flow
*flow
;
1150 struct datapath
*dp
;
1151 struct sw_flow_match match
;
1154 if (likely(a
[OVS_FLOW_ATTR_KEY
])) {
1155 ovs_match_init(&match
, &key
, NULL
);
1156 err
= ovs_nla_get_match(&match
, a
[OVS_FLOW_ATTR_KEY
], NULL
);
1162 dp
= get_dp(sock_net(skb
->sk
), ovs_header
->dp_ifindex
);
1163 if (unlikely(!dp
)) {
1167 if (unlikely(!a
[OVS_FLOW_ATTR_KEY
])) {
1168 err
= ovs_flow_tbl_flush(&dp
->table
);
1171 flow
= ovs_flow_tbl_lookup_exact(&dp
->table
, &match
);
1172 if (unlikely(!flow
)) {
1177 ovs_flow_tbl_remove(&dp
->table
, flow
);
1180 reply
= ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force
*)flow
->sf_acts
,
1183 if (likely(reply
)) {
1184 if (likely(!IS_ERR(reply
))) {
1185 rcu_read_lock(); /* Keep RCU checker happy. */
1186 err
= ovs_flow_cmd_fill_info(dp
, flow
,
1187 ovs_header
->dp_ifindex
,
1188 reply
, info
->snd_portid
,
1193 ovs_notify(&dp_flow_genl_family
, &ovs_dp_flow_multicast_group
, reply
, info
);
1195 genl_set_err(&dp_flow_genl_family
, sock_net(skb
->sk
), 0,
1196 GROUP_ID(&ovs_dp_flow_multicast_group
), PTR_ERR(reply
));
1201 ovs_flow_free(flow
, true);
1208 static int ovs_flow_cmd_dump(struct sk_buff
*skb
, struct netlink_callback
*cb
)
1210 struct ovs_header
*ovs_header
= genlmsg_data(nlmsg_data(cb
->nlh
));
1211 struct table_instance
*ti
;
1212 struct datapath
*dp
;
1215 dp
= get_dp_rcu(sock_net(skb
->sk
), ovs_header
->dp_ifindex
);
1221 ti
= rcu_dereference(dp
->table
.ti
);
1223 struct sw_flow
*flow
;
1226 bucket
= cb
->args
[0];
1228 flow
= ovs_flow_tbl_dump_next(ti
, &bucket
, &obj
);
1232 if (ovs_flow_cmd_fill_info(dp
, flow
, ovs_header
->dp_ifindex
, skb
,
1233 NETLINK_CB(cb
->skb
).portid
,
1234 cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
,
1235 OVS_FLOW_CMD_NEW
) < 0)
1238 cb
->args
[0] = bucket
;
1245 static const struct nla_policy flow_policy
[OVS_FLOW_ATTR_MAX
+ 1] = {
1246 [OVS_FLOW_ATTR_KEY
] = { .type
= NLA_NESTED
},
1247 [OVS_FLOW_ATTR_ACTIONS
] = { .type
= NLA_NESTED
},
1248 [OVS_FLOW_ATTR_CLEAR
] = { .type
= NLA_FLAG
},
1251 static struct genl_ops dp_flow_genl_ops
[] = {
1252 { .cmd
= OVS_FLOW_CMD_NEW
,
1253 .flags
= GENL_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
1254 .policy
= flow_policy
,
1255 .doit
= ovs_flow_cmd_new
1257 { .cmd
= OVS_FLOW_CMD_DEL
,
1258 .flags
= GENL_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
1259 .policy
= flow_policy
,
1260 .doit
= ovs_flow_cmd_del
1262 { .cmd
= OVS_FLOW_CMD_GET
,
1263 .flags
= 0, /* OK for unprivileged users. */
1264 .policy
= flow_policy
,
1265 .doit
= ovs_flow_cmd_get
,
1266 .dumpit
= ovs_flow_cmd_dump
1268 { .cmd
= OVS_FLOW_CMD_SET
,
1269 .flags
= GENL_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
1270 .policy
= flow_policy
,
1271 .doit
= ovs_flow_cmd_set
,
1275 static struct genl_family dp_flow_genl_family
= {
1276 .id
= GENL_ID_GENERATE
,
1277 .hdrsize
= sizeof(struct ovs_header
),
1278 .name
= OVS_FLOW_FAMILY
,
1279 .version
= OVS_FLOW_VERSION
,
1280 .maxattr
= OVS_FLOW_ATTR_MAX
,
1282 .parallel_ops
= true,
1283 .ops
= dp_flow_genl_ops
,
1284 .n_ops
= ARRAY_SIZE(dp_flow_genl_ops
),
1285 .mcgrps
= &ovs_dp_flow_multicast_group
,
1289 static size_t ovs_dp_cmd_msg_size(void)
1291 size_t msgsize
= NLMSG_ALIGN(sizeof(struct ovs_header
));
1293 msgsize
+= nla_total_size(IFNAMSIZ
);
1294 msgsize
+= nla_total_size(sizeof(struct ovs_dp_stats
));
1295 msgsize
+= nla_total_size(sizeof(struct ovs_dp_megaflow_stats
));
1296 msgsize
+= nla_total_size(sizeof(u32
)); /* OVS_DP_ATTR_USER_FEATURES */
1301 /* Called with ovs_mutex or RCU read lock. */
1302 static int ovs_dp_cmd_fill_info(struct datapath
*dp
, struct sk_buff
*skb
,
1303 u32 portid
, u32 seq
, u32 flags
, u8 cmd
)
1305 struct ovs_header
*ovs_header
;
1306 struct ovs_dp_stats dp_stats
;
1307 struct ovs_dp_megaflow_stats dp_megaflow_stats
;
1310 ovs_header
= genlmsg_put(skb
, portid
, seq
, &dp_datapath_genl_family
,
1315 ovs_header
->dp_ifindex
= get_dpifindex(dp
);
1317 err
= nla_put_string(skb
, OVS_DP_ATTR_NAME
, ovs_dp_name(dp
));
1319 goto nla_put_failure
;
1321 get_dp_stats(dp
, &dp_stats
, &dp_megaflow_stats
);
1322 if (nla_put(skb
, OVS_DP_ATTR_STATS
, sizeof(struct ovs_dp_stats
),
1324 goto nla_put_failure
;
1326 if (nla_put(skb
, OVS_DP_ATTR_MEGAFLOW_STATS
,
1327 sizeof(struct ovs_dp_megaflow_stats
),
1328 &dp_megaflow_stats
))
1329 goto nla_put_failure
;
1331 if (nla_put_u32(skb
, OVS_DP_ATTR_USER_FEATURES
, dp
->user_features
))
1332 goto nla_put_failure
;
1334 return genlmsg_end(skb
, ovs_header
);
1337 genlmsg_cancel(skb
, ovs_header
);
1342 static struct sk_buff
*ovs_dp_cmd_alloc_info(struct genl_info
*info
)
1344 return genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info
, GFP_KERNEL
);
1347 /* Called with rcu_read_lock or ovs_mutex. */
1348 static struct datapath
*lookup_datapath(struct net
*net
,
1349 struct ovs_header
*ovs_header
,
1350 struct nlattr
*a
[OVS_DP_ATTR_MAX
+ 1])
1352 struct datapath
*dp
;
1354 if (!a
[OVS_DP_ATTR_NAME
])
1355 dp
= get_dp(net
, ovs_header
->dp_ifindex
);
1357 struct vport
*vport
;
1359 vport
= ovs_vport_locate(net
, nla_data(a
[OVS_DP_ATTR_NAME
]));
1360 dp
= vport
&& vport
->port_no
== OVSP_LOCAL
? vport
->dp
: NULL
;
1362 return dp
? dp
: ERR_PTR(-ENODEV
);
1365 static void ovs_dp_reset_user_features(struct sk_buff
*skb
, struct genl_info
*info
)
1367 struct datapath
*dp
;
1369 dp
= lookup_datapath(sock_net(skb
->sk
), info
->userhdr
, info
->attrs
);
1373 WARN(dp
->user_features
, "Dropping previously announced user features\n");
1374 dp
->user_features
= 0;
1377 static void ovs_dp_change(struct datapath
*dp
, struct nlattr
**a
)
1379 if (a
[OVS_DP_ATTR_USER_FEATURES
])
1380 dp
->user_features
= nla_get_u32(a
[OVS_DP_ATTR_USER_FEATURES
]);
1383 static int ovs_dp_cmd_new(struct sk_buff
*skb
, struct genl_info
*info
)
1385 struct nlattr
**a
= info
->attrs
;
1386 struct vport_parms parms
;
1387 struct sk_buff
*reply
;
1388 struct datapath
*dp
;
1389 struct vport
*vport
;
1390 struct ovs_net
*ovs_net
;
1394 if (!a
[OVS_DP_ATTR_NAME
] || !a
[OVS_DP_ATTR_UPCALL_PID
])
1397 reply
= ovs_dp_cmd_alloc_info(info
);
1402 dp
= kzalloc(sizeof(*dp
), GFP_KERNEL
);
1404 goto err_free_reply
;
1406 ovs_dp_set_net(dp
, hold_net(sock_net(skb
->sk
)));
1408 /* Allocate table. */
1409 err
= ovs_flow_tbl_init(&dp
->table
);
1413 dp
->stats_percpu
= alloc_percpu(struct dp_stats_percpu
);
1414 if (!dp
->stats_percpu
) {
1416 goto err_destroy_table
;
1419 for_each_possible_cpu(i
) {
1420 struct dp_stats_percpu
*dpath_stats
;
1421 dpath_stats
= per_cpu_ptr(dp
->stats_percpu
, i
);
1422 u64_stats_init(&dpath_stats
->sync
);
1425 dp
->ports
= kmalloc(DP_VPORT_HASH_BUCKETS
* sizeof(struct hlist_head
),
1429 goto err_destroy_percpu
;
1432 for (i
= 0; i
< DP_VPORT_HASH_BUCKETS
; i
++)
1433 INIT_HLIST_HEAD(&dp
->ports
[i
]);
1435 /* Set up our datapath device. */
1436 parms
.name
= nla_data(a
[OVS_DP_ATTR_NAME
]);
1437 parms
.type
= OVS_VPORT_TYPE_INTERNAL
;
1438 parms
.options
= NULL
;
1440 parms
.port_no
= OVSP_LOCAL
;
1441 parms
.upcall_portids
= a
[OVS_DP_ATTR_UPCALL_PID
];
1443 ovs_dp_change(dp
, a
);
1445 /* So far only local changes have been made, now need the lock. */
1448 vport
= new_vport(&parms
);
1449 if (IS_ERR(vport
)) {
1450 err
= PTR_ERR(vport
);
1454 if (err
== -EEXIST
) {
1455 /* An outdated user space instance that does not understand
1456 * the concept of user_features has attempted to create a new
1457 * datapath and is likely to reuse it. Drop all user features.
1459 if (info
->genlhdr
->version
< OVS_DP_VER_FEATURES
)
1460 ovs_dp_reset_user_features(skb
, info
);
1463 goto err_destroy_ports_array
;
1466 err
= ovs_dp_cmd_fill_info(dp
, reply
, info
->snd_portid
,
1467 info
->snd_seq
, 0, OVS_DP_CMD_NEW
);
1470 ovs_net
= net_generic(ovs_dp_get_net(dp
), ovs_net_id
);
1471 list_add_tail_rcu(&dp
->list_node
, &ovs_net
->dps
);
1475 ovs_notify(&dp_datapath_genl_family
, &ovs_dp_datapath_multicast_group
, reply
, info
);
1478 err_destroy_ports_array
:
1482 free_percpu(dp
->stats_percpu
);
1484 ovs_flow_tbl_destroy(&dp
->table
);
1486 release_net(ovs_dp_get_net(dp
));
1494 /* Called with ovs_mutex. */
1495 static void __dp_destroy(struct datapath
*dp
)
1499 for (i
= 0; i
< DP_VPORT_HASH_BUCKETS
; i
++) {
1500 struct vport
*vport
;
1501 struct hlist_node
*n
;
1503 hlist_for_each_entry_safe(vport
, n
, &dp
->ports
[i
], dp_hash_node
)
1504 if (vport
->port_no
!= OVSP_LOCAL
)
1505 ovs_dp_detach_port(vport
);
1508 list_del_rcu(&dp
->list_node
);
1510 /* OVSP_LOCAL is datapath internal port. We need to make sure that
1511 * all ports in datapath are destroyed first before freeing datapath.
1513 ovs_dp_detach_port(ovs_vport_ovsl(dp
, OVSP_LOCAL
));
1515 /* RCU destroy the flow table */
1516 call_rcu(&dp
->rcu
, destroy_dp_rcu
);
1519 static int ovs_dp_cmd_del(struct sk_buff
*skb
, struct genl_info
*info
)
1521 struct sk_buff
*reply
;
1522 struct datapath
*dp
;
1525 reply
= ovs_dp_cmd_alloc_info(info
);
1530 dp
= lookup_datapath(sock_net(skb
->sk
), info
->userhdr
, info
->attrs
);
1533 goto err_unlock_free
;
1535 err
= ovs_dp_cmd_fill_info(dp
, reply
, info
->snd_portid
,
1536 info
->snd_seq
, 0, OVS_DP_CMD_DEL
);
1542 ovs_notify(&dp_datapath_genl_family
, &ovs_dp_datapath_multicast_group
, reply
, info
);
1551 static int ovs_dp_cmd_set(struct sk_buff
*skb
, struct genl_info
*info
)
1553 struct sk_buff
*reply
;
1554 struct datapath
*dp
;
1557 reply
= ovs_dp_cmd_alloc_info(info
);
1562 dp
= lookup_datapath(sock_net(skb
->sk
), info
->userhdr
, info
->attrs
);
1565 goto err_unlock_free
;
1567 ovs_dp_change(dp
, info
->attrs
);
1569 err
= ovs_dp_cmd_fill_info(dp
, reply
, info
->snd_portid
,
1570 info
->snd_seq
, 0, OVS_DP_CMD_NEW
);
1574 ovs_notify(&dp_datapath_genl_family
, &ovs_dp_datapath_multicast_group
, reply
, info
);
1583 static int ovs_dp_cmd_get(struct sk_buff
*skb
, struct genl_info
*info
)
1585 struct sk_buff
*reply
;
1586 struct datapath
*dp
;
1589 reply
= ovs_dp_cmd_alloc_info(info
);
1594 dp
= lookup_datapath(sock_net(skb
->sk
), info
->userhdr
, info
->attrs
);
1597 goto err_unlock_free
;
1599 err
= ovs_dp_cmd_fill_info(dp
, reply
, info
->snd_portid
,
1600 info
->snd_seq
, 0, OVS_DP_CMD_NEW
);
1604 return genlmsg_reply(reply
, info
);
1612 static int ovs_dp_cmd_dump(struct sk_buff
*skb
, struct netlink_callback
*cb
)
1614 struct ovs_net
*ovs_net
= net_generic(sock_net(skb
->sk
), ovs_net_id
);
1615 struct datapath
*dp
;
1616 int skip
= cb
->args
[0];
1620 list_for_each_entry_rcu(dp
, &ovs_net
->dps
, list_node
) {
1622 ovs_dp_cmd_fill_info(dp
, skb
, NETLINK_CB(cb
->skb
).portid
,
1623 cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
,
1624 OVS_DP_CMD_NEW
) < 0)
1635 static const struct nla_policy datapath_policy
[OVS_DP_ATTR_MAX
+ 1] = {
1636 [OVS_DP_ATTR_NAME
] = { .type
= NLA_NUL_STRING
, .len
= IFNAMSIZ
- 1 },
1637 [OVS_DP_ATTR_UPCALL_PID
] = { .type
= NLA_U32
},
1638 [OVS_DP_ATTR_USER_FEATURES
] = { .type
= NLA_U32
},
1641 static struct genl_ops dp_datapath_genl_ops
[] = {
1642 { .cmd
= OVS_DP_CMD_NEW
,
1643 .flags
= GENL_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
1644 .policy
= datapath_policy
,
1645 .doit
= ovs_dp_cmd_new
1647 { .cmd
= OVS_DP_CMD_DEL
,
1648 .flags
= GENL_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
1649 .policy
= datapath_policy
,
1650 .doit
= ovs_dp_cmd_del
1652 { .cmd
= OVS_DP_CMD_GET
,
1653 .flags
= 0, /* OK for unprivileged users. */
1654 .policy
= datapath_policy
,
1655 .doit
= ovs_dp_cmd_get
,
1656 .dumpit
= ovs_dp_cmd_dump
1658 { .cmd
= OVS_DP_CMD_SET
,
1659 .flags
= GENL_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
1660 .policy
= datapath_policy
,
1661 .doit
= ovs_dp_cmd_set
,
1665 static struct genl_family dp_datapath_genl_family
= {
1666 .id
= GENL_ID_GENERATE
,
1667 .hdrsize
= sizeof(struct ovs_header
),
1668 .name
= OVS_DATAPATH_FAMILY
,
1669 .version
= OVS_DATAPATH_VERSION
,
1670 .maxattr
= OVS_DP_ATTR_MAX
,
1672 .parallel_ops
= true,
1673 .ops
= dp_datapath_genl_ops
,
1674 .n_ops
= ARRAY_SIZE(dp_datapath_genl_ops
),
1675 .mcgrps
= &ovs_dp_datapath_multicast_group
,
1679 /* Called with ovs_mutex or RCU read lock. */
1680 static int ovs_vport_cmd_fill_info(struct vport
*vport
, struct sk_buff
*skb
,
1681 u32 portid
, u32 seq
, u32 flags
, u8 cmd
)
1683 struct ovs_header
*ovs_header
;
1684 struct ovs_vport_stats vport_stats
;
1687 ovs_header
= genlmsg_put(skb
, portid
, seq
, &dp_vport_genl_family
,
1692 ovs_header
->dp_ifindex
= get_dpifindex(vport
->dp
);
1694 if (nla_put_u32(skb
, OVS_VPORT_ATTR_PORT_NO
, vport
->port_no
) ||
1695 nla_put_u32(skb
, OVS_VPORT_ATTR_TYPE
, vport
->ops
->type
) ||
1696 nla_put_string(skb
, OVS_VPORT_ATTR_NAME
, vport
->ops
->get_name(vport
)))
1697 goto nla_put_failure
;
1699 ovs_vport_get_stats(vport
, &vport_stats
);
1700 if (nla_put(skb
, OVS_VPORT_ATTR_STATS
, sizeof(struct ovs_vport_stats
),
1702 goto nla_put_failure
;
1704 if (ovs_vport_get_upcall_portids(vport
, skb
))
1705 goto nla_put_failure
;
1707 err
= ovs_vport_get_options(vport
, skb
);
1708 if (err
== -EMSGSIZE
)
1711 return genlmsg_end(skb
, ovs_header
);
1716 genlmsg_cancel(skb
, ovs_header
);
1720 static struct sk_buff
*ovs_vport_cmd_alloc_info(void)
1722 return nlmsg_new(NLMSG_DEFAULT_SIZE
, GFP_KERNEL
);
1725 /* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
1726 struct sk_buff
*ovs_vport_cmd_build_info(struct vport
*vport
, u32 portid
,
1729 struct sk_buff
*skb
;
1732 skb
= nlmsg_new(NLMSG_DEFAULT_SIZE
, GFP_ATOMIC
);
1734 return ERR_PTR(-ENOMEM
);
1736 retval
= ovs_vport_cmd_fill_info(vport
, skb
, portid
, seq
, 0, cmd
);
1742 /* Called with ovs_mutex or RCU read lock. */
1743 static struct vport
*lookup_vport(struct net
*net
,
1744 struct ovs_header
*ovs_header
,
1745 struct nlattr
*a
[OVS_VPORT_ATTR_MAX
+ 1])
1747 struct datapath
*dp
;
1748 struct vport
*vport
;
1750 if (a
[OVS_VPORT_ATTR_NAME
]) {
1751 vport
= ovs_vport_locate(net
, nla_data(a
[OVS_VPORT_ATTR_NAME
]));
1753 return ERR_PTR(-ENODEV
);
1754 if (ovs_header
->dp_ifindex
&&
1755 ovs_header
->dp_ifindex
!= get_dpifindex(vport
->dp
))
1756 return ERR_PTR(-ENODEV
);
1758 } else if (a
[OVS_VPORT_ATTR_PORT_NO
]) {
1759 u32 port_no
= nla_get_u32(a
[OVS_VPORT_ATTR_PORT_NO
]);
1761 if (port_no
>= DP_MAX_PORTS
)
1762 return ERR_PTR(-EFBIG
);
1764 dp
= get_dp(net
, ovs_header
->dp_ifindex
);
1766 return ERR_PTR(-ENODEV
);
1768 vport
= ovs_vport_ovsl_rcu(dp
, port_no
);
1770 return ERR_PTR(-ENODEV
);
1773 return ERR_PTR(-EINVAL
);
1776 static int ovs_vport_cmd_new(struct sk_buff
*skb
, struct genl_info
*info
)
1778 struct nlattr
**a
= info
->attrs
;
1779 struct ovs_header
*ovs_header
= info
->userhdr
;
1780 struct vport_parms parms
;
1781 struct sk_buff
*reply
;
1782 struct vport
*vport
;
1783 struct datapath
*dp
;
1787 if (!a
[OVS_VPORT_ATTR_NAME
] || !a
[OVS_VPORT_ATTR_TYPE
] ||
1788 !a
[OVS_VPORT_ATTR_UPCALL_PID
])
1791 port_no
= a
[OVS_VPORT_ATTR_PORT_NO
]
1792 ? nla_get_u32(a
[OVS_VPORT_ATTR_PORT_NO
]) : 0;
1793 if (port_no
>= DP_MAX_PORTS
)
1796 reply
= ovs_vport_cmd_alloc_info();
1801 dp
= get_dp(sock_net(skb
->sk
), ovs_header
->dp_ifindex
);
1804 goto exit_unlock_free
;
1807 vport
= ovs_vport_ovsl(dp
, port_no
);
1810 goto exit_unlock_free
;
1812 for (port_no
= 1; ; port_no
++) {
1813 if (port_no
>= DP_MAX_PORTS
) {
1815 goto exit_unlock_free
;
1817 vport
= ovs_vport_ovsl(dp
, port_no
);
1823 parms
.name
= nla_data(a
[OVS_VPORT_ATTR_NAME
]);
1824 parms
.type
= nla_get_u32(a
[OVS_VPORT_ATTR_TYPE
]);
1825 parms
.options
= a
[OVS_VPORT_ATTR_OPTIONS
];
1827 parms
.port_no
= port_no
;
1828 parms
.upcall_portids
= a
[OVS_VPORT_ATTR_UPCALL_PID
];
1830 vport
= new_vport(&parms
);
1831 err
= PTR_ERR(vport
);
1833 goto exit_unlock_free
;
1836 if (a
[OVS_VPORT_ATTR_STATS
])
1837 ovs_vport_set_stats(vport
, nla_data(a
[OVS_VPORT_ATTR_STATS
]));
1839 err
= ovs_vport_cmd_fill_info(vport
, reply
, info
->snd_portid
,
1840 info
->snd_seq
, 0, OVS_VPORT_CMD_NEW
);
1844 ovs_notify(&dp_vport_genl_family
, &ovs_dp_vport_multicast_group
, reply
, info
);
1853 static int ovs_vport_cmd_set(struct sk_buff
*skb
, struct genl_info
*info
)
1855 struct nlattr
**a
= info
->attrs
;
1856 struct sk_buff
*reply
;
1857 struct vport
*vport
;
1860 reply
= ovs_vport_cmd_alloc_info();
1865 vport
= lookup_vport(sock_net(skb
->sk
), info
->userhdr
, a
);
1866 err
= PTR_ERR(vport
);
1868 goto exit_unlock_free
;
1870 if (a
[OVS_VPORT_ATTR_TYPE
] &&
1871 nla_get_u32(a
[OVS_VPORT_ATTR_TYPE
]) != vport
->ops
->type
) {
1873 goto exit_unlock_free
;
1876 if (a
[OVS_VPORT_ATTR_OPTIONS
]) {
1877 err
= ovs_vport_set_options(vport
, a
[OVS_VPORT_ATTR_OPTIONS
]);
1879 goto exit_unlock_free
;
1882 if (a
[OVS_VPORT_ATTR_STATS
])
1883 ovs_vport_set_stats(vport
, nla_data(a
[OVS_VPORT_ATTR_STATS
]));
1886 if (a
[OVS_VPORT_ATTR_UPCALL_PID
]) {
1887 err
= ovs_vport_set_upcall_portids(vport
,
1888 a
[OVS_VPORT_ATTR_UPCALL_PID
]);
1890 goto exit_unlock_free
;
1893 err
= ovs_vport_cmd_fill_info(vport
, reply
, info
->snd_portid
,
1894 info
->snd_seq
, 0, OVS_VPORT_CMD_NEW
);
1898 ovs_notify(&dp_vport_genl_family
, &ovs_dp_vport_multicast_group
, reply
, info
);
1907 static int ovs_vport_cmd_del(struct sk_buff
*skb
, struct genl_info
*info
)
1909 struct nlattr
**a
= info
->attrs
;
1910 struct sk_buff
*reply
;
1911 struct vport
*vport
;
1914 reply
= ovs_vport_cmd_alloc_info();
1919 vport
= lookup_vport(sock_net(skb
->sk
), info
->userhdr
, a
);
1920 err
= PTR_ERR(vport
);
1922 goto exit_unlock_free
;
1924 if (vport
->port_no
== OVSP_LOCAL
) {
1926 goto exit_unlock_free
;
1929 err
= ovs_vport_cmd_fill_info(vport
, reply
, info
->snd_portid
,
1930 info
->snd_seq
, 0, OVS_VPORT_CMD_DEL
);
1932 ovs_dp_detach_port(vport
);
1935 ovs_notify(&dp_vport_genl_family
, &ovs_dp_vport_multicast_group
, reply
, info
);
1944 static int ovs_vport_cmd_get(struct sk_buff
*skb
, struct genl_info
*info
)
1946 struct nlattr
**a
= info
->attrs
;
1947 struct ovs_header
*ovs_header
= info
->userhdr
;
1948 struct sk_buff
*reply
;
1949 struct vport
*vport
;
1952 reply
= ovs_vport_cmd_alloc_info();
1957 vport
= lookup_vport(sock_net(skb
->sk
), ovs_header
, a
);
1958 err
= PTR_ERR(vport
);
1960 goto exit_unlock_free
;
1961 err
= ovs_vport_cmd_fill_info(vport
, reply
, info
->snd_portid
,
1962 info
->snd_seq
, 0, OVS_VPORT_CMD_NEW
);
1966 return genlmsg_reply(reply
, info
);
1974 static int ovs_vport_cmd_dump(struct sk_buff
*skb
, struct netlink_callback
*cb
)
1976 struct ovs_header
*ovs_header
= genlmsg_data(nlmsg_data(cb
->nlh
));
1977 struct datapath
*dp
;
1978 int bucket
= cb
->args
[0], skip
= cb
->args
[1];
1982 dp
= get_dp_rcu(sock_net(skb
->sk
), ovs_header
->dp_ifindex
);
1987 for (i
= bucket
; i
< DP_VPORT_HASH_BUCKETS
; i
++) {
1988 struct vport
*vport
;
1991 hlist_for_each_entry_rcu(vport
, &dp
->ports
[i
], dp_hash_node
) {
1993 ovs_vport_cmd_fill_info(vport
, skb
,
1994 NETLINK_CB(cb
->skb
).portid
,
1997 OVS_VPORT_CMD_NEW
) < 0)
2013 static const struct nla_policy vport_policy
[OVS_VPORT_ATTR_MAX
+ 1] = {
2014 [OVS_VPORT_ATTR_NAME
] = { .type
= NLA_NUL_STRING
, .len
= IFNAMSIZ
- 1 },
2015 [OVS_VPORT_ATTR_STATS
] = { .len
= sizeof(struct ovs_vport_stats
) },
2016 [OVS_VPORT_ATTR_PORT_NO
] = { .type
= NLA_U32
},
2017 [OVS_VPORT_ATTR_TYPE
] = { .type
= NLA_U32
},
2018 [OVS_VPORT_ATTR_UPCALL_PID
] = { .type
= NLA_U32
},
2019 [OVS_VPORT_ATTR_OPTIONS
] = { .type
= NLA_NESTED
},
2022 static struct genl_ops dp_vport_genl_ops
[] = {
2023 { .cmd
= OVS_VPORT_CMD_NEW
,
2024 .flags
= GENL_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
2025 .policy
= vport_policy
,
2026 .doit
= ovs_vport_cmd_new
2028 { .cmd
= OVS_VPORT_CMD_DEL
,
2029 .flags
= GENL_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
2030 .policy
= vport_policy
,
2031 .doit
= ovs_vport_cmd_del
2033 { .cmd
= OVS_VPORT_CMD_GET
,
2034 .flags
= 0, /* OK for unprivileged users. */
2035 .policy
= vport_policy
,
2036 .doit
= ovs_vport_cmd_get
,
2037 .dumpit
= ovs_vport_cmd_dump
2039 { .cmd
= OVS_VPORT_CMD_SET
,
2040 .flags
= GENL_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
2041 .policy
= vport_policy
,
2042 .doit
= ovs_vport_cmd_set
,
2046 struct genl_family dp_vport_genl_family
= {
2047 .id
= GENL_ID_GENERATE
,
2048 .hdrsize
= sizeof(struct ovs_header
),
2049 .name
= OVS_VPORT_FAMILY
,
2050 .version
= OVS_VPORT_VERSION
,
2051 .maxattr
= OVS_VPORT_ATTR_MAX
,
2053 .parallel_ops
= true,
2054 .ops
= dp_vport_genl_ops
,
2055 .n_ops
= ARRAY_SIZE(dp_vport_genl_ops
),
2056 .mcgrps
= &ovs_dp_vport_multicast_group
,
2060 static struct genl_family
*dp_genl_families
[] = {
2061 &dp_datapath_genl_family
,
2062 &dp_vport_genl_family
,
2063 &dp_flow_genl_family
,
2064 &dp_packet_genl_family
,
2067 static void dp_unregister_genl(int n_families
)
2071 for (i
= 0; i
< n_families
; i
++)
2072 genl_unregister_family(dp_genl_families
[i
]);
2075 static int dp_register_genl(void)
2080 for (i
= 0; i
< ARRAY_SIZE(dp_genl_families
); i
++) {
2082 err
= genl_register_family(dp_genl_families
[i
]);
2090 dp_unregister_genl(i
);
2094 static int __net_init
ovs_init_net(struct net
*net
)
2096 struct ovs_net
*ovs_net
= net_generic(net
, ovs_net_id
);
2098 INIT_LIST_HEAD(&ovs_net
->dps
);
2099 INIT_WORK(&ovs_net
->dp_notify_work
, ovs_dp_notify_wq
);
2103 static void __net_exit
ovs_exit_net(struct net
*net
)
2105 struct datapath
*dp
, *dp_next
;
2106 struct ovs_net
*ovs_net
= net_generic(net
, ovs_net_id
);
2109 list_for_each_entry_safe(dp
, dp_next
, &ovs_net
->dps
, list_node
)
2113 cancel_work_sync(&ovs_net
->dp_notify_work
);
2116 static struct pernet_operations ovs_net_ops
= {
2117 .init
= ovs_init_net
,
2118 .exit
= ovs_exit_net
,
2120 .size
= sizeof(struct ovs_net
),
2123 DEFINE_COMPAT_PNET_REG_FUNC(device
);
2125 static int __init
dp_init(void)
2129 BUILD_BUG_ON(sizeof(struct ovs_skb_cb
) > FIELD_SIZEOF(struct sk_buff
, cb
));
2131 pr_info("Open vSwitch switching datapath %s, built "__DATE__
" "__TIME__
"\n",
2134 err
= ovs_flow_init();
2138 err
= ovs_vport_init();
2140 goto error_flow_exit
;
2142 err
= register_pernet_device(&ovs_net_ops
);
2144 goto error_vport_exit
;
2146 err
= register_netdevice_notifier(&ovs_dp_device_notifier
);
2148 goto error_netns_exit
;
2150 err
= dp_register_genl();
2152 goto error_unreg_notifier
;
2156 error_unreg_notifier
:
2157 unregister_netdevice_notifier(&ovs_dp_device_notifier
);
2159 unregister_pernet_device(&ovs_net_ops
);
2168 static void dp_cleanup(void)
2170 dp_unregister_genl(ARRAY_SIZE(dp_genl_families
));
2171 unregister_netdevice_notifier(&ovs_dp_device_notifier
);
2172 unregister_pernet_device(&ovs_net_ops
);
2178 module_init(dp_init
);
2179 module_exit(dp_cleanup
);
2181 MODULE_DESCRIPTION("Open vSwitch switching datapath");
2182 MODULE_LICENSE("GPL");
2183 MODULE_VERSION(VERSION
);