]>
Commit | Line | Data |
---|---|---|
064af421 | 1 | /* |
f632c8fc | 2 | * Copyright (c) 2007, 2008, 2009, 2010, 2011 Nicira Networks. |
a14bc59f BP |
3 | * Distributed under the terms of the GNU GPL version 2. |
4 | * | |
5 | * Significant portions of this file may be copied from parts of the Linux | |
6 | * kernel, by Linus Torvalds and others. | |
064af421 BP |
7 | */ |
8 | ||
9 | /* Functions for managing the dp interface/device. */ | |
10 | ||
dfffaef1 JP |
11 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
12 | ||
064af421 BP |
13 | #include <linux/init.h> |
14 | #include <linux/module.h> | |
15 | #include <linux/fs.h> | |
16 | #include <linux/if_arp.h> | |
064af421 BP |
17 | #include <linux/if_vlan.h> |
18 | #include <linux/in.h> | |
19 | #include <linux/ip.h> | |
20 | #include <linux/delay.h> | |
21 | #include <linux/time.h> | |
22 | #include <linux/etherdevice.h> | |
23 | #include <linux/kernel.h> | |
24 | #include <linux/kthread.h> | |
064af421 BP |
25 | #include <linux/mutex.h> |
26 | #include <linux/percpu.h> | |
27 | #include <linux/rcupdate.h> | |
28 | #include <linux/tcp.h> | |
29 | #include <linux/udp.h> | |
30 | #include <linux/version.h> | |
31 | #include <linux/ethtool.h> | |
064af421 BP |
32 | #include <linux/wait.h> |
33 | #include <asm/system.h> | |
34 | #include <asm/div64.h> | |
35 | #include <asm/bug.h> | |
656a0e37 | 36 | #include <linux/highmem.h> |
064af421 BP |
37 | #include <linux/netfilter_bridge.h> |
38 | #include <linux/netfilter_ipv4.h> | |
39 | #include <linux/inetdevice.h> | |
40 | #include <linux/list.h> | |
41 | #include <linux/rculist.h> | |
064af421 | 42 | #include <linux/dmi.h> |
3c5f6de3 | 43 | #include <net/inet_ecn.h> |
36956a7d | 44 | #include <net/genetlink.h> |
3fbd517a | 45 | #include <linux/compat.h> |
064af421 BP |
46 | |
47 | #include "openvswitch/datapath-protocol.h" | |
dd8d6b8c | 48 | #include "checksum.h" |
064af421 BP |
49 | #include "datapath.h" |
50 | #include "actions.h" | |
064af421 | 51 | #include "flow.h" |
7eaa9830 | 52 | #include "loop_counter.h" |
3fbd517a | 53 | #include "odp-compat.h" |
8d5ebd83 | 54 | #include "table.h" |
f2459fe7 | 55 | #include "vport-internal_dev.h" |
064af421 | 56 | |
064af421 BP |
57 | int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd); |
58 | EXPORT_SYMBOL(dp_ioctl_hook); | |
59 | ||
064af421 | 60 | /* Datapaths. Protected on the read side by rcu_read_lock, on the write side |
0d3b8a34 | 61 | * by dp_mutex. |
064af421 BP |
62 | * |
63 | * dp_mutex nests inside the RTNL lock: if you need both you must take the RTNL | |
64 | * lock first. | |
65 | * | |
e779d8d9 | 66 | * It is safe to access the datapath and vport structures with just |
064af421 BP |
67 | * dp_mutex. |
68 | */ | |
e1040c77 | 69 | static struct datapath __rcu *dps[ODP_MAX]; |
064af421 BP |
70 | static DEFINE_MUTEX(dp_mutex); |
71 | ||
c19e6535 | 72 | static struct vport *new_vport(const struct vport_parms *); |
064af421 BP |
73 | |
74 | /* Must be called with rcu_read_lock or dp_mutex. */ | |
75 | struct datapath *get_dp(int dp_idx) | |
76 | { | |
77 | if (dp_idx < 0 || dp_idx >= ODP_MAX) | |
78 | return NULL; | |
eb3ccf11 JG |
79 | return rcu_dereference_check(dps[dp_idx], rcu_read_lock_held() || |
80 | lockdep_is_held(&dp_mutex)); | |
064af421 BP |
81 | } |
82 | EXPORT_SYMBOL_GPL(get_dp); | |
83 | ||
35f7605b | 84 | static struct datapath *get_dp_locked(int dp_idx) |
064af421 BP |
85 | { |
86 | struct datapath *dp; | |
87 | ||
88 | mutex_lock(&dp_mutex); | |
89 | dp = get_dp(dp_idx); | |
90 | if (dp) | |
91 | mutex_lock(&dp->mutex); | |
92 | mutex_unlock(&dp_mutex); | |
93 | return dp; | |
94 | } | |
95 | ||
027f9007 | 96 | static struct tbl *get_table_protected(struct datapath *dp) |
9abaf6b3 | 97 | { |
1452b28c JG |
98 | return rcu_dereference_protected(dp->table, |
99 | lockdep_is_held(&dp->mutex)); | |
100 | } | |
101 | ||
027f9007 | 102 | static struct vport *get_vport_protected(struct datapath *dp, u16 port_no) |
1452b28c JG |
103 | { |
104 | return rcu_dereference_protected(dp->ports[port_no], | |
105 | lockdep_is_held(&dp->mutex)); | |
9abaf6b3 JG |
106 | } |
107 | ||
f2459fe7 JG |
108 | /* Must be called with rcu_read_lock or RTNL lock. */ |
109 | const char *dp_name(const struct datapath *dp) | |
110 | { | |
ad919711 | 111 | return vport_get_name(rcu_dereference_rtnl(dp->ports[ODPP_LOCAL])); |
f2459fe7 JG |
112 | } |
113 | ||
064af421 BP |
114 | static inline size_t br_nlmsg_size(void) |
115 | { | |
116 | return NLMSG_ALIGN(sizeof(struct ifinfomsg)) | |
117 | + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ | |
118 | + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ | |
119 | + nla_total_size(4) /* IFLA_MASTER */ | |
120 | + nla_total_size(4) /* IFLA_MTU */ | |
121 | + nla_total_size(4) /* IFLA_LINK */ | |
122 | + nla_total_size(1); /* IFLA_OPERSTATE */ | |
123 | } | |
124 | ||
125 | static int dp_fill_ifinfo(struct sk_buff *skb, | |
e779d8d9 | 126 | const struct vport *port, |
064af421 BP |
127 | int event, unsigned int flags) |
128 | { | |
027f9007 | 129 | struct datapath *dp = port->dp; |
e779d8d9 BP |
130 | int ifindex = vport_get_ifindex(port); |
131 | int iflink = vport_get_iflink(port); | |
064af421 BP |
132 | struct ifinfomsg *hdr; |
133 | struct nlmsghdr *nlh; | |
134 | ||
f2459fe7 JG |
135 | if (ifindex < 0) |
136 | return ifindex; | |
137 | ||
138 | if (iflink < 0) | |
139 | return iflink; | |
140 | ||
064af421 BP |
141 | nlh = nlmsg_put(skb, 0, 0, event, sizeof(*hdr), flags); |
142 | if (nlh == NULL) | |
143 | return -EMSGSIZE; | |
144 | ||
145 | hdr = nlmsg_data(nlh); | |
146 | hdr->ifi_family = AF_BRIDGE; | |
147 | hdr->__ifi_pad = 0; | |
f2459fe7 JG |
148 | hdr->ifi_type = ARPHRD_ETHER; |
149 | hdr->ifi_index = ifindex; | |
e779d8d9 | 150 | hdr->ifi_flags = vport_get_flags(port); |
064af421 BP |
151 | hdr->ifi_change = 0; |
152 | ||
e779d8d9 | 153 | NLA_PUT_STRING(skb, IFLA_IFNAME, vport_get_name(port)); |
ad919711 | 154 | NLA_PUT_U32(skb, IFLA_MASTER, |
1452b28c | 155 | vport_get_ifindex(get_vport_protected(dp, ODPP_LOCAL))); |
e779d8d9 | 156 | NLA_PUT_U32(skb, IFLA_MTU, vport_get_mtu(port)); |
064af421 BP |
157 | #ifdef IFLA_OPERSTATE |
158 | NLA_PUT_U8(skb, IFLA_OPERSTATE, | |
e779d8d9 BP |
159 | vport_is_running(port) |
160 | ? vport_get_operstate(port) | |
f2459fe7 | 161 | : IF_OPER_DOWN); |
064af421 BP |
162 | #endif |
163 | ||
e779d8d9 | 164 | NLA_PUT(skb, IFLA_ADDRESS, ETH_ALEN, vport_get_addr(port)); |
064af421 | 165 | |
f2459fe7 JG |
166 | if (ifindex != iflink) |
167 | NLA_PUT_U32(skb, IFLA_LINK,iflink); | |
064af421 BP |
168 | |
169 | return nlmsg_end(skb, nlh); | |
170 | ||
171 | nla_put_failure: | |
172 | nlmsg_cancel(skb, nlh); | |
173 | return -EMSGSIZE; | |
174 | } | |
175 | ||
e779d8d9 | 176 | static void dp_ifinfo_notify(int event, struct vport *port) |
064af421 | 177 | { |
064af421 BP |
178 | struct sk_buff *skb; |
179 | int err = -ENOBUFS; | |
180 | ||
181 | skb = nlmsg_new(br_nlmsg_size(), GFP_KERNEL); | |
182 | if (skb == NULL) | |
183 | goto errout; | |
184 | ||
185 | err = dp_fill_ifinfo(skb, port, event, 0); | |
186 | if (err < 0) { | |
187 | /* -EMSGSIZE implies BUG in br_nlmsg_size() */ | |
188 | WARN_ON(err == -EMSGSIZE); | |
189 | kfree_skb(skb); | |
190 | goto errout; | |
191 | } | |
f2459fe7 | 192 | rtnl_notify(skb, &init_net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); |
cfe7c1f5 | 193 | return; |
064af421 BP |
194 | errout: |
195 | if (err < 0) | |
f2459fe7 | 196 | rtnl_set_sk_err(&init_net, RTNLGRP_LINK, err); |
064af421 BP |
197 | } |
198 | ||
58c342f6 BP |
199 | static void release_dp(struct kobject *kobj) |
200 | { | |
201 | struct datapath *dp = container_of(kobj, struct datapath, ifobj); | |
202 | kfree(dp); | |
203 | } | |
204 | ||
35f7605b | 205 | static struct kobj_type dp_ktype = { |
58c342f6 BP |
206 | .release = release_dp |
207 | }; | |
208 | ||
064af421 BP |
209 | static int create_dp(int dp_idx, const char __user *devnamep) |
210 | { | |
c19e6535 | 211 | struct vport_parms parms; |
064af421 | 212 | char devname[IFNAMSIZ]; |
c19e6535 | 213 | struct vport *vport; |
064af421 BP |
214 | struct datapath *dp; |
215 | int err; | |
216 | int i; | |
217 | ||
218 | if (devnamep) { | |
968f7c8d BP |
219 | int retval = strncpy_from_user(devname, devnamep, IFNAMSIZ); |
220 | if (retval < 0) { | |
221 | err = -EFAULT; | |
064af421 | 222 | goto err; |
968f7c8d BP |
223 | } else if (retval >= IFNAMSIZ) { |
224 | err = -ENAMETOOLONG; | |
225 | goto err; | |
226 | } | |
064af421 | 227 | } else { |
84c17d98 | 228 | snprintf(devname, sizeof(devname), "of%d", dp_idx); |
064af421 BP |
229 | } |
230 | ||
231 | rtnl_lock(); | |
232 | mutex_lock(&dp_mutex); | |
233 | err = -ENODEV; | |
234 | if (!try_module_get(THIS_MODULE)) | |
235 | goto err_unlock; | |
236 | ||
237 | /* Exit early if a datapath with that number already exists. | |
238 | * (We don't use -EEXIST because that's ambiguous with 'devname' | |
239 | * conflicting with an existing network device name.) */ | |
240 | err = -EBUSY; | |
241 | if (get_dp(dp_idx)) | |
242 | goto err_put_module; | |
243 | ||
244 | err = -ENOMEM; | |
84c17d98 | 245 | dp = kzalloc(sizeof(*dp), GFP_KERNEL); |
064af421 BP |
246 | if (dp == NULL) |
247 | goto err_put_module; | |
828bc1f0 | 248 | INIT_LIST_HEAD(&dp->port_list); |
064af421 | 249 | mutex_init(&dp->mutex); |
f072ebdd | 250 | mutex_lock(&dp->mutex); |
064af421 BP |
251 | dp->dp_idx = dp_idx; |
252 | for (i = 0; i < DP_N_QUEUES; i++) | |
253 | skb_queue_head_init(&dp->queues[i]); | |
254 | init_waitqueue_head(&dp->waitqueue); | |
255 | ||
58c342f6 | 256 | /* Initialize kobject for bridge. This will be added as |
b0c32774 | 257 | * /sys/class/net/<devname>/brif later, if sysfs is enabled. */ |
58c342f6 | 258 | dp->ifobj.kset = NULL; |
58c342f6 BP |
259 | kobject_init(&dp->ifobj, &dp_ktype); |
260 | ||
828bc1f0 BP |
261 | /* Allocate table. */ |
262 | err = -ENOMEM; | |
c6fadeb1 | 263 | rcu_assign_pointer(dp->table, tbl_create(TBL_MIN_BUCKETS)); |
828bc1f0 BP |
264 | if (!dp->table) |
265 | goto err_free_dp; | |
266 | ||
d6fbec6d | 267 | /* Set up our datapath device. */ |
c19e6535 BP |
268 | parms.name = devname; |
269 | parms.type = ODP_VPORT_TYPE_INTERNAL; | |
270 | parms.options = NULL; | |
271 | parms.dp = dp; | |
272 | parms.port_no = ODPP_LOCAL; | |
273 | vport = new_vport(&parms); | |
274 | if (IS_ERR(vport)) { | |
275 | err = PTR_ERR(vport); | |
f2459fe7 JG |
276 | if (err == -EBUSY) |
277 | err = -EEXIST; | |
278 | ||
064af421 | 279 | goto err_destroy_table; |
828bc1f0 | 280 | } |
064af421 BP |
281 | |
282 | dp->drop_frags = 0; | |
283 | dp->stats_percpu = alloc_percpu(struct dp_stats_percpu); | |
94947cd8 JG |
284 | if (!dp->stats_percpu) { |
285 | err = -ENOMEM; | |
064af421 | 286 | goto err_destroy_local_port; |
94947cd8 | 287 | } |
064af421 BP |
288 | |
289 | rcu_assign_pointer(dps[dp_idx], dp); | |
dad80ec3 JG |
290 | dp_sysfs_add_dp(dp); |
291 | ||
f072ebdd | 292 | mutex_unlock(&dp->mutex); |
064af421 BP |
293 | mutex_unlock(&dp_mutex); |
294 | rtnl_unlock(); | |
295 | ||
064af421 BP |
296 | return 0; |
297 | ||
298 | err_destroy_local_port: | |
1452b28c | 299 | dp_detach_port(get_vport_protected(dp, ODPP_LOCAL)); |
064af421 | 300 | err_destroy_table: |
6f20002c | 301 | tbl_destroy(get_table_protected(dp), NULL); |
064af421 | 302 | err_free_dp: |
f072ebdd | 303 | mutex_unlock(&dp->mutex); |
064af421 BP |
304 | kfree(dp); |
305 | err_put_module: | |
306 | module_put(THIS_MODULE); | |
307 | err_unlock: | |
308 | mutex_unlock(&dp_mutex); | |
309 | rtnl_unlock(); | |
310 | err: | |
311 | return err; | |
312 | } | |
313 | ||
46c6a11d JG |
314 | static void destroy_dp_rcu(struct rcu_head *rcu) |
315 | { | |
316 | struct datapath *dp = container_of(rcu, struct datapath, rcu); | |
317 | int i; | |
318 | ||
319 | for (i = 0; i < DP_N_QUEUES; i++) | |
320 | skb_queue_purge(&dp->queues[i]); | |
321 | ||
322 | tbl_destroy((struct tbl __force *)dp->table, flow_free_tbl); | |
323 | free_percpu(dp->stats_percpu); | |
324 | kobject_put(&dp->ifobj); | |
325 | } | |
326 | ||
8f843b6f | 327 | static int destroy_dp(int dp_idx) |
064af421 | 328 | { |
8f843b6f JG |
329 | struct datapath *dp; |
330 | int err = 0; | |
e779d8d9 | 331 | struct vport *p, *n; |
064af421 | 332 | |
8f843b6f JG |
333 | rtnl_lock(); |
334 | mutex_lock(&dp_mutex); | |
335 | dp = get_dp(dp_idx); | |
336 | if (!dp) { | |
337 | err = -ENODEV; | |
b0fb95ac | 338 | goto out; |
8f843b6f JG |
339 | } |
340 | ||
b0fb95ac JG |
341 | mutex_lock(&dp->mutex); |
342 | ||
6fba0d0b BP |
343 | list_for_each_entry_safe (p, n, &dp->port_list, node) |
344 | if (p->port_no != ODPP_LOCAL) | |
c3827f61 | 345 | dp_detach_port(p); |
6fba0d0b | 346 | |
2ba9026e | 347 | dp_sysfs_del_dp(dp); |
064af421 | 348 | rcu_assign_pointer(dps[dp->dp_idx], NULL); |
1452b28c | 349 | dp_detach_port(get_vport_protected(dp, ODPP_LOCAL)); |
8f843b6f | 350 | |
b0fb95ac | 351 | mutex_unlock(&dp->mutex); |
46c6a11d | 352 | call_rcu(&dp->rcu, destroy_dp_rcu); |
064af421 | 353 | module_put(THIS_MODULE); |
064af421 | 354 | |
b0fb95ac | 355 | out: |
064af421 BP |
356 | mutex_unlock(&dp_mutex); |
357 | rtnl_unlock(); | |
064af421 BP |
358 | return err; |
359 | } | |
360 | ||
f072ebdd | 361 | /* Called with RTNL lock and dp->mutex. */ |
c19e6535 | 362 | static struct vport *new_vport(const struct vport_parms *parms) |
064af421 | 363 | { |
f2459fe7 | 364 | struct vport *vport; |
f2459fe7 | 365 | |
c3827f61 | 366 | vport_lock(); |
c19e6535 BP |
367 | vport = vport_add(parms); |
368 | if (!IS_ERR(vport)) { | |
369 | struct datapath *dp = parms->dp; | |
064af421 | 370 | |
c19e6535 BP |
371 | rcu_assign_pointer(dp->ports[parms->port_no], vport); |
372 | list_add_rcu(&vport->node, &dp->port_list); | |
064af421 | 373 | |
c19e6535 BP |
374 | dp_ifinfo_notify(RTM_NEWLINK, vport); |
375 | } | |
376 | vport_unlock(); | |
064af421 | 377 | |
c19e6535 | 378 | return vport; |
064af421 BP |
379 | } |
380 | ||
e779d8d9 | 381 | int dp_detach_port(struct vport *p) |
064af421 | 382 | { |
f2459fe7 JG |
383 | int err; |
384 | ||
064af421 BP |
385 | ASSERT_RTNL(); |
386 | ||
2e7dd8ec | 387 | if (p->port_no != ODPP_LOCAL) |
0515ceb3 | 388 | dp_sysfs_del_if(p); |
064af421 BP |
389 | dp_ifinfo_notify(RTM_DELLINK, p); |
390 | ||
064af421 | 391 | /* First drop references to device. */ |
064af421 BP |
392 | list_del_rcu(&p->node); |
393 | rcu_assign_pointer(p->dp->ports[p->port_no], NULL); | |
f2459fe7 | 394 | |
7237e4f4 | 395 | /* Then destroy it. */ |
c3827f61 | 396 | vport_lock(); |
7237e4f4 | 397 | err = vport_del(p); |
c3827f61 | 398 | vport_unlock(); |
f2459fe7 | 399 | |
7237e4f4 | 400 | return err; |
064af421 BP |
401 | } |
402 | ||
8819fac7 | 403 | /* Must be called with rcu_read_lock. */ |
e779d8d9 | 404 | void dp_process_received_packet(struct vport *p, struct sk_buff *skb) |
064af421 BP |
405 | { |
406 | struct datapath *dp = p->dp; | |
407 | struct dp_stats_percpu *stats; | |
8819fac7 | 408 | int stats_counter_off; |
55574bb0 BP |
409 | struct sw_flow_actions *acts; |
410 | struct loop_counter *loop; | |
4c1ad233 | 411 | int error; |
064af421 | 412 | |
e779d8d9 | 413 | OVS_CB(skb)->vport = p; |
a063b0df | 414 | |
3976f6d5 | 415 | if (!OVS_CB(skb)->flow) { |
36956a7d | 416 | struct sw_flow_key key; |
3976f6d5 | 417 | struct tbl_node *flow_node; |
b7a31ec1 | 418 | bool is_frag; |
4c1ad233 | 419 | |
3976f6d5 | 420 | /* Extract flow from 'skb' into 'key'. */ |
b7a31ec1 | 421 | error = flow_extract(skb, p ? p->port_no : ODPP_NONE, &key, &is_frag); |
3976f6d5 JG |
422 | if (unlikely(error)) { |
423 | kfree_skb(skb); | |
424 | return; | |
425 | } | |
064af421 | 426 | |
b7a31ec1 | 427 | if (is_frag && dp->drop_frags) { |
3976f6d5 JG |
428 | kfree_skb(skb); |
429 | stats_counter_off = offsetof(struct dp_stats_percpu, n_frags); | |
430 | goto out; | |
431 | } | |
432 | ||
433 | /* Look up flow. */ | |
434 | flow_node = tbl_lookup(rcu_dereference(dp->table), &key, | |
435 | flow_hash(&key), flow_cmp); | |
436 | if (unlikely(!flow_node)) { | |
856081f6 BP |
437 | struct dp_upcall_info upcall; |
438 | ||
439 | upcall.type = _ODPL_MISS_NR; | |
440 | upcall.key = &key; | |
441 | upcall.userdata = 0; | |
442 | upcall.sample_pool = 0; | |
443 | upcall.actions = NULL; | |
444 | upcall.actions_len = 0; | |
445 | dp_upcall(dp, skb, &upcall); | |
3976f6d5 JG |
446 | stats_counter_off = offsetof(struct dp_stats_percpu, n_missed); |
447 | goto out; | |
448 | } | |
449 | ||
450 | OVS_CB(skb)->flow = flow_cast(flow_node); | |
55574bb0 BP |
451 | } |
452 | ||
f267de8a | 453 | stats_counter_off = offsetof(struct dp_stats_percpu, n_hit); |
3976f6d5 | 454 | flow_used(OVS_CB(skb)->flow, skb); |
55574bb0 | 455 | |
3976f6d5 | 456 | acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts); |
55574bb0 BP |
457 | |
458 | /* Check whether we've looped too much. */ | |
7eaa9830 JG |
459 | loop = loop_get_counter(); |
460 | if (unlikely(++loop->count > MAX_LOOPS)) | |
55574bb0 BP |
461 | loop->looping = true; |
462 | if (unlikely(loop->looping)) { | |
7eaa9830 | 463 | loop_suppress(dp, acts); |
f267de8a | 464 | kfree_skb(skb); |
55574bb0 | 465 | goto out_loop; |
064af421 | 466 | } |
8819fac7 | 467 | |
55574bb0 | 468 | /* Execute actions. */ |
3976f6d5 | 469 | execute_actions(dp, skb, &OVS_CB(skb)->flow->key, acts->actions, |
cdee00fd | 470 | acts->actions_len); |
55574bb0 BP |
471 | |
472 | /* Check whether sub-actions looped too much. */ | |
473 | if (unlikely(loop->looping)) | |
7eaa9830 | 474 | loop_suppress(dp, acts); |
55574bb0 BP |
475 | |
476 | out_loop: | |
477 | /* Decrement loop counter. */ | |
478 | if (!--loop->count) | |
479 | loop->looping = false; | |
7eaa9830 | 480 | loop_put_counter(); |
55574bb0 | 481 | |
8819fac7 | 482 | out: |
55574bb0 | 483 | /* Update datapath statistics. */ |
8819fac7 JG |
484 | local_bh_disable(); |
485 | stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id()); | |
38c6ecbc JG |
486 | |
487 | write_seqcount_begin(&stats->seqlock); | |
8819fac7 | 488 | (*(u64 *)((u8 *)stats + stats_counter_off))++; |
38c6ecbc JG |
489 | write_seqcount_end(&stats->seqlock); |
490 | ||
8819fac7 | 491 | local_bh_enable(); |
064af421 BP |
492 | } |
493 | ||
856081f6 BP |
494 | static void copy_and_csum_skb(struct sk_buff *skb, void *to) |
495 | { | |
496 | u16 csum_start, csum_offset; | |
497 | __wsum csum; | |
498 | ||
499 | get_skb_csum_pointers(skb, &csum_start, &csum_offset); | |
500 | csum_start -= skb_headroom(skb); | |
501 | BUG_ON(csum_start >= skb_headlen(skb)); | |
502 | ||
503 | skb_copy_bits(skb, 0, to, csum_start); | |
504 | ||
505 | csum = skb_copy_and_csum_bits(skb, csum_start, to + csum_start, | |
506 | skb->len - csum_start, 0); | |
507 | *(__sum16 *)(to + csum_start + csum_offset) = csum_fold(csum); | |
508 | } | |
509 | ||
cb5087ca BP |
510 | /* Append each packet in 'skb' list to 'queue'. There will be only one packet |
511 | * unless we broke up a GSO packet. */ | |
856081f6 BP |
512 | static int queue_control_packets(struct datapath *dp, struct sk_buff *skb, |
513 | const struct dp_upcall_info *upcall_info) | |
cb5087ca BP |
514 | { |
515 | struct sk_buff *nskb; | |
516 | int port_no; | |
517 | int err; | |
518 | ||
e779d8d9 BP |
519 | if (OVS_CB(skb)->vport) |
520 | port_no = OVS_CB(skb)->vport->port_no; | |
f2459fe7 JG |
521 | else |
522 | port_no = ODPP_LOCAL; | |
cb5087ca BP |
523 | |
524 | do { | |
856081f6 BP |
525 | struct odp_packet *upcall; |
526 | struct sk_buff *user_skb; /* to be queued to userspace */ | |
527 | struct nlattr *nla; | |
528 | unsigned int len; | |
cb5087ca BP |
529 | |
530 | nskb = skb->next; | |
531 | skb->next = NULL; | |
532 | ||
856081f6 BP |
533 | len = sizeof(struct odp_packet); |
534 | len += nla_total_size(4); /* ODP_PACKET_ATTR_TYPE. */ | |
535 | len += nla_total_size(skb->len); | |
536 | len += nla_total_size(FLOW_BUFSIZE); | |
537 | if (upcall_info->userdata) | |
538 | len += nla_total_size(8); | |
539 | if (upcall_info->sample_pool) | |
540 | len += nla_total_size(4); | |
541 | if (upcall_info->actions_len) | |
542 | len += nla_total_size(upcall_info->actions_len); | |
543 | ||
544 | user_skb = alloc_skb(len, GFP_ATOMIC); | |
545 | if (!user_skb) | |
cb5087ca BP |
546 | goto err_kfree_skbs; |
547 | ||
856081f6 BP |
548 | upcall = (struct odp_packet *)__skb_put(user_skb, sizeof(*upcall)); |
549 | upcall->dp_idx = dp->dp_idx; | |
550 | ||
551 | nla_put_u32(user_skb, ODP_PACKET_ATTR_TYPE, upcall_info->type); | |
552 | ||
553 | nla = nla_nest_start(user_skb, ODP_PACKET_ATTR_KEY); | |
554 | flow_to_nlattrs(upcall_info->key, user_skb); | |
555 | nla_nest_end(user_skb, nla); | |
cb5087ca | 556 | |
856081f6 BP |
557 | if (upcall_info->userdata) |
558 | nla_put_u64(user_skb, ODP_PACKET_ATTR_USERDATA, upcall_info->userdata); | |
559 | if (upcall_info->sample_pool) | |
560 | nla_put_u32(user_skb, ODP_PACKET_ATTR_SAMPLE_POOL, upcall_info->sample_pool); | |
561 | if (upcall_info->actions_len) { | |
562 | const struct nlattr *actions = upcall_info->actions; | |
563 | u32 actions_len = upcall_info->actions_len; | |
564 | ||
565 | nla = nla_nest_start(user_skb, ODP_PACKET_ATTR_ACTIONS); | |
566 | memcpy(__skb_put(user_skb, actions_len), actions, actions_len); | |
567 | nla_nest_end(user_skb, nla); | |
568 | } | |
569 | ||
570 | nla = __nla_reserve(user_skb, ODP_PACKET_ATTR_PACKET, skb->len); | |
571 | if (skb->ip_summed == CHECKSUM_PARTIAL) | |
572 | copy_and_csum_skb(skb, nla_data(nla)); | |
573 | else | |
574 | skb_copy_bits(skb, 0, nla_data(nla), skb->len); | |
575 | ||
576 | upcall->len = user_skb->len; | |
577 | skb_queue_tail(&dp->queues[upcall_info->type], user_skb); | |
578 | ||
579 | kfree_skb(skb); | |
cb5087ca BP |
580 | skb = nskb; |
581 | } while (skb); | |
582 | return 0; | |
583 | ||
584 | err_kfree_skbs: | |
585 | kfree_skb(skb); | |
586 | while ((skb = nskb) != NULL) { | |
587 | nskb = skb->next; | |
588 | kfree_skb(skb); | |
589 | } | |
590 | return err; | |
591 | } | |
592 | ||
856081f6 | 593 | int dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info) |
064af421 BP |
594 | { |
595 | struct dp_stats_percpu *stats; | |
596 | struct sk_buff_head *queue; | |
064af421 BP |
597 | int err; |
598 | ||
599 | WARN_ON_ONCE(skb_shared(skb)); | |
856081f6 BP |
600 | BUG_ON(upcall_info->type >= DP_N_QUEUES); |
601 | ||
602 | queue = &dp->queues[upcall_info->type]; | |
064af421 BP |
603 | err = -ENOBUFS; |
604 | if (skb_queue_len(queue) >= DP_MAX_QUEUE_LEN) | |
605 | goto err_kfree_skb; | |
606 | ||
a6057323 JG |
607 | forward_ip_summed(skb); |
608 | ||
a2377e44 JG |
609 | err = vswitch_skb_checksum_setup(skb); |
610 | if (err) | |
611 | goto err_kfree_skb; | |
612 | ||
064af421 BP |
613 | /* Break apart GSO packets into their component pieces. Otherwise |
614 | * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */ | |
615 | if (skb_is_gso(skb)) { | |
9cc8b4e4 | 616 | struct sk_buff *nskb = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM); |
2d7ce2ee JG |
617 | |
618 | kfree_skb(skb); | |
619 | skb = nskb; | |
40796b34 | 620 | if (IS_ERR(skb)) { |
2d7ce2ee JG |
621 | err = PTR_ERR(skb); |
622 | goto err; | |
064af421 BP |
623 | } |
624 | } | |
625 | ||
856081f6 | 626 | err = queue_control_packets(dp, skb, upcall_info); |
064af421 | 627 | wake_up_interruptible(&dp->waitqueue); |
cb5087ca | 628 | return err; |
064af421 BP |
629 | |
630 | err_kfree_skb: | |
631 | kfree_skb(skb); | |
632 | err: | |
1c075d0a JG |
633 | local_bh_disable(); |
634 | stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id()); | |
38c6ecbc JG |
635 | |
636 | write_seqcount_begin(&stats->seqlock); | |
064af421 | 637 | stats->n_lost++; |
38c6ecbc JG |
638 | write_seqcount_end(&stats->seqlock); |
639 | ||
1c075d0a | 640 | local_bh_enable(); |
064af421 BP |
641 | |
642 | return err; | |
643 | } | |
644 | ||
645 | static int flush_flows(struct datapath *dp) | |
646 | { | |
9abaf6b3 | 647 | struct tbl *old_table = get_table_protected(dp); |
8d5ebd83 JG |
648 | struct tbl *new_table; |
649 | ||
c6fadeb1 | 650 | new_table = tbl_create(TBL_MIN_BUCKETS); |
8d5ebd83 JG |
651 | if (!new_table) |
652 | return -ENOMEM; | |
653 | ||
654 | rcu_assign_pointer(dp->table, new_table); | |
655 | ||
656 | tbl_deferred_destroy(old_table, flow_free_tbl); | |
657 | ||
658 | return 0; | |
064af421 BP |
659 | } |
660 | ||
cdee00fd | 661 | static int validate_actions(const struct nlattr *actions, u32 actions_len) |
064af421 | 662 | { |
23cad98c BP |
663 | const struct nlattr *a; |
664 | int rem; | |
665 | ||
666 | nla_for_each_attr(a, actions, actions_len, rem) { | |
667 | static const u32 action_lens[ODPAT_MAX + 1] = { | |
668 | [ODPAT_OUTPUT] = 4, | |
669 | [ODPAT_CONTROLLER] = 8, | |
670 | [ODPAT_SET_DL_TCI] = 2, | |
671 | [ODPAT_STRIP_VLAN] = 0, | |
672 | [ODPAT_SET_DL_SRC] = ETH_ALEN, | |
673 | [ODPAT_SET_DL_DST] = ETH_ALEN, | |
674 | [ODPAT_SET_NW_SRC] = 4, | |
675 | [ODPAT_SET_NW_DST] = 4, | |
676 | [ODPAT_SET_NW_TOS] = 1, | |
677 | [ODPAT_SET_TP_SRC] = 2, | |
678 | [ODPAT_SET_TP_DST] = 2, | |
679 | [ODPAT_SET_TUNNEL] = 8, | |
680 | [ODPAT_SET_PRIORITY] = 4, | |
681 | [ODPAT_POP_PRIORITY] = 0, | |
682 | [ODPAT_DROP_SPOOFED_ARP] = 0, | |
683 | }; | |
684 | int type = nla_type(a); | |
685 | ||
686 | if (type > ODPAT_MAX || nla_len(a) != action_lens[type]) | |
687 | return -EINVAL; | |
688 | ||
689 | switch (type) { | |
cdee00fd BP |
690 | case ODPAT_UNSPEC: |
691 | return -EINVAL; | |
064af421 | 692 | |
23cad98c BP |
693 | case ODPAT_CONTROLLER: |
694 | case ODPAT_STRIP_VLAN: | |
695 | case ODPAT_SET_DL_SRC: | |
696 | case ODPAT_SET_DL_DST: | |
697 | case ODPAT_SET_NW_SRC: | |
698 | case ODPAT_SET_NW_DST: | |
699 | case ODPAT_SET_TP_SRC: | |
700 | case ODPAT_SET_TP_DST: | |
701 | case ODPAT_SET_TUNNEL: | |
702 | case ODPAT_SET_PRIORITY: | |
703 | case ODPAT_POP_PRIORITY: | |
704 | case ODPAT_DROP_SPOOFED_ARP: | |
705 | /* No validation needed. */ | |
706 | break; | |
707 | ||
708 | case ODPAT_OUTPUT: | |
709 | if (nla_get_u32(a) >= DP_MAX_PORTS) | |
710 | return -EINVAL; | |
3b1fc5f3 | 711 | break; |
cdee00fd | 712 | |
23cad98c | 713 | case ODPAT_SET_DL_TCI: |
cdee00fd | 714 | if (nla_get_be16(a) & htons(VLAN_CFI_MASK)) |
064af421 | 715 | return -EINVAL; |
23cad98c | 716 | break; |
064af421 | 717 | |
23cad98c BP |
718 | case ODPAT_SET_NW_TOS: |
719 | if (nla_get_u8(a) & INET_ECN_MASK) | |
720 | return -EINVAL; | |
721 | break; | |
064af421 | 722 | |
23cad98c BP |
723 | default: |
724 | return -EOPNOTSUPP; | |
725 | } | |
726 | } | |
3c5f6de3 | 727 | |
23cad98c BP |
728 | if (rem > 0) |
729 | return -EINVAL; | |
064af421 | 730 | |
23cad98c | 731 | return 0; |
064af421 BP |
732 | } |
733 | ||
734 | static struct sw_flow_actions *get_actions(const struct odp_flow *flow) | |
735 | { | |
736 | struct sw_flow_actions *actions; | |
737 | int error; | |
738 | ||
cdee00fd | 739 | actions = flow_actions_alloc(flow->actions_len); |
064af421 BP |
740 | error = PTR_ERR(actions); |
741 | if (IS_ERR(actions)) | |
742 | goto error; | |
743 | ||
744 | error = -EFAULT; | |
1b29ebe5 | 745 | if (copy_from_user(actions->actions, |
6c229737 | 746 | (struct nlattr __user __force *)flow->actions, |
1b29ebe5 | 747 | flow->actions_len)) |
064af421 | 748 | goto error_free_actions; |
cdee00fd | 749 | error = validate_actions(actions->actions, actions->actions_len); |
064af421 BP |
750 | if (error) |
751 | goto error_free_actions; | |
752 | ||
753 | return actions; | |
754 | ||
755 | error_free_actions: | |
756 | kfree(actions); | |
757 | error: | |
758 | return ERR_PTR(error); | |
759 | } | |
760 | ||
65d042a1 | 761 | static void get_stats(struct sw_flow *flow, struct odp_flow_stats *stats) |
6bfafa55 JG |
762 | { |
763 | if (flow->used) { | |
65d042a1 | 764 | struct timespec offset_ts, used, now_mono; |
6bfafa55 | 765 | |
65d042a1 HZ |
766 | ktime_get_ts(&now_mono); |
767 | jiffies_to_timespec(jiffies - flow->used, &offset_ts); | |
768 | set_normalized_timespec(&used, now_mono.tv_sec - offset_ts.tv_sec, | |
769 | now_mono.tv_nsec - offset_ts.tv_nsec); | |
6bfafa55 JG |
770 | |
771 | stats->used_sec = used.tv_sec; | |
772 | stats->used_nsec = used.tv_nsec; | |
064af421 BP |
773 | } else { |
774 | stats->used_sec = 0; | |
775 | stats->used_nsec = 0; | |
776 | } | |
6bfafa55 | 777 | |
064af421 BP |
778 | stats->n_packets = flow->packet_count; |
779 | stats->n_bytes = flow->byte_count; | |
abfec865 | 780 | stats->reserved = 0; |
064af421 | 781 | stats->tcp_flags = flow->tcp_flags; |
f1aa2072 | 782 | stats->error = 0; |
064af421 BP |
783 | } |
784 | ||
785 | static void clear_stats(struct sw_flow *flow) | |
786 | { | |
6bfafa55 | 787 | flow->used = 0; |
064af421 | 788 | flow->tcp_flags = 0; |
064af421 BP |
789 | flow->packet_count = 0; |
790 | flow->byte_count = 0; | |
791 | } | |
792 | ||
8d5ebd83 JG |
793 | static int expand_table(struct datapath *dp) |
794 | { | |
9abaf6b3 | 795 | struct tbl *old_table = get_table_protected(dp); |
8d5ebd83 JG |
796 | struct tbl *new_table; |
797 | ||
798 | new_table = tbl_expand(old_table); | |
799 | if (IS_ERR(new_table)) | |
800 | return PTR_ERR(new_table); | |
801 | ||
802 | rcu_assign_pointer(dp->table, new_table); | |
803 | tbl_deferred_destroy(old_table, NULL); | |
804 | ||
805 | return 0; | |
806 | } | |
807 | ||
44e05eca BP |
808 | static int do_put_flow(struct datapath *dp, struct odp_flow_put *uf, |
809 | struct odp_flow_stats *stats) | |
064af421 | 810 | { |
8d5ebd83 | 811 | struct tbl_node *flow_node; |
36956a7d | 812 | struct sw_flow_key key; |
6fa58f7a | 813 | struct sw_flow *flow; |
8d5ebd83 | 814 | struct tbl *table; |
3d82583c | 815 | struct sw_flow_actions *acts = NULL; |
064af421 | 816 | int error; |
58f8f0e7 | 817 | u32 hash; |
064af421 | 818 | |
36956a7d BP |
819 | error = flow_copy_from_user(&key, (const struct nlattr __force __user *)uf->flow.key, |
820 | uf->flow.key_len); | |
821 | if (error) | |
822 | return error; | |
823 | ||
824 | hash = flow_hash(&key); | |
9abaf6b3 | 825 | table = get_table_protected(dp); |
36956a7d | 826 | flow_node = tbl_lookup(table, &key, hash, flow_cmp); |
8d5ebd83 | 827 | if (!flow_node) { |
6fa58f7a | 828 | /* No such flow. */ |
064af421 | 829 | error = -ENOENT; |
44e05eca | 830 | if (!(uf->flags & ODPPF_CREATE)) |
064af421 BP |
831 | goto error; |
832 | ||
833 | /* Expand table, if necessary, to make room. */ | |
8d5ebd83 JG |
834 | if (tbl_count(table) >= tbl_n_buckets(table)) { |
835 | error = expand_table(dp); | |
064af421 BP |
836 | if (error) |
837 | goto error; | |
9abaf6b3 | 838 | table = get_table_protected(dp); |
064af421 BP |
839 | } |
840 | ||
841 | /* Allocate flow. */ | |
560e8022 JG |
842 | flow = flow_alloc(); |
843 | if (IS_ERR(flow)) { | |
844 | error = PTR_ERR(flow); | |
064af421 | 845 | goto error; |
560e8022 | 846 | } |
36956a7d | 847 | flow->key = key; |
064af421 BP |
848 | clear_stats(flow); |
849 | ||
850 | /* Obtain actions. */ | |
44e05eca | 851 | acts = get_actions(&uf->flow); |
064af421 BP |
852 | error = PTR_ERR(acts); |
853 | if (IS_ERR(acts)) | |
854 | goto error_free_flow; | |
855 | rcu_assign_pointer(flow->sf_acts, acts); | |
856 | ||
857 | /* Put flow in bucket. */ | |
58f8f0e7 | 858 | error = tbl_insert(table, &flow->tbl_node, hash); |
6fa58f7a BP |
859 | if (error) |
860 | goto error_free_flow_acts; | |
8d5ebd83 | 861 | |
44e05eca | 862 | memset(stats, 0, sizeof(struct odp_flow_stats)); |
064af421 BP |
863 | } else { |
864 | /* We found a matching flow. */ | |
064af421 | 865 | struct sw_flow_actions *old_acts, *new_acts; |
064af421 | 866 | |
8d5ebd83 JG |
867 | flow = flow_cast(flow_node); |
868 | ||
064af421 BP |
869 | /* Bail out if we're not allowed to modify an existing flow. */ |
870 | error = -EEXIST; | |
44e05eca | 871 | if (!(uf->flags & ODPPF_MODIFY)) |
064af421 BP |
872 | goto error; |
873 | ||
874 | /* Swap actions. */ | |
44e05eca | 875 | new_acts = get_actions(&uf->flow); |
064af421 BP |
876 | error = PTR_ERR(new_acts); |
877 | if (IS_ERR(new_acts)) | |
878 | goto error; | |
d3c54451 JG |
879 | |
880 | old_acts = rcu_dereference_protected(flow->sf_acts, | |
881 | lockdep_is_held(&dp->mutex)); | |
cdee00fd | 882 | if (old_acts->actions_len != new_acts->actions_len || |
064af421 | 883 | memcmp(old_acts->actions, new_acts->actions, |
cdee00fd | 884 | old_acts->actions_len)) { |
064af421 BP |
885 | rcu_assign_pointer(flow->sf_acts, new_acts); |
886 | flow_deferred_free_acts(old_acts); | |
887 | } else { | |
888 | kfree(new_acts); | |
889 | } | |
890 | ||
891 | /* Fetch stats, then clear them if necessary. */ | |
1d7241c7 | 892 | spin_lock_bh(&flow->lock); |
65d042a1 | 893 | get_stats(flow, stats); |
44e05eca | 894 | if (uf->flags & ODPPF_ZERO_STATS) |
064af421 | 895 | clear_stats(flow); |
1d7241c7 | 896 | spin_unlock_bh(&flow->lock); |
064af421 BP |
897 | } |
898 | ||
064af421 BP |
899 | return 0; |
900 | ||
6fa58f7a | 901 | error_free_flow_acts: |
3d82583c | 902 | kfree(acts); |
064af421 | 903 | error_free_flow: |
fb8c9347 JG |
904 | flow->sf_acts = NULL; |
905 | flow_put(flow); | |
064af421 BP |
906 | error: |
907 | return error; | |
908 | } | |
909 | ||
44e05eca BP |
910 | static int put_flow(struct datapath *dp, struct odp_flow_put __user *ufp) |
911 | { | |
912 | struct odp_flow_stats stats; | |
913 | struct odp_flow_put uf; | |
914 | int error; | |
915 | ||
916 | if (copy_from_user(&uf, ufp, sizeof(struct odp_flow_put))) | |
917 | return -EFAULT; | |
918 | ||
919 | error = do_put_flow(dp, &uf, &stats); | |
920 | if (error) | |
921 | return error; | |
922 | ||
776f10ce BP |
923 | if (copy_to_user(&ufp->flow.stats, &stats, |
924 | sizeof(struct odp_flow_stats))) | |
44e05eca BP |
925 | return -EFAULT; |
926 | ||
927 | return 0; | |
928 | } | |
929 | ||
d3c54451 JG |
930 | static int do_answer_query(struct datapath *dp, struct sw_flow *flow, |
931 | u32 query_flags, | |
44e05eca | 932 | struct odp_flow_stats __user *ustats, |
cdee00fd BP |
933 | struct nlattr __user *actions, |
934 | u32 __user *actions_lenp) | |
064af421 | 935 | { |
064af421 | 936 | struct sw_flow_actions *sf_acts; |
44e05eca | 937 | struct odp_flow_stats stats; |
cdee00fd | 938 | u32 actions_len; |
064af421 | 939 | |
1d7241c7 | 940 | spin_lock_bh(&flow->lock); |
65d042a1 | 941 | get_stats(flow, &stats); |
1d7241c7 | 942 | if (query_flags & ODPFF_ZERO_TCP_FLAGS) |
44e05eca | 943 | flow->tcp_flags = 0; |
1d7241c7 JG |
944 | |
945 | spin_unlock_bh(&flow->lock); | |
44e05eca | 946 | |
776f10ce | 947 | if (copy_to_user(ustats, &stats, sizeof(struct odp_flow_stats)) || |
cdee00fd | 948 | get_user(actions_len, actions_lenp)) |
064af421 BP |
949 | return -EFAULT; |
950 | ||
cdee00fd | 951 | if (!actions_len) |
064af421 | 952 | return 0; |
064af421 | 953 | |
d3c54451 JG |
954 | sf_acts = rcu_dereference_protected(flow->sf_acts, |
955 | lockdep_is_held(&dp->mutex)); | |
cdee00fd | 956 | if (put_user(sf_acts->actions_len, actions_lenp) || |
064af421 | 957 | (actions && copy_to_user(actions, sf_acts->actions, |
cdee00fd | 958 | min(sf_acts->actions_len, actions_len)))) |
064af421 BP |
959 | return -EFAULT; |
960 | ||
961 | return 0; | |
962 | } | |
963 | ||
d3c54451 JG |
964 | static int answer_query(struct datapath *dp, struct sw_flow *flow, |
965 | u32 query_flags, struct odp_flow __user *ufp) | |
064af421 | 966 | { |
1b29ebe5 | 967 | struct nlattr __user *actions; |
064af421 | 968 | |
1b29ebe5 | 969 | if (get_user(actions, (struct nlattr __user * __user *)&ufp->actions)) |
064af421 | 970 | return -EFAULT; |
44e05eca | 971 | |
d3c54451 | 972 | return do_answer_query(dp, flow, query_flags, |
cdee00fd | 973 | &ufp->stats, actions, &ufp->actions_len); |
064af421 BP |
974 | } |
975 | ||
36956a7d | 976 | static struct sw_flow *do_del_flow(struct datapath *dp, const struct nlattr __user *key, u32 key_len) |
064af421 | 977 | { |
9abaf6b3 | 978 | struct tbl *table = get_table_protected(dp); |
8d5ebd83 | 979 | struct tbl_node *flow_node; |
36956a7d | 980 | struct sw_flow_key swkey; |
064af421 BP |
981 | int error; |
982 | ||
36956a7d BP |
983 | error = flow_copy_from_user(&swkey, key, key_len); |
984 | if (error) | |
985 | return ERR_PTR(error); | |
986 | ||
987 | flow_node = tbl_lookup(table, &swkey, flow_hash(&swkey), flow_cmp); | |
8d5ebd83 | 988 | if (!flow_node) |
44e05eca | 989 | return ERR_PTR(-ENOENT); |
064af421 | 990 | |
8d5ebd83 | 991 | error = tbl_remove(table, flow_node); |
f1aa2072 | 992 | if (error) |
44e05eca | 993 | return ERR_PTR(error); |
064af421 | 994 | |
44e05eca BP |
995 | /* XXX Returned flow_node's statistics might lose a few packets, since |
996 | * other CPUs can be using this flow. We used to synchronize_rcu() to | |
997 | * make sure that we get completely accurate stats, but that blows our | |
998 | * performance, badly. */ | |
999 | return flow_cast(flow_node); | |
1000 | } | |
1001 | ||
1002 | static int del_flow(struct datapath *dp, struct odp_flow __user *ufp) | |
1003 | { | |
1004 | struct sw_flow *flow; | |
1005 | struct odp_flow uf; | |
1006 | int error; | |
1007 | ||
84c17d98 | 1008 | if (copy_from_user(&uf, ufp, sizeof(uf))) |
44e05eca BP |
1009 | return -EFAULT; |
1010 | ||
36956a7d | 1011 | flow = do_del_flow(dp, (const struct nlattr __force __user *)uf.key, uf.key_len); |
44e05eca BP |
1012 | if (IS_ERR(flow)) |
1013 | return PTR_ERR(flow); | |
8d5ebd83 | 1014 | |
d3c54451 | 1015 | error = answer_query(dp, flow, 0, ufp); |
f1aa2072 | 1016 | flow_deferred_free(flow); |
064af421 BP |
1017 | return error; |
1018 | } | |
1019 | ||
44e05eca | 1020 | static int do_query_flows(struct datapath *dp, const struct odp_flowvec *flowvec) |
064af421 | 1021 | { |
9abaf6b3 | 1022 | struct tbl *table = get_table_protected(dp); |
6d7568dc BP |
1023 | u32 i; |
1024 | ||
064af421 | 1025 | for (i = 0; i < flowvec->n_flows; i++) { |
6c229737 | 1026 | struct odp_flow __user *ufp = (struct odp_flow __user __force *)&flowvec->flows[i]; |
36956a7d | 1027 | struct sw_flow_key key; |
064af421 | 1028 | struct odp_flow uf; |
8d5ebd83 | 1029 | struct tbl_node *flow_node; |
064af421 BP |
1030 | int error; |
1031 | ||
84c17d98 | 1032 | if (copy_from_user(&uf, ufp, sizeof(uf))) |
064af421 | 1033 | return -EFAULT; |
064af421 | 1034 | |
36956a7d BP |
1035 | error = flow_copy_from_user(&key, (const struct nlattr __force __user *)uf.key, uf.key_len); |
1036 | if (error) | |
1037 | return error; | |
1038 | ||
1039 | flow_node = tbl_lookup(table, &uf.key, flow_hash(&key), flow_cmp); | |
8d5ebd83 | 1040 | if (!flow_node) |
776f10ce | 1041 | error = put_user(ENOENT, &ufp->stats.error); |
064af421 | 1042 | else |
d3c54451 | 1043 | error = answer_query(dp, flow_cast(flow_node), uf.flags, ufp); |
064af421 BP |
1044 | if (error) |
1045 | return -EFAULT; | |
1046 | } | |
1047 | return flowvec->n_flows; | |
1048 | } | |
1049 | ||
064af421 BP |
1050 | static int do_flowvec_ioctl(struct datapath *dp, unsigned long argp, |
1051 | int (*function)(struct datapath *, | |
1052 | const struct odp_flowvec *)) | |
1053 | { | |
1054 | struct odp_flowvec __user *uflowvec; | |
1055 | struct odp_flowvec flowvec; | |
1056 | int retval; | |
1057 | ||
1058 | uflowvec = (struct odp_flowvec __user *)argp; | |
84c17d98 | 1059 | if (copy_from_user(&flowvec, uflowvec, sizeof(flowvec))) |
064af421 BP |
1060 | return -EFAULT; |
1061 | ||
1062 | if (flowvec.n_flows > INT_MAX / sizeof(struct odp_flow)) | |
1063 | return -EINVAL; | |
1064 | ||
064af421 BP |
1065 | retval = function(dp, &flowvec); |
1066 | return (retval < 0 ? retval | |
1067 | : retval == flowvec.n_flows ? 0 | |
776f10ce | 1068 | : put_user(retval, &uflowvec->n_flows)); |
064af421 BP |
1069 | } |
1070 | ||
704a1e09 BP |
1071 | static struct sw_flow *do_dump_flow(struct datapath *dp, u32 __user *state) |
1072 | { | |
1073 | struct tbl *table = get_table_protected(dp); | |
1074 | struct tbl_node *tbl_node; | |
1075 | u32 bucket, obj; | |
1076 | ||
1077 | if (get_user(bucket, &state[0]) || get_user(obj, &state[1])) | |
1078 | return ERR_PTR(-EFAULT); | |
1079 | ||
1080 | tbl_node = tbl_next(table, &bucket, &obj); | |
1081 | ||
1082 | if (put_user(bucket, &state[0]) || put_user(obj, &state[1])) | |
1083 | return ERR_PTR(-EFAULT); | |
1084 | ||
1085 | return tbl_node ? flow_cast(tbl_node) : NULL; | |
1086 | } | |
1087 | ||
1088 | static int dump_flow(struct datapath *dp, struct odp_flow_dump __user *udumpp) | |
1089 | { | |
1090 | struct odp_flow __user *uflowp; | |
36956a7d | 1091 | struct nlattr __user *ukey; |
704a1e09 | 1092 | struct sw_flow *flow; |
36956a7d | 1093 | u32 key_len; |
704a1e09 BP |
1094 | |
1095 | flow = do_dump_flow(dp, udumpp->state); | |
1096 | if (IS_ERR(flow)) | |
1097 | return PTR_ERR(flow); | |
1098 | ||
1099 | if (get_user(uflowp, (struct odp_flow __user *__user*)&udumpp->flow)) | |
1100 | return -EFAULT; | |
1101 | ||
1102 | if (!flow) | |
1103 | return put_user(ODPFF_EOF, &uflowp->flags); | |
1104 | ||
36956a7d BP |
1105 | if (put_user(0, &uflowp->flags) || |
1106 | get_user(ukey, (struct nlattr __user * __user*)&uflowp->key) || | |
1107 | get_user(key_len, &uflowp->key_len)) | |
704a1e09 | 1108 | return -EFAULT; |
36956a7d BP |
1109 | |
1110 | key_len = flow_copy_to_user(ukey, &flow->key, key_len); | |
1111 | if (key_len < 0) | |
1112 | return key_len; | |
1113 | if (put_user(key_len, &uflowp->key_len)) | |
1114 | return -EFAULT; | |
1115 | ||
704a1e09 BP |
1116 | return answer_query(dp, flow, 0, uflowp); |
1117 | } | |
1118 | ||
44e05eca | 1119 | static int do_execute(struct datapath *dp, const struct odp_execute *execute) |
064af421 | 1120 | { |
36956a7d | 1121 | struct sw_flow_key key; |
064af421 BP |
1122 | struct sk_buff *skb; |
1123 | struct sw_flow_actions *actions; | |
a393b897 | 1124 | struct ethhdr *eth; |
b7a31ec1 | 1125 | bool is_frag; |
064af421 BP |
1126 | int err; |
1127 | ||
064af421 | 1128 | err = -EINVAL; |
44e05eca | 1129 | if (execute->length < ETH_HLEN || execute->length > 65535) |
064af421 BP |
1130 | goto error; |
1131 | ||
cdee00fd | 1132 | actions = flow_actions_alloc(execute->actions_len); |
8ba1fd2f JG |
1133 | if (IS_ERR(actions)) { |
1134 | err = PTR_ERR(actions); | |
064af421 | 1135 | goto error; |
8ba1fd2f | 1136 | } |
064af421 BP |
1137 | |
1138 | err = -EFAULT; | |
1b29ebe5 | 1139 | if (copy_from_user(actions->actions, |
6c229737 | 1140 | (struct nlattr __user __force *)execute->actions, execute->actions_len)) |
064af421 BP |
1141 | goto error_free_actions; |
1142 | ||
cdee00fd | 1143 | err = validate_actions(actions->actions, execute->actions_len); |
064af421 BP |
1144 | if (err) |
1145 | goto error_free_actions; | |
1146 | ||
1147 | err = -ENOMEM; | |
44e05eca | 1148 | skb = alloc_skb(execute->length, GFP_KERNEL); |
064af421 BP |
1149 | if (!skb) |
1150 | goto error_free_actions; | |
659586ef | 1151 | |
064af421 | 1152 | err = -EFAULT; |
1b29ebe5 | 1153 | if (copy_from_user(skb_put(skb, execute->length), |
6c229737 | 1154 | (const void __user __force *)execute->data, |
44e05eca | 1155 | execute->length)) |
064af421 BP |
1156 | goto error_free_skb; |
1157 | ||
a393b897 JP |
1158 | skb_reset_mac_header(skb); |
1159 | eth = eth_hdr(skb); | |
1160 | ||
de3f65ea JP |
1161 | /* Normally, setting the skb 'protocol' field would be handled by a |
1162 | * call to eth_type_trans(), but it assumes there's a sending | |
1163 | * device, which we may not have. */ | |
a393b897 JP |
1164 | if (ntohs(eth->h_proto) >= 1536) |
1165 | skb->protocol = eth->h_proto; | |
1166 | else | |
1167 | skb->protocol = htons(ETH_P_802_2); | |
1168 | ||
f1588b1f | 1169 | err = flow_extract(skb, -1, &key, &is_frag); |
4c1ad233 BP |
1170 | if (err) |
1171 | goto error_free_skb; | |
9dca7bd5 JG |
1172 | |
1173 | rcu_read_lock(); | |
cdee00fd | 1174 | err = execute_actions(dp, skb, &key, actions->actions, actions->actions_len); |
9dca7bd5 JG |
1175 | rcu_read_unlock(); |
1176 | ||
064af421 BP |
1177 | kfree(actions); |
1178 | return err; | |
1179 | ||
1180 | error_free_skb: | |
1181 | kfree_skb(skb); | |
1182 | error_free_actions: | |
1183 | kfree(actions); | |
1184 | error: | |
1185 | return err; | |
1186 | } | |
1187 | ||
44e05eca BP |
1188 | static int execute_packet(struct datapath *dp, const struct odp_execute __user *executep) |
1189 | { | |
1190 | struct odp_execute execute; | |
1191 | ||
84c17d98 | 1192 | if (copy_from_user(&execute, executep, sizeof(execute))) |
44e05eca BP |
1193 | return -EFAULT; |
1194 | ||
1195 | return do_execute(dp, &execute); | |
1196 | } | |
1197 | ||
16190191 | 1198 | static int get_dp_stats(struct datapath *dp, struct odp_stats __user *statsp) |
064af421 BP |
1199 | { |
1200 | struct odp_stats stats; | |
1201 | int i; | |
1202 | ||
064af421 BP |
1203 | stats.n_frags = stats.n_hit = stats.n_missed = stats.n_lost = 0; |
1204 | for_each_possible_cpu(i) { | |
38c6ecbc JG |
1205 | const struct dp_stats_percpu *percpu_stats; |
1206 | struct dp_stats_percpu local_stats; | |
1207 | unsigned seqcount; | |
1208 | ||
1209 | percpu_stats = per_cpu_ptr(dp->stats_percpu, i); | |
1210 | ||
1211 | do { | |
1212 | seqcount = read_seqcount_begin(&percpu_stats->seqlock); | |
1213 | local_stats = *percpu_stats; | |
1214 | } while (read_seqcount_retry(&percpu_stats->seqlock, seqcount)); | |
1215 | ||
1216 | stats.n_frags += local_stats.n_frags; | |
1217 | stats.n_hit += local_stats.n_hit; | |
1218 | stats.n_missed += local_stats.n_missed; | |
1219 | stats.n_lost += local_stats.n_lost; | |
064af421 | 1220 | } |
84c17d98 | 1221 | return copy_to_user(statsp, &stats, sizeof(stats)) ? -EFAULT : 0; |
064af421 BP |
1222 | } |
1223 | ||
1dcf111b JP |
1224 | /* MTU of the dp pseudo-device: ETH_DATA_LEN or the minimum of the ports */ |
1225 | int dp_min_mtu(const struct datapath *dp) | |
1226 | { | |
e779d8d9 | 1227 | struct vport *p; |
1dcf111b JP |
1228 | int mtu = 0; |
1229 | ||
1230 | ASSERT_RTNL(); | |
1231 | ||
1232 | list_for_each_entry_rcu (p, &dp->port_list, node) { | |
f2459fe7 | 1233 | int dev_mtu; |
1dcf111b JP |
1234 | |
1235 | /* Skip any internal ports, since that's what we're trying to | |
1236 | * set. */ | |
e779d8d9 | 1237 | if (is_internal_vport(p)) |
1dcf111b JP |
1238 | continue; |
1239 | ||
e779d8d9 | 1240 | dev_mtu = vport_get_mtu(p); |
f2459fe7 JG |
1241 | if (!mtu || dev_mtu < mtu) |
1242 | mtu = dev_mtu; | |
1dcf111b JP |
1243 | } |
1244 | ||
1245 | return mtu ? mtu : ETH_DATA_LEN; | |
1246 | } | |
1247 | ||
f2459fe7 | 1248 | /* Sets the MTU of all datapath devices to the minimum of the ports. Must |
d8b5d43a | 1249 | * be called with RTNL lock. */ |
f2459fe7 | 1250 | void set_internal_devs_mtu(const struct datapath *dp) |
a7786963 | 1251 | { |
e779d8d9 | 1252 | struct vport *p; |
a7786963 JG |
1253 | int mtu; |
1254 | ||
1255 | ASSERT_RTNL(); | |
1256 | ||
a7786963 JG |
1257 | mtu = dp_min_mtu(dp); |
1258 | ||
1259 | list_for_each_entry_rcu (p, &dp->port_list, node) { | |
e779d8d9 BP |
1260 | if (is_internal_vport(p)) |
1261 | vport_set_mtu(p, mtu); | |
a7786963 JG |
1262 | } |
1263 | } | |
1264 | ||
c19e6535 BP |
1265 | static int get_listen_mask(const struct file *f) |
1266 | { | |
1267 | return (long)f->private_data; | |
1268 | } | |
1269 | ||
1270 | static void set_listen_mask(struct file *f, int listen_mask) | |
1271 | { | |
1272 | f->private_data = (void*)(long)listen_mask; | |
1273 | } | |
1274 | ||
1275 | static const struct nla_policy vport_policy[ODP_VPORT_ATTR_MAX + 1] = { | |
1276 | [ODP_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, | |
1277 | [ODP_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 }, | |
1278 | [ODP_VPORT_ATTR_TYPE] = { .type = NLA_U32 }, | |
1279 | [ODP_VPORT_ATTR_STATS] = { .len = sizeof(struct rtnl_link_stats64) }, | |
1280 | [ODP_VPORT_ATTR_ADDRESS] = { .len = ETH_ALEN }, | |
1281 | [ODP_VPORT_ATTR_MTU] = { .type = NLA_U32 }, | |
1282 | [ODP_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, | |
1283 | }; | |
1284 | ||
1285 | static int copy_vport_to_user(void __user *dst, struct vport *vport, uint32_t total_len) | |
064af421 | 1286 | { |
c19e6535 BP |
1287 | struct odp_vport *odp_vport; |
1288 | struct sk_buff *skb; | |
1289 | struct nlattr *nla; | |
1290 | int ifindex, iflink; | |
1291 | int err; | |
1292 | ||
1293 | skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); | |
1294 | err = -ENOMEM; | |
1295 | if (!skb) | |
1296 | goto exit; | |
1297 | ||
f2459fe7 | 1298 | rcu_read_lock(); |
c19e6535 BP |
1299 | odp_vport = (struct odp_vport*)__skb_put(skb, sizeof(struct odp_vport)); |
1300 | odp_vport->dp_idx = vport->dp->dp_idx; | |
1301 | odp_vport->total_len = total_len; | |
1302 | ||
1303 | NLA_PUT_U32(skb, ODP_VPORT_ATTR_PORT_NO, vport->port_no); | |
1304 | NLA_PUT_U32(skb, ODP_VPORT_ATTR_TYPE, vport_get_type(vport)); | |
1305 | NLA_PUT_STRING(skb, ODP_VPORT_ATTR_NAME, vport_get_name(vport)); | |
1306 | ||
1307 | nla = nla_reserve(skb, ODP_VPORT_ATTR_STATS, sizeof(struct rtnl_link_stats64)); | |
1308 | if (!nla) | |
1309 | goto nla_put_failure; | |
1310 | if (vport_get_stats(vport, nla_data(nla))) | |
1311 | __skb_trim(skb, skb->len - nla->nla_len); | |
1312 | ||
1313 | NLA_PUT(skb, ODP_VPORT_ATTR_ADDRESS, ETH_ALEN, vport_get_addr(vport)); | |
1314 | ||
1315 | NLA_PUT_U32(skb, ODP_VPORT_ATTR_MTU, vport_get_mtu(vport)); | |
1316 | ||
1317 | err = vport_get_options(vport, skb); | |
1318 | ||
1319 | ifindex = vport_get_ifindex(vport); | |
1320 | if (ifindex > 0) | |
1321 | NLA_PUT_U32(skb, ODP_VPORT_ATTR_IFINDEX, ifindex); | |
1322 | ||
1323 | iflink = vport_get_iflink(vport); | |
1324 | if (iflink > 0) | |
1325 | NLA_PUT_U32(skb, ODP_VPORT_ATTR_IFLINK, iflink); | |
1326 | ||
1327 | err = -EMSGSIZE; | |
1328 | if (skb->len > total_len) | |
1329 | goto exit_unlock; | |
1330 | ||
1331 | odp_vport->len = skb->len; | |
1332 | err = copy_to_user(dst, skb->data, skb->len) ? -EFAULT : 0; | |
1333 | goto exit_unlock; | |
1334 | ||
1335 | nla_put_failure: | |
1336 | err = -EMSGSIZE; | |
1337 | exit_unlock: | |
f2459fe7 | 1338 | rcu_read_unlock(); |
c19e6535 BP |
1339 | kfree_skb(skb); |
1340 | exit: | |
1341 | return err; | |
064af421 BP |
1342 | } |
1343 | ||
c19e6535 BP |
1344 | static struct sk_buff *copy_vport_from_user(struct odp_vport __user *uodp_vport, |
1345 | struct nlattr *a[ODP_VPORT_ATTR_MAX + 1]) | |
064af421 | 1346 | { |
c19e6535 BP |
1347 | struct odp_vport *odp_vport; |
1348 | struct sk_buff *skb; | |
1349 | u32 len; | |
1350 | int err; | |
064af421 | 1351 | |
c19e6535 BP |
1352 | if (get_user(len, &uodp_vport->len)) |
1353 | return ERR_PTR(-EFAULT); | |
1354 | if (len < sizeof(struct odp_vport)) | |
1355 | return ERR_PTR(-EINVAL); | |
1356 | ||
1357 | skb = alloc_skb(len, GFP_KERNEL); | |
1358 | if (!skb) | |
1359 | return ERR_PTR(-ENOMEM); | |
1360 | ||
1361 | err = -EFAULT; | |
1362 | if (copy_from_user(__skb_put(skb, len), uodp_vport, len)) | |
1363 | goto error_free_skb; | |
f2459fe7 | 1364 | |
c19e6535 BP |
1365 | odp_vport = (struct odp_vport *)skb->data; |
1366 | err = -EINVAL; | |
1367 | if (odp_vport->len != len) | |
1368 | goto error_free_skb; | |
51d4d598 | 1369 | |
c19e6535 BP |
1370 | err = nla_parse(a, ODP_VPORT_ATTR_MAX, (struct nlattr *)(skb->data + sizeof(struct odp_vport)), |
1371 | skb->len - sizeof(struct odp_vport), vport_policy); | |
1372 | if (err) | |
1373 | goto error_free_skb; | |
064af421 | 1374 | |
c19e6535 BP |
1375 | err = VERIFY_NUL_STRING(a[ODP_VPORT_ATTR_NAME], IFNAMSIZ - 1); |
1376 | if (err) | |
1377 | goto error_free_skb; | |
f2459fe7 | 1378 | |
c19e6535 BP |
1379 | return skb; |
1380 | ||
1381 | error_free_skb: | |
1382 | kfree_skb(skb); | |
1383 | return ERR_PTR(err); | |
1384 | } | |
51d4d598 | 1385 | |
c19e6535 BP |
1386 | |
1387 | /* Called without any locks (or with RTNL lock). | |
1388 | * Returns holding vport->dp->mutex. | |
1389 | */ | |
1390 | static struct vport *lookup_vport(struct odp_vport *odp_vport, | |
1391 | struct nlattr *a[ODP_VPORT_ATTR_MAX + 1]) | |
1392 | { | |
1393 | struct datapath *dp; | |
1394 | struct vport *vport; | |
1395 | ||
1396 | if (a[ODP_VPORT_ATTR_NAME]) { | |
1397 | int dp_idx, port_no; | |
1398 | ||
1399 | retry: | |
1400 | vport_lock(); | |
1401 | vport = vport_locate(nla_data(a[ODP_VPORT_ATTR_NAME])); | |
1402 | if (!vport) { | |
1403 | vport_unlock(); | |
1404 | return ERR_PTR(-ENODEV); | |
1405 | } | |
1406 | dp_idx = vport->dp->dp_idx; | |
1407 | port_no = vport->port_no; | |
1408 | vport_unlock(); | |
7e71ab66 | 1409 | |
51d4d598 BP |
1410 | dp = get_dp_locked(dp_idx); |
1411 | if (!dp) | |
c19e6535 | 1412 | goto retry; |
51d4d598 | 1413 | |
c19e6535 BP |
1414 | vport = get_vport_protected(dp, port_no); |
1415 | if (!vport || | |
1416 | strcmp(vport_get_name(vport), nla_data(a[ODP_VPORT_ATTR_NAME]))) { | |
1417 | mutex_unlock(&dp->mutex); | |
1418 | goto retry; | |
1419 | } | |
51d4d598 | 1420 | |
c19e6535 BP |
1421 | return vport; |
1422 | } else if (a[ODP_VPORT_ATTR_PORT_NO]) { | |
1423 | u32 port_no = nla_get_u32(a[ODP_VPORT_ATTR_PORT_NO]); | |
1424 | ||
1425 | if (port_no >= DP_MAX_PORTS) | |
1426 | return ERR_PTR(-EINVAL); | |
1427 | ||
1428 | dp = get_dp_locked(odp_vport->dp_idx); | |
1429 | if (!dp) | |
1430 | return ERR_PTR(-ENODEV); | |
f2459fe7 | 1431 | |
c19e6535 BP |
1432 | vport = get_vport_protected(dp, port_no); |
1433 | if (!vport) { | |
1434 | mutex_unlock(&dp->mutex); | |
1435 | return ERR_PTR(-ENOENT); | |
1436 | } | |
1437 | return vport; | |
1438 | } else | |
1439 | return ERR_PTR(-EINVAL); | |
064af421 BP |
1440 | } |
1441 | ||
c19e6535 | 1442 | static int change_vport(struct vport *vport, struct nlattr *a[ODP_VPORT_ATTR_MAX + 1]) |
064af421 | 1443 | { |
c19e6535 BP |
1444 | int err = 0; |
1445 | if (a[ODP_VPORT_ATTR_STATS]) | |
1446 | err = vport_set_stats(vport, nla_data(a[ODP_VPORT_ATTR_STATS])); | |
1447 | if (!err && a[ODP_VPORT_ATTR_ADDRESS]) | |
1448 | err = vport_set_addr(vport, nla_data(a[ODP_VPORT_ATTR_ADDRESS])); | |
1449 | if (!err && a[ODP_VPORT_ATTR_MTU]) | |
1450 | err = vport_set_mtu(vport, nla_get_u32(a[ODP_VPORT_ATTR_MTU])); | |
1451 | return err; | |
1452 | } | |
1453 | ||
1454 | static int attach_vport(struct odp_vport __user *uodp_vport) | |
1455 | { | |
1456 | struct nlattr *a[ODP_VPORT_ATTR_MAX + 1]; | |
1457 | struct odp_vport *odp_vport; | |
1458 | struct vport_parms parms; | |
1459 | struct vport *vport; | |
1460 | struct sk_buff *skb; | |
1461 | struct datapath *dp; | |
b0ec0f27 | 1462 | u32 port_no; |
c19e6535 | 1463 | int err; |
b0ec0f27 | 1464 | |
c19e6535 BP |
1465 | skb = copy_vport_from_user(uodp_vport, a); |
1466 | err = PTR_ERR(skb); | |
1467 | if (IS_ERR(skb)) | |
1468 | goto exit; | |
1469 | odp_vport = (struct odp_vport *)skb->data; | |
1470 | ||
1471 | err = -EINVAL; | |
1472 | if (!a[ODP_VPORT_ATTR_NAME] || !a[ODP_VPORT_ATTR_TYPE]) | |
1473 | goto exit_kfree_skb; | |
51d4d598 | 1474 | |
c19e6535 BP |
1475 | rtnl_lock(); |
1476 | ||
1477 | dp = get_dp_locked(odp_vport->dp_idx); | |
1478 | err = -ENODEV; | |
1479 | if (!dp) | |
1480 | goto exit_unlock_rtnl; | |
1481 | ||
1482 | if (a[ODP_VPORT_ATTR_PORT_NO]) { | |
1483 | port_no = nla_get_u32(a[ODP_VPORT_ATTR_PORT_NO]); | |
1484 | ||
1485 | err = -EFBIG; | |
1486 | if (port_no >= DP_MAX_PORTS) | |
1487 | goto exit_unlock_dp; | |
1488 | ||
1489 | vport = get_vport_protected(dp, port_no); | |
1490 | err = -EBUSY; | |
1491 | if (vport) | |
1492 | goto exit_unlock_dp; | |
1493 | } else { | |
1494 | for (port_no = 1; ; port_no++) { | |
1495 | if (port_no >= DP_MAX_PORTS) { | |
1496 | err = -EFBIG; | |
1497 | goto exit_unlock_dp; | |
1498 | } | |
1499 | vport = get_vport_protected(dp, port_no); | |
1500 | if (!vport) | |
1501 | break; | |
51d4d598 | 1502 | } |
064af421 | 1503 | } |
b0ec0f27 | 1504 | |
c19e6535 BP |
1505 | parms.name = nla_data(a[ODP_VPORT_ATTR_NAME]); |
1506 | parms.type = nla_get_u32(a[ODP_VPORT_ATTR_TYPE]); | |
1507 | parms.options = a[ODP_VPORT_ATTR_OPTIONS]; | |
1508 | parms.dp = dp; | |
1509 | parms.port_no = port_no; | |
1510 | ||
1511 | vport = new_vport(&parms); | |
1512 | err = PTR_ERR(vport); | |
1513 | if (IS_ERR(vport)) | |
1514 | goto exit_unlock_dp; | |
1515 | ||
1516 | set_internal_devs_mtu(dp); | |
1517 | dp_sysfs_add_if(vport); | |
1518 | ||
1519 | err = change_vport(vport, a); | |
1520 | if (err) { | |
1521 | dp_detach_port(vport); | |
1522 | goto exit_unlock_dp; | |
1523 | } | |
1524 | ||
1525 | err = copy_vport_to_user(uodp_vport, vport, odp_vport->total_len); | |
1526 | ||
1527 | exit_unlock_dp: | |
1528 | mutex_unlock(&dp->mutex); | |
1529 | exit_unlock_rtnl: | |
1530 | rtnl_unlock(); | |
1531 | exit_kfree_skb: | |
1532 | kfree_skb(skb); | |
1533 | exit: | |
1534 | return err; | |
44e05eca BP |
1535 | } |
1536 | ||
c19e6535 | 1537 | static int set_vport(unsigned int cmd, struct odp_vport __user *uodp_vport) |
44e05eca | 1538 | { |
c19e6535 BP |
1539 | struct nlattr *a[ODP_VPORT_ATTR_MAX + 1]; |
1540 | struct vport *vport; | |
1541 | struct sk_buff *skb; | |
1542 | int err; | |
44e05eca | 1543 | |
c19e6535 BP |
1544 | skb = copy_vport_from_user(uodp_vport, a); |
1545 | err = PTR_ERR(skb); | |
1546 | if (IS_ERR(skb)) | |
1547 | goto exit; | |
1548 | ||
1549 | rtnl_lock(); | |
1550 | vport = lookup_vport((struct odp_vport *)skb->data, a); | |
1551 | err = PTR_ERR(vport); | |
1552 | if (IS_ERR(vport)) | |
1553 | goto exit_free; | |
44e05eca | 1554 | |
c19e6535 BP |
1555 | err = 0; |
1556 | if (a[ODP_VPORT_ATTR_OPTIONS]) | |
1557 | err = vport_set_options(vport, a[ODP_VPORT_ATTR_OPTIONS]); | |
1558 | if (!err) | |
1559 | err = change_vport(vport, a); | |
1560 | ||
1561 | mutex_unlock(&vport->dp->mutex); | |
1562 | exit_free: | |
1563 | kfree_skb(skb); | |
1564 | rtnl_unlock(); | |
1565 | exit: | |
1566 | return err; | |
064af421 BP |
1567 | } |
1568 | ||
c19e6535 | 1569 | static int del_vport(unsigned int cmd, struct odp_vport __user *uodp_vport) |
7c40efc9 | 1570 | { |
c19e6535 BP |
1571 | struct nlattr *a[ODP_VPORT_ATTR_MAX + 1]; |
1572 | struct datapath *dp; | |
1573 | struct vport *vport; | |
1574 | struct sk_buff *skb; | |
1575 | int err; | |
1576 | ||
1577 | skb = copy_vport_from_user(uodp_vport, a); | |
1578 | err = PTR_ERR(skb); | |
1579 | if (IS_ERR(skb)) | |
1580 | goto exit; | |
1581 | ||
1582 | rtnl_lock(); | |
1583 | vport = lookup_vport((struct odp_vport *)skb->data, a); | |
1584 | err = PTR_ERR(vport); | |
1585 | if (IS_ERR(vport)) | |
1586 | goto exit_free; | |
1587 | dp = vport->dp; | |
1588 | ||
1589 | err = -EINVAL; | |
1590 | if (vport->port_no == ODPP_LOCAL) | |
1591 | goto exit_free; | |
1592 | ||
1593 | err = dp_detach_port(vport); | |
1594 | mutex_unlock(&dp->mutex); | |
1595 | exit_free: | |
1596 | kfree_skb(skb); | |
1597 | rtnl_unlock(); | |
1598 | exit: | |
1599 | return err; | |
7c40efc9 BP |
1600 | } |
1601 | ||
c19e6535 | 1602 | static int get_vport(struct odp_vport __user *uodp_vport) |
7c40efc9 | 1603 | { |
c19e6535 BP |
1604 | struct nlattr *a[ODP_VPORT_ATTR_MAX + 1]; |
1605 | struct odp_vport *odp_vport; | |
1606 | struct vport *vport; | |
1607 | struct sk_buff *skb; | |
1608 | int err; | |
1609 | ||
1610 | skb = copy_vport_from_user(uodp_vport, a); | |
1611 | err = PTR_ERR(skb); | |
1612 | if (IS_ERR(skb)) | |
1613 | goto exit; | |
1614 | odp_vport = (struct odp_vport *)skb->data; | |
1615 | ||
1616 | vport = lookup_vport(odp_vport, a); | |
1617 | err = PTR_ERR(vport); | |
1618 | if (IS_ERR(vport)) | |
1619 | goto exit_free; | |
1620 | ||
1621 | err = copy_vport_to_user(uodp_vport, vport, odp_vport->total_len); | |
1622 | mutex_unlock(&vport->dp->mutex); | |
1623 | exit_free: | |
1624 | kfree_skb(skb); | |
1625 | exit: | |
1626 | return err; | |
1627 | } | |
1628 | ||
1629 | static int dump_vport(struct odp_vport __user *uodp_vport) | |
1630 | { | |
1631 | struct nlattr *a[ODP_VPORT_ATTR_MAX + 1]; | |
1632 | struct odp_vport *odp_vport; | |
1633 | struct sk_buff *skb; | |
1634 | struct datapath *dp; | |
1635 | u32 port_no; | |
1636 | int err; | |
1637 | ||
1638 | skb = copy_vport_from_user(uodp_vport, a); | |
1639 | err = PTR_ERR(skb); | |
1640 | if (IS_ERR(skb)) | |
1641 | goto exit; | |
1642 | odp_vport = (struct odp_vport *)skb->data; | |
1643 | ||
1644 | dp = get_dp_locked(odp_vport->dp_idx); | |
1645 | err = -ENODEV; | |
1646 | if (!dp) | |
1647 | goto exit_free; | |
1648 | ||
1649 | port_no = 0; | |
1650 | if (a[ODP_VPORT_ATTR_PORT_NO]) | |
1651 | port_no = nla_get_u32(a[ODP_VPORT_ATTR_PORT_NO]); | |
1652 | for (; port_no < DP_MAX_PORTS; port_no++) { | |
1653 | struct vport *vport = get_vport_protected(dp, port_no); | |
1654 | if (vport) { | |
1655 | err = copy_vport_to_user(uodp_vport, vport, odp_vport->total_len); | |
1656 | goto exit_unlock_dp; | |
1657 | } | |
1658 | } | |
1659 | err = -ENODEV; | |
1660 | ||
1661 | exit_unlock_dp: | |
1662 | mutex_unlock(&dp->mutex); | |
1663 | exit_free: | |
1664 | kfree_skb(skb); | |
1665 | exit: | |
1666 | return err; | |
7c40efc9 BP |
1667 | } |
1668 | ||
064af421 BP |
1669 | static long openvswitch_ioctl(struct file *f, unsigned int cmd, |
1670 | unsigned long argp) | |
1671 | { | |
1672 | int dp_idx = iminor(f->f_dentry->d_inode); | |
1673 | struct datapath *dp; | |
c19e6535 | 1674 | int drop_frags, listeners; |
72b06300 | 1675 | unsigned int sflow_probability; |
064af421 BP |
1676 | int err; |
1677 | ||
1678 | /* Handle commands with special locking requirements up front. */ | |
1679 | switch (cmd) { | |
1680 | case ODP_DP_CREATE: | |
e86c8696 BP |
1681 | err = create_dp(dp_idx, (char __user *)argp); |
1682 | goto exit; | |
064af421 BP |
1683 | |
1684 | case ODP_DP_DESTROY: | |
e86c8696 BP |
1685 | err = destroy_dp(dp_idx); |
1686 | goto exit; | |
064af421 | 1687 | |
c19e6535 BP |
1688 | case ODP_VPORT_NEW: |
1689 | err = attach_vport((struct odp_vport __user *)argp); | |
e86c8696 | 1690 | goto exit; |
064af421 | 1691 | |
c19e6535 BP |
1692 | case ODP_VPORT_GET: |
1693 | err = get_vport((struct odp_vport __user *)argp); | |
f2459fe7 JG |
1694 | goto exit; |
1695 | ||
c19e6535 BP |
1696 | case ODP_VPORT_DEL: |
1697 | err = del_vport(cmd, (struct odp_vport __user *)argp); | |
780e6207 JG |
1698 | goto exit; |
1699 | ||
c19e6535 BP |
1700 | case ODP_VPORT_SET: |
1701 | err = set_vport(cmd, (struct odp_vport __user *)argp); | |
f2459fe7 JG |
1702 | goto exit; |
1703 | ||
c19e6535 BP |
1704 | case ODP_VPORT_DUMP: |
1705 | err = dump_vport((struct odp_vport __user *)argp); | |
e86c8696 | 1706 | goto exit; |
064af421 BP |
1707 | } |
1708 | ||
1709 | dp = get_dp_locked(dp_idx); | |
e86c8696 | 1710 | err = -ENODEV; |
064af421 | 1711 | if (!dp) |
e86c8696 | 1712 | goto exit; |
064af421 BP |
1713 | |
1714 | switch (cmd) { | |
1715 | case ODP_DP_STATS: | |
1716 | err = get_dp_stats(dp, (struct odp_stats __user *)argp); | |
1717 | break; | |
1718 | ||
1719 | case ODP_GET_DROP_FRAGS: | |
1720 | err = put_user(dp->drop_frags, (int __user *)argp); | |
1721 | break; | |
1722 | ||
1723 | case ODP_SET_DROP_FRAGS: | |
1724 | err = get_user(drop_frags, (int __user *)argp); | |
1725 | if (err) | |
1726 | break; | |
1727 | err = -EINVAL; | |
1728 | if (drop_frags != 0 && drop_frags != 1) | |
1729 | break; | |
1730 | dp->drop_frags = drop_frags; | |
1731 | err = 0; | |
1732 | break; | |
1733 | ||
1734 | case ODP_GET_LISTEN_MASK: | |
7c40efc9 | 1735 | err = put_user(get_listen_mask(f), (int __user *)argp); |
064af421 BP |
1736 | break; |
1737 | ||
1738 | case ODP_SET_LISTEN_MASK: | |
1739 | err = get_user(listeners, (int __user *)argp); | |
1740 | if (err) | |
1741 | break; | |
1742 | err = -EINVAL; | |
1743 | if (listeners & ~ODPL_ALL) | |
1744 | break; | |
1745 | err = 0; | |
7c40efc9 | 1746 | set_listen_mask(f, listeners); |
064af421 BP |
1747 | break; |
1748 | ||
72b06300 BP |
1749 | case ODP_GET_SFLOW_PROBABILITY: |
1750 | err = put_user(dp->sflow_probability, (unsigned int __user *)argp); | |
1751 | break; | |
1752 | ||
1753 | case ODP_SET_SFLOW_PROBABILITY: | |
1754 | err = get_user(sflow_probability, (unsigned int __user *)argp); | |
1755 | if (!err) | |
1756 | dp->sflow_probability = sflow_probability; | |
1757 | break; | |
1758 | ||
064af421 BP |
1759 | case ODP_FLOW_FLUSH: |
1760 | err = flush_flows(dp); | |
1761 | break; | |
1762 | ||
1763 | case ODP_FLOW_PUT: | |
1764 | err = put_flow(dp, (struct odp_flow_put __user *)argp); | |
1765 | break; | |
1766 | ||
1767 | case ODP_FLOW_DEL: | |
f1aa2072 | 1768 | err = del_flow(dp, (struct odp_flow __user *)argp); |
064af421 BP |
1769 | break; |
1770 | ||
f1aa2072 | 1771 | case ODP_FLOW_GET: |
44e05eca | 1772 | err = do_flowvec_ioctl(dp, argp, do_query_flows); |
064af421 BP |
1773 | break; |
1774 | ||
704a1e09 BP |
1775 | case ODP_FLOW_DUMP: |
1776 | err = dump_flow(dp, (struct odp_flow_dump __user *)argp); | |
064af421 BP |
1777 | break; |
1778 | ||
1779 | case ODP_EXECUTE: | |
44e05eca | 1780 | err = execute_packet(dp, (struct odp_execute __user *)argp); |
064af421 BP |
1781 | break; |
1782 | ||
1783 | default: | |
1784 | err = -ENOIOCTLCMD; | |
1785 | break; | |
1786 | } | |
1787 | mutex_unlock(&dp->mutex); | |
e86c8696 | 1788 | exit: |
064af421 BP |
1789 | return err; |
1790 | } | |
1791 | ||
1792 | static int dp_has_packet_of_interest(struct datapath *dp, int listeners) | |
1793 | { | |
1794 | int i; | |
1795 | for (i = 0; i < DP_N_QUEUES; i++) { | |
1796 | if (listeners & (1 << i) && !skb_queue_empty(&dp->queues[i])) | |
1797 | return 1; | |
1798 | } | |
1799 | return 0; | |
1800 | } | |
1801 | ||
3fbd517a | 1802 | #ifdef CONFIG_COMPAT |
3fbd517a BP |
1803 | static int compat_get_flow(struct odp_flow *flow, const struct compat_odp_flow __user *compat) |
1804 | { | |
36956a7d | 1805 | compat_uptr_t key, actions; |
3fbd517a BP |
1806 | |
1807 | if (!access_ok(VERIFY_READ, compat, sizeof(struct compat_odp_flow)) || | |
1808 | __copy_from_user(&flow->stats, &compat->stats, sizeof(struct odp_flow_stats)) || | |
36956a7d BP |
1809 | __get_user(key, &compat->key) || |
1810 | __get_user(flow->key_len, &compat->key_len) || | |
3fbd517a | 1811 | __get_user(actions, &compat->actions) || |
cdee00fd | 1812 | __get_user(flow->actions_len, &compat->actions_len) || |
3fbd517a BP |
1813 | __get_user(flow->flags, &compat->flags)) |
1814 | return -EFAULT; | |
1815 | ||
36956a7d | 1816 | flow->key = (struct nlattr __force *)compat_ptr(key); |
1b29ebe5 | 1817 | flow->actions = (struct nlattr __force *)compat_ptr(actions); |
3fbd517a BP |
1818 | return 0; |
1819 | } | |
1820 | ||
1821 | static int compat_put_flow(struct datapath *dp, struct compat_odp_flow_put __user *ufp) | |
1822 | { | |
1823 | struct odp_flow_stats stats; | |
1824 | struct odp_flow_put fp; | |
1825 | int error; | |
1826 | ||
1827 | if (compat_get_flow(&fp.flow, &ufp->flow) || | |
1828 | get_user(fp.flags, &ufp->flags)) | |
1829 | return -EFAULT; | |
1830 | ||
1831 | error = do_put_flow(dp, &fp, &stats); | |
1832 | if (error) | |
1833 | return error; | |
1834 | ||
1835 | if (copy_to_user(&ufp->flow.stats, &stats, | |
1836 | sizeof(struct odp_flow_stats))) | |
1837 | return -EFAULT; | |
1838 | ||
1839 | return 0; | |
1840 | } | |
1841 | ||
d3c54451 JG |
1842 | static int compat_answer_query(struct datapath *dp, struct sw_flow *flow, |
1843 | u32 query_flags, | |
3fbd517a BP |
1844 | struct compat_odp_flow __user *ufp) |
1845 | { | |
1846 | compat_uptr_t actions; | |
1847 | ||
1848 | if (get_user(actions, &ufp->actions)) | |
1849 | return -EFAULT; | |
1850 | ||
d3c54451 | 1851 | return do_answer_query(dp, flow, query_flags, &ufp->stats, |
cdee00fd | 1852 | compat_ptr(actions), &ufp->actions_len); |
3fbd517a BP |
1853 | } |
1854 | ||
1855 | static int compat_del_flow(struct datapath *dp, struct compat_odp_flow __user *ufp) | |
1856 | { | |
1857 | struct sw_flow *flow; | |
1858 | struct odp_flow uf; | |
1859 | int error; | |
1860 | ||
1861 | if (compat_get_flow(&uf, ufp)) | |
1862 | return -EFAULT; | |
1863 | ||
36956a7d | 1864 | flow = do_del_flow(dp, (const struct nlattr __force __user *)uf.key, uf.key_len); |
3fbd517a BP |
1865 | if (IS_ERR(flow)) |
1866 | return PTR_ERR(flow); | |
1867 | ||
d3c54451 | 1868 | error = compat_answer_query(dp, flow, 0, ufp); |
3fbd517a BP |
1869 | flow_deferred_free(flow); |
1870 | return error; | |
1871 | } | |
1872 | ||
1b29ebe5 JG |
1873 | static int compat_query_flows(struct datapath *dp, |
1874 | struct compat_odp_flow __user *flows, | |
1875 | u32 n_flows) | |
3fbd517a | 1876 | { |
9abaf6b3 | 1877 | struct tbl *table = get_table_protected(dp); |
3fbd517a BP |
1878 | u32 i; |
1879 | ||
1880 | for (i = 0; i < n_flows; i++) { | |
1881 | struct compat_odp_flow __user *ufp = &flows[i]; | |
1882 | struct odp_flow uf; | |
1883 | struct tbl_node *flow_node; | |
36956a7d | 1884 | struct sw_flow_key key; |
3fbd517a BP |
1885 | int error; |
1886 | ||
1887 | if (compat_get_flow(&uf, ufp)) | |
1888 | return -EFAULT; | |
3fbd517a | 1889 | |
36956a7d BP |
1890 | error = flow_copy_from_user(&key, (const struct nlattr __force __user *) uf.key, uf.key_len); |
1891 | if (error) | |
1892 | return error; | |
1893 | ||
1894 | flow_node = tbl_lookup(table, &key, flow_hash(&key), flow_cmp); | |
3fbd517a BP |
1895 | if (!flow_node) |
1896 | error = put_user(ENOENT, &ufp->stats.error); | |
1897 | else | |
d3c54451 JG |
1898 | error = compat_answer_query(dp, flow_cast(flow_node), |
1899 | uf.flags, ufp); | |
3fbd517a BP |
1900 | if (error) |
1901 | return -EFAULT; | |
1902 | } | |
1903 | return n_flows; | |
1904 | } | |
1905 | ||
704a1e09 | 1906 | static int compat_dump_flow(struct datapath *dp, struct compat_odp_flow_dump __user *udumpp) |
3fbd517a | 1907 | { |
704a1e09 BP |
1908 | struct compat_odp_flow __user *uflowp; |
1909 | compat_uptr_t compat_ufp; | |
1910 | struct sw_flow *flow; | |
36956a7d BP |
1911 | compat_uptr_t ukey; |
1912 | u32 key_len; | |
3fbd517a | 1913 | |
704a1e09 BP |
1914 | flow = do_dump_flow(dp, udumpp->state); |
1915 | if (IS_ERR(flow)) | |
1916 | return PTR_ERR(flow); | |
3fbd517a | 1917 | |
704a1e09 BP |
1918 | if (get_user(compat_ufp, &udumpp->flow)) |
1919 | return -EFAULT; | |
1920 | uflowp = compat_ptr(compat_ufp); | |
3fbd517a | 1921 | |
704a1e09 BP |
1922 | if (!flow) |
1923 | return put_user(ODPFF_EOF, &uflowp->flags); | |
6bfafa55 | 1924 | |
36956a7d BP |
1925 | if (put_user(0, &uflowp->flags) || |
1926 | get_user(ukey, &uflowp->key) || | |
1927 | get_user(key_len, &uflowp->key_len)) | |
1928 | return -EFAULT; | |
1929 | ||
1930 | key_len = flow_copy_to_user(compat_ptr(ukey), &flow->key, key_len); | |
1931 | if (key_len < 0) | |
1932 | return key_len; | |
1933 | if (put_user(key_len, &uflowp->key_len)) | |
704a1e09 | 1934 | return -EFAULT; |
36956a7d | 1935 | |
704a1e09 | 1936 | return compat_answer_query(dp, flow, 0, uflowp); |
3fbd517a BP |
1937 | } |
1938 | ||
1939 | static int compat_flowvec_ioctl(struct datapath *dp, unsigned long argp, | |
1940 | int (*function)(struct datapath *, | |
1b29ebe5 | 1941 | struct compat_odp_flow __user *, |
3fbd517a BP |
1942 | u32 n_flows)) |
1943 | { | |
1944 | struct compat_odp_flowvec __user *uflowvec; | |
1945 | struct compat_odp_flow __user *flows; | |
1946 | struct compat_odp_flowvec flowvec; | |
1947 | int retval; | |
1948 | ||
1949 | uflowvec = compat_ptr(argp); | |
84c17d98 BP |
1950 | if (!access_ok(VERIFY_WRITE, uflowvec, sizeof(*uflowvec)) || |
1951 | copy_from_user(&flowvec, uflowvec, sizeof(flowvec))) | |
3fbd517a BP |
1952 | return -EFAULT; |
1953 | ||
1954 | if (flowvec.n_flows > INT_MAX / sizeof(struct compat_odp_flow)) | |
1955 | return -EINVAL; | |
1956 | ||
1957 | flows = compat_ptr(flowvec.flows); | |
1958 | if (!access_ok(VERIFY_WRITE, flows, | |
1959 | flowvec.n_flows * sizeof(struct compat_odp_flow))) | |
1960 | return -EFAULT; | |
1961 | ||
1962 | retval = function(dp, flows, flowvec.n_flows); | |
1963 | return (retval < 0 ? retval | |
1964 | : retval == flowvec.n_flows ? 0 | |
1965 | : put_user(retval, &uflowvec->n_flows)); | |
1966 | } | |
1967 | ||
1968 | static int compat_execute(struct datapath *dp, const struct compat_odp_execute __user *uexecute) | |
1969 | { | |
1970 | struct odp_execute execute; | |
1971 | compat_uptr_t actions; | |
1972 | compat_uptr_t data; | |
1973 | ||
1974 | if (!access_ok(VERIFY_READ, uexecute, sizeof(struct compat_odp_execute)) || | |
3fbd517a | 1975 | __get_user(actions, &uexecute->actions) || |
cdee00fd | 1976 | __get_user(execute.actions_len, &uexecute->actions_len) || |
3fbd517a BP |
1977 | __get_user(data, &uexecute->data) || |
1978 | __get_user(execute.length, &uexecute->length)) | |
1979 | return -EFAULT; | |
1980 | ||
1b29ebe5 JG |
1981 | execute.actions = (struct nlattr __force *)compat_ptr(actions); |
1982 | execute.data = (const void __force *)compat_ptr(data); | |
3fbd517a BP |
1983 | |
1984 | return do_execute(dp, &execute); | |
1985 | } | |
1986 | ||
1987 | static long openvswitch_compat_ioctl(struct file *f, unsigned int cmd, unsigned long argp) | |
1988 | { | |
1989 | int dp_idx = iminor(f->f_dentry->d_inode); | |
1990 | struct datapath *dp; | |
1991 | int err; | |
1992 | ||
1993 | switch (cmd) { | |
1994 | case ODP_DP_DESTROY: | |
1995 | case ODP_FLOW_FLUSH: | |
1996 | /* Ioctls that don't need any translation at all. */ | |
1997 | return openvswitch_ioctl(f, cmd, argp); | |
1998 | ||
1999 | case ODP_DP_CREATE: | |
c19e6535 BP |
2000 | case ODP_VPORT_NEW: |
2001 | case ODP_VPORT_DEL: | |
2002 | case ODP_VPORT_GET: | |
2003 | case ODP_VPORT_SET: | |
2004 | case ODP_VPORT_DUMP: | |
3fbd517a BP |
2005 | case ODP_DP_STATS: |
2006 | case ODP_GET_DROP_FRAGS: | |
2007 | case ODP_SET_DROP_FRAGS: | |
2008 | case ODP_SET_LISTEN_MASK: | |
2009 | case ODP_GET_LISTEN_MASK: | |
2010 | case ODP_SET_SFLOW_PROBABILITY: | |
2011 | case ODP_GET_SFLOW_PROBABILITY: | |
3fbd517a BP |
2012 | /* Ioctls that just need their pointer argument extended. */ |
2013 | return openvswitch_ioctl(f, cmd, (unsigned long)compat_ptr(argp)); | |
3fbd517a BP |
2014 | } |
2015 | ||
2016 | dp = get_dp_locked(dp_idx); | |
2017 | err = -ENODEV; | |
2018 | if (!dp) | |
2019 | goto exit; | |
2020 | ||
2021 | switch (cmd) { | |
3fbd517a BP |
2022 | case ODP_FLOW_PUT32: |
2023 | err = compat_put_flow(dp, compat_ptr(argp)); | |
2024 | break; | |
2025 | ||
2026 | case ODP_FLOW_DEL32: | |
2027 | err = compat_del_flow(dp, compat_ptr(argp)); | |
2028 | break; | |
2029 | ||
2030 | case ODP_FLOW_GET32: | |
2031 | err = compat_flowvec_ioctl(dp, argp, compat_query_flows); | |
2032 | break; | |
2033 | ||
704a1e09 BP |
2034 | case ODP_FLOW_DUMP32: |
2035 | err = compat_dump_flow(dp, compat_ptr(argp)); | |
3fbd517a BP |
2036 | break; |
2037 | ||
2038 | case ODP_EXECUTE32: | |
2039 | err = compat_execute(dp, compat_ptr(argp)); | |
2040 | break; | |
2041 | ||
2042 | default: | |
2043 | err = -ENOIOCTLCMD; | |
2044 | break; | |
2045 | } | |
2046 | mutex_unlock(&dp->mutex); | |
2047 | exit: | |
2048 | return err; | |
2049 | } | |
2050 | #endif | |
2051 | ||
33b38b63 JG |
2052 | static ssize_t openvswitch_read(struct file *f, char __user *buf, |
2053 | size_t nbytes, loff_t *ppos) | |
064af421 | 2054 | { |
7c40efc9 | 2055 | int listeners = get_listen_mask(f); |
064af421 | 2056 | int dp_idx = iminor(f->f_dentry->d_inode); |
e22d4953 | 2057 | struct datapath *dp = get_dp_locked(dp_idx); |
064af421 | 2058 | struct sk_buff *skb; |
856081f6 | 2059 | struct iovec iov; |
064af421 BP |
2060 | int retval; |
2061 | ||
2062 | if (!dp) | |
2063 | return -ENODEV; | |
2064 | ||
2065 | if (nbytes == 0 || !listeners) | |
2066 | return 0; | |
2067 | ||
2068 | for (;;) { | |
2069 | int i; | |
2070 | ||
2071 | for (i = 0; i < DP_N_QUEUES; i++) { | |
2072 | if (listeners & (1 << i)) { | |
2073 | skb = skb_dequeue(&dp->queues[i]); | |
2074 | if (skb) | |
2075 | goto success; | |
2076 | } | |
2077 | } | |
2078 | ||
2079 | if (f->f_flags & O_NONBLOCK) { | |
2080 | retval = -EAGAIN; | |
2081 | goto error; | |
2082 | } | |
2083 | ||
2084 | wait_event_interruptible(dp->waitqueue, | |
2085 | dp_has_packet_of_interest(dp, | |
2086 | listeners)); | |
2087 | ||
2088 | if (signal_pending(current)) { | |
2089 | retval = -ERESTARTSYS; | |
2090 | goto error; | |
2091 | } | |
2092 | } | |
2093 | success: | |
e22d4953 JG |
2094 | mutex_unlock(&dp->mutex); |
2095 | ||
856081f6 BP |
2096 | iov.iov_base = buf; |
2097 | iov.iov_len = min_t(size_t, skb->len, nbytes); | |
2098 | retval = skb_copy_datagram_iovec(skb, 0, &iov, iov.iov_len); | |
064af421 | 2099 | if (!retval) |
856081f6 | 2100 | retval = skb->len; |
9cc8b4e4 | 2101 | |
064af421 | 2102 | kfree_skb(skb); |
e22d4953 | 2103 | return retval; |
064af421 BP |
2104 | |
2105 | error: | |
e22d4953 | 2106 | mutex_unlock(&dp->mutex); |
064af421 BP |
2107 | return retval; |
2108 | } | |
2109 | ||
2110 | static unsigned int openvswitch_poll(struct file *file, poll_table *wait) | |
2111 | { | |
2112 | int dp_idx = iminor(file->f_dentry->d_inode); | |
e22d4953 | 2113 | struct datapath *dp = get_dp_locked(dp_idx); |
064af421 BP |
2114 | unsigned int mask; |
2115 | ||
2116 | if (dp) { | |
2117 | mask = 0; | |
2118 | poll_wait(file, &dp->waitqueue, wait); | |
7c40efc9 | 2119 | if (dp_has_packet_of_interest(dp, get_listen_mask(file))) |
064af421 | 2120 | mask |= POLLIN | POLLRDNORM; |
e22d4953 | 2121 | mutex_unlock(&dp->mutex); |
064af421 BP |
2122 | } else { |
2123 | mask = POLLIN | POLLRDNORM | POLLHUP; | |
2124 | } | |
2125 | return mask; | |
2126 | } | |
2127 | ||
33b38b63 | 2128 | static struct file_operations openvswitch_fops = { |
609af740 | 2129 | .owner = THIS_MODULE, |
064af421 BP |
2130 | .read = openvswitch_read, |
2131 | .poll = openvswitch_poll, | |
2132 | .unlocked_ioctl = openvswitch_ioctl, | |
3fbd517a BP |
2133 | #ifdef CONFIG_COMPAT |
2134 | .compat_ioctl = openvswitch_compat_ioctl, | |
2135 | #endif | |
064af421 BP |
2136 | }; |
2137 | ||
2138 | static int major; | |
22d24ebf | 2139 | |
22d24ebf BP |
2140 | static int __init dp_init(void) |
2141 | { | |
f2459fe7 | 2142 | struct sk_buff *dummy_skb; |
22d24ebf BP |
2143 | int err; |
2144 | ||
f2459fe7 | 2145 | BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof(dummy_skb->cb)); |
22d24ebf | 2146 | |
f2459fe7 | 2147 | printk("Open vSwitch %s, built "__DATE__" "__TIME__"\n", VERSION BUILDNR); |
064af421 BP |
2148 | |
2149 | err = flow_init(); | |
2150 | if (err) | |
2151 | goto error; | |
2152 | ||
f2459fe7 | 2153 | err = vport_init(); |
064af421 BP |
2154 | if (err) |
2155 | goto error_flow_exit; | |
2156 | ||
f2459fe7 JG |
2157 | err = register_netdevice_notifier(&dp_device_notifier); |
2158 | if (err) | |
2159 | goto error_vport_exit; | |
2160 | ||
064af421 BP |
2161 | major = register_chrdev(0, "openvswitch", &openvswitch_fops); |
2162 | if (err < 0) | |
2163 | goto error_unreg_notifier; | |
2164 | ||
064af421 BP |
2165 | return 0; |
2166 | ||
2167 | error_unreg_notifier: | |
2168 | unregister_netdevice_notifier(&dp_device_notifier); | |
f2459fe7 JG |
2169 | error_vport_exit: |
2170 | vport_exit(); | |
064af421 BP |
2171 | error_flow_exit: |
2172 | flow_exit(); | |
2173 | error: | |
2174 | return err; | |
2175 | } | |
2176 | ||
2177 | static void dp_cleanup(void) | |
2178 | { | |
2179 | rcu_barrier(); | |
2180 | unregister_chrdev(major, "openvswitch"); | |
2181 | unregister_netdevice_notifier(&dp_device_notifier); | |
f2459fe7 | 2182 | vport_exit(); |
064af421 | 2183 | flow_exit(); |
064af421 BP |
2184 | } |
2185 | ||
2186 | module_init(dp_init); | |
2187 | module_exit(dp_cleanup); | |
2188 | ||
2189 | MODULE_DESCRIPTION("Open vSwitch switching datapath"); | |
2190 | MODULE_LICENSE("GPL"); |