]> git.proxmox.com Git - mirror_ovs.git/blame - datapath/datapath.c
datapath: Change listing ports to use an iterator concept.
[mirror_ovs.git] / datapath / datapath.c
CommitLineData
064af421 1/*
f632c8fc 2 * Copyright (c) 2007, 2008, 2009, 2010, 2011 Nicira Networks.
a14bc59f
BP
3 * Distributed under the terms of the GNU GPL version 2.
4 *
5 * Significant portions of this file may be copied from parts of the Linux
6 * kernel, by Linus Torvalds and others.
064af421
BP
7 */
8
9/* Functions for managing the dp interface/device. */
10
dfffaef1
JP
11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12
064af421
BP
13#include <linux/init.h>
14#include <linux/module.h>
15#include <linux/fs.h>
16#include <linux/if_arp.h>
064af421
BP
17#include <linux/if_vlan.h>
18#include <linux/in.h>
19#include <linux/ip.h>
20#include <linux/delay.h>
21#include <linux/time.h>
22#include <linux/etherdevice.h>
23#include <linux/kernel.h>
24#include <linux/kthread.h>
064af421
BP
25#include <linux/mutex.h>
26#include <linux/percpu.h>
27#include <linux/rcupdate.h>
28#include <linux/tcp.h>
29#include <linux/udp.h>
30#include <linux/version.h>
31#include <linux/ethtool.h>
064af421
BP
32#include <linux/wait.h>
33#include <asm/system.h>
34#include <asm/div64.h>
35#include <asm/bug.h>
656a0e37 36#include <linux/highmem.h>
064af421
BP
37#include <linux/netfilter_bridge.h>
38#include <linux/netfilter_ipv4.h>
39#include <linux/inetdevice.h>
40#include <linux/list.h>
41#include <linux/rculist.h>
064af421 42#include <linux/dmi.h>
3c5f6de3 43#include <net/inet_ecn.h>
36956a7d 44#include <net/genetlink.h>
3fbd517a 45#include <linux/compat.h>
064af421
BP
46
47#include "openvswitch/datapath-protocol.h"
dd8d6b8c 48#include "checksum.h"
064af421
BP
49#include "datapath.h"
50#include "actions.h"
064af421 51#include "flow.h"
7eaa9830 52#include "loop_counter.h"
3fbd517a 53#include "odp-compat.h"
8d5ebd83 54#include "table.h"
f2459fe7 55#include "vport-internal_dev.h"
064af421 56
064af421
BP
57int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
58EXPORT_SYMBOL(dp_ioctl_hook);
59
064af421 60/* Datapaths. Protected on the read side by rcu_read_lock, on the write side
0d3b8a34 61 * by dp_mutex.
064af421
BP
62 *
63 * dp_mutex nests inside the RTNL lock: if you need both you must take the RTNL
64 * lock first.
65 *
e779d8d9 66 * It is safe to access the datapath and vport structures with just
064af421
BP
67 * dp_mutex.
68 */
e1040c77 69static struct datapath __rcu *dps[ODP_MAX];
064af421
BP
70static DEFINE_MUTEX(dp_mutex);
71
e779d8d9 72static int new_vport(struct datapath *, struct odp_port *, int port_no);
064af421
BP
73
74/* Must be called with rcu_read_lock or dp_mutex. */
75struct datapath *get_dp(int dp_idx)
76{
77 if (dp_idx < 0 || dp_idx >= ODP_MAX)
78 return NULL;
eb3ccf11
JG
79 return rcu_dereference_check(dps[dp_idx], rcu_read_lock_held() ||
80 lockdep_is_held(&dp_mutex));
064af421
BP
81}
82EXPORT_SYMBOL_GPL(get_dp);
83
35f7605b 84static struct datapath *get_dp_locked(int dp_idx)
064af421
BP
85{
86 struct datapath *dp;
87
88 mutex_lock(&dp_mutex);
89 dp = get_dp(dp_idx);
90 if (dp)
91 mutex_lock(&dp->mutex);
92 mutex_unlock(&dp_mutex);
93 return dp;
94}
95
027f9007 96static struct tbl *get_table_protected(struct datapath *dp)
9abaf6b3 97{
1452b28c
JG
98 return rcu_dereference_protected(dp->table,
99 lockdep_is_held(&dp->mutex));
100}
101
027f9007 102static struct vport *get_vport_protected(struct datapath *dp, u16 port_no)
1452b28c
JG
103{
104 return rcu_dereference_protected(dp->ports[port_no],
105 lockdep_is_held(&dp->mutex));
9abaf6b3
JG
106}
107
f2459fe7
JG
108/* Must be called with rcu_read_lock or RTNL lock. */
109const char *dp_name(const struct datapath *dp)
110{
ad919711 111 return vport_get_name(rcu_dereference_rtnl(dp->ports[ODPP_LOCAL]));
f2459fe7
JG
112}
113
064af421
BP
114static inline size_t br_nlmsg_size(void)
115{
116 return NLMSG_ALIGN(sizeof(struct ifinfomsg))
117 + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
118 + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
119 + nla_total_size(4) /* IFLA_MASTER */
120 + nla_total_size(4) /* IFLA_MTU */
121 + nla_total_size(4) /* IFLA_LINK */
122 + nla_total_size(1); /* IFLA_OPERSTATE */
123}
124
125static int dp_fill_ifinfo(struct sk_buff *skb,
e779d8d9 126 const struct vport *port,
064af421
BP
127 int event, unsigned int flags)
128{
027f9007 129 struct datapath *dp = port->dp;
e779d8d9
BP
130 int ifindex = vport_get_ifindex(port);
131 int iflink = vport_get_iflink(port);
064af421
BP
132 struct ifinfomsg *hdr;
133 struct nlmsghdr *nlh;
134
f2459fe7
JG
135 if (ifindex < 0)
136 return ifindex;
137
138 if (iflink < 0)
139 return iflink;
140
064af421
BP
141 nlh = nlmsg_put(skb, 0, 0, event, sizeof(*hdr), flags);
142 if (nlh == NULL)
143 return -EMSGSIZE;
144
145 hdr = nlmsg_data(nlh);
146 hdr->ifi_family = AF_BRIDGE;
147 hdr->__ifi_pad = 0;
f2459fe7
JG
148 hdr->ifi_type = ARPHRD_ETHER;
149 hdr->ifi_index = ifindex;
e779d8d9 150 hdr->ifi_flags = vport_get_flags(port);
064af421
BP
151 hdr->ifi_change = 0;
152
e779d8d9 153 NLA_PUT_STRING(skb, IFLA_IFNAME, vport_get_name(port));
ad919711 154 NLA_PUT_U32(skb, IFLA_MASTER,
1452b28c 155 vport_get_ifindex(get_vport_protected(dp, ODPP_LOCAL)));
e779d8d9 156 NLA_PUT_U32(skb, IFLA_MTU, vport_get_mtu(port));
064af421
BP
157#ifdef IFLA_OPERSTATE
158 NLA_PUT_U8(skb, IFLA_OPERSTATE,
e779d8d9
BP
159 vport_is_running(port)
160 ? vport_get_operstate(port)
f2459fe7 161 : IF_OPER_DOWN);
064af421
BP
162#endif
163
e779d8d9 164 NLA_PUT(skb, IFLA_ADDRESS, ETH_ALEN, vport_get_addr(port));
064af421 165
f2459fe7
JG
166 if (ifindex != iflink)
167 NLA_PUT_U32(skb, IFLA_LINK,iflink);
064af421
BP
168
169 return nlmsg_end(skb, nlh);
170
171nla_put_failure:
172 nlmsg_cancel(skb, nlh);
173 return -EMSGSIZE;
174}
175
e779d8d9 176static void dp_ifinfo_notify(int event, struct vport *port)
064af421 177{
064af421
BP
178 struct sk_buff *skb;
179 int err = -ENOBUFS;
180
181 skb = nlmsg_new(br_nlmsg_size(), GFP_KERNEL);
182 if (skb == NULL)
183 goto errout;
184
185 err = dp_fill_ifinfo(skb, port, event, 0);
186 if (err < 0) {
187 /* -EMSGSIZE implies BUG in br_nlmsg_size() */
188 WARN_ON(err == -EMSGSIZE);
189 kfree_skb(skb);
190 goto errout;
191 }
f2459fe7 192 rtnl_notify(skb, &init_net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
cfe7c1f5 193 return;
064af421
BP
194errout:
195 if (err < 0)
f2459fe7 196 rtnl_set_sk_err(&init_net, RTNLGRP_LINK, err);
064af421
BP
197}
198
58c342f6
BP
199static void release_dp(struct kobject *kobj)
200{
201 struct datapath *dp = container_of(kobj, struct datapath, ifobj);
202 kfree(dp);
203}
204
35f7605b 205static struct kobj_type dp_ktype = {
58c342f6
BP
206 .release = release_dp
207};
208
064af421
BP
209static int create_dp(int dp_idx, const char __user *devnamep)
210{
f2459fe7 211 struct odp_port internal_dev_port;
064af421
BP
212 char devname[IFNAMSIZ];
213 struct datapath *dp;
214 int err;
215 int i;
216
217 if (devnamep) {
968f7c8d
BP
218 int retval = strncpy_from_user(devname, devnamep, IFNAMSIZ);
219 if (retval < 0) {
220 err = -EFAULT;
064af421 221 goto err;
968f7c8d
BP
222 } else if (retval >= IFNAMSIZ) {
223 err = -ENAMETOOLONG;
224 goto err;
225 }
064af421 226 } else {
84c17d98 227 snprintf(devname, sizeof(devname), "of%d", dp_idx);
064af421
BP
228 }
229
230 rtnl_lock();
231 mutex_lock(&dp_mutex);
232 err = -ENODEV;
233 if (!try_module_get(THIS_MODULE))
234 goto err_unlock;
235
236 /* Exit early if a datapath with that number already exists.
237 * (We don't use -EEXIST because that's ambiguous with 'devname'
238 * conflicting with an existing network device name.) */
239 err = -EBUSY;
240 if (get_dp(dp_idx))
241 goto err_put_module;
242
243 err = -ENOMEM;
84c17d98 244 dp = kzalloc(sizeof(*dp), GFP_KERNEL);
064af421
BP
245 if (dp == NULL)
246 goto err_put_module;
828bc1f0 247 INIT_LIST_HEAD(&dp->port_list);
064af421 248 mutex_init(&dp->mutex);
f072ebdd 249 mutex_lock(&dp->mutex);
064af421
BP
250 dp->dp_idx = dp_idx;
251 for (i = 0; i < DP_N_QUEUES; i++)
252 skb_queue_head_init(&dp->queues[i]);
253 init_waitqueue_head(&dp->waitqueue);
254
58c342f6 255 /* Initialize kobject for bridge. This will be added as
b0c32774 256 * /sys/class/net/<devname>/brif later, if sysfs is enabled. */
58c342f6 257 dp->ifobj.kset = NULL;
58c342f6
BP
258 kobject_init(&dp->ifobj, &dp_ktype);
259
828bc1f0
BP
260 /* Allocate table. */
261 err = -ENOMEM;
c6fadeb1 262 rcu_assign_pointer(dp->table, tbl_create(TBL_MIN_BUCKETS));
828bc1f0
BP
263 if (!dp->table)
264 goto err_free_dp;
265
d6fbec6d 266 /* Set up our datapath device. */
092a872d
BP
267 BUILD_BUG_ON(sizeof(internal_dev_port.devname) != sizeof(devname));
268 strcpy(internal_dev_port.devname, devname);
c3827f61 269 strcpy(internal_dev_port.type, "internal");
e779d8d9 270 err = new_vport(dp, &internal_dev_port, ODPP_LOCAL);
828bc1f0 271 if (err) {
f2459fe7
JG
272 if (err == -EBUSY)
273 err = -EEXIST;
274
064af421 275 goto err_destroy_table;
828bc1f0 276 }
064af421
BP
277
278 dp->drop_frags = 0;
279 dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
94947cd8
JG
280 if (!dp->stats_percpu) {
281 err = -ENOMEM;
064af421 282 goto err_destroy_local_port;
94947cd8 283 }
064af421
BP
284
285 rcu_assign_pointer(dps[dp_idx], dp);
dad80ec3
JG
286 dp_sysfs_add_dp(dp);
287
f072ebdd 288 mutex_unlock(&dp->mutex);
064af421
BP
289 mutex_unlock(&dp_mutex);
290 rtnl_unlock();
291
064af421
BP
292 return 0;
293
294err_destroy_local_port:
1452b28c 295 dp_detach_port(get_vport_protected(dp, ODPP_LOCAL));
064af421 296err_destroy_table:
6f20002c 297 tbl_destroy(get_table_protected(dp), NULL);
064af421 298err_free_dp:
f072ebdd 299 mutex_unlock(&dp->mutex);
064af421
BP
300 kfree(dp);
301err_put_module:
302 module_put(THIS_MODULE);
303err_unlock:
304 mutex_unlock(&dp_mutex);
305 rtnl_unlock();
306err:
307 return err;
308}
309
46c6a11d
JG
310static void destroy_dp_rcu(struct rcu_head *rcu)
311{
312 struct datapath *dp = container_of(rcu, struct datapath, rcu);
313 int i;
314
315 for (i = 0; i < DP_N_QUEUES; i++)
316 skb_queue_purge(&dp->queues[i]);
317
318 tbl_destroy((struct tbl __force *)dp->table, flow_free_tbl);
319 free_percpu(dp->stats_percpu);
320 kobject_put(&dp->ifobj);
321}
322
8f843b6f 323static int destroy_dp(int dp_idx)
064af421 324{
8f843b6f
JG
325 struct datapath *dp;
326 int err = 0;
e779d8d9 327 struct vport *p, *n;
064af421 328
8f843b6f
JG
329 rtnl_lock();
330 mutex_lock(&dp_mutex);
331 dp = get_dp(dp_idx);
332 if (!dp) {
333 err = -ENODEV;
b0fb95ac 334 goto out;
8f843b6f
JG
335 }
336
b0fb95ac
JG
337 mutex_lock(&dp->mutex);
338
6fba0d0b
BP
339 list_for_each_entry_safe (p, n, &dp->port_list, node)
340 if (p->port_no != ODPP_LOCAL)
c3827f61 341 dp_detach_port(p);
6fba0d0b 342
2ba9026e 343 dp_sysfs_del_dp(dp);
064af421 344 rcu_assign_pointer(dps[dp->dp_idx], NULL);
1452b28c 345 dp_detach_port(get_vport_protected(dp, ODPP_LOCAL));
8f843b6f 346
b0fb95ac 347 mutex_unlock(&dp->mutex);
46c6a11d 348 call_rcu(&dp->rcu, destroy_dp_rcu);
064af421 349 module_put(THIS_MODULE);
064af421 350
b0fb95ac 351out:
064af421
BP
352 mutex_unlock(&dp_mutex);
353 rtnl_unlock();
064af421
BP
354 return err;
355}
356
f072ebdd 357/* Called with RTNL lock and dp->mutex. */
e779d8d9 358static int new_vport(struct datapath *dp, struct odp_port *odp_port, int port_no)
064af421 359{
c3827f61 360 struct vport_parms parms;
f2459fe7 361 struct vport *vport;
f2459fe7 362
c3827f61
BP
363 parms.name = odp_port->devname;
364 parms.type = odp_port->type;
365 parms.config = odp_port->config;
e779d8d9
BP
366 parms.dp = dp;
367 parms.port_no = port_no;
f2459fe7 368
c3827f61
BP
369 vport_lock();
370 vport = vport_add(&parms);
371 vport_unlock();
064af421 372
c3827f61
BP
373 if (IS_ERR(vport))
374 return PTR_ERR(vport);
064af421 375
e779d8d9
BP
376 rcu_assign_pointer(dp->ports[port_no], vport);
377 list_add_rcu(&vport->node, &dp->port_list);
064af421
BP
378 dp->n_ports++;
379
e779d8d9 380 dp_ifinfo_notify(RTM_NEWLINK, vport);
064af421
BP
381
382 return 0;
383}
384
f2459fe7 385static int attach_port(int dp_idx, struct odp_port __user *portp)
064af421 386{
064af421
BP
387 struct datapath *dp;
388 struct odp_port port;
389 int port_no;
390 int err;
391
392 err = -EFAULT;
84c17d98 393 if (copy_from_user(&port, portp, sizeof(port)))
064af421
BP
394 goto out;
395 port.devname[IFNAMSIZ - 1] = '\0';
c3827f61 396 port.type[VPORT_TYPE_SIZE - 1] = '\0';
064af421
BP
397
398 rtnl_lock();
399 dp = get_dp_locked(dp_idx);
400 err = -ENODEV;
401 if (!dp)
402 goto out_unlock_rtnl;
403
9ee3ae3e
BP
404 for (port_no = 1; port_no < DP_MAX_PORTS; port_no++)
405 if (!dp->ports[port_no])
406 goto got_port_no;
3c71830a 407 err = -EFBIG;
9ee3ae3e 408 goto out_unlock_dp;
064af421 409
9ee3ae3e 410got_port_no:
e779d8d9 411 err = new_vport(dp, &port, port_no);
064af421 412 if (err)
f2459fe7 413 goto out_unlock_dp;
064af421 414
d8b5d43a 415 set_internal_devs_mtu(dp);
1452b28c 416 dp_sysfs_add_if(get_vport_protected(dp, port_no));
064af421 417
776f10ce 418 err = put_user(port_no, &portp->port);
064af421 419
064af421
BP
420out_unlock_dp:
421 mutex_unlock(&dp->mutex);
422out_unlock_rtnl:
423 rtnl_unlock();
424out:
425 return err;
426}
427
e779d8d9 428int dp_detach_port(struct vport *p)
064af421 429{
f2459fe7
JG
430 int err;
431
064af421
BP
432 ASSERT_RTNL();
433
2e7dd8ec 434 if (p->port_no != ODPP_LOCAL)
0515ceb3 435 dp_sysfs_del_if(p);
064af421
BP
436 dp_ifinfo_notify(RTM_DELLINK, p);
437
064af421 438 /* First drop references to device. */
f2459fe7 439 p->dp->n_ports--;
064af421
BP
440 list_del_rcu(&p->node);
441 rcu_assign_pointer(p->dp->ports[p->port_no], NULL);
f2459fe7 442
7237e4f4 443 /* Then destroy it. */
c3827f61 444 vport_lock();
7237e4f4 445 err = vport_del(p);
c3827f61 446 vport_unlock();
f2459fe7 447
7237e4f4 448 return err;
064af421
BP
449}
450
f2459fe7 451static int detach_port(int dp_idx, int port_no)
064af421 452{
e779d8d9 453 struct vport *p;
064af421 454 struct datapath *dp;
064af421
BP
455 int err;
456
457 err = -EINVAL;
458 if (port_no < 0 || port_no >= DP_MAX_PORTS || port_no == ODPP_LOCAL)
459 goto out;
460
461 rtnl_lock();
462 dp = get_dp_locked(dp_idx);
463 err = -ENODEV;
464 if (!dp)
465 goto out_unlock_rtnl;
466
1452b28c 467 p = get_vport_protected(dp, port_no);
064af421
BP
468 err = -ENOENT;
469 if (!p)
470 goto out_unlock_dp;
471
c3827f61 472 err = dp_detach_port(p);
064af421
BP
473
474out_unlock_dp:
475 mutex_unlock(&dp->mutex);
476out_unlock_rtnl:
477 rtnl_unlock();
478out:
064af421
BP
479 return err;
480}
481
8819fac7 482/* Must be called with rcu_read_lock. */
e779d8d9 483void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
064af421
BP
484{
485 struct datapath *dp = p->dp;
486 struct dp_stats_percpu *stats;
8819fac7 487 int stats_counter_off;
55574bb0
BP
488 struct sw_flow_actions *acts;
489 struct loop_counter *loop;
4c1ad233 490 int error;
064af421 491
e779d8d9 492 OVS_CB(skb)->vport = p;
a063b0df 493
3976f6d5 494 if (!OVS_CB(skb)->flow) {
36956a7d 495 struct sw_flow_key key;
3976f6d5 496 struct tbl_node *flow_node;
b7a31ec1 497 bool is_frag;
4c1ad233 498
3976f6d5 499 /* Extract flow from 'skb' into 'key'. */
b7a31ec1 500 error = flow_extract(skb, p ? p->port_no : ODPP_NONE, &key, &is_frag);
3976f6d5
JG
501 if (unlikely(error)) {
502 kfree_skb(skb);
503 return;
504 }
064af421 505
b7a31ec1 506 if (is_frag && dp->drop_frags) {
3976f6d5
JG
507 kfree_skb(skb);
508 stats_counter_off = offsetof(struct dp_stats_percpu, n_frags);
509 goto out;
510 }
511
512 /* Look up flow. */
513 flow_node = tbl_lookup(rcu_dereference(dp->table), &key,
514 flow_hash(&key), flow_cmp);
515 if (unlikely(!flow_node)) {
856081f6
BP
516 struct dp_upcall_info upcall;
517
518 upcall.type = _ODPL_MISS_NR;
519 upcall.key = &key;
520 upcall.userdata = 0;
521 upcall.sample_pool = 0;
522 upcall.actions = NULL;
523 upcall.actions_len = 0;
524 dp_upcall(dp, skb, &upcall);
3976f6d5
JG
525 stats_counter_off = offsetof(struct dp_stats_percpu, n_missed);
526 goto out;
527 }
528
529 OVS_CB(skb)->flow = flow_cast(flow_node);
55574bb0
BP
530 }
531
f267de8a 532 stats_counter_off = offsetof(struct dp_stats_percpu, n_hit);
3976f6d5 533 flow_used(OVS_CB(skb)->flow, skb);
55574bb0 534
3976f6d5 535 acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
55574bb0
BP
536
537 /* Check whether we've looped too much. */
7eaa9830
JG
538 loop = loop_get_counter();
539 if (unlikely(++loop->count > MAX_LOOPS))
55574bb0
BP
540 loop->looping = true;
541 if (unlikely(loop->looping)) {
7eaa9830 542 loop_suppress(dp, acts);
f267de8a 543 kfree_skb(skb);
55574bb0 544 goto out_loop;
064af421 545 }
8819fac7 546
55574bb0 547 /* Execute actions. */
3976f6d5 548 execute_actions(dp, skb, &OVS_CB(skb)->flow->key, acts->actions,
cdee00fd 549 acts->actions_len);
55574bb0
BP
550
551 /* Check whether sub-actions looped too much. */
552 if (unlikely(loop->looping))
7eaa9830 553 loop_suppress(dp, acts);
55574bb0
BP
554
555out_loop:
556 /* Decrement loop counter. */
557 if (!--loop->count)
558 loop->looping = false;
7eaa9830 559 loop_put_counter();
55574bb0 560
8819fac7 561out:
55574bb0 562 /* Update datapath statistics. */
8819fac7
JG
563 local_bh_disable();
564 stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
38c6ecbc
JG
565
566 write_seqcount_begin(&stats->seqlock);
8819fac7 567 (*(u64 *)((u8 *)stats + stats_counter_off))++;
38c6ecbc
JG
568 write_seqcount_end(&stats->seqlock);
569
8819fac7 570 local_bh_enable();
064af421
BP
571}
572
856081f6
BP
573static void copy_and_csum_skb(struct sk_buff *skb, void *to)
574{
575 u16 csum_start, csum_offset;
576 __wsum csum;
577
578 get_skb_csum_pointers(skb, &csum_start, &csum_offset);
579 csum_start -= skb_headroom(skb);
580 BUG_ON(csum_start >= skb_headlen(skb));
581
582 skb_copy_bits(skb, 0, to, csum_start);
583
584 csum = skb_copy_and_csum_bits(skb, csum_start, to + csum_start,
585 skb->len - csum_start, 0);
586 *(__sum16 *)(to + csum_start + csum_offset) = csum_fold(csum);
587}
588
cb5087ca
BP
589/* Append each packet in 'skb' list to 'queue'. There will be only one packet
590 * unless we broke up a GSO packet. */
856081f6
BP
591static int queue_control_packets(struct datapath *dp, struct sk_buff *skb,
592 const struct dp_upcall_info *upcall_info)
cb5087ca
BP
593{
594 struct sk_buff *nskb;
595 int port_no;
596 int err;
597
e779d8d9
BP
598 if (OVS_CB(skb)->vport)
599 port_no = OVS_CB(skb)->vport->port_no;
f2459fe7
JG
600 else
601 port_no = ODPP_LOCAL;
cb5087ca
BP
602
603 do {
856081f6
BP
604 struct odp_packet *upcall;
605 struct sk_buff *user_skb; /* to be queued to userspace */
606 struct nlattr *nla;
607 unsigned int len;
cb5087ca
BP
608
609 nskb = skb->next;
610 skb->next = NULL;
611
856081f6
BP
612 len = sizeof(struct odp_packet);
613 len += nla_total_size(4); /* ODP_PACKET_ATTR_TYPE. */
614 len += nla_total_size(skb->len);
615 len += nla_total_size(FLOW_BUFSIZE);
616 if (upcall_info->userdata)
617 len += nla_total_size(8);
618 if (upcall_info->sample_pool)
619 len += nla_total_size(4);
620 if (upcall_info->actions_len)
621 len += nla_total_size(upcall_info->actions_len);
622
623 user_skb = alloc_skb(len, GFP_ATOMIC);
624 if (!user_skb)
cb5087ca
BP
625 goto err_kfree_skbs;
626
856081f6
BP
627 upcall = (struct odp_packet *)__skb_put(user_skb, sizeof(*upcall));
628 upcall->dp_idx = dp->dp_idx;
629
630 nla_put_u32(user_skb, ODP_PACKET_ATTR_TYPE, upcall_info->type);
631
632 nla = nla_nest_start(user_skb, ODP_PACKET_ATTR_KEY);
633 flow_to_nlattrs(upcall_info->key, user_skb);
634 nla_nest_end(user_skb, nla);
cb5087ca 635
856081f6
BP
636 if (upcall_info->userdata)
637 nla_put_u64(user_skb, ODP_PACKET_ATTR_USERDATA, upcall_info->userdata);
638 if (upcall_info->sample_pool)
639 nla_put_u32(user_skb, ODP_PACKET_ATTR_SAMPLE_POOL, upcall_info->sample_pool);
640 if (upcall_info->actions_len) {
641 const struct nlattr *actions = upcall_info->actions;
642 u32 actions_len = upcall_info->actions_len;
643
644 nla = nla_nest_start(user_skb, ODP_PACKET_ATTR_ACTIONS);
645 memcpy(__skb_put(user_skb, actions_len), actions, actions_len);
646 nla_nest_end(user_skb, nla);
647 }
648
649 nla = __nla_reserve(user_skb, ODP_PACKET_ATTR_PACKET, skb->len);
650 if (skb->ip_summed == CHECKSUM_PARTIAL)
651 copy_and_csum_skb(skb, nla_data(nla));
652 else
653 skb_copy_bits(skb, 0, nla_data(nla), skb->len);
654
655 upcall->len = user_skb->len;
656 skb_queue_tail(&dp->queues[upcall_info->type], user_skb);
657
658 kfree_skb(skb);
cb5087ca
BP
659 skb = nskb;
660 } while (skb);
661 return 0;
662
663err_kfree_skbs:
664 kfree_skb(skb);
665 while ((skb = nskb) != NULL) {
666 nskb = skb->next;
667 kfree_skb(skb);
668 }
669 return err;
670}
671
856081f6 672int dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info)
064af421
BP
673{
674 struct dp_stats_percpu *stats;
675 struct sk_buff_head *queue;
064af421
BP
676 int err;
677
678 WARN_ON_ONCE(skb_shared(skb));
856081f6
BP
679 BUG_ON(upcall_info->type >= DP_N_QUEUES);
680
681 queue = &dp->queues[upcall_info->type];
064af421
BP
682 err = -ENOBUFS;
683 if (skb_queue_len(queue) >= DP_MAX_QUEUE_LEN)
684 goto err_kfree_skb;
685
a6057323
JG
686 forward_ip_summed(skb);
687
a2377e44
JG
688 err = vswitch_skb_checksum_setup(skb);
689 if (err)
690 goto err_kfree_skb;
691
064af421
BP
692 /* Break apart GSO packets into their component pieces. Otherwise
693 * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */
694 if (skb_is_gso(skb)) {
9cc8b4e4 695 struct sk_buff *nskb = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
2d7ce2ee
JG
696
697 kfree_skb(skb);
698 skb = nskb;
40796b34 699 if (IS_ERR(skb)) {
2d7ce2ee
JG
700 err = PTR_ERR(skb);
701 goto err;
064af421
BP
702 }
703 }
704
856081f6 705 err = queue_control_packets(dp, skb, upcall_info);
064af421 706 wake_up_interruptible(&dp->waitqueue);
cb5087ca 707 return err;
064af421
BP
708
709err_kfree_skb:
710 kfree_skb(skb);
711err:
1c075d0a
JG
712 local_bh_disable();
713 stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
38c6ecbc
JG
714
715 write_seqcount_begin(&stats->seqlock);
064af421 716 stats->n_lost++;
38c6ecbc
JG
717 write_seqcount_end(&stats->seqlock);
718
1c075d0a 719 local_bh_enable();
064af421
BP
720
721 return err;
722}
723
724static int flush_flows(struct datapath *dp)
725{
9abaf6b3 726 struct tbl *old_table = get_table_protected(dp);
8d5ebd83
JG
727 struct tbl *new_table;
728
c6fadeb1 729 new_table = tbl_create(TBL_MIN_BUCKETS);
8d5ebd83
JG
730 if (!new_table)
731 return -ENOMEM;
732
733 rcu_assign_pointer(dp->table, new_table);
734
735 tbl_deferred_destroy(old_table, flow_free_tbl);
736
737 return 0;
064af421
BP
738}
739
cdee00fd 740static int validate_actions(const struct nlattr *actions, u32 actions_len)
064af421 741{
23cad98c
BP
742 const struct nlattr *a;
743 int rem;
744
745 nla_for_each_attr(a, actions, actions_len, rem) {
746 static const u32 action_lens[ODPAT_MAX + 1] = {
747 [ODPAT_OUTPUT] = 4,
748 [ODPAT_CONTROLLER] = 8,
749 [ODPAT_SET_DL_TCI] = 2,
750 [ODPAT_STRIP_VLAN] = 0,
751 [ODPAT_SET_DL_SRC] = ETH_ALEN,
752 [ODPAT_SET_DL_DST] = ETH_ALEN,
753 [ODPAT_SET_NW_SRC] = 4,
754 [ODPAT_SET_NW_DST] = 4,
755 [ODPAT_SET_NW_TOS] = 1,
756 [ODPAT_SET_TP_SRC] = 2,
757 [ODPAT_SET_TP_DST] = 2,
758 [ODPAT_SET_TUNNEL] = 8,
759 [ODPAT_SET_PRIORITY] = 4,
760 [ODPAT_POP_PRIORITY] = 0,
761 [ODPAT_DROP_SPOOFED_ARP] = 0,
762 };
763 int type = nla_type(a);
764
765 if (type > ODPAT_MAX || nla_len(a) != action_lens[type])
766 return -EINVAL;
767
768 switch (type) {
cdee00fd
BP
769 case ODPAT_UNSPEC:
770 return -EINVAL;
064af421 771
23cad98c
BP
772 case ODPAT_CONTROLLER:
773 case ODPAT_STRIP_VLAN:
774 case ODPAT_SET_DL_SRC:
775 case ODPAT_SET_DL_DST:
776 case ODPAT_SET_NW_SRC:
777 case ODPAT_SET_NW_DST:
778 case ODPAT_SET_TP_SRC:
779 case ODPAT_SET_TP_DST:
780 case ODPAT_SET_TUNNEL:
781 case ODPAT_SET_PRIORITY:
782 case ODPAT_POP_PRIORITY:
783 case ODPAT_DROP_SPOOFED_ARP:
784 /* No validation needed. */
785 break;
786
787 case ODPAT_OUTPUT:
788 if (nla_get_u32(a) >= DP_MAX_PORTS)
789 return -EINVAL;
3b1fc5f3 790 break;
cdee00fd 791
23cad98c 792 case ODPAT_SET_DL_TCI:
cdee00fd 793 if (nla_get_be16(a) & htons(VLAN_CFI_MASK))
064af421 794 return -EINVAL;
23cad98c 795 break;
064af421 796
23cad98c
BP
797 case ODPAT_SET_NW_TOS:
798 if (nla_get_u8(a) & INET_ECN_MASK)
799 return -EINVAL;
800 break;
064af421 801
23cad98c
BP
802 default:
803 return -EOPNOTSUPP;
804 }
805 }
3c5f6de3 806
23cad98c
BP
807 if (rem > 0)
808 return -EINVAL;
064af421 809
23cad98c 810 return 0;
064af421
BP
811}
812
813static struct sw_flow_actions *get_actions(const struct odp_flow *flow)
814{
815 struct sw_flow_actions *actions;
816 int error;
817
cdee00fd 818 actions = flow_actions_alloc(flow->actions_len);
064af421
BP
819 error = PTR_ERR(actions);
820 if (IS_ERR(actions))
821 goto error;
822
823 error = -EFAULT;
1b29ebe5 824 if (copy_from_user(actions->actions,
6c229737 825 (struct nlattr __user __force *)flow->actions,
1b29ebe5 826 flow->actions_len))
064af421 827 goto error_free_actions;
cdee00fd 828 error = validate_actions(actions->actions, actions->actions_len);
064af421
BP
829 if (error)
830 goto error_free_actions;
831
832 return actions;
833
834error_free_actions:
835 kfree(actions);
836error:
837 return ERR_PTR(error);
838}
839
65d042a1 840static void get_stats(struct sw_flow *flow, struct odp_flow_stats *stats)
6bfafa55
JG
841{
842 if (flow->used) {
65d042a1 843 struct timespec offset_ts, used, now_mono;
6bfafa55 844
65d042a1
HZ
845 ktime_get_ts(&now_mono);
846 jiffies_to_timespec(jiffies - flow->used, &offset_ts);
847 set_normalized_timespec(&used, now_mono.tv_sec - offset_ts.tv_sec,
848 now_mono.tv_nsec - offset_ts.tv_nsec);
6bfafa55
JG
849
850 stats->used_sec = used.tv_sec;
851 stats->used_nsec = used.tv_nsec;
064af421
BP
852 } else {
853 stats->used_sec = 0;
854 stats->used_nsec = 0;
855 }
6bfafa55 856
064af421
BP
857 stats->n_packets = flow->packet_count;
858 stats->n_bytes = flow->byte_count;
abfec865 859 stats->reserved = 0;
064af421 860 stats->tcp_flags = flow->tcp_flags;
f1aa2072 861 stats->error = 0;
064af421
BP
862}
863
864static void clear_stats(struct sw_flow *flow)
865{
6bfafa55 866 flow->used = 0;
064af421 867 flow->tcp_flags = 0;
064af421
BP
868 flow->packet_count = 0;
869 flow->byte_count = 0;
870}
871
8d5ebd83
JG
872static int expand_table(struct datapath *dp)
873{
9abaf6b3 874 struct tbl *old_table = get_table_protected(dp);
8d5ebd83
JG
875 struct tbl *new_table;
876
877 new_table = tbl_expand(old_table);
878 if (IS_ERR(new_table))
879 return PTR_ERR(new_table);
880
881 rcu_assign_pointer(dp->table, new_table);
882 tbl_deferred_destroy(old_table, NULL);
883
884 return 0;
885}
886
44e05eca
BP
887static int do_put_flow(struct datapath *dp, struct odp_flow_put *uf,
888 struct odp_flow_stats *stats)
064af421 889{
8d5ebd83 890 struct tbl_node *flow_node;
36956a7d 891 struct sw_flow_key key;
6fa58f7a 892 struct sw_flow *flow;
8d5ebd83 893 struct tbl *table;
3d82583c 894 struct sw_flow_actions *acts = NULL;
064af421 895 int error;
58f8f0e7 896 u32 hash;
064af421 897
36956a7d
BP
898 error = flow_copy_from_user(&key, (const struct nlattr __force __user *)uf->flow.key,
899 uf->flow.key_len);
900 if (error)
901 return error;
902
903 hash = flow_hash(&key);
9abaf6b3 904 table = get_table_protected(dp);
36956a7d 905 flow_node = tbl_lookup(table, &key, hash, flow_cmp);
8d5ebd83 906 if (!flow_node) {
6fa58f7a 907 /* No such flow. */
064af421 908 error = -ENOENT;
44e05eca 909 if (!(uf->flags & ODPPF_CREATE))
064af421
BP
910 goto error;
911
912 /* Expand table, if necessary, to make room. */
8d5ebd83
JG
913 if (tbl_count(table) >= tbl_n_buckets(table)) {
914 error = expand_table(dp);
064af421
BP
915 if (error)
916 goto error;
9abaf6b3 917 table = get_table_protected(dp);
064af421
BP
918 }
919
920 /* Allocate flow. */
560e8022
JG
921 flow = flow_alloc();
922 if (IS_ERR(flow)) {
923 error = PTR_ERR(flow);
064af421 924 goto error;
560e8022 925 }
36956a7d 926 flow->key = key;
064af421
BP
927 clear_stats(flow);
928
929 /* Obtain actions. */
44e05eca 930 acts = get_actions(&uf->flow);
064af421
BP
931 error = PTR_ERR(acts);
932 if (IS_ERR(acts))
933 goto error_free_flow;
934 rcu_assign_pointer(flow->sf_acts, acts);
935
936 /* Put flow in bucket. */
58f8f0e7 937 error = tbl_insert(table, &flow->tbl_node, hash);
6fa58f7a
BP
938 if (error)
939 goto error_free_flow_acts;
8d5ebd83 940
44e05eca 941 memset(stats, 0, sizeof(struct odp_flow_stats));
064af421
BP
942 } else {
943 /* We found a matching flow. */
064af421 944 struct sw_flow_actions *old_acts, *new_acts;
064af421 945
8d5ebd83
JG
946 flow = flow_cast(flow_node);
947
064af421
BP
948 /* Bail out if we're not allowed to modify an existing flow. */
949 error = -EEXIST;
44e05eca 950 if (!(uf->flags & ODPPF_MODIFY))
064af421
BP
951 goto error;
952
953 /* Swap actions. */
44e05eca 954 new_acts = get_actions(&uf->flow);
064af421
BP
955 error = PTR_ERR(new_acts);
956 if (IS_ERR(new_acts))
957 goto error;
d3c54451
JG
958
959 old_acts = rcu_dereference_protected(flow->sf_acts,
960 lockdep_is_held(&dp->mutex));
cdee00fd 961 if (old_acts->actions_len != new_acts->actions_len ||
064af421 962 memcmp(old_acts->actions, new_acts->actions,
cdee00fd 963 old_acts->actions_len)) {
064af421
BP
964 rcu_assign_pointer(flow->sf_acts, new_acts);
965 flow_deferred_free_acts(old_acts);
966 } else {
967 kfree(new_acts);
968 }
969
970 /* Fetch stats, then clear them if necessary. */
1d7241c7 971 spin_lock_bh(&flow->lock);
65d042a1 972 get_stats(flow, stats);
44e05eca 973 if (uf->flags & ODPPF_ZERO_STATS)
064af421 974 clear_stats(flow);
1d7241c7 975 spin_unlock_bh(&flow->lock);
064af421
BP
976 }
977
064af421
BP
978 return 0;
979
6fa58f7a 980error_free_flow_acts:
3d82583c 981 kfree(acts);
064af421 982error_free_flow:
fb8c9347
JG
983 flow->sf_acts = NULL;
984 flow_put(flow);
064af421
BP
985error:
986 return error;
987}
988
44e05eca
BP
989static int put_flow(struct datapath *dp, struct odp_flow_put __user *ufp)
990{
991 struct odp_flow_stats stats;
992 struct odp_flow_put uf;
993 int error;
994
995 if (copy_from_user(&uf, ufp, sizeof(struct odp_flow_put)))
996 return -EFAULT;
997
998 error = do_put_flow(dp, &uf, &stats);
999 if (error)
1000 return error;
1001
776f10ce
BP
1002 if (copy_to_user(&ufp->flow.stats, &stats,
1003 sizeof(struct odp_flow_stats)))
44e05eca
BP
1004 return -EFAULT;
1005
1006 return 0;
1007}
1008
d3c54451
JG
1009static int do_answer_query(struct datapath *dp, struct sw_flow *flow,
1010 u32 query_flags,
44e05eca 1011 struct odp_flow_stats __user *ustats,
cdee00fd
BP
1012 struct nlattr __user *actions,
1013 u32 __user *actions_lenp)
064af421 1014{
064af421 1015 struct sw_flow_actions *sf_acts;
44e05eca 1016 struct odp_flow_stats stats;
cdee00fd 1017 u32 actions_len;
064af421 1018
1d7241c7 1019 spin_lock_bh(&flow->lock);
65d042a1 1020 get_stats(flow, &stats);
1d7241c7 1021 if (query_flags & ODPFF_ZERO_TCP_FLAGS)
44e05eca 1022 flow->tcp_flags = 0;
1d7241c7
JG
1023
1024 spin_unlock_bh(&flow->lock);
44e05eca 1025
776f10ce 1026 if (copy_to_user(ustats, &stats, sizeof(struct odp_flow_stats)) ||
cdee00fd 1027 get_user(actions_len, actions_lenp))
064af421
BP
1028 return -EFAULT;
1029
cdee00fd 1030 if (!actions_len)
064af421 1031 return 0;
064af421 1032
d3c54451
JG
1033 sf_acts = rcu_dereference_protected(flow->sf_acts,
1034 lockdep_is_held(&dp->mutex));
cdee00fd 1035 if (put_user(sf_acts->actions_len, actions_lenp) ||
064af421 1036 (actions && copy_to_user(actions, sf_acts->actions,
cdee00fd 1037 min(sf_acts->actions_len, actions_len))))
064af421
BP
1038 return -EFAULT;
1039
1040 return 0;
1041}
1042
d3c54451
JG
1043static int answer_query(struct datapath *dp, struct sw_flow *flow,
1044 u32 query_flags, struct odp_flow __user *ufp)
064af421 1045{
1b29ebe5 1046 struct nlattr __user *actions;
064af421 1047
1b29ebe5 1048 if (get_user(actions, (struct nlattr __user * __user *)&ufp->actions))
064af421 1049 return -EFAULT;
44e05eca 1050
d3c54451 1051 return do_answer_query(dp, flow, query_flags,
cdee00fd 1052 &ufp->stats, actions, &ufp->actions_len);
064af421
BP
1053}
1054
36956a7d 1055static struct sw_flow *do_del_flow(struct datapath *dp, const struct nlattr __user *key, u32 key_len)
064af421 1056{
9abaf6b3 1057 struct tbl *table = get_table_protected(dp);
8d5ebd83 1058 struct tbl_node *flow_node;
36956a7d 1059 struct sw_flow_key swkey;
064af421
BP
1060 int error;
1061
36956a7d
BP
1062 error = flow_copy_from_user(&swkey, key, key_len);
1063 if (error)
1064 return ERR_PTR(error);
1065
1066 flow_node = tbl_lookup(table, &swkey, flow_hash(&swkey), flow_cmp);
8d5ebd83 1067 if (!flow_node)
44e05eca 1068 return ERR_PTR(-ENOENT);
064af421 1069
8d5ebd83 1070 error = tbl_remove(table, flow_node);
f1aa2072 1071 if (error)
44e05eca 1072 return ERR_PTR(error);
064af421 1073
44e05eca
BP
1074 /* XXX Returned flow_node's statistics might lose a few packets, since
1075 * other CPUs can be using this flow. We used to synchronize_rcu() to
1076 * make sure that we get completely accurate stats, but that blows our
1077 * performance, badly. */
1078 return flow_cast(flow_node);
1079}
1080
1081static int del_flow(struct datapath *dp, struct odp_flow __user *ufp)
1082{
1083 struct sw_flow *flow;
1084 struct odp_flow uf;
1085 int error;
1086
84c17d98 1087 if (copy_from_user(&uf, ufp, sizeof(uf)))
44e05eca
BP
1088 return -EFAULT;
1089
36956a7d 1090 flow = do_del_flow(dp, (const struct nlattr __force __user *)uf.key, uf.key_len);
44e05eca
BP
1091 if (IS_ERR(flow))
1092 return PTR_ERR(flow);
8d5ebd83 1093
d3c54451 1094 error = answer_query(dp, flow, 0, ufp);
f1aa2072 1095 flow_deferred_free(flow);
064af421
BP
1096 return error;
1097}
1098
44e05eca 1099static int do_query_flows(struct datapath *dp, const struct odp_flowvec *flowvec)
064af421 1100{
9abaf6b3 1101 struct tbl *table = get_table_protected(dp);
6d7568dc
BP
1102 u32 i;
1103
064af421 1104 for (i = 0; i < flowvec->n_flows; i++) {
6c229737 1105 struct odp_flow __user *ufp = (struct odp_flow __user __force *)&flowvec->flows[i];
36956a7d 1106 struct sw_flow_key key;
064af421 1107 struct odp_flow uf;
8d5ebd83 1108 struct tbl_node *flow_node;
064af421
BP
1109 int error;
1110
84c17d98 1111 if (copy_from_user(&uf, ufp, sizeof(uf)))
064af421 1112 return -EFAULT;
064af421 1113
36956a7d
BP
1114 error = flow_copy_from_user(&key, (const struct nlattr __force __user *)uf.key, uf.key_len);
1115 if (error)
1116 return error;
1117
1118 flow_node = tbl_lookup(table, &uf.key, flow_hash(&key), flow_cmp);
8d5ebd83 1119 if (!flow_node)
776f10ce 1120 error = put_user(ENOENT, &ufp->stats.error);
064af421 1121 else
d3c54451 1122 error = answer_query(dp, flow_cast(flow_node), uf.flags, ufp);
064af421
BP
1123 if (error)
1124 return -EFAULT;
1125 }
1126 return flowvec->n_flows;
1127}
1128
064af421
BP
1129static int do_flowvec_ioctl(struct datapath *dp, unsigned long argp,
1130 int (*function)(struct datapath *,
1131 const struct odp_flowvec *))
1132{
1133 struct odp_flowvec __user *uflowvec;
1134 struct odp_flowvec flowvec;
1135 int retval;
1136
1137 uflowvec = (struct odp_flowvec __user *)argp;
84c17d98 1138 if (copy_from_user(&flowvec, uflowvec, sizeof(flowvec)))
064af421
BP
1139 return -EFAULT;
1140
1141 if (flowvec.n_flows > INT_MAX / sizeof(struct odp_flow))
1142 return -EINVAL;
1143
064af421
BP
1144 retval = function(dp, &flowvec);
1145 return (retval < 0 ? retval
1146 : retval == flowvec.n_flows ? 0
776f10ce 1147 : put_user(retval, &uflowvec->n_flows));
064af421
BP
1148}
1149
704a1e09
BP
1150static struct sw_flow *do_dump_flow(struct datapath *dp, u32 __user *state)
1151{
1152 struct tbl *table = get_table_protected(dp);
1153 struct tbl_node *tbl_node;
1154 u32 bucket, obj;
1155
1156 if (get_user(bucket, &state[0]) || get_user(obj, &state[1]))
1157 return ERR_PTR(-EFAULT);
1158
1159 tbl_node = tbl_next(table, &bucket, &obj);
1160
1161 if (put_user(bucket, &state[0]) || put_user(obj, &state[1]))
1162 return ERR_PTR(-EFAULT);
1163
1164 return tbl_node ? flow_cast(tbl_node) : NULL;
1165}
1166
1167static int dump_flow(struct datapath *dp, struct odp_flow_dump __user *udumpp)
1168{
1169 struct odp_flow __user *uflowp;
36956a7d 1170 struct nlattr __user *ukey;
704a1e09 1171 struct sw_flow *flow;
36956a7d 1172 u32 key_len;
704a1e09
BP
1173
1174 flow = do_dump_flow(dp, udumpp->state);
1175 if (IS_ERR(flow))
1176 return PTR_ERR(flow);
1177
1178 if (get_user(uflowp, (struct odp_flow __user *__user*)&udumpp->flow))
1179 return -EFAULT;
1180
1181 if (!flow)
1182 return put_user(ODPFF_EOF, &uflowp->flags);
1183
36956a7d
BP
1184 if (put_user(0, &uflowp->flags) ||
1185 get_user(ukey, (struct nlattr __user * __user*)&uflowp->key) ||
1186 get_user(key_len, &uflowp->key_len))
704a1e09 1187 return -EFAULT;
36956a7d
BP
1188
1189 key_len = flow_copy_to_user(ukey, &flow->key, key_len);
1190 if (key_len < 0)
1191 return key_len;
1192 if (put_user(key_len, &uflowp->key_len))
1193 return -EFAULT;
1194
704a1e09
BP
1195 return answer_query(dp, flow, 0, uflowp);
1196}
1197
44e05eca 1198static int do_execute(struct datapath *dp, const struct odp_execute *execute)
064af421 1199{
36956a7d 1200 struct sw_flow_key key;
064af421
BP
1201 struct sk_buff *skb;
1202 struct sw_flow_actions *actions;
a393b897 1203 struct ethhdr *eth;
b7a31ec1 1204 bool is_frag;
064af421
BP
1205 int err;
1206
064af421 1207 err = -EINVAL;
44e05eca 1208 if (execute->length < ETH_HLEN || execute->length > 65535)
064af421
BP
1209 goto error;
1210
cdee00fd 1211 actions = flow_actions_alloc(execute->actions_len);
8ba1fd2f
JG
1212 if (IS_ERR(actions)) {
1213 err = PTR_ERR(actions);
064af421 1214 goto error;
8ba1fd2f 1215 }
064af421
BP
1216
1217 err = -EFAULT;
1b29ebe5 1218 if (copy_from_user(actions->actions,
6c229737 1219 (struct nlattr __user __force *)execute->actions, execute->actions_len))
064af421
BP
1220 goto error_free_actions;
1221
cdee00fd 1222 err = validate_actions(actions->actions, execute->actions_len);
064af421
BP
1223 if (err)
1224 goto error_free_actions;
1225
1226 err = -ENOMEM;
44e05eca 1227 skb = alloc_skb(execute->length, GFP_KERNEL);
064af421
BP
1228 if (!skb)
1229 goto error_free_actions;
659586ef 1230
064af421 1231 err = -EFAULT;
1b29ebe5 1232 if (copy_from_user(skb_put(skb, execute->length),
6c229737 1233 (const void __user __force *)execute->data,
44e05eca 1234 execute->length))
064af421
BP
1235 goto error_free_skb;
1236
a393b897
JP
1237 skb_reset_mac_header(skb);
1238 eth = eth_hdr(skb);
1239
de3f65ea
JP
1240 /* Normally, setting the skb 'protocol' field would be handled by a
1241 * call to eth_type_trans(), but it assumes there's a sending
1242 * device, which we may not have. */
a393b897
JP
1243 if (ntohs(eth->h_proto) >= 1536)
1244 skb->protocol = eth->h_proto;
1245 else
1246 skb->protocol = htons(ETH_P_802_2);
1247
f1588b1f 1248 err = flow_extract(skb, -1, &key, &is_frag);
4c1ad233
BP
1249 if (err)
1250 goto error_free_skb;
9dca7bd5
JG
1251
1252 rcu_read_lock();
cdee00fd 1253 err = execute_actions(dp, skb, &key, actions->actions, actions->actions_len);
9dca7bd5
JG
1254 rcu_read_unlock();
1255
064af421
BP
1256 kfree(actions);
1257 return err;
1258
1259error_free_skb:
1260 kfree_skb(skb);
1261error_free_actions:
1262 kfree(actions);
1263error:
1264 return err;
1265}
1266
44e05eca
BP
1267static int execute_packet(struct datapath *dp, const struct odp_execute __user *executep)
1268{
1269 struct odp_execute execute;
1270
84c17d98 1271 if (copy_from_user(&execute, executep, sizeof(execute)))
44e05eca
BP
1272 return -EFAULT;
1273
1274 return do_execute(dp, &execute);
1275}
1276
16190191 1277static int get_dp_stats(struct datapath *dp, struct odp_stats __user *statsp)
064af421 1278{
9abaf6b3 1279 struct tbl *table = get_table_protected(dp);
064af421
BP
1280 struct odp_stats stats;
1281 int i;
1282
8d5ebd83
JG
1283 stats.n_flows = tbl_count(table);
1284 stats.cur_capacity = tbl_n_buckets(table);
1285 stats.max_capacity = TBL_MAX_BUCKETS;
064af421
BP
1286 stats.n_ports = dp->n_ports;
1287 stats.max_ports = DP_MAX_PORTS;
064af421
BP
1288 stats.n_frags = stats.n_hit = stats.n_missed = stats.n_lost = 0;
1289 for_each_possible_cpu(i) {
38c6ecbc
JG
1290 const struct dp_stats_percpu *percpu_stats;
1291 struct dp_stats_percpu local_stats;
1292 unsigned seqcount;
1293
1294 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
1295
1296 do {
1297 seqcount = read_seqcount_begin(&percpu_stats->seqlock);
1298 local_stats = *percpu_stats;
1299 } while (read_seqcount_retry(&percpu_stats->seqlock, seqcount));
1300
1301 stats.n_frags += local_stats.n_frags;
1302 stats.n_hit += local_stats.n_hit;
1303 stats.n_missed += local_stats.n_missed;
1304 stats.n_lost += local_stats.n_lost;
064af421
BP
1305 }
1306 stats.max_miss_queue = DP_MAX_QUEUE_LEN;
1307 stats.max_action_queue = DP_MAX_QUEUE_LEN;
84c17d98 1308 return copy_to_user(statsp, &stats, sizeof(stats)) ? -EFAULT : 0;
064af421
BP
1309}
1310
1dcf111b
JP
1311/* MTU of the dp pseudo-device: ETH_DATA_LEN or the minimum of the ports */
1312int dp_min_mtu(const struct datapath *dp)
1313{
e779d8d9 1314 struct vport *p;
1dcf111b
JP
1315 int mtu = 0;
1316
1317 ASSERT_RTNL();
1318
1319 list_for_each_entry_rcu (p, &dp->port_list, node) {
f2459fe7 1320 int dev_mtu;
1dcf111b
JP
1321
1322 /* Skip any internal ports, since that's what we're trying to
1323 * set. */
e779d8d9 1324 if (is_internal_vport(p))
1dcf111b
JP
1325 continue;
1326
e779d8d9 1327 dev_mtu = vport_get_mtu(p);
f2459fe7
JG
1328 if (!mtu || dev_mtu < mtu)
1329 mtu = dev_mtu;
1dcf111b
JP
1330 }
1331
1332 return mtu ? mtu : ETH_DATA_LEN;
1333}
1334
f2459fe7 1335/* Sets the MTU of all datapath devices to the minimum of the ports. Must
d8b5d43a 1336 * be called with RTNL lock. */
f2459fe7 1337void set_internal_devs_mtu(const struct datapath *dp)
a7786963 1338{
e779d8d9 1339 struct vport *p;
a7786963
JG
1340 int mtu;
1341
1342 ASSERT_RTNL();
1343
a7786963
JG
1344 mtu = dp_min_mtu(dp);
1345
1346 list_for_each_entry_rcu (p, &dp->port_list, node) {
e779d8d9
BP
1347 if (is_internal_vport(p))
1348 vport_set_mtu(p, mtu);
a7786963
JG
1349 }
1350}
1351
e779d8d9 1352static int put_port(const struct vport *p, struct odp_port __user *uop)
064af421
BP
1353{
1354 struct odp_port op;
f2459fe7 1355
84c17d98 1356 memset(&op, 0, sizeof(op));
f2459fe7
JG
1357
1358 rcu_read_lock();
84c17d98
BP
1359 strncpy(op.devname, vport_get_name(p), sizeof(op.devname));
1360 strncpy(op.type, vport_get_type(p), sizeof(op.type));
dd851cbb 1361 vport_get_config(p, op.config);
f2459fe7
JG
1362 rcu_read_unlock();
1363
064af421 1364 op.port = p->port_no;
f2459fe7 1365
84c17d98 1366 return copy_to_user(uop, &op, sizeof(op)) ? -EFAULT : 0;
064af421
BP
1367}
1368
fceb2a5b 1369static int query_port(struct datapath *dp, struct odp_port __user *uport)
064af421
BP
1370{
1371 struct odp_port port;
7e71ab66 1372 struct vport *vport;
064af421 1373
84c17d98 1374 if (copy_from_user(&port, uport, sizeof(port)))
064af421 1375 return -EFAULT;
f2459fe7 1376
064af421 1377 if (port.devname[0]) {
064af421
BP
1378 port.devname[IFNAMSIZ - 1] = '\0';
1379
f2459fe7 1380 vport_lock();
f2459fe7 1381 vport = vport_locate(port.devname);
f2459fe7
JG
1382 vport_unlock();
1383
7e71ab66
JG
1384 if (!vport)
1385 return -ENODEV;
1386 if (vport->dp != dp)
1387 return -ENOENT;
064af421
BP
1388 } else {
1389 if (port.port >= DP_MAX_PORTS)
1390 return -EINVAL;
7e71ab66
JG
1391
1392 vport = get_vport_protected(dp, port.port);
1393 if (!vport)
064af421 1394 return -ENOENT;
064af421 1395 }
f2459fe7 1396
7e71ab66 1397 return put_port(vport, uport);
064af421
BP
1398}
1399
b0ec0f27 1400static int do_dump_port(struct datapath *dp, struct odp_vport_dump *dump)
064af421 1401{
b0ec0f27
BP
1402 u32 port_no;
1403
1404 for (port_no = dump->port_no; port_no < DP_MAX_PORTS; port_no++) {
1405 struct vport *vport = get_vport_protected(dp, port_no);
1406 if (vport)
1407 return put_port(vport, (struct odp_port __force __user*)dump->port);
064af421 1408 }
b0ec0f27
BP
1409
1410 return put_user('\0', (char __force __user*)&dump->port->devname[0]);
44e05eca
BP
1411}
1412
b0ec0f27 1413static int dump_port(struct datapath *dp, struct odp_vport_dump __user *udump)
44e05eca 1414{
b0ec0f27 1415 struct odp_vport_dump dump;
44e05eca 1416
b0ec0f27 1417 if (copy_from_user(&dump, udump, sizeof(dump)))
44e05eca
BP
1418 return -EFAULT;
1419
b0ec0f27 1420 return do_dump_port(dp, &dump);
064af421
BP
1421}
1422
7c40efc9
BP
1423static int get_listen_mask(const struct file *f)
1424{
1425 return (long)f->private_data;
1426}
1427
1428static void set_listen_mask(struct file *f, int listen_mask)
1429{
1430 f->private_data = (void*)(long)listen_mask;
1431}
1432
064af421
BP
1433static long openvswitch_ioctl(struct file *f, unsigned int cmd,
1434 unsigned long argp)
1435{
1436 int dp_idx = iminor(f->f_dentry->d_inode);
1437 struct datapath *dp;
1438 int drop_frags, listeners, port_no;
72b06300 1439 unsigned int sflow_probability;
064af421
BP
1440 int err;
1441
1442 /* Handle commands with special locking requirements up front. */
1443 switch (cmd) {
1444 case ODP_DP_CREATE:
e86c8696
BP
1445 err = create_dp(dp_idx, (char __user *)argp);
1446 goto exit;
064af421
BP
1447
1448 case ODP_DP_DESTROY:
e86c8696
BP
1449 err = destroy_dp(dp_idx);
1450 goto exit;
064af421 1451
c3827f61 1452 case ODP_VPORT_ATTACH:
f2459fe7 1453 err = attach_port(dp_idx, (struct odp_port __user *)argp);
e86c8696 1454 goto exit;
064af421 1455
c3827f61 1456 case ODP_VPORT_DETACH:
064af421 1457 err = get_user(port_no, (int __user *)argp);
e86c8696 1458 if (!err)
f2459fe7
JG
1459 err = detach_port(dp_idx, port_no);
1460 goto exit;
1461
f2459fe7 1462 case ODP_VPORT_MOD:
c3827f61 1463 err = vport_user_mod((struct odp_port __user *)argp);
f2459fe7
JG
1464 goto exit;
1465
1466 case ODP_VPORT_STATS_GET:
61e89cd6 1467 err = vport_user_stats_get((struct odp_vport_stats_req __user *)argp);
f2459fe7
JG
1468 goto exit;
1469
780e6207
JG
1470 case ODP_VPORT_STATS_SET:
1471 err = vport_user_stats_set((struct odp_vport_stats_req __user *)argp);
1472 goto exit;
1473
f2459fe7 1474 case ODP_VPORT_ETHER_GET:
61e89cd6 1475 err = vport_user_ether_get((struct odp_vport_ether __user *)argp);
f2459fe7
JG
1476 goto exit;
1477
1478 case ODP_VPORT_ETHER_SET:
61e89cd6 1479 err = vport_user_ether_set((struct odp_vport_ether __user *)argp);
f2459fe7
JG
1480 goto exit;
1481
1482 case ODP_VPORT_MTU_GET:
61e89cd6 1483 err = vport_user_mtu_get((struct odp_vport_mtu __user *)argp);
f2459fe7
JG
1484 goto exit;
1485
1486 case ODP_VPORT_MTU_SET:
61e89cd6 1487 err = vport_user_mtu_set((struct odp_vport_mtu __user *)argp);
e86c8696 1488 goto exit;
064af421
BP
1489 }
1490
1491 dp = get_dp_locked(dp_idx);
e86c8696 1492 err = -ENODEV;
064af421 1493 if (!dp)
e86c8696 1494 goto exit;
064af421
BP
1495
1496 switch (cmd) {
1497 case ODP_DP_STATS:
1498 err = get_dp_stats(dp, (struct odp_stats __user *)argp);
1499 break;
1500
1501 case ODP_GET_DROP_FRAGS:
1502 err = put_user(dp->drop_frags, (int __user *)argp);
1503 break;
1504
1505 case ODP_SET_DROP_FRAGS:
1506 err = get_user(drop_frags, (int __user *)argp);
1507 if (err)
1508 break;
1509 err = -EINVAL;
1510 if (drop_frags != 0 && drop_frags != 1)
1511 break;
1512 dp->drop_frags = drop_frags;
1513 err = 0;
1514 break;
1515
1516 case ODP_GET_LISTEN_MASK:
7c40efc9 1517 err = put_user(get_listen_mask(f), (int __user *)argp);
064af421
BP
1518 break;
1519
1520 case ODP_SET_LISTEN_MASK:
1521 err = get_user(listeners, (int __user *)argp);
1522 if (err)
1523 break;
1524 err = -EINVAL;
1525 if (listeners & ~ODPL_ALL)
1526 break;
1527 err = 0;
7c40efc9 1528 set_listen_mask(f, listeners);
064af421
BP
1529 break;
1530
72b06300
BP
1531 case ODP_GET_SFLOW_PROBABILITY:
1532 err = put_user(dp->sflow_probability, (unsigned int __user *)argp);
1533 break;
1534
1535 case ODP_SET_SFLOW_PROBABILITY:
1536 err = get_user(sflow_probability, (unsigned int __user *)argp);
1537 if (!err)
1538 dp->sflow_probability = sflow_probability;
1539 break;
1540
c3827f61 1541 case ODP_VPORT_QUERY:
064af421
BP
1542 err = query_port(dp, (struct odp_port __user *)argp);
1543 break;
1544
b0ec0f27
BP
1545 case ODP_VPORT_DUMP:
1546 err = dump_port(dp, (struct odp_vport_dump __user *)argp);
064af421
BP
1547 break;
1548
064af421
BP
1549 case ODP_FLOW_FLUSH:
1550 err = flush_flows(dp);
1551 break;
1552
1553 case ODP_FLOW_PUT:
1554 err = put_flow(dp, (struct odp_flow_put __user *)argp);
1555 break;
1556
1557 case ODP_FLOW_DEL:
f1aa2072 1558 err = del_flow(dp, (struct odp_flow __user *)argp);
064af421
BP
1559 break;
1560
f1aa2072 1561 case ODP_FLOW_GET:
44e05eca 1562 err = do_flowvec_ioctl(dp, argp, do_query_flows);
064af421
BP
1563 break;
1564
704a1e09
BP
1565 case ODP_FLOW_DUMP:
1566 err = dump_flow(dp, (struct odp_flow_dump __user *)argp);
064af421
BP
1567 break;
1568
1569 case ODP_EXECUTE:
44e05eca 1570 err = execute_packet(dp, (struct odp_execute __user *)argp);
064af421
BP
1571 break;
1572
1573 default:
1574 err = -ENOIOCTLCMD;
1575 break;
1576 }
1577 mutex_unlock(&dp->mutex);
e86c8696 1578exit:
064af421
BP
1579 return err;
1580}
1581
1582static int dp_has_packet_of_interest(struct datapath *dp, int listeners)
1583{
1584 int i;
1585 for (i = 0; i < DP_N_QUEUES; i++) {
1586 if (listeners & (1 << i) && !skb_queue_empty(&dp->queues[i]))
1587 return 1;
1588 }
1589 return 0;
1590}
1591
3fbd517a 1592#ifdef CONFIG_COMPAT
b0ec0f27 1593static int compat_dump_port(struct datapath *dp, struct compat_odp_vport_dump __user *compat)
3fbd517a 1594{
b0ec0f27
BP
1595 struct odp_vport_dump dump;
1596 compat_uptr_t port;
3fbd517a 1597
b0ec0f27
BP
1598 if (!access_ok(VERIFY_READ, compat, sizeof(struct compat_odp_vport_dump)) ||
1599 __get_user(port, &compat->port) ||
1600 __get_user(dump.port_no, &compat->port_no))
3fbd517a
BP
1601 return -EFAULT;
1602
b0ec0f27
BP
1603 dump.port = (struct odp_port __force *)compat_ptr(port);
1604 return do_dump_port(dp, &dump);
3fbd517a
BP
1605}
1606
3fbd517a
BP
1607static int compat_get_flow(struct odp_flow *flow, const struct compat_odp_flow __user *compat)
1608{
36956a7d 1609 compat_uptr_t key, actions;
3fbd517a
BP
1610
1611 if (!access_ok(VERIFY_READ, compat, sizeof(struct compat_odp_flow)) ||
1612 __copy_from_user(&flow->stats, &compat->stats, sizeof(struct odp_flow_stats)) ||
36956a7d
BP
1613 __get_user(key, &compat->key) ||
1614 __get_user(flow->key_len, &compat->key_len) ||
3fbd517a 1615 __get_user(actions, &compat->actions) ||
cdee00fd 1616 __get_user(flow->actions_len, &compat->actions_len) ||
3fbd517a
BP
1617 __get_user(flow->flags, &compat->flags))
1618 return -EFAULT;
1619
36956a7d 1620 flow->key = (struct nlattr __force *)compat_ptr(key);
1b29ebe5 1621 flow->actions = (struct nlattr __force *)compat_ptr(actions);
3fbd517a
BP
1622 return 0;
1623}
1624
1625static int compat_put_flow(struct datapath *dp, struct compat_odp_flow_put __user *ufp)
1626{
1627 struct odp_flow_stats stats;
1628 struct odp_flow_put fp;
1629 int error;
1630
1631 if (compat_get_flow(&fp.flow, &ufp->flow) ||
1632 get_user(fp.flags, &ufp->flags))
1633 return -EFAULT;
1634
1635 error = do_put_flow(dp, &fp, &stats);
1636 if (error)
1637 return error;
1638
1639 if (copy_to_user(&ufp->flow.stats, &stats,
1640 sizeof(struct odp_flow_stats)))
1641 return -EFAULT;
1642
1643 return 0;
1644}
1645
d3c54451
JG
1646static int compat_answer_query(struct datapath *dp, struct sw_flow *flow,
1647 u32 query_flags,
3fbd517a
BP
1648 struct compat_odp_flow __user *ufp)
1649{
1650 compat_uptr_t actions;
1651
1652 if (get_user(actions, &ufp->actions))
1653 return -EFAULT;
1654
d3c54451 1655 return do_answer_query(dp, flow, query_flags, &ufp->stats,
cdee00fd 1656 compat_ptr(actions), &ufp->actions_len);
3fbd517a
BP
1657}
1658
1659static int compat_del_flow(struct datapath *dp, struct compat_odp_flow __user *ufp)
1660{
1661 struct sw_flow *flow;
1662 struct odp_flow uf;
1663 int error;
1664
1665 if (compat_get_flow(&uf, ufp))
1666 return -EFAULT;
1667
36956a7d 1668 flow = do_del_flow(dp, (const struct nlattr __force __user *)uf.key, uf.key_len);
3fbd517a
BP
1669 if (IS_ERR(flow))
1670 return PTR_ERR(flow);
1671
d3c54451 1672 error = compat_answer_query(dp, flow, 0, ufp);
3fbd517a
BP
1673 flow_deferred_free(flow);
1674 return error;
1675}
1676
1b29ebe5
JG
1677static int compat_query_flows(struct datapath *dp,
1678 struct compat_odp_flow __user *flows,
1679 u32 n_flows)
3fbd517a 1680{
9abaf6b3 1681 struct tbl *table = get_table_protected(dp);
3fbd517a
BP
1682 u32 i;
1683
1684 for (i = 0; i < n_flows; i++) {
1685 struct compat_odp_flow __user *ufp = &flows[i];
1686 struct odp_flow uf;
1687 struct tbl_node *flow_node;
36956a7d 1688 struct sw_flow_key key;
3fbd517a
BP
1689 int error;
1690
1691 if (compat_get_flow(&uf, ufp))
1692 return -EFAULT;
3fbd517a 1693
36956a7d
BP
1694 error = flow_copy_from_user(&key, (const struct nlattr __force __user *) uf.key, uf.key_len);
1695 if (error)
1696 return error;
1697
1698 flow_node = tbl_lookup(table, &key, flow_hash(&key), flow_cmp);
3fbd517a
BP
1699 if (!flow_node)
1700 error = put_user(ENOENT, &ufp->stats.error);
1701 else
d3c54451
JG
1702 error = compat_answer_query(dp, flow_cast(flow_node),
1703 uf.flags, ufp);
3fbd517a
BP
1704 if (error)
1705 return -EFAULT;
1706 }
1707 return n_flows;
1708}
1709
704a1e09 1710static int compat_dump_flow(struct datapath *dp, struct compat_odp_flow_dump __user *udumpp)
3fbd517a 1711{
704a1e09
BP
1712 struct compat_odp_flow __user *uflowp;
1713 compat_uptr_t compat_ufp;
1714 struct sw_flow *flow;
36956a7d
BP
1715 compat_uptr_t ukey;
1716 u32 key_len;
3fbd517a 1717
704a1e09
BP
1718 flow = do_dump_flow(dp, udumpp->state);
1719 if (IS_ERR(flow))
1720 return PTR_ERR(flow);
3fbd517a 1721
704a1e09
BP
1722 if (get_user(compat_ufp, &udumpp->flow))
1723 return -EFAULT;
1724 uflowp = compat_ptr(compat_ufp);
3fbd517a 1725
704a1e09
BP
1726 if (!flow)
1727 return put_user(ODPFF_EOF, &uflowp->flags);
6bfafa55 1728
36956a7d
BP
1729 if (put_user(0, &uflowp->flags) ||
1730 get_user(ukey, &uflowp->key) ||
1731 get_user(key_len, &uflowp->key_len))
1732 return -EFAULT;
1733
1734 key_len = flow_copy_to_user(compat_ptr(ukey), &flow->key, key_len);
1735 if (key_len < 0)
1736 return key_len;
1737 if (put_user(key_len, &uflowp->key_len))
704a1e09 1738 return -EFAULT;
36956a7d 1739
704a1e09 1740 return compat_answer_query(dp, flow, 0, uflowp);
3fbd517a
BP
1741}
1742
1743static int compat_flowvec_ioctl(struct datapath *dp, unsigned long argp,
1744 int (*function)(struct datapath *,
1b29ebe5 1745 struct compat_odp_flow __user *,
3fbd517a
BP
1746 u32 n_flows))
1747{
1748 struct compat_odp_flowvec __user *uflowvec;
1749 struct compat_odp_flow __user *flows;
1750 struct compat_odp_flowvec flowvec;
1751 int retval;
1752
1753 uflowvec = compat_ptr(argp);
84c17d98
BP
1754 if (!access_ok(VERIFY_WRITE, uflowvec, sizeof(*uflowvec)) ||
1755 copy_from_user(&flowvec, uflowvec, sizeof(flowvec)))
3fbd517a
BP
1756 return -EFAULT;
1757
1758 if (flowvec.n_flows > INT_MAX / sizeof(struct compat_odp_flow))
1759 return -EINVAL;
1760
1761 flows = compat_ptr(flowvec.flows);
1762 if (!access_ok(VERIFY_WRITE, flows,
1763 flowvec.n_flows * sizeof(struct compat_odp_flow)))
1764 return -EFAULT;
1765
1766 retval = function(dp, flows, flowvec.n_flows);
1767 return (retval < 0 ? retval
1768 : retval == flowvec.n_flows ? 0
1769 : put_user(retval, &uflowvec->n_flows));
1770}
1771
1772static int compat_execute(struct datapath *dp, const struct compat_odp_execute __user *uexecute)
1773{
1774 struct odp_execute execute;
1775 compat_uptr_t actions;
1776 compat_uptr_t data;
1777
1778 if (!access_ok(VERIFY_READ, uexecute, sizeof(struct compat_odp_execute)) ||
3fbd517a 1779 __get_user(actions, &uexecute->actions) ||
cdee00fd 1780 __get_user(execute.actions_len, &uexecute->actions_len) ||
3fbd517a
BP
1781 __get_user(data, &uexecute->data) ||
1782 __get_user(execute.length, &uexecute->length))
1783 return -EFAULT;
1784
1b29ebe5
JG
1785 execute.actions = (struct nlattr __force *)compat_ptr(actions);
1786 execute.data = (const void __force *)compat_ptr(data);
3fbd517a
BP
1787
1788 return do_execute(dp, &execute);
1789}
1790
1791static long openvswitch_compat_ioctl(struct file *f, unsigned int cmd, unsigned long argp)
1792{
1793 int dp_idx = iminor(f->f_dentry->d_inode);
1794 struct datapath *dp;
1795 int err;
1796
1797 switch (cmd) {
1798 case ODP_DP_DESTROY:
1799 case ODP_FLOW_FLUSH:
1800 /* Ioctls that don't need any translation at all. */
1801 return openvswitch_ioctl(f, cmd, argp);
1802
1803 case ODP_DP_CREATE:
c3827f61
BP
1804 case ODP_VPORT_ATTACH:
1805 case ODP_VPORT_DETACH:
1806 case ODP_VPORT_MOD:
3fbd517a
BP
1807 case ODP_VPORT_MTU_SET:
1808 case ODP_VPORT_MTU_GET:
1809 case ODP_VPORT_ETHER_SET:
1810 case ODP_VPORT_ETHER_GET:
780e6207 1811 case ODP_VPORT_STATS_SET:
3fbd517a
BP
1812 case ODP_VPORT_STATS_GET:
1813 case ODP_DP_STATS:
1814 case ODP_GET_DROP_FRAGS:
1815 case ODP_SET_DROP_FRAGS:
1816 case ODP_SET_LISTEN_MASK:
1817 case ODP_GET_LISTEN_MASK:
1818 case ODP_SET_SFLOW_PROBABILITY:
1819 case ODP_GET_SFLOW_PROBABILITY:
c3827f61 1820 case ODP_VPORT_QUERY:
3fbd517a
BP
1821 /* Ioctls that just need their pointer argument extended. */
1822 return openvswitch_ioctl(f, cmd, (unsigned long)compat_ptr(argp));
3fbd517a
BP
1823 }
1824
1825 dp = get_dp_locked(dp_idx);
1826 err = -ENODEV;
1827 if (!dp)
1828 goto exit;
1829
1830 switch (cmd) {
b0ec0f27
BP
1831 case ODP_VPORT_DUMP32:
1832 err = compat_dump_port(dp, compat_ptr(argp));
3fbd517a
BP
1833 break;
1834
3fbd517a
BP
1835 case ODP_FLOW_PUT32:
1836 err = compat_put_flow(dp, compat_ptr(argp));
1837 break;
1838
1839 case ODP_FLOW_DEL32:
1840 err = compat_del_flow(dp, compat_ptr(argp));
1841 break;
1842
1843 case ODP_FLOW_GET32:
1844 err = compat_flowvec_ioctl(dp, argp, compat_query_flows);
1845 break;
1846
704a1e09
BP
1847 case ODP_FLOW_DUMP32:
1848 err = compat_dump_flow(dp, compat_ptr(argp));
3fbd517a
BP
1849 break;
1850
1851 case ODP_EXECUTE32:
1852 err = compat_execute(dp, compat_ptr(argp));
1853 break;
1854
1855 default:
1856 err = -ENOIOCTLCMD;
1857 break;
1858 }
1859 mutex_unlock(&dp->mutex);
1860exit:
1861 return err;
1862}
1863#endif
1864
33b38b63
JG
1865static ssize_t openvswitch_read(struct file *f, char __user *buf,
1866 size_t nbytes, loff_t *ppos)
064af421 1867{
7c40efc9 1868 int listeners = get_listen_mask(f);
064af421 1869 int dp_idx = iminor(f->f_dentry->d_inode);
e22d4953 1870 struct datapath *dp = get_dp_locked(dp_idx);
064af421 1871 struct sk_buff *skb;
856081f6 1872 struct iovec iov;
064af421
BP
1873 int retval;
1874
1875 if (!dp)
1876 return -ENODEV;
1877
1878 if (nbytes == 0 || !listeners)
1879 return 0;
1880
1881 for (;;) {
1882 int i;
1883
1884 for (i = 0; i < DP_N_QUEUES; i++) {
1885 if (listeners & (1 << i)) {
1886 skb = skb_dequeue(&dp->queues[i]);
1887 if (skb)
1888 goto success;
1889 }
1890 }
1891
1892 if (f->f_flags & O_NONBLOCK) {
1893 retval = -EAGAIN;
1894 goto error;
1895 }
1896
1897 wait_event_interruptible(dp->waitqueue,
1898 dp_has_packet_of_interest(dp,
1899 listeners));
1900
1901 if (signal_pending(current)) {
1902 retval = -ERESTARTSYS;
1903 goto error;
1904 }
1905 }
1906success:
e22d4953
JG
1907 mutex_unlock(&dp->mutex);
1908
856081f6
BP
1909 iov.iov_base = buf;
1910 iov.iov_len = min_t(size_t, skb->len, nbytes);
1911 retval = skb_copy_datagram_iovec(skb, 0, &iov, iov.iov_len);
064af421 1912 if (!retval)
856081f6 1913 retval = skb->len;
9cc8b4e4 1914
064af421 1915 kfree_skb(skb);
e22d4953 1916 return retval;
064af421
BP
1917
1918error:
e22d4953 1919 mutex_unlock(&dp->mutex);
064af421
BP
1920 return retval;
1921}
1922
1923static unsigned int openvswitch_poll(struct file *file, poll_table *wait)
1924{
1925 int dp_idx = iminor(file->f_dentry->d_inode);
e22d4953 1926 struct datapath *dp = get_dp_locked(dp_idx);
064af421
BP
1927 unsigned int mask;
1928
1929 if (dp) {
1930 mask = 0;
1931 poll_wait(file, &dp->waitqueue, wait);
7c40efc9 1932 if (dp_has_packet_of_interest(dp, get_listen_mask(file)))
064af421 1933 mask |= POLLIN | POLLRDNORM;
e22d4953 1934 mutex_unlock(&dp->mutex);
064af421
BP
1935 } else {
1936 mask = POLLIN | POLLRDNORM | POLLHUP;
1937 }
1938 return mask;
1939}
1940
33b38b63 1941static struct file_operations openvswitch_fops = {
609af740 1942 .owner = THIS_MODULE,
064af421
BP
1943 .read = openvswitch_read,
1944 .poll = openvswitch_poll,
1945 .unlocked_ioctl = openvswitch_ioctl,
3fbd517a
BP
1946#ifdef CONFIG_COMPAT
1947 .compat_ioctl = openvswitch_compat_ioctl,
1948#endif
064af421
BP
1949};
1950
1951static int major;
22d24ebf 1952
22d24ebf
BP
1953static int __init dp_init(void)
1954{
f2459fe7 1955 struct sk_buff *dummy_skb;
22d24ebf
BP
1956 int err;
1957
f2459fe7 1958 BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof(dummy_skb->cb));
22d24ebf 1959
f2459fe7 1960 printk("Open vSwitch %s, built "__DATE__" "__TIME__"\n", VERSION BUILDNR);
064af421
BP
1961
1962 err = flow_init();
1963 if (err)
1964 goto error;
1965
f2459fe7 1966 err = vport_init();
064af421
BP
1967 if (err)
1968 goto error_flow_exit;
1969
f2459fe7
JG
1970 err = register_netdevice_notifier(&dp_device_notifier);
1971 if (err)
1972 goto error_vport_exit;
1973
064af421
BP
1974 major = register_chrdev(0, "openvswitch", &openvswitch_fops);
1975 if (err < 0)
1976 goto error_unreg_notifier;
1977
064af421
BP
1978 return 0;
1979
1980error_unreg_notifier:
1981 unregister_netdevice_notifier(&dp_device_notifier);
f2459fe7
JG
1982error_vport_exit:
1983 vport_exit();
064af421
BP
1984error_flow_exit:
1985 flow_exit();
1986error:
1987 return err;
1988}
1989
1990static void dp_cleanup(void)
1991{
1992 rcu_barrier();
1993 unregister_chrdev(major, "openvswitch");
1994 unregister_netdevice_notifier(&dp_device_notifier);
f2459fe7 1995 vport_exit();
064af421 1996 flow_exit();
064af421
BP
1997}
1998
1999module_init(dp_init);
2000module_exit(dp_cleanup);
2001
2002MODULE_DESCRIPTION("Open vSwitch switching datapath");
2003MODULE_LICENSE("GPL");