]> git.proxmox.com Git - ovs.git/blame - datapath/datapath.h
datapath: Add generic virtual port layer.
[ovs.git] / datapath / datapath.h
CommitLineData
a14bc59f 1/*
67a78abe 2 * Copyright (c) 2009, 2010 Nicira Networks.
a14bc59f
BP
3 * Distributed under the terms of the GNU GPL version 2.
4 *
5 * Significant portions of this file may be copied from parts of the Linux
6 * kernel, by Linus Torvalds and others.
7 */
8
064af421
BP
9/* Interface exported by openvswitch_mod. */
10
11#ifndef DATAPATH_H
12#define DATAPATH_H 1
13
14#include <asm/page.h>
15#include <linux/kernel.h>
16#include <linux/mutex.h>
064af421
BP
17#include <linux/netdevice.h>
18#include <linux/workqueue.h>
19#include <linux/skbuff.h>
806e39cf 20#include <linux/version.h>
064af421 21#include "flow.h"
2ba9026e 22#include "dp_sysfs.h"
064af421 23
f2459fe7
JG
24struct vport;
25struct dp_port;
26
064af421
BP
27/* Mask for the priority bits in a vlan header. If we ever merge upstream
28 * then this should go into include/linux/if_vlan.h. */
29#define VLAN_PCP_MASK 0xe000
a4fbb689 30#define VLAN_PCP_SHIFT 13
064af421 31
5eab9abc 32#define DP_MAX_PORTS 1024
064af421
BP
33#define DP_MAX_GROUPS 16
34
6fa58f7a 35#define DP_L2_BITS (PAGE_SHIFT - ilog2(sizeof(struct dp_bucket*)))
064af421
BP
36#define DP_L2_SIZE (1 << DP_L2_BITS)
37#define DP_L2_SHIFT 0
38
6fa58f7a 39#define DP_L1_BITS (PAGE_SHIFT - ilog2(sizeof(struct dp_bucket**)))
064af421
BP
40#define DP_L1_SIZE (1 << DP_L1_BITS)
41#define DP_L1_SHIFT DP_L2_BITS
42
6fa58f7a 43/* For 4 kB pages, this is 1,048,576 on 32-bit or 262,144 on 64-bit. */
064af421
BP
44#define DP_MAX_BUCKETS (DP_L1_SIZE * DP_L2_SIZE)
45
6fa58f7a
BP
46/**
47 * struct dp_table - flow table
48 * @n_buckets: number of buckets (a power of 2 between %DP_L1_SIZE and
49 * %DP_MAX_BUCKETS)
50 * @buckets: pointer to @n_buckets/%DP_L1_SIZE pointers to %DP_L1_SIZE pointers
51 * to buckets
52 * @hash_seed: random number used for flow hashing, to make the hash
53 * distribution harder to predict
54 * @rcu: RCU callback structure
55 *
56 * The @buckets array is logically an array of pointers to buckets. It is
57 * broken into two levels to avoid the need to kmalloc() any object larger than
58 * a single page or to use vmalloc(). @buckets is always nonnull, as is each
59 * @buckets[i], but each @buckets[i][j] is nonnull only if the specified hash
60 * bucket is nonempty (for 0 <= i < @n_buckets/%DP_L1_SIZE, 0 <= j <
61 * %DP_L1_SIZE).
62 */
064af421
BP
63struct dp_table {
64 unsigned int n_buckets;
6fa58f7a
BP
65 struct dp_bucket ***buckets;
66 unsigned int hash_seed;
67 struct rcu_head rcu;
68};
69
70/**
71 * struct dp_bucket - single bucket within datapath flow table
72 * @rcu: RCU callback structure
73 * @n_flows: number of flows in @flows[] array
74 * @flows: array of @n_flows pointers to flows
75 *
76 * The expected number of flows per bucket is 1, but this allows for an
77 * arbitrary number of collisions.
78 */
79struct dp_bucket {
064af421 80 struct rcu_head rcu;
6fa58f7a
BP
81 unsigned int n_flows;
82 struct sw_flow *flows[];
064af421
BP
83};
84
72b06300 85#define DP_N_QUEUES 3
064af421
BP
86#define DP_MAX_QUEUE_LEN 100
87
67a78abe
BP
88/**
89 * struct dp_stats_percpu - per-cpu packet processing statistics for a given
90 * datapath.
91 * @n_frags: Number of IP fragments processed by datapath.
92 * @n_hit: Number of received packets for which a matching flow was found in
93 * the flow table.
94 * @n_miss: Number of received packets that had no matching flow in the flow
95 * table. The sum of @n_hit and @n_miss is the number of packets that have
96 * been received by the datapath.
97 * @n_lost: Number of received packets that had no matching flow in the flow
98 * table that could not be sent to userspace (normally due to an overflow in
99 * one of the datapath's queues).
67a78abe 100 */
064af421
BP
101struct dp_stats_percpu {
102 u64 n_frags;
103 u64 n_hit;
104 u64 n_missed;
105 u64 n_lost;
106};
107
108struct dp_port_group {
109 struct rcu_head rcu;
110 int n_ports;
111 u16 ports[];
112};
113
72b06300
BP
114/**
115 * struct datapath - datapath for flow-based packet switching
116 * @mutex: Mutual exclusion for ioctls.
117 * @dp_idx: Datapath number (index into the dps[] array in datapath.c).
56fd8edf 118 * @ifobj: Represents /sys/class/net/<devname>/brif.
72b06300
BP
119 * @drop_frags: Drop all IP fragments if nonzero.
120 * @queues: %DP_N_QUEUES sets of queued packets for userspace to handle.
121 * @waitqueue: Waitqueue, for waiting for new packets in @queues.
122 * @n_flows: Number of flows currently in flow table.
123 * @table: Current flow table (RCU protected).
124 * @groups: Port groups, used by ODPAT_OUTPUT_GROUP action (RCU protected).
125 * @n_ports: Number of ports currently in @ports.
f2459fe7 126 * @ports: Map from port number to &struct dp_port. %ODPP_LOCAL port
72b06300
BP
127 * always exists, other ports may be %NULL.
128 * @port_list: List of all ports in @ports in arbitrary order.
129 * @stats_percpu: Per-CPU datapath statistics.
b4a7a3f3
BP
130 * @sflow_probability: Number of packets out of UINT_MAX to sample to the
131 * %ODPL_SFLOW queue, e.g. (@sflow_probability/UINT_MAX) is the probability of
132 * sampling a given packet.
72b06300 133 */
064af421
BP
134struct datapath {
135 struct mutex mutex;
136 int dp_idx;
064af421 137 struct kobject ifobj;
064af421
BP
138
139 int drop_frags;
140
141 /* Queued data. */
142 struct sk_buff_head queues[DP_N_QUEUES];
143 wait_queue_head_t waitqueue;
144
145 /* Flow table. */
146 unsigned int n_flows;
147 struct dp_table *table;
148
149 /* Port groups. */
150 struct dp_port_group *groups[DP_MAX_GROUPS];
151
152 /* Switch ports. */
153 unsigned int n_ports;
f2459fe7 154 struct dp_port *ports[DP_MAX_PORTS];
72b06300 155 struct list_head port_list;
064af421
BP
156
157 /* Stats. */
158 struct dp_stats_percpu *stats_percpu;
72b06300
BP
159
160 /* sFlow Sampling */
161 unsigned int sflow_probability;
064af421
BP
162};
163
56fd8edf 164/**
f2459fe7 165 * struct dp_port - one port within a datapath
56fd8edf
BP
166 * @port_no: Index into @dp's @ports array.
167 * @dp: Datapath to which this port belongs.
f2459fe7
JG
168 * @vport: The network device attached to this port. The contents depends on
169 * the device and should be accessed only through the vport_* functions.
56fd8edf
BP
170 * @kobj: Represents /sys/class/net/<devname>/brport.
171 * @linkname: The name of the link from /sys/class/net/<datapath>/brif to this
f2459fe7
JG
172 * &struct dp_port. (We keep this around so that we can delete it if the
173 * device gets renamed.) Set to the null string when no link exists.
56fd8edf
BP
174 * @node: Element in @dp's @port_list.
175 * @sflow_pool: Number of packets that were candidates for sFlow sampling,
176 * regardless of whether they were actually chosen and sent down to userspace.
177 */
f2459fe7 178struct dp_port {
064af421
BP
179 u16 port_no;
180 struct datapath *dp;
f2459fe7 181 struct vport *vport;
064af421 182 struct kobject kobj;
0515ceb3 183 char linkname[IFNAMSIZ];
56fd8edf
BP
184 struct list_head node;
185 atomic_t sflow_pool;
064af421
BP
186};
187
a063b0df 188enum csum_type {
635c9298
JG
189 OVS_CSUM_NONE = 0,
190 OVS_CSUM_UNNECESSARY = 1,
191 OVS_CSUM_COMPLETE = 2,
192 OVS_CSUM_PARTIAL = 3,
a063b0df
JG
193};
194
195/**
196 * struct ovs_skb_cb - OVS data in skb CB
f2459fe7 197 * @br_port: The bridge port on which the skb entered the switch.
a063b0df
JG
198 * @ip_summed: Consistently stores L4 checksumming status across different
199 * kernel versions.
200 */
201struct ovs_skb_cb {
f2459fe7 202 struct dp_port *dp_port;
659586ef
JG
203 enum csum_type ip_summed;
204 __be32 tun_id;
a063b0df
JG
205};
206#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
207
064af421
BP
208extern struct notifier_block dp_device_notifier;
209extern int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
064af421
BP
210
211/* Flow table. */
212struct dp_table *dp_table_create(unsigned int n_buckets);
213void dp_table_destroy(struct dp_table *, int free_flows);
214struct sw_flow *dp_table_lookup(struct dp_table *, const struct odp_flow_key *);
6fa58f7a 215int dp_table_insert(struct dp_table *, struct sw_flow *);
064af421
BP
216int dp_table_delete(struct dp_table *, struct sw_flow *);
217int dp_table_expand(struct datapath *);
218int dp_table_flush(struct datapath *);
219int dp_table_foreach(struct dp_table *table,
220 int (*callback)(struct sw_flow *flow, void *aux),
221 void *aux);
222
f2459fe7
JG
223void dp_process_received_packet(struct dp_port *, struct sk_buff *);
224int dp_detach_port(struct dp_port *, int may_delete);
064af421 225int dp_output_control(struct datapath *, struct sk_buff *, int, u32 arg);
1dcf111b 226int dp_min_mtu(const struct datapath *dp);
f2459fe7 227void set_internal_devs_mtu(const struct datapath *dp);
064af421
BP
228
229struct datapath *get_dp(int dp_idx);
f2459fe7 230const char *dp_name(const struct datapath *dp);
064af421 231
53d3bbbc
BP
232#if defined(CONFIG_XEN) && defined(HAVE_PROTO_DATA_VALID)
233int vswitch_skb_checksum_setup(struct sk_buff *skb);
064af421 234#else
53d3bbbc 235static inline int vswitch_skb_checksum_setup(struct sk_buff *skb)
064af421
BP
236{
237 return 0;
238}
239#endif
240
635c9298 241void compute_ip_summed(struct sk_buff *skb, bool xmit);
a6057323 242void forward_ip_summed(struct sk_buff *skb);
b2f460c7 243
064af421 244#endif /* datapath.h */