]>
Commit | Line | Data |
---|---|---|
064af421 BP |
1 | /* |
2 | * Distributed under the terms of the GNU GPL version 2. | |
834377ea | 3 | * Copyright (c) 2007, 2008, 2009, 2010 Nicira Networks. |
a14bc59f BP |
4 | * |
5 | * Significant portions of this file may be copied from parts of the Linux | |
6 | * kernel, by Linus Torvalds and others. | |
064af421 BP |
7 | */ |
8 | ||
9 | #include "flow.h" | |
f5e86186 | 10 | #include "datapath.h" |
064af421 BP |
11 | #include <linux/netdevice.h> |
12 | #include <linux/etherdevice.h> | |
13 | #include <linux/if_ether.h> | |
14 | #include <linux/if_vlan.h> | |
15 | #include <net/llc_pdu.h> | |
16 | #include <linux/kernel.h> | |
8d5ebd83 | 17 | #include <linux/jhash.h> |
064af421 BP |
18 | #include <linux/jiffies.h> |
19 | #include <linux/llc.h> | |
20 | #include <linux/module.h> | |
21 | #include <linux/in.h> | |
22 | #include <linux/rcupdate.h> | |
a26ef517 | 23 | #include <linux/if_arp.h> |
064af421 BP |
24 | #include <linux/if_ether.h> |
25 | #include <linux/ip.h> | |
26 | #include <linux/tcp.h> | |
27 | #include <linux/udp.h> | |
28 | #include <linux/icmp.h> | |
3c5f6de3 | 29 | #include <net/inet_ecn.h> |
064af421 BP |
30 | #include <net/ip.h> |
31 | ||
32 | #include "compat.h" | |
33 | ||
34 | struct kmem_cache *flow_cache; | |
8d5ebd83 | 35 | static unsigned int hash_seed; |
064af421 | 36 | |
e819fb47 | 37 | static inline bool arphdr_ok(struct sk_buff *skb) |
a26ef517 | 38 | { |
7d0ab001 | 39 | return skb->len >= skb_network_offset(skb) + sizeof(struct arp_eth_header); |
a26ef517 JP |
40 | } |
41 | ||
4c1ad233 | 42 | static inline int check_iphdr(struct sk_buff *skb) |
064af421 | 43 | { |
4c1ad233 BP |
44 | unsigned int nh_ofs = skb_network_offset(skb); |
45 | unsigned int ip_len; | |
46 | ||
47 | if (skb->len < nh_ofs + sizeof(struct iphdr)) | |
48 | return -EINVAL; | |
49 | ||
50 | ip_len = ip_hdrlen(skb); | |
51 | if (ip_len < sizeof(struct iphdr) || skb->len < nh_ofs + ip_len) | |
52 | return -EINVAL; | |
53 | ||
54 | /* | |
55 | * Pull enough header bytes to account for the IP header plus the | |
56 | * longest transport header that we parse, currently 20 bytes for TCP. | |
57 | */ | |
58 | if (!pskb_may_pull(skb, min(nh_ofs + ip_len + 20, skb->len))) | |
59 | return -ENOMEM; | |
60 | ||
61 | skb_set_transport_header(skb, nh_ofs + ip_len); | |
62 | return 0; | |
064af421 BP |
63 | } |
64 | ||
e819fb47 | 65 | static inline bool tcphdr_ok(struct sk_buff *skb) |
064af421 BP |
66 | { |
67 | int th_ofs = skb_transport_offset(skb); | |
7d0ab001 | 68 | if (skb->len >= th_ofs + sizeof(struct tcphdr)) { |
064af421 BP |
69 | int tcp_len = tcp_hdrlen(skb); |
70 | return (tcp_len >= sizeof(struct tcphdr) | |
71 | && skb->len >= th_ofs + tcp_len); | |
72 | } | |
e819fb47 | 73 | return false; |
064af421 BP |
74 | } |
75 | ||
e819fb47 | 76 | static inline bool udphdr_ok(struct sk_buff *skb) |
064af421 | 77 | { |
7d0ab001 | 78 | return skb->len >= skb_transport_offset(skb) + sizeof(struct udphdr); |
064af421 BP |
79 | } |
80 | ||
e819fb47 | 81 | static inline bool icmphdr_ok(struct sk_buff *skb) |
064af421 | 82 | { |
7d0ab001 | 83 | return skb->len >= skb_transport_offset(skb) + sizeof(struct icmphdr); |
064af421 BP |
84 | } |
85 | ||
86 | #define TCP_FLAGS_OFFSET 13 | |
87 | #define TCP_FLAG_MASK 0x3f | |
88 | ||
064af421 BP |
89 | void flow_used(struct sw_flow *flow, struct sk_buff *skb) |
90 | { | |
064af421 BP |
91 | u8 tcp_flags = 0; |
92 | ||
abfec865 BP |
93 | if (flow->key.dl_type == htons(ETH_P_IP) && |
94 | flow->key.nw_proto == IPPROTO_TCP) { | |
95 | u8 *tcp = (u8 *)tcp_hdr(skb); | |
96 | tcp_flags = *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK; | |
064af421 BP |
97 | } |
98 | ||
f2459fe7 | 99 | spin_lock_bh(&flow->lock); |
6bfafa55 | 100 | flow->used = jiffies; |
064af421 BP |
101 | flow->packet_count++; |
102 | flow->byte_count += skb->len; | |
103 | flow->tcp_flags |= tcp_flags; | |
f2459fe7 | 104 | spin_unlock_bh(&flow->lock); |
064af421 BP |
105 | } |
106 | ||
107 | struct sw_flow_actions *flow_actions_alloc(size_t n_actions) | |
108 | { | |
109 | struct sw_flow_actions *sfa; | |
110 | ||
111 | if (n_actions > (PAGE_SIZE - sizeof *sfa) / sizeof(union odp_action)) | |
112 | return ERR_PTR(-EINVAL); | |
113 | ||
114 | sfa = kmalloc(sizeof *sfa + n_actions * sizeof(union odp_action), | |
115 | GFP_KERNEL); | |
116 | if (!sfa) | |
117 | return ERR_PTR(-ENOMEM); | |
118 | ||
119 | sfa->n_actions = n_actions; | |
120 | return sfa; | |
121 | } | |
122 | ||
123 | ||
124 | /* Frees 'flow' immediately. */ | |
8d5ebd83 | 125 | static void flow_free(struct sw_flow *flow) |
064af421 BP |
126 | { |
127 | if (unlikely(!flow)) | |
128 | return; | |
129 | kfree(flow->sf_acts); | |
130 | kmem_cache_free(flow_cache, flow); | |
131 | } | |
132 | ||
8d5ebd83 JG |
133 | void flow_free_tbl(struct tbl_node *node) |
134 | { | |
135 | struct sw_flow *flow = flow_cast(node); | |
136 | flow_free(flow); | |
137 | } | |
138 | ||
064af421 BP |
139 | /* RCU callback used by flow_deferred_free. */ |
140 | static void rcu_free_flow_callback(struct rcu_head *rcu) | |
141 | { | |
142 | struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu); | |
143 | flow_free(flow); | |
144 | } | |
145 | ||
146 | /* Schedules 'flow' to be freed after the next RCU grace period. | |
147 | * The caller must hold rcu_read_lock for this to be sensible. */ | |
148 | void flow_deferred_free(struct sw_flow *flow) | |
149 | { | |
150 | call_rcu(&flow->rcu, rcu_free_flow_callback); | |
151 | } | |
152 | ||
153 | /* RCU callback used by flow_deferred_free_acts. */ | |
154 | static void rcu_free_acts_callback(struct rcu_head *rcu) | |
155 | { | |
156 | struct sw_flow_actions *sf_acts = container_of(rcu, | |
157 | struct sw_flow_actions, rcu); | |
158 | kfree(sf_acts); | |
159 | } | |
160 | ||
161 | /* Schedules 'sf_acts' to be freed after the next RCU grace period. | |
162 | * The caller must hold rcu_read_lock for this to be sensible. */ | |
163 | void flow_deferred_free_acts(struct sw_flow_actions *sf_acts) | |
164 | { | |
165 | call_rcu(&sf_acts->rcu, rcu_free_acts_callback); | |
166 | } | |
167 | ||
50f06e16 | 168 | static void parse_vlan(struct sk_buff *skb, struct odp_flow_key *key) |
064af421 | 169 | { |
50f06e16 BP |
170 | struct qtag_prefix { |
171 | __be16 eth_type; /* ETH_P_8021Q */ | |
172 | __be16 tci; | |
173 | }; | |
174 | struct qtag_prefix *qp; | |
175 | ||
176 | if (skb->len < sizeof(struct qtag_prefix) + sizeof(__be16)) | |
177 | return; | |
178 | ||
179 | qp = (struct qtag_prefix *) skb->data; | |
180 | key->dl_vlan = qp->tci & htons(VLAN_VID_MASK); | |
181 | key->dl_vlan_pcp = (ntohs(qp->tci) & VLAN_PCP_MASK) >> VLAN_PCP_SHIFT; | |
182 | __skb_pull(skb, sizeof(struct qtag_prefix)); | |
183 | } | |
184 | ||
185 | static __be16 parse_ethertype(struct sk_buff *skb) | |
064af421 | 186 | { |
50f06e16 BP |
187 | struct llc_snap_hdr { |
188 | u8 dsap; /* Always 0xAA */ | |
189 | u8 ssap; /* Always 0xAA */ | |
190 | u8 ctrl; | |
191 | u8 oui[3]; | |
192 | u16 ethertype; | |
193 | }; | |
194 | struct llc_snap_hdr *llc; | |
195 | __be16 proto; | |
196 | ||
197 | proto = *(__be16 *) skb->data; | |
198 | __skb_pull(skb, sizeof(__be16)); | |
199 | ||
200 | if (ntohs(proto) >= ODP_DL_TYPE_ETH2_CUTOFF) | |
201 | return proto; | |
202 | ||
203 | if (unlikely(skb->len < sizeof(struct llc_snap_hdr))) | |
204 | return htons(ODP_DL_TYPE_NOT_ETH_TYPE); | |
205 | ||
206 | llc = (struct llc_snap_hdr *) skb->data; | |
207 | if (llc->dsap != LLC_SAP_SNAP || | |
208 | llc->ssap != LLC_SAP_SNAP || | |
209 | (llc->oui[0] | llc->oui[1] | llc->oui[2]) != 0) | |
210 | return htons(ODP_DL_TYPE_NOT_ETH_TYPE); | |
211 | ||
212 | __skb_pull(skb, sizeof(struct llc_snap_hdr)); | |
213 | return llc->ethertype; | |
064af421 BP |
214 | } |
215 | ||
a31e0e31 BP |
216 | /** |
217 | * flow_extract - extracts a flow key from an Ethernet frame. | |
218 | * @skb: sk_buff that contains the frame, with skb->data pointing to the | |
219 | * Ethernet header | |
220 | * @in_port: port number on which @skb was received. | |
221 | * @key: output flow key | |
222 | * | |
223 | * The caller must ensure that skb->len >= ETH_HLEN. | |
224 | * | |
4c1ad233 BP |
225 | * Returns 0 if successful, otherwise a negative errno value. |
226 | * | |
59a18f80 BP |
227 | * Initializes @skb header pointers as follows: |
228 | * | |
229 | * - skb->mac_header: the Ethernet header. | |
230 | * | |
231 | * - skb->network_header: just past the Ethernet header, or just past the | |
232 | * VLAN header, to the first byte of the Ethernet payload. | |
233 | * | |
234 | * - skb->transport_header: If key->dl_type is ETH_P_IP on output, then just | |
235 | * past the IPv4 header, if one is present and of a correct length, | |
236 | * otherwise the same as skb->network_header. For other key->dl_type | |
237 | * values it is left untouched. | |
238 | * | |
769f8ccd BP |
239 | * Sets OVS_CB(skb)->is_frag to %true if @skb is an IPv4 fragment, otherwise to |
240 | * %false. | |
a31e0e31 | 241 | */ |
064af421 BP |
242 | int flow_extract(struct sk_buff *skb, u16 in_port, struct odp_flow_key *key) |
243 | { | |
244 | struct ethhdr *eth; | |
064af421 BP |
245 | |
246 | memset(key, 0, sizeof *key); | |
659586ef | 247 | key->tun_id = OVS_CB(skb)->tun_id; |
064af421 | 248 | key->in_port = in_port; |
659586ef | 249 | key->dl_vlan = htons(ODP_VLAN_NONE); |
769f8ccd | 250 | OVS_CB(skb)->is_frag = false; |
064af421 | 251 | |
4c1ad233 BP |
252 | /* |
253 | * We would really like to pull as many bytes as we could possibly | |
254 | * want to parse into the linear data area. Currently that is: | |
255 | * | |
256 | * 14 Ethernet header | |
257 | * 4 VLAN header | |
258 | * 60 max IP header with options | |
259 | * 20 max TCP/UDP/ICMP header (don't care about options) | |
260 | * -- | |
261 | * 98 | |
262 | * | |
263 | * But Xen only allocates 64 or 72 bytes for the linear data area in | |
264 | * netback, which means that we would reallocate and copy the skb's | |
265 | * linear data on every packet if we did that. So instead just pull 64 | |
266 | * bytes, which is always sufficient without IP options, and then check | |
267 | * whether we need to pull more later when we look at the IP header. | |
268 | */ | |
d9fce1ca | 269 | if (!pskb_may_pull(skb, min(skb->len, 64u))) |
4c1ad233 | 270 | return -ENOMEM; |
064af421 BP |
271 | |
272 | skb_reset_mac_header(skb); | |
064af421 | 273 | |
50f06e16 BP |
274 | /* Link layer. */ |
275 | eth = eth_hdr(skb); | |
064af421 BP |
276 | memcpy(key->dl_src, eth->h_source, ETH_ALEN); |
277 | memcpy(key->dl_dst, eth->h_dest, ETH_ALEN); | |
50f06e16 BP |
278 | |
279 | /* dl_type, dl_vlan, dl_vlan_pcp. */ | |
280 | __skb_pull(skb, 2 * ETH_ALEN); | |
281 | if (eth->h_proto == htons(ETH_P_8021Q)) | |
282 | parse_vlan(skb, key); | |
283 | key->dl_type = parse_ethertype(skb); | |
284 | skb_reset_network_header(skb); | |
285 | __skb_push(skb, skb->data - (unsigned char *)eth); | |
064af421 BP |
286 | |
287 | /* Network layer. */ | |
4c1ad233 BP |
288 | if (key->dl_type == htons(ETH_P_IP)) { |
289 | struct iphdr *nh; | |
290 | int error; | |
291 | ||
292 | error = check_iphdr(skb); | |
293 | if (unlikely(error)) { | |
294 | if (error == -EINVAL) { | |
295 | skb->transport_header = skb->network_header; | |
296 | return 0; | |
297 | } | |
298 | return error; | |
299 | } | |
300 | ||
301 | nh = ip_hdr(skb); | |
064af421 BP |
302 | key->nw_src = nh->saddr; |
303 | key->nw_dst = nh->daddr; | |
f5e86186 | 304 | key->nw_tos = nh->tos & ~INET_ECN_MASK; |
064af421 | 305 | key->nw_proto = nh->protocol; |
064af421 BP |
306 | |
307 | /* Transport layer. */ | |
308 | if (!(nh->frag_off & htons(IP_MF | IP_OFFSET))) { | |
309 | if (key->nw_proto == IPPROTO_TCP) { | |
310 | if (tcphdr_ok(skb)) { | |
311 | struct tcphdr *tcp = tcp_hdr(skb); | |
312 | key->tp_src = tcp->source; | |
313 | key->tp_dst = tcp->dest; | |
064af421 BP |
314 | } |
315 | } else if (key->nw_proto == IPPROTO_UDP) { | |
316 | if (udphdr_ok(skb)) { | |
317 | struct udphdr *udp = udp_hdr(skb); | |
318 | key->tp_src = udp->source; | |
319 | key->tp_dst = udp->dest; | |
064af421 BP |
320 | } |
321 | } else if (key->nw_proto == IPPROTO_ICMP) { | |
322 | if (icmphdr_ok(skb)) { | |
323 | struct icmphdr *icmp = icmp_hdr(skb); | |
324 | /* The ICMP type and code fields use the 16-bit | |
325 | * transport port fields, so we need to store them | |
326 | * in 16-bit network byte order. */ | |
327 | key->tp_src = htons(icmp->type); | |
328 | key->tp_dst = htons(icmp->code); | |
064af421 BP |
329 | } |
330 | } | |
331 | } else { | |
769f8ccd | 332 | OVS_CB(skb)->is_frag = true; |
064af421 | 333 | } |
a26ef517 JP |
334 | } else if (key->dl_type == htons(ETH_P_ARP) && arphdr_ok(skb)) { |
335 | struct arp_eth_header *arp; | |
336 | ||
337 | arp = (struct arp_eth_header *)skb_network_header(skb); | |
338 | ||
f5e86186 | 339 | if (arp->ar_hrd == htons(ARPHRD_ETHER) |
de3f65ea JP |
340 | && arp->ar_pro == htons(ETH_P_IP) |
341 | && arp->ar_hln == ETH_ALEN | |
342 | && arp->ar_pln == 4) { | |
343 | ||
344 | /* We only match on the lower 8 bits of the opcode. */ | |
345 | if (ntohs(arp->ar_op) <= 0xff) { | |
346 | key->nw_proto = ntohs(arp->ar_op); | |
347 | } | |
348 | ||
349 | if (key->nw_proto == ARPOP_REQUEST | |
350 | || key->nw_proto == ARPOP_REPLY) { | |
351 | memcpy(&key->nw_src, arp->ar_sip, sizeof(key->nw_src)); | |
352 | memcpy(&key->nw_dst, arp->ar_tip, sizeof(key->nw_dst)); | |
353 | } | |
354 | } | |
064af421 | 355 | } |
769f8ccd | 356 | return 0; |
064af421 BP |
357 | } |
358 | ||
8d5ebd83 JG |
359 | u32 flow_hash(const struct odp_flow_key *key) |
360 | { | |
361 | return jhash2((u32*)key, sizeof *key / sizeof(u32), hash_seed); | |
362 | } | |
363 | ||
364 | int flow_cmp(const struct tbl_node *node, void *key2_) | |
365 | { | |
366 | const struct odp_flow_key *key1 = &flow_cast(node)->key; | |
367 | const struct odp_flow_key *key2 = key2_; | |
368 | ||
369 | return !memcmp(key1, key2, sizeof(struct odp_flow_key)); | |
370 | } | |
371 | ||
064af421 BP |
372 | /* Initializes the flow module. |
373 | * Returns zero if successful or a negative error code. */ | |
374 | int flow_init(void) | |
375 | { | |
376 | flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0, | |
377 | 0, NULL); | |
378 | if (flow_cache == NULL) | |
379 | return -ENOMEM; | |
380 | ||
8d5ebd83 JG |
381 | get_random_bytes(&hash_seed, sizeof hash_seed); |
382 | ||
064af421 BP |
383 | return 0; |
384 | } | |
385 | ||
386 | /* Uninitializes the flow module. */ | |
387 | void flow_exit(void) | |
388 | { | |
389 | kmem_cache_destroy(flow_cache); | |
390 | } |