]>
Commit | Line | Data |
---|---|---|
064af421 BP |
1 | /* |
2 | * Distributed under the terms of the GNU GPL version 2. | |
834377ea | 3 | * Copyright (c) 2007, 2008, 2009, 2010 Nicira Networks. |
a14bc59f BP |
4 | * |
5 | * Significant portions of this file may be copied from parts of the Linux | |
6 | * kernel, by Linus Torvalds and others. | |
064af421 BP |
7 | */ |
8 | ||
9 | #include "flow.h" | |
f5e86186 | 10 | #include "datapath.h" |
064af421 BP |
11 | #include <linux/netdevice.h> |
12 | #include <linux/etherdevice.h> | |
13 | #include <linux/if_ether.h> | |
14 | #include <linux/if_vlan.h> | |
15 | #include <net/llc_pdu.h> | |
16 | #include <linux/kernel.h> | |
8d5ebd83 | 17 | #include <linux/jhash.h> |
064af421 BP |
18 | #include <linux/jiffies.h> |
19 | #include <linux/llc.h> | |
20 | #include <linux/module.h> | |
21 | #include <linux/in.h> | |
22 | #include <linux/rcupdate.h> | |
a26ef517 | 23 | #include <linux/if_arp.h> |
064af421 BP |
24 | #include <linux/if_ether.h> |
25 | #include <linux/ip.h> | |
26 | #include <linux/tcp.h> | |
27 | #include <linux/udp.h> | |
28 | #include <linux/icmp.h> | |
3c5f6de3 | 29 | #include <net/inet_ecn.h> |
064af421 BP |
30 | #include <net/ip.h> |
31 | ||
32 | #include "compat.h" | |
33 | ||
34 | struct kmem_cache *flow_cache; | |
8d5ebd83 | 35 | static unsigned int hash_seed; |
064af421 | 36 | |
a26ef517 JP |
37 | struct arp_eth_header |
38 | { | |
39 | __be16 ar_hrd; /* format of hardware address */ | |
40 | __be16 ar_pro; /* format of protocol address */ | |
41 | unsigned char ar_hln; /* length of hardware address */ | |
42 | unsigned char ar_pln; /* length of protocol address */ | |
43 | __be16 ar_op; /* ARP opcode (command) */ | |
44 | ||
45 | /* Ethernet+IPv4 specific members. */ | |
46 | unsigned char ar_sha[ETH_ALEN]; /* sender hardware address */ | |
47 | unsigned char ar_sip[4]; /* sender IP address */ | |
48 | unsigned char ar_tha[ETH_ALEN]; /* target hardware address */ | |
49 | unsigned char ar_tip[4]; /* target IP address */ | |
50 | } __attribute__((packed)); | |
51 | ||
52 | static inline int arphdr_ok(struct sk_buff *skb) | |
53 | { | |
54 | int nh_ofs = skb_network_offset(skb); | |
55 | return pskb_may_pull(skb, nh_ofs + sizeof(struct arp_eth_header)); | |
56 | } | |
57 | ||
064af421 BP |
58 | static inline int iphdr_ok(struct sk_buff *skb) |
59 | { | |
60 | int nh_ofs = skb_network_offset(skb); | |
61 | if (skb->len >= nh_ofs + sizeof(struct iphdr)) { | |
62 | int ip_len = ip_hdrlen(skb); | |
63 | return (ip_len >= sizeof(struct iphdr) | |
64 | && pskb_may_pull(skb, nh_ofs + ip_len)); | |
65 | } | |
66 | return 0; | |
67 | } | |
68 | ||
69 | static inline int tcphdr_ok(struct sk_buff *skb) | |
70 | { | |
71 | int th_ofs = skb_transport_offset(skb); | |
72 | if (pskb_may_pull(skb, th_ofs + sizeof(struct tcphdr))) { | |
73 | int tcp_len = tcp_hdrlen(skb); | |
74 | return (tcp_len >= sizeof(struct tcphdr) | |
75 | && skb->len >= th_ofs + tcp_len); | |
76 | } | |
77 | return 0; | |
78 | } | |
79 | ||
80 | static inline int udphdr_ok(struct sk_buff *skb) | |
81 | { | |
82 | int th_ofs = skb_transport_offset(skb); | |
83 | return pskb_may_pull(skb, th_ofs + sizeof(struct udphdr)); | |
84 | } | |
85 | ||
86 | static inline int icmphdr_ok(struct sk_buff *skb) | |
87 | { | |
88 | int th_ofs = skb_transport_offset(skb); | |
89 | return pskb_may_pull(skb, th_ofs + sizeof(struct icmphdr)); | |
90 | } | |
91 | ||
92 | #define TCP_FLAGS_OFFSET 13 | |
93 | #define TCP_FLAG_MASK 0x3f | |
94 | ||
064af421 BP |
95 | void flow_used(struct sw_flow *flow, struct sk_buff *skb) |
96 | { | |
064af421 BP |
97 | u8 tcp_flags = 0; |
98 | ||
abfec865 BP |
99 | if (flow->key.dl_type == htons(ETH_P_IP) && |
100 | flow->key.nw_proto == IPPROTO_TCP) { | |
101 | u8 *tcp = (u8 *)tcp_hdr(skb); | |
102 | tcp_flags = *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK; | |
064af421 BP |
103 | } |
104 | ||
f2459fe7 | 105 | spin_lock_bh(&flow->lock); |
6bfafa55 | 106 | flow->used = jiffies; |
064af421 BP |
107 | flow->packet_count++; |
108 | flow->byte_count += skb->len; | |
109 | flow->tcp_flags |= tcp_flags; | |
f2459fe7 | 110 | spin_unlock_bh(&flow->lock); |
064af421 BP |
111 | } |
112 | ||
113 | struct sw_flow_actions *flow_actions_alloc(size_t n_actions) | |
114 | { | |
115 | struct sw_flow_actions *sfa; | |
116 | ||
117 | if (n_actions > (PAGE_SIZE - sizeof *sfa) / sizeof(union odp_action)) | |
118 | return ERR_PTR(-EINVAL); | |
119 | ||
120 | sfa = kmalloc(sizeof *sfa + n_actions * sizeof(union odp_action), | |
121 | GFP_KERNEL); | |
122 | if (!sfa) | |
123 | return ERR_PTR(-ENOMEM); | |
124 | ||
125 | sfa->n_actions = n_actions; | |
126 | return sfa; | |
127 | } | |
128 | ||
129 | ||
130 | /* Frees 'flow' immediately. */ | |
8d5ebd83 | 131 | static void flow_free(struct sw_flow *flow) |
064af421 BP |
132 | { |
133 | if (unlikely(!flow)) | |
134 | return; | |
135 | kfree(flow->sf_acts); | |
136 | kmem_cache_free(flow_cache, flow); | |
137 | } | |
138 | ||
8d5ebd83 JG |
139 | void flow_free_tbl(struct tbl_node *node) |
140 | { | |
141 | struct sw_flow *flow = flow_cast(node); | |
142 | flow_free(flow); | |
143 | } | |
144 | ||
064af421 BP |
145 | /* RCU callback used by flow_deferred_free. */ |
146 | static void rcu_free_flow_callback(struct rcu_head *rcu) | |
147 | { | |
148 | struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu); | |
149 | flow_free(flow); | |
150 | } | |
151 | ||
152 | /* Schedules 'flow' to be freed after the next RCU grace period. | |
153 | * The caller must hold rcu_read_lock for this to be sensible. */ | |
154 | void flow_deferred_free(struct sw_flow *flow) | |
155 | { | |
156 | call_rcu(&flow->rcu, rcu_free_flow_callback); | |
157 | } | |
158 | ||
159 | /* RCU callback used by flow_deferred_free_acts. */ | |
160 | static void rcu_free_acts_callback(struct rcu_head *rcu) | |
161 | { | |
162 | struct sw_flow_actions *sf_acts = container_of(rcu, | |
163 | struct sw_flow_actions, rcu); | |
164 | kfree(sf_acts); | |
165 | } | |
166 | ||
167 | /* Schedules 'sf_acts' to be freed after the next RCU grace period. | |
168 | * The caller must hold rcu_read_lock for this to be sensible. */ | |
169 | void flow_deferred_free_acts(struct sw_flow_actions *sf_acts) | |
170 | { | |
171 | call_rcu(&sf_acts->rcu, rcu_free_acts_callback); | |
172 | } | |
173 | ||
50f06e16 | 174 | static void parse_vlan(struct sk_buff *skb, struct odp_flow_key *key) |
064af421 | 175 | { |
50f06e16 BP |
176 | struct qtag_prefix { |
177 | __be16 eth_type; /* ETH_P_8021Q */ | |
178 | __be16 tci; | |
179 | }; | |
180 | struct qtag_prefix *qp; | |
181 | ||
182 | if (skb->len < sizeof(struct qtag_prefix) + sizeof(__be16)) | |
183 | return; | |
184 | ||
185 | qp = (struct qtag_prefix *) skb->data; | |
186 | key->dl_vlan = qp->tci & htons(VLAN_VID_MASK); | |
187 | key->dl_vlan_pcp = (ntohs(qp->tci) & VLAN_PCP_MASK) >> VLAN_PCP_SHIFT; | |
188 | __skb_pull(skb, sizeof(struct qtag_prefix)); | |
189 | } | |
190 | ||
191 | static __be16 parse_ethertype(struct sk_buff *skb) | |
064af421 | 192 | { |
50f06e16 BP |
193 | struct llc_snap_hdr { |
194 | u8 dsap; /* Always 0xAA */ | |
195 | u8 ssap; /* Always 0xAA */ | |
196 | u8 ctrl; | |
197 | u8 oui[3]; | |
198 | u16 ethertype; | |
199 | }; | |
200 | struct llc_snap_hdr *llc; | |
201 | __be16 proto; | |
202 | ||
203 | proto = *(__be16 *) skb->data; | |
204 | __skb_pull(skb, sizeof(__be16)); | |
205 | ||
206 | if (ntohs(proto) >= ODP_DL_TYPE_ETH2_CUTOFF) | |
207 | return proto; | |
208 | ||
209 | if (unlikely(skb->len < sizeof(struct llc_snap_hdr))) | |
210 | return htons(ODP_DL_TYPE_NOT_ETH_TYPE); | |
211 | ||
212 | llc = (struct llc_snap_hdr *) skb->data; | |
213 | if (llc->dsap != LLC_SAP_SNAP || | |
214 | llc->ssap != LLC_SAP_SNAP || | |
215 | (llc->oui[0] | llc->oui[1] | llc->oui[2]) != 0) | |
216 | return htons(ODP_DL_TYPE_NOT_ETH_TYPE); | |
217 | ||
218 | __skb_pull(skb, sizeof(struct llc_snap_hdr)); | |
219 | return llc->ethertype; | |
064af421 BP |
220 | } |
221 | ||
222 | /* Parses the Ethernet frame in 'skb', which was received on 'in_port', | |
223 | * and initializes 'key' to match. Returns 1 if 'skb' contains an IP | |
224 | * fragment, 0 otherwise. */ | |
225 | int flow_extract(struct sk_buff *skb, u16 in_port, struct odp_flow_key *key) | |
226 | { | |
227 | struct ethhdr *eth; | |
064af421 | 228 | int retval = 0; |
064af421 BP |
229 | |
230 | memset(key, 0, sizeof *key); | |
659586ef | 231 | key->tun_id = OVS_CB(skb)->tun_id; |
064af421 | 232 | key->in_port = in_port; |
659586ef | 233 | key->dl_vlan = htons(ODP_VLAN_NONE); |
064af421 BP |
234 | |
235 | if (skb->len < sizeof *eth) | |
236 | return 0; | |
d9fce1ca | 237 | if (!pskb_may_pull(skb, min(skb->len, 64u))) |
064af421 | 238 | return 0; |
064af421 BP |
239 | |
240 | skb_reset_mac_header(skb); | |
064af421 | 241 | |
50f06e16 BP |
242 | /* Link layer. */ |
243 | eth = eth_hdr(skb); | |
064af421 BP |
244 | memcpy(key->dl_src, eth->h_source, ETH_ALEN); |
245 | memcpy(key->dl_dst, eth->h_dest, ETH_ALEN); | |
50f06e16 BP |
246 | |
247 | /* dl_type, dl_vlan, dl_vlan_pcp. */ | |
248 | __skb_pull(skb, 2 * ETH_ALEN); | |
249 | if (eth->h_proto == htons(ETH_P_8021Q)) | |
250 | parse_vlan(skb, key); | |
251 | key->dl_type = parse_ethertype(skb); | |
252 | skb_reset_network_header(skb); | |
253 | __skb_push(skb, skb->data - (unsigned char *)eth); | |
064af421 BP |
254 | |
255 | /* Network layer. */ | |
256 | if (key->dl_type == htons(ETH_P_IP) && iphdr_ok(skb)) { | |
257 | struct iphdr *nh = ip_hdr(skb); | |
50f06e16 | 258 | int th_ofs = skb_network_offset(skb) + nh->ihl * 4; |
064af421 BP |
259 | key->nw_src = nh->saddr; |
260 | key->nw_dst = nh->daddr; | |
f5e86186 | 261 | key->nw_tos = nh->tos & ~INET_ECN_MASK; |
064af421 BP |
262 | key->nw_proto = nh->protocol; |
263 | skb_set_transport_header(skb, th_ofs); | |
264 | ||
265 | /* Transport layer. */ | |
266 | if (!(nh->frag_off & htons(IP_MF | IP_OFFSET))) { | |
267 | if (key->nw_proto == IPPROTO_TCP) { | |
268 | if (tcphdr_ok(skb)) { | |
269 | struct tcphdr *tcp = tcp_hdr(skb); | |
270 | key->tp_src = tcp->source; | |
271 | key->tp_dst = tcp->dest; | |
272 | } else { | |
273 | /* Avoid tricking other code into | |
274 | * thinking that this packet has an L4 | |
275 | * header. */ | |
276 | key->nw_proto = 0; | |
277 | } | |
278 | } else if (key->nw_proto == IPPROTO_UDP) { | |
279 | if (udphdr_ok(skb)) { | |
280 | struct udphdr *udp = udp_hdr(skb); | |
281 | key->tp_src = udp->source; | |
282 | key->tp_dst = udp->dest; | |
283 | } else { | |
284 | /* Avoid tricking other code into | |
285 | * thinking that this packet has an L4 | |
286 | * header. */ | |
287 | key->nw_proto = 0; | |
288 | } | |
289 | } else if (key->nw_proto == IPPROTO_ICMP) { | |
290 | if (icmphdr_ok(skb)) { | |
291 | struct icmphdr *icmp = icmp_hdr(skb); | |
292 | /* The ICMP type and code fields use the 16-bit | |
293 | * transport port fields, so we need to store them | |
294 | * in 16-bit network byte order. */ | |
295 | key->tp_src = htons(icmp->type); | |
296 | key->tp_dst = htons(icmp->code); | |
297 | } else { | |
298 | /* Avoid tricking other code into | |
299 | * thinking that this packet has an L4 | |
300 | * header. */ | |
301 | key->nw_proto = 0; | |
302 | } | |
303 | } | |
304 | } else { | |
305 | retval = 1; | |
306 | } | |
a26ef517 JP |
307 | } else if (key->dl_type == htons(ETH_P_ARP) && arphdr_ok(skb)) { |
308 | struct arp_eth_header *arp; | |
309 | ||
310 | arp = (struct arp_eth_header *)skb_network_header(skb); | |
311 | ||
f5e86186 | 312 | if (arp->ar_hrd == htons(ARPHRD_ETHER) |
de3f65ea JP |
313 | && arp->ar_pro == htons(ETH_P_IP) |
314 | && arp->ar_hln == ETH_ALEN | |
315 | && arp->ar_pln == 4) { | |
316 | ||
317 | /* We only match on the lower 8 bits of the opcode. */ | |
318 | if (ntohs(arp->ar_op) <= 0xff) { | |
319 | key->nw_proto = ntohs(arp->ar_op); | |
320 | } | |
321 | ||
322 | if (key->nw_proto == ARPOP_REQUEST | |
323 | || key->nw_proto == ARPOP_REPLY) { | |
324 | memcpy(&key->nw_src, arp->ar_sip, sizeof(key->nw_src)); | |
325 | memcpy(&key->nw_dst, arp->ar_tip, sizeof(key->nw_dst)); | |
326 | } | |
327 | } | |
064af421 BP |
328 | } else { |
329 | skb_reset_transport_header(skb); | |
330 | } | |
331 | return retval; | |
332 | } | |
333 | ||
8d5ebd83 JG |
334 | u32 flow_hash(const struct odp_flow_key *key) |
335 | { | |
336 | return jhash2((u32*)key, sizeof *key / sizeof(u32), hash_seed); | |
337 | } | |
338 | ||
339 | int flow_cmp(const struct tbl_node *node, void *key2_) | |
340 | { | |
341 | const struct odp_flow_key *key1 = &flow_cast(node)->key; | |
342 | const struct odp_flow_key *key2 = key2_; | |
343 | ||
344 | return !memcmp(key1, key2, sizeof(struct odp_flow_key)); | |
345 | } | |
346 | ||
064af421 BP |
347 | /* Initializes the flow module. |
348 | * Returns zero if successful or a negative error code. */ | |
349 | int flow_init(void) | |
350 | { | |
351 | flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0, | |
352 | 0, NULL); | |
353 | if (flow_cache == NULL) | |
354 | return -ENOMEM; | |
355 | ||
8d5ebd83 JG |
356 | get_random_bytes(&hash_seed, sizeof hash_seed); |
357 | ||
064af421 BP |
358 | return 0; |
359 | } | |
360 | ||
361 | /* Uninitializes the flow module. */ | |
362 | void flow_exit(void) | |
363 | { | |
364 | kmem_cache_destroy(flow_cache); | |
365 | } |