]>
Commit | Line | Data |
---|---|---|
064af421 BP |
1 | /* |
2 | * Distributed under the terms of the GNU GPL version 2. | |
834377ea | 3 | * Copyright (c) 2007, 2008, 2009, 2010 Nicira Networks. |
a14bc59f BP |
4 | * |
5 | * Significant portions of this file may be copied from parts of the Linux | |
6 | * kernel, by Linus Torvalds and others. | |
064af421 BP |
7 | */ |
8 | ||
9 | #include "flow.h" | |
f5e86186 | 10 | #include "datapath.h" |
064af421 BP |
11 | #include <linux/netdevice.h> |
12 | #include <linux/etherdevice.h> | |
13 | #include <linux/if_ether.h> | |
14 | #include <linux/if_vlan.h> | |
15 | #include <net/llc_pdu.h> | |
16 | #include <linux/kernel.h> | |
17 | #include <linux/jiffies.h> | |
18 | #include <linux/llc.h> | |
19 | #include <linux/module.h> | |
20 | #include <linux/in.h> | |
21 | #include <linux/rcupdate.h> | |
a26ef517 | 22 | #include <linux/if_arp.h> |
064af421 BP |
23 | #include <linux/if_ether.h> |
24 | #include <linux/ip.h> | |
25 | #include <linux/tcp.h> | |
26 | #include <linux/udp.h> | |
27 | #include <linux/icmp.h> | |
3c5f6de3 | 28 | #include <net/inet_ecn.h> |
064af421 BP |
29 | #include <net/ip.h> |
30 | ||
31 | #include "compat.h" | |
32 | ||
33 | struct kmem_cache *flow_cache; | |
34 | ||
a26ef517 JP |
35 | struct arp_eth_header |
36 | { | |
37 | __be16 ar_hrd; /* format of hardware address */ | |
38 | __be16 ar_pro; /* format of protocol address */ | |
39 | unsigned char ar_hln; /* length of hardware address */ | |
40 | unsigned char ar_pln; /* length of protocol address */ | |
41 | __be16 ar_op; /* ARP opcode (command) */ | |
42 | ||
43 | /* Ethernet+IPv4 specific members. */ | |
44 | unsigned char ar_sha[ETH_ALEN]; /* sender hardware address */ | |
45 | unsigned char ar_sip[4]; /* sender IP address */ | |
46 | unsigned char ar_tha[ETH_ALEN]; /* target hardware address */ | |
47 | unsigned char ar_tip[4]; /* target IP address */ | |
48 | } __attribute__((packed)); | |
49 | ||
50 | static inline int arphdr_ok(struct sk_buff *skb) | |
51 | { | |
52 | int nh_ofs = skb_network_offset(skb); | |
53 | return pskb_may_pull(skb, nh_ofs + sizeof(struct arp_eth_header)); | |
54 | } | |
55 | ||
064af421 BP |
56 | static inline int iphdr_ok(struct sk_buff *skb) |
57 | { | |
58 | int nh_ofs = skb_network_offset(skb); | |
59 | if (skb->len >= nh_ofs + sizeof(struct iphdr)) { | |
60 | int ip_len = ip_hdrlen(skb); | |
61 | return (ip_len >= sizeof(struct iphdr) | |
62 | && pskb_may_pull(skb, nh_ofs + ip_len)); | |
63 | } | |
64 | return 0; | |
65 | } | |
66 | ||
67 | static inline int tcphdr_ok(struct sk_buff *skb) | |
68 | { | |
69 | int th_ofs = skb_transport_offset(skb); | |
70 | if (pskb_may_pull(skb, th_ofs + sizeof(struct tcphdr))) { | |
71 | int tcp_len = tcp_hdrlen(skb); | |
72 | return (tcp_len >= sizeof(struct tcphdr) | |
73 | && skb->len >= th_ofs + tcp_len); | |
74 | } | |
75 | return 0; | |
76 | } | |
77 | ||
78 | static inline int udphdr_ok(struct sk_buff *skb) | |
79 | { | |
80 | int th_ofs = skb_transport_offset(skb); | |
81 | return pskb_may_pull(skb, th_ofs + sizeof(struct udphdr)); | |
82 | } | |
83 | ||
84 | static inline int icmphdr_ok(struct sk_buff *skb) | |
85 | { | |
86 | int th_ofs = skb_transport_offset(skb); | |
87 | return pskb_may_pull(skb, th_ofs + sizeof(struct icmphdr)); | |
88 | } | |
89 | ||
90 | #define TCP_FLAGS_OFFSET 13 | |
91 | #define TCP_FLAG_MASK 0x3f | |
92 | ||
93 | static inline struct ovs_tcphdr *ovs_tcp_hdr(const struct sk_buff *skb) | |
94 | { | |
95 | return (struct ovs_tcphdr *)skb_transport_header(skb); | |
96 | } | |
97 | ||
98 | void flow_used(struct sw_flow *flow, struct sk_buff *skb) | |
99 | { | |
100 | unsigned long flags; | |
101 | u8 tcp_flags = 0; | |
102 | ||
103 | if (flow->key.dl_type == htons(ETH_P_IP) && iphdr_ok(skb)) { | |
104 | struct iphdr *nh = ip_hdr(skb); | |
105 | flow->ip_tos = nh->tos; | |
106 | if (flow->key.nw_proto == IPPROTO_TCP && tcphdr_ok(skb)) { | |
107 | u8 *tcp = (u8 *)tcp_hdr(skb); | |
108 | tcp_flags = *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK; | |
109 | } | |
110 | } | |
111 | ||
112 | spin_lock_irqsave(&flow->lock, flags); | |
113 | getnstimeofday(&flow->used); | |
114 | flow->packet_count++; | |
115 | flow->byte_count += skb->len; | |
116 | flow->tcp_flags |= tcp_flags; | |
117 | spin_unlock_irqrestore(&flow->lock, flags); | |
118 | } | |
119 | ||
120 | struct sw_flow_actions *flow_actions_alloc(size_t n_actions) | |
121 | { | |
122 | struct sw_flow_actions *sfa; | |
123 | ||
124 | if (n_actions > (PAGE_SIZE - sizeof *sfa) / sizeof(union odp_action)) | |
125 | return ERR_PTR(-EINVAL); | |
126 | ||
127 | sfa = kmalloc(sizeof *sfa + n_actions * sizeof(union odp_action), | |
128 | GFP_KERNEL); | |
129 | if (!sfa) | |
130 | return ERR_PTR(-ENOMEM); | |
131 | ||
132 | sfa->n_actions = n_actions; | |
133 | return sfa; | |
134 | } | |
135 | ||
136 | ||
137 | /* Frees 'flow' immediately. */ | |
138 | void flow_free(struct sw_flow *flow) | |
139 | { | |
140 | if (unlikely(!flow)) | |
141 | return; | |
142 | kfree(flow->sf_acts); | |
143 | kmem_cache_free(flow_cache, flow); | |
144 | } | |
145 | ||
146 | /* RCU callback used by flow_deferred_free. */ | |
147 | static void rcu_free_flow_callback(struct rcu_head *rcu) | |
148 | { | |
149 | struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu); | |
150 | flow_free(flow); | |
151 | } | |
152 | ||
153 | /* Schedules 'flow' to be freed after the next RCU grace period. | |
154 | * The caller must hold rcu_read_lock for this to be sensible. */ | |
155 | void flow_deferred_free(struct sw_flow *flow) | |
156 | { | |
157 | call_rcu(&flow->rcu, rcu_free_flow_callback); | |
158 | } | |
159 | ||
160 | /* RCU callback used by flow_deferred_free_acts. */ | |
161 | static void rcu_free_acts_callback(struct rcu_head *rcu) | |
162 | { | |
163 | struct sw_flow_actions *sf_acts = container_of(rcu, | |
164 | struct sw_flow_actions, rcu); | |
165 | kfree(sf_acts); | |
166 | } | |
167 | ||
168 | /* Schedules 'sf_acts' to be freed after the next RCU grace period. | |
169 | * The caller must hold rcu_read_lock for this to be sensible. */ | |
170 | void flow_deferred_free_acts(struct sw_flow_actions *sf_acts) | |
171 | { | |
172 | call_rcu(&sf_acts->rcu, rcu_free_acts_callback); | |
173 | } | |
174 | ||
175 | #define SNAP_OUI_LEN 3 | |
176 | ||
177 | struct eth_snap_hdr | |
178 | { | |
179 | struct ethhdr eth; | |
180 | u8 dsap; /* Always 0xAA */ | |
181 | u8 ssap; /* Always 0xAA */ | |
182 | u8 ctrl; | |
183 | u8 oui[SNAP_OUI_LEN]; | |
184 | u16 ethertype; | |
185 | } __attribute__ ((packed)); | |
186 | ||
187 | static int is_snap(const struct eth_snap_hdr *esh) | |
188 | { | |
189 | return (esh->dsap == LLC_SAP_SNAP | |
190 | && esh->ssap == LLC_SAP_SNAP | |
191 | && !memcmp(esh->oui, "\0\0\0", 3)); | |
192 | } | |
193 | ||
194 | /* Parses the Ethernet frame in 'skb', which was received on 'in_port', | |
195 | * and initializes 'key' to match. Returns 1 if 'skb' contains an IP | |
196 | * fragment, 0 otherwise. */ | |
197 | int flow_extract(struct sk_buff *skb, u16 in_port, struct odp_flow_key *key) | |
198 | { | |
199 | struct ethhdr *eth; | |
200 | struct eth_snap_hdr *esh; | |
201 | int retval = 0; | |
202 | int nh_ofs; | |
203 | ||
204 | memset(key, 0, sizeof *key); | |
205 | key->dl_vlan = htons(ODP_VLAN_NONE); | |
206 | key->in_port = in_port; | |
207 | ||
208 | if (skb->len < sizeof *eth) | |
209 | return 0; | |
210 | if (!pskb_may_pull(skb, skb->len >= 64 ? 64 : skb->len)) { | |
211 | return 0; | |
212 | } | |
213 | ||
214 | skb_reset_mac_header(skb); | |
215 | eth = eth_hdr(skb); | |
216 | esh = (struct eth_snap_hdr *) eth; | |
217 | nh_ofs = sizeof *eth; | |
218 | if (likely(ntohs(eth->h_proto) >= ODP_DL_TYPE_ETH2_CUTOFF)) | |
219 | key->dl_type = eth->h_proto; | |
220 | else if (skb->len >= sizeof *esh && is_snap(esh)) { | |
221 | key->dl_type = esh->ethertype; | |
222 | nh_ofs = sizeof *esh; | |
223 | } else { | |
224 | key->dl_type = htons(ODP_DL_TYPE_NOT_ETH_TYPE); | |
225 | if (skb->len >= nh_ofs + sizeof(struct llc_pdu_un)) { | |
226 | nh_ofs += sizeof(struct llc_pdu_un); | |
227 | } | |
228 | } | |
229 | ||
230 | /* Check for a VLAN tag */ | |
231 | if (key->dl_type == htons(ETH_P_8021Q) && | |
232 | skb->len >= nh_ofs + sizeof(struct vlan_hdr)) { | |
233 | struct vlan_hdr *vh = (struct vlan_hdr*)(skb->data + nh_ofs); | |
234 | key->dl_type = vh->h_vlan_encapsulated_proto; | |
235 | key->dl_vlan = vh->h_vlan_TCI & htons(VLAN_VID_MASK); | |
f5e86186 | 236 | key->dl_vlan_pcp = (ntohs(vh->h_vlan_TCI) & VLAN_PCP_MASK) >> VLAN_PCP_SHIFT; |
064af421 BP |
237 | nh_ofs += sizeof(struct vlan_hdr); |
238 | } | |
239 | memcpy(key->dl_src, eth->h_source, ETH_ALEN); | |
240 | memcpy(key->dl_dst, eth->h_dest, ETH_ALEN); | |
241 | skb_set_network_header(skb, nh_ofs); | |
242 | ||
243 | /* Network layer. */ | |
244 | if (key->dl_type == htons(ETH_P_IP) && iphdr_ok(skb)) { | |
245 | struct iphdr *nh = ip_hdr(skb); | |
246 | int th_ofs = nh_ofs + nh->ihl * 4; | |
247 | key->nw_src = nh->saddr; | |
248 | key->nw_dst = nh->daddr; | |
f5e86186 | 249 | key->nw_tos = nh->tos & ~INET_ECN_MASK; |
064af421 BP |
250 | key->nw_proto = nh->protocol; |
251 | skb_set_transport_header(skb, th_ofs); | |
252 | ||
253 | /* Transport layer. */ | |
254 | if (!(nh->frag_off & htons(IP_MF | IP_OFFSET))) { | |
255 | if (key->nw_proto == IPPROTO_TCP) { | |
256 | if (tcphdr_ok(skb)) { | |
257 | struct tcphdr *tcp = tcp_hdr(skb); | |
258 | key->tp_src = tcp->source; | |
259 | key->tp_dst = tcp->dest; | |
260 | } else { | |
261 | /* Avoid tricking other code into | |
262 | * thinking that this packet has an L4 | |
263 | * header. */ | |
264 | key->nw_proto = 0; | |
265 | } | |
266 | } else if (key->nw_proto == IPPROTO_UDP) { | |
267 | if (udphdr_ok(skb)) { | |
268 | struct udphdr *udp = udp_hdr(skb); | |
269 | key->tp_src = udp->source; | |
270 | key->tp_dst = udp->dest; | |
271 | } else { | |
272 | /* Avoid tricking other code into | |
273 | * thinking that this packet has an L4 | |
274 | * header. */ | |
275 | key->nw_proto = 0; | |
276 | } | |
277 | } else if (key->nw_proto == IPPROTO_ICMP) { | |
278 | if (icmphdr_ok(skb)) { | |
279 | struct icmphdr *icmp = icmp_hdr(skb); | |
280 | /* The ICMP type and code fields use the 16-bit | |
281 | * transport port fields, so we need to store them | |
282 | * in 16-bit network byte order. */ | |
283 | key->tp_src = htons(icmp->type); | |
284 | key->tp_dst = htons(icmp->code); | |
285 | } else { | |
286 | /* Avoid tricking other code into | |
287 | * thinking that this packet has an L4 | |
288 | * header. */ | |
289 | key->nw_proto = 0; | |
290 | } | |
291 | } | |
292 | } else { | |
293 | retval = 1; | |
294 | } | |
a26ef517 JP |
295 | } else if (key->dl_type == htons(ETH_P_ARP) && arphdr_ok(skb)) { |
296 | struct arp_eth_header *arp; | |
297 | ||
298 | arp = (struct arp_eth_header *)skb_network_header(skb); | |
299 | ||
f5e86186 | 300 | if (arp->ar_hrd == htons(ARPHRD_ETHER) |
de3f65ea JP |
301 | && arp->ar_pro == htons(ETH_P_IP) |
302 | && arp->ar_hln == ETH_ALEN | |
303 | && arp->ar_pln == 4) { | |
304 | ||
305 | /* We only match on the lower 8 bits of the opcode. */ | |
306 | if (ntohs(arp->ar_op) <= 0xff) { | |
307 | key->nw_proto = ntohs(arp->ar_op); | |
308 | } | |
309 | ||
310 | if (key->nw_proto == ARPOP_REQUEST | |
311 | || key->nw_proto == ARPOP_REPLY) { | |
312 | memcpy(&key->nw_src, arp->ar_sip, sizeof(key->nw_src)); | |
313 | memcpy(&key->nw_dst, arp->ar_tip, sizeof(key->nw_dst)); | |
314 | } | |
315 | } | |
064af421 BP |
316 | } else { |
317 | skb_reset_transport_header(skb); | |
318 | } | |
319 | return retval; | |
320 | } | |
321 | ||
322 | /* Initializes the flow module. | |
323 | * Returns zero if successful or a negative error code. */ | |
324 | int flow_init(void) | |
325 | { | |
326 | flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0, | |
327 | 0, NULL); | |
328 | if (flow_cache == NULL) | |
329 | return -ENOMEM; | |
330 | ||
331 | return 0; | |
332 | } | |
333 | ||
334 | /* Uninitializes the flow module. */ | |
335 | void flow_exit(void) | |
336 | { | |
337 | kmem_cache_destroy(flow_cache); | |
338 | } |