2 * Distributed under the terms of the GNU GPL version 2.
3 * Copyright (c) 2007, 2008, 2009, 2010 Nicira Networks.
5 * Significant portions of this file may be copied from parts of the Linux
6 * kernel, by Linus Torvalds and others.
11 #include <linux/netdevice.h>
12 #include <linux/etherdevice.h>
13 #include <linux/if_ether.h>
14 #include <linux/if_vlan.h>
15 #include <net/llc_pdu.h>
16 #include <linux/kernel.h>
17 #include <linux/jhash.h>
18 #include <linux/jiffies.h>
19 #include <linux/llc.h>
20 #include <linux/module.h>
22 #include <linux/rcupdate.h>
23 #include <linux/if_arp.h>
24 #include <linux/if_ether.h>
26 #include <linux/tcp.h>
27 #include <linux/udp.h>
28 #include <linux/icmp.h>
29 #include <net/inet_ecn.h>
34 struct kmem_cache
*flow_cache
;
35 static unsigned int hash_seed
;
39 __be16 ar_hrd
; /* format of hardware address */
40 __be16 ar_pro
; /* format of protocol address */
41 unsigned char ar_hln
; /* length of hardware address */
42 unsigned char ar_pln
; /* length of protocol address */
43 __be16 ar_op
; /* ARP opcode (command) */
45 /* Ethernet+IPv4 specific members. */
46 unsigned char ar_sha
[ETH_ALEN
]; /* sender hardware address */
47 unsigned char ar_sip
[4]; /* sender IP address */
48 unsigned char ar_tha
[ETH_ALEN
]; /* target hardware address */
49 unsigned char ar_tip
[4]; /* target IP address */
50 } __attribute__((packed
));
52 static inline int arphdr_ok(struct sk_buff
*skb
)
54 int nh_ofs
= skb_network_offset(skb
);
55 return pskb_may_pull(skb
, nh_ofs
+ sizeof(struct arp_eth_header
));
58 static inline int iphdr_ok(struct sk_buff
*skb
)
60 int nh_ofs
= skb_network_offset(skb
);
61 if (skb
->len
>= nh_ofs
+ sizeof(struct iphdr
)) {
62 int ip_len
= ip_hdrlen(skb
);
63 return (ip_len
>= sizeof(struct iphdr
)
64 && pskb_may_pull(skb
, nh_ofs
+ ip_len
));
69 static inline int tcphdr_ok(struct sk_buff
*skb
)
71 int th_ofs
= skb_transport_offset(skb
);
72 if (pskb_may_pull(skb
, th_ofs
+ sizeof(struct tcphdr
))) {
73 int tcp_len
= tcp_hdrlen(skb
);
74 return (tcp_len
>= sizeof(struct tcphdr
)
75 && skb
->len
>= th_ofs
+ tcp_len
);
80 static inline int udphdr_ok(struct sk_buff
*skb
)
82 int th_ofs
= skb_transport_offset(skb
);
83 return pskb_may_pull(skb
, th_ofs
+ sizeof(struct udphdr
));
86 static inline int icmphdr_ok(struct sk_buff
*skb
)
88 int th_ofs
= skb_transport_offset(skb
);
89 return pskb_may_pull(skb
, th_ofs
+ sizeof(struct icmphdr
));
92 #define TCP_FLAGS_OFFSET 13
93 #define TCP_FLAG_MASK 0x3f
95 static inline struct ovs_tcphdr
*ovs_tcp_hdr(const struct sk_buff
*skb
)
97 return (struct ovs_tcphdr
*)skb_transport_header(skb
);
100 void flow_used(struct sw_flow
*flow
, struct sk_buff
*skb
)
104 if (flow
->key
.dl_type
== htons(ETH_P_IP
) && iphdr_ok(skb
)) {
105 struct iphdr
*nh
= ip_hdr(skb
);
106 flow
->ip_tos
= nh
->tos
;
107 if (flow
->key
.nw_proto
== IPPROTO_TCP
&& tcphdr_ok(skb
)) {
108 u8
*tcp
= (u8
*)tcp_hdr(skb
);
109 tcp_flags
= *(tcp
+ TCP_FLAGS_OFFSET
) & TCP_FLAG_MASK
;
113 spin_lock_bh(&flow
->lock
);
114 getnstimeofday(&flow
->used
);
115 flow
->packet_count
++;
116 flow
->byte_count
+= skb
->len
;
117 flow
->tcp_flags
|= tcp_flags
;
118 spin_unlock_bh(&flow
->lock
);
121 struct sw_flow_actions
*flow_actions_alloc(size_t n_actions
)
123 struct sw_flow_actions
*sfa
;
125 if (n_actions
> (PAGE_SIZE
- sizeof *sfa
) / sizeof(union odp_action
))
126 return ERR_PTR(-EINVAL
);
128 sfa
= kmalloc(sizeof *sfa
+ n_actions
* sizeof(union odp_action
),
131 return ERR_PTR(-ENOMEM
);
133 sfa
->n_actions
= n_actions
;
138 /* Frees 'flow' immediately. */
139 static void flow_free(struct sw_flow
*flow
)
143 kfree(flow
->sf_acts
);
144 kmem_cache_free(flow_cache
, flow
);
147 void flow_free_tbl(struct tbl_node
*node
)
149 struct sw_flow
*flow
= flow_cast(node
);
153 /* RCU callback used by flow_deferred_free. */
154 static void rcu_free_flow_callback(struct rcu_head
*rcu
)
156 struct sw_flow
*flow
= container_of(rcu
, struct sw_flow
, rcu
);
160 /* Schedules 'flow' to be freed after the next RCU grace period.
161 * The caller must hold rcu_read_lock for this to be sensible. */
162 void flow_deferred_free(struct sw_flow
*flow
)
164 call_rcu(&flow
->rcu
, rcu_free_flow_callback
);
167 /* RCU callback used by flow_deferred_free_acts. */
168 static void rcu_free_acts_callback(struct rcu_head
*rcu
)
170 struct sw_flow_actions
*sf_acts
= container_of(rcu
,
171 struct sw_flow_actions
, rcu
);
175 /* Schedules 'sf_acts' to be freed after the next RCU grace period.
176 * The caller must hold rcu_read_lock for this to be sensible. */
177 void flow_deferred_free_acts(struct sw_flow_actions
*sf_acts
)
179 call_rcu(&sf_acts
->rcu
, rcu_free_acts_callback
);
182 #define SNAP_OUI_LEN 3
187 u8 dsap
; /* Always 0xAA */
188 u8 ssap
; /* Always 0xAA */
190 u8 oui
[SNAP_OUI_LEN
];
192 } __attribute__ ((packed
));
194 static int is_snap(const struct eth_snap_hdr
*esh
)
196 return (esh
->dsap
== LLC_SAP_SNAP
197 && esh
->ssap
== LLC_SAP_SNAP
198 && !memcmp(esh
->oui
, "\0\0\0", 3));
201 /* Parses the Ethernet frame in 'skb', which was received on 'in_port',
202 * and initializes 'key' to match. Returns 1 if 'skb' contains an IP
203 * fragment, 0 otherwise. */
204 int flow_extract(struct sk_buff
*skb
, u16 in_port
, struct odp_flow_key
*key
)
207 struct eth_snap_hdr
*esh
;
211 memset(key
, 0, sizeof *key
);
212 key
->tun_id
= OVS_CB(skb
)->tun_id
;
213 key
->in_port
= in_port
;
214 key
->dl_vlan
= htons(ODP_VLAN_NONE
);
216 if (skb
->len
< sizeof *eth
)
218 if (!pskb_may_pull(skb
, skb
->len
>= 64 ? 64 : skb
->len
)) {
222 skb_reset_mac_header(skb
);
224 esh
= (struct eth_snap_hdr
*) eth
;
225 nh_ofs
= sizeof *eth
;
226 if (likely(ntohs(eth
->h_proto
) >= ODP_DL_TYPE_ETH2_CUTOFF
))
227 key
->dl_type
= eth
->h_proto
;
228 else if (skb
->len
>= sizeof *esh
&& is_snap(esh
)) {
229 key
->dl_type
= esh
->ethertype
;
230 nh_ofs
= sizeof *esh
;
232 key
->dl_type
= htons(ODP_DL_TYPE_NOT_ETH_TYPE
);
233 if (skb
->len
>= nh_ofs
+ sizeof(struct llc_pdu_un
)) {
234 nh_ofs
+= sizeof(struct llc_pdu_un
);
238 /* Check for a VLAN tag */
239 if (key
->dl_type
== htons(ETH_P_8021Q
) &&
240 skb
->len
>= nh_ofs
+ sizeof(struct vlan_hdr
)) {
241 struct vlan_hdr
*vh
= (struct vlan_hdr
*)(skb
->data
+ nh_ofs
);
242 key
->dl_type
= vh
->h_vlan_encapsulated_proto
;
243 key
->dl_vlan
= vh
->h_vlan_TCI
& htons(VLAN_VID_MASK
);
244 key
->dl_vlan_pcp
= (ntohs(vh
->h_vlan_TCI
) & VLAN_PCP_MASK
) >> VLAN_PCP_SHIFT
;
245 nh_ofs
+= sizeof(struct vlan_hdr
);
247 memcpy(key
->dl_src
, eth
->h_source
, ETH_ALEN
);
248 memcpy(key
->dl_dst
, eth
->h_dest
, ETH_ALEN
);
249 skb_set_network_header(skb
, nh_ofs
);
252 if (key
->dl_type
== htons(ETH_P_IP
) && iphdr_ok(skb
)) {
253 struct iphdr
*nh
= ip_hdr(skb
);
254 int th_ofs
= nh_ofs
+ nh
->ihl
* 4;
255 key
->nw_src
= nh
->saddr
;
256 key
->nw_dst
= nh
->daddr
;
257 key
->nw_tos
= nh
->tos
& ~INET_ECN_MASK
;
258 key
->nw_proto
= nh
->protocol
;
259 skb_set_transport_header(skb
, th_ofs
);
261 /* Transport layer. */
262 if (!(nh
->frag_off
& htons(IP_MF
| IP_OFFSET
))) {
263 if (key
->nw_proto
== IPPROTO_TCP
) {
264 if (tcphdr_ok(skb
)) {
265 struct tcphdr
*tcp
= tcp_hdr(skb
);
266 key
->tp_src
= tcp
->source
;
267 key
->tp_dst
= tcp
->dest
;
269 /* Avoid tricking other code into
270 * thinking that this packet has an L4
274 } else if (key
->nw_proto
== IPPROTO_UDP
) {
275 if (udphdr_ok(skb
)) {
276 struct udphdr
*udp
= udp_hdr(skb
);
277 key
->tp_src
= udp
->source
;
278 key
->tp_dst
= udp
->dest
;
280 /* Avoid tricking other code into
281 * thinking that this packet has an L4
285 } else if (key
->nw_proto
== IPPROTO_ICMP
) {
286 if (icmphdr_ok(skb
)) {
287 struct icmphdr
*icmp
= icmp_hdr(skb
);
288 /* The ICMP type and code fields use the 16-bit
289 * transport port fields, so we need to store them
290 * in 16-bit network byte order. */
291 key
->tp_src
= htons(icmp
->type
);
292 key
->tp_dst
= htons(icmp
->code
);
294 /* Avoid tricking other code into
295 * thinking that this packet has an L4
303 } else if (key
->dl_type
== htons(ETH_P_ARP
) && arphdr_ok(skb
)) {
304 struct arp_eth_header
*arp
;
306 arp
= (struct arp_eth_header
*)skb_network_header(skb
);
308 if (arp
->ar_hrd
== htons(ARPHRD_ETHER
)
309 && arp
->ar_pro
== htons(ETH_P_IP
)
310 && arp
->ar_hln
== ETH_ALEN
311 && arp
->ar_pln
== 4) {
313 /* We only match on the lower 8 bits of the opcode. */
314 if (ntohs(arp
->ar_op
) <= 0xff) {
315 key
->nw_proto
= ntohs(arp
->ar_op
);
318 if (key
->nw_proto
== ARPOP_REQUEST
319 || key
->nw_proto
== ARPOP_REPLY
) {
320 memcpy(&key
->nw_src
, arp
->ar_sip
, sizeof(key
->nw_src
));
321 memcpy(&key
->nw_dst
, arp
->ar_tip
, sizeof(key
->nw_dst
));
325 skb_reset_transport_header(skb
);
330 struct sw_flow
*flow_cast(const struct tbl_node
*node
)
332 return container_of(node
, struct sw_flow
, tbl_node
);
335 u32
flow_hash(const struct odp_flow_key
*key
)
337 return jhash2((u32
*)key
, sizeof *key
/ sizeof(u32
), hash_seed
);
340 int flow_cmp(const struct tbl_node
*node
, void *key2_
)
342 const struct odp_flow_key
*key1
= &flow_cast(node
)->key
;
343 const struct odp_flow_key
*key2
= key2_
;
345 return !memcmp(key1
, key2
, sizeof(struct odp_flow_key
));
348 /* Initializes the flow module.
349 * Returns zero if successful or a negative error code. */
352 flow_cache
= kmem_cache_create("sw_flow", sizeof(struct sw_flow
), 0,
354 if (flow_cache
== NULL
)
357 get_random_bytes(&hash_seed
, sizeof hash_seed
);
362 /* Uninitializes the flow module. */
365 kmem_cache_destroy(flow_cache
);