2 * Distributed under the terms of the GNU GPL version 2.
3 * Copyright (c) 2007, 2008, 2009, 2010, 2011 Nicira Networks.
5 * Significant portions of this file may be copied from parts of the Linux
6 * kernel, by Linus Torvalds and others.
11 #include <asm/uaccess.h>
12 #include <linux/netdevice.h>
13 #include <linux/etherdevice.h>
14 #include <linux/if_ether.h>
15 #include <linux/if_vlan.h>
16 #include <net/llc_pdu.h>
17 #include <linux/kernel.h>
18 #include <linux/jhash.h>
19 #include <linux/jiffies.h>
20 #include <linux/llc.h>
21 #include <linux/module.h>
23 #include <linux/rcupdate.h>
24 #include <linux/if_arp.h>
25 #include <linux/if_ether.h>
27 #include <linux/ipv6.h>
28 #include <linux/tcp.h>
29 #include <linux/udp.h>
30 #include <linux/icmp.h>
31 #include <linux/icmpv6.h>
32 #include <net/inet_ecn.h>
35 #include <net/ndisc.h>
39 static struct kmem_cache
*flow_cache
;
40 static unsigned int hash_seed __read_mostly
;
42 static int check_header(struct sk_buff
*skb
, int len
)
44 if (unlikely(skb
->len
< len
))
46 if (unlikely(!pskb_may_pull(skb
, len
)))
51 static inline bool arphdr_ok(struct sk_buff
*skb
)
53 return pskb_may_pull(skb
, skb_network_offset(skb
) +
54 sizeof(struct arp_eth_header
));
57 static inline int check_iphdr(struct sk_buff
*skb
)
59 unsigned int nh_ofs
= skb_network_offset(skb
);
63 err
= check_header(skb
, nh_ofs
+ sizeof(struct iphdr
));
67 ip_len
= ip_hdrlen(skb
);
68 if (unlikely(ip_len
< sizeof(struct iphdr
) ||
69 skb
->len
< nh_ofs
+ ip_len
))
72 skb_set_transport_header(skb
, nh_ofs
+ ip_len
);
76 static inline bool tcphdr_ok(struct sk_buff
*skb
)
78 int th_ofs
= skb_transport_offset(skb
);
81 if (unlikely(!pskb_may_pull(skb
, th_ofs
+ sizeof(struct tcphdr
))))
84 tcp_len
= tcp_hdrlen(skb
);
85 if (unlikely(tcp_len
< sizeof(struct tcphdr
) ||
86 skb
->len
< th_ofs
+ tcp_len
))
92 static inline bool udphdr_ok(struct sk_buff
*skb
)
94 return pskb_may_pull(skb
, skb_transport_offset(skb
) +
95 sizeof(struct udphdr
));
98 static inline bool icmphdr_ok(struct sk_buff
*skb
)
100 return pskb_may_pull(skb
, skb_transport_offset(skb
) +
101 sizeof(struct icmphdr
));
104 u64
flow_used_time(unsigned long flow_jiffies
)
106 struct timespec cur_ts
;
109 ktime_get_ts(&cur_ts
);
110 idle_ms
= jiffies_to_msecs(jiffies
- flow_jiffies
);
111 cur_ms
= (u64
)cur_ts
.tv_sec
* MSEC_PER_SEC
+
112 cur_ts
.tv_nsec
/ NSEC_PER_MSEC
;
114 return cur_ms
- idle_ms
;
117 #define SW_FLOW_KEY_OFFSET(field) \
118 offsetof(struct sw_flow_key, field) + \
119 FIELD_SIZEOF(struct sw_flow_key, field)
121 static int parse_ipv6hdr(struct sk_buff
*skb
, struct sw_flow_key
*key
,
124 unsigned int nh_ofs
= skb_network_offset(skb
);
131 *key_lenp
= SW_FLOW_KEY_OFFSET(ipv6
.addr
);
133 err
= check_header(skb
, nh_ofs
+ sizeof(*nh
));
138 nexthdr
= nh
->nexthdr
;
139 payload_ofs
= (u8
*)(nh
+ 1) - skb
->data
;
141 key
->ip
.nw_proto
= NEXTHDR_NONE
;
142 key
->ip
.nw_tos
= ipv6_get_dsfield(nh
) & ~INET_ECN_MASK
;
143 ipv6_addr_copy(&key
->ipv6
.addr
.src
, &nh
->saddr
);
144 ipv6_addr_copy(&key
->ipv6
.addr
.dst
, &nh
->daddr
);
146 payload_ofs
= ipv6_skip_exthdr(skb
, payload_ofs
, &nexthdr
);
147 if (unlikely(payload_ofs
< 0))
150 nh_len
= payload_ofs
- nh_ofs
;
151 skb_set_transport_header(skb
, nh_ofs
+ nh_len
);
152 key
->ip
.nw_proto
= nexthdr
;
156 static bool icmp6hdr_ok(struct sk_buff
*skb
)
158 return pskb_may_pull(skb
, skb_transport_offset(skb
) +
159 sizeof(struct icmp6hdr
));
162 #define TCP_FLAGS_OFFSET 13
163 #define TCP_FLAG_MASK 0x3f
165 void flow_used(struct sw_flow
*flow
, struct sk_buff
*skb
)
169 if (flow
->key
.eth
.type
== htons(ETH_P_IP
) &&
170 flow
->key
.ip
.nw_proto
== IPPROTO_TCP
) {
171 u8
*tcp
= (u8
*)tcp_hdr(skb
);
172 tcp_flags
= *(tcp
+ TCP_FLAGS_OFFSET
) & TCP_FLAG_MASK
;
175 spin_lock_bh(&flow
->lock
);
176 flow
->used
= jiffies
;
177 flow
->packet_count
++;
178 flow
->byte_count
+= skb
->len
;
179 flow
->tcp_flags
|= tcp_flags
;
180 spin_unlock_bh(&flow
->lock
);
183 struct sw_flow_actions
*flow_actions_alloc(const struct nlattr
*actions
)
185 int actions_len
= nla_len(actions
);
186 struct sw_flow_actions
*sfa
;
188 /* At least DP_MAX_PORTS actions are required to be able to flood a
189 * packet to every port. Factor of 2 allows for setting VLAN tags,
191 if (actions_len
> 2 * DP_MAX_PORTS
* nla_total_size(4))
192 return ERR_PTR(-EINVAL
);
194 sfa
= kmalloc(sizeof(*sfa
) + actions_len
, GFP_KERNEL
);
196 return ERR_PTR(-ENOMEM
);
198 sfa
->actions_len
= actions_len
;
199 memcpy(sfa
->actions
, nla_data(actions
), actions_len
);
203 struct sw_flow
*flow_alloc(void)
205 struct sw_flow
*flow
;
207 flow
= kmem_cache_alloc(flow_cache
, GFP_KERNEL
);
209 return ERR_PTR(-ENOMEM
);
211 spin_lock_init(&flow
->lock
);
212 atomic_set(&flow
->refcnt
, 1);
213 flow
->sf_acts
= NULL
;
219 void flow_free_tbl(struct tbl_node
*node
)
221 struct sw_flow
*flow
= flow_cast(node
);
227 /* RCU callback used by flow_deferred_free. */
228 static void rcu_free_flow_callback(struct rcu_head
*rcu
)
230 struct sw_flow
*flow
= container_of(rcu
, struct sw_flow
, rcu
);
236 /* Schedules 'flow' to be freed after the next RCU grace period.
237 * The caller must hold rcu_read_lock for this to be sensible. */
238 void flow_deferred_free(struct sw_flow
*flow
)
240 call_rcu(&flow
->rcu
, rcu_free_flow_callback
);
243 void flow_hold(struct sw_flow
*flow
)
245 atomic_inc(&flow
->refcnt
);
248 void flow_put(struct sw_flow
*flow
)
253 if (atomic_dec_and_test(&flow
->refcnt
)) {
254 kfree((struct sf_flow_acts __force
*)flow
->sf_acts
);
255 kmem_cache_free(flow_cache
, flow
);
259 /* RCU callback used by flow_deferred_free_acts. */
260 static void rcu_free_acts_callback(struct rcu_head
*rcu
)
262 struct sw_flow_actions
*sf_acts
= container_of(rcu
,
263 struct sw_flow_actions
, rcu
);
267 /* Schedules 'sf_acts' to be freed after the next RCU grace period.
268 * The caller must hold rcu_read_lock for this to be sensible. */
269 void flow_deferred_free_acts(struct sw_flow_actions
*sf_acts
)
271 call_rcu(&sf_acts
->rcu
, rcu_free_acts_callback
);
274 static int parse_vlan(struct sk_buff
*skb
, struct sw_flow_key
*key
)
277 __be16 eth_type
; /* ETH_P_8021Q */
280 struct qtag_prefix
*qp
;
282 if (unlikely(!pskb_may_pull(skb
, sizeof(struct qtag_prefix
) +
286 qp
= (struct qtag_prefix
*) skb
->data
;
287 key
->eth
.tci
= qp
->tci
| htons(VLAN_TAG_PRESENT
);
288 __skb_pull(skb
, sizeof(struct qtag_prefix
));
293 static __be16
parse_ethertype(struct sk_buff
*skb
)
295 struct llc_snap_hdr
{
296 u8 dsap
; /* Always 0xAA */
297 u8 ssap
; /* Always 0xAA */
302 struct llc_snap_hdr
*llc
;
305 proto
= *(__be16
*) skb
->data
;
306 __skb_pull(skb
, sizeof(__be16
));
308 if (ntohs(proto
) >= 1536)
311 if (skb
->len
< sizeof(struct llc_snap_hdr
))
312 return htons(ETH_P_802_2
);
314 if (unlikely(!pskb_may_pull(skb
, sizeof(struct llc_snap_hdr
))))
317 llc
= (struct llc_snap_hdr
*) skb
->data
;
318 if (llc
->dsap
!= LLC_SAP_SNAP
||
319 llc
->ssap
!= LLC_SAP_SNAP
||
320 (llc
->oui
[0] | llc
->oui
[1] | llc
->oui
[2]) != 0)
321 return htons(ETH_P_802_2
);
323 __skb_pull(skb
, sizeof(struct llc_snap_hdr
));
324 return llc
->ethertype
;
327 static int parse_icmpv6(struct sk_buff
*skb
, struct sw_flow_key
*key
,
328 int *key_lenp
, int nh_len
)
330 struct icmp6hdr
*icmp
= icmp6_hdr(skb
);
334 /* The ICMPv6 type and code fields use the 16-bit transport port
335 * fields, so we need to store them in 16-bit network byte order.
337 key
->ipv6
.tp
.src
= htons(icmp
->icmp6_type
);
338 key
->ipv6
.tp
.dst
= htons(icmp
->icmp6_code
);
339 key_len
= SW_FLOW_KEY_OFFSET(ipv6
.tp
);
341 if (icmp
->icmp6_code
== 0 &&
342 (icmp
->icmp6_type
== NDISC_NEIGHBOUR_SOLICITATION
||
343 icmp
->icmp6_type
== NDISC_NEIGHBOUR_ADVERTISEMENT
)) {
344 int icmp_len
= skb
->len
- skb_transport_offset(skb
);
348 key_len
= SW_FLOW_KEY_OFFSET(ipv6
.nd
);
350 /* In order to process neighbor discovery options, we need the
353 if (unlikely(icmp_len
< sizeof(*nd
)))
355 if (unlikely(skb_linearize(skb
))) {
360 nd
= (struct nd_msg
*)skb_transport_header(skb
);
361 ipv6_addr_copy(&key
->ipv6
.nd
.target
, &nd
->target
);
362 key_len
= SW_FLOW_KEY_OFFSET(ipv6
.nd
);
364 icmp_len
-= sizeof(*nd
);
366 while (icmp_len
>= 8) {
367 struct nd_opt_hdr
*nd_opt
= (struct nd_opt_hdr
*)(nd
->opt
+ offset
);
368 int opt_len
= nd_opt
->nd_opt_len
* 8;
370 if (unlikely(!opt_len
|| opt_len
> icmp_len
))
373 /* Store the link layer address if the appropriate
374 * option is provided. It is considered an error if
375 * the same link layer option is specified twice.
377 if (nd_opt
->nd_opt_type
== ND_OPT_SOURCE_LL_ADDR
379 if (unlikely(!is_zero_ether_addr(key
->ipv6
.nd
.sll
)))
381 memcpy(key
->ipv6
.nd
.sll
,
382 &nd
->opt
[offset
+sizeof(*nd_opt
)], ETH_ALEN
);
383 } else if (nd_opt
->nd_opt_type
== ND_OPT_TARGET_LL_ADDR
385 if (unlikely(!is_zero_ether_addr(key
->ipv6
.nd
.tll
)))
387 memcpy(key
->ipv6
.nd
.tll
,
388 &nd
->opt
[offset
+sizeof(*nd_opt
)], ETH_ALEN
);
399 memset(&key
->ipv6
.nd
.target
, 0, sizeof(key
->ipv6
.nd
.target
));
400 memset(key
->ipv6
.nd
.sll
, 0, sizeof(key
->ipv6
.nd
.sll
));
401 memset(key
->ipv6
.nd
.tll
, 0, sizeof(key
->ipv6
.nd
.tll
));
409 * flow_extract - extracts a flow key from an Ethernet frame.
410 * @skb: sk_buff that contains the frame, with skb->data pointing to the
412 * @in_port: port number on which @skb was received.
413 * @key: output flow key
414 * @key_lenp: length of output flow key
415 * @is_frag: set to 1 if @skb contains an IPv4 fragment, or to 0 if @skb does
416 * not contain an IPv4 packet or if it is not a fragment.
418 * The caller must ensure that skb->len >= ETH_HLEN.
420 * Returns 0 if successful, otherwise a negative errno value.
422 * Initializes @skb header pointers as follows:
424 * - skb->mac_header: the Ethernet header.
426 * - skb->network_header: just past the Ethernet header, or just past the
427 * VLAN header, to the first byte of the Ethernet payload.
429 * - skb->transport_header: If key->dl_type is ETH_P_IP or ETH_P_IPV6
430 * on output, then just past the IP header, if one is present and
431 * of a correct length, otherwise the same as skb->network_header.
432 * For other key->dl_type values it is left untouched.
434 int flow_extract(struct sk_buff
*skb
, u16 in_port
, struct sw_flow_key
*key
,
435 int *key_lenp
, bool *is_frag
)
438 int key_len
= SW_FLOW_KEY_OFFSET(eth
);
441 memset(key
, 0, sizeof(*key
));
442 key
->eth
.tun_id
= OVS_CB(skb
)->tun_id
;
443 key
->eth
.in_port
= in_port
;
446 skb_reset_mac_header(skb
);
448 /* Link layer. We are guaranteed to have at least the 14 byte Ethernet
449 * header in the linear data area.
452 memcpy(key
->eth
.src
, eth
->h_source
, ETH_ALEN
);
453 memcpy(key
->eth
.dst
, eth
->h_dest
, ETH_ALEN
);
455 __skb_pull(skb
, 2 * ETH_ALEN
);
457 if (vlan_tx_tag_present(skb
))
458 key
->eth
.tci
= htons(vlan_get_tci(skb
));
459 else if (eth
->h_proto
== htons(ETH_P_8021Q
))
460 if (unlikely(parse_vlan(skb
, key
)))
463 key
->eth
.type
= parse_ethertype(skb
);
464 if (unlikely(key
->eth
.type
== htons(0)))
467 skb_reset_network_header(skb
);
468 __skb_push(skb
, skb
->data
- skb_mac_header(skb
));
471 if (key
->eth
.type
== htons(ETH_P_IP
)) {
474 key_len
= SW_FLOW_KEY_OFFSET(ipv4
.addr
);
476 error
= check_iphdr(skb
);
477 if (unlikely(error
)) {
478 if (error
== -EINVAL
) {
479 skb
->transport_header
= skb
->network_header
;
486 key
->ipv4
.addr
.src
= nh
->saddr
;
487 key
->ipv4
.addr
.dst
= nh
->daddr
;
488 key
->ip
.nw_tos
= nh
->tos
& ~INET_ECN_MASK
;
489 key
->ip
.nw_proto
= nh
->protocol
;
491 /* Transport layer. */
492 if (!(nh
->frag_off
& htons(IP_MF
| IP_OFFSET
)) &&
493 !(skb_shinfo(skb
)->gso_type
& SKB_GSO_UDP
)) {
494 if (key
->ip
.nw_proto
== IPPROTO_TCP
) {
495 key_len
= SW_FLOW_KEY_OFFSET(ipv4
.tp
);
496 if (tcphdr_ok(skb
)) {
497 struct tcphdr
*tcp
= tcp_hdr(skb
);
498 key
->ipv4
.tp
.src
= tcp
->source
;
499 key
->ipv4
.tp
.dst
= tcp
->dest
;
501 } else if (key
->ip
.nw_proto
== IPPROTO_UDP
) {
502 key_len
= SW_FLOW_KEY_OFFSET(ipv4
.tp
);
503 if (udphdr_ok(skb
)) {
504 struct udphdr
*udp
= udp_hdr(skb
);
505 key
->ipv4
.tp
.src
= udp
->source
;
506 key
->ipv4
.tp
.dst
= udp
->dest
;
508 } else if (key
->ip
.nw_proto
== IPPROTO_ICMP
) {
509 key_len
= SW_FLOW_KEY_OFFSET(ipv4
.tp
);
510 if (icmphdr_ok(skb
)) {
511 struct icmphdr
*icmp
= icmp_hdr(skb
);
512 /* The ICMP type and code fields use the 16-bit
513 * transport port fields, so we need to store them
514 * in 16-bit network byte order. */
515 key
->ipv4
.tp
.src
= htons(icmp
->type
);
516 key
->ipv4
.tp
.dst
= htons(icmp
->code
);
522 } else if (key
->eth
.type
== htons(ETH_P_ARP
) && arphdr_ok(skb
)) {
523 struct arp_eth_header
*arp
;
525 arp
= (struct arp_eth_header
*)skb_network_header(skb
);
527 if (arp
->ar_hrd
== htons(ARPHRD_ETHER
)
528 && arp
->ar_pro
== htons(ETH_P_IP
)
529 && arp
->ar_hln
== ETH_ALEN
530 && arp
->ar_pln
== 4) {
532 /* We only match on the lower 8 bits of the opcode. */
533 if (ntohs(arp
->ar_op
) <= 0xff)
534 key
->ip
.nw_proto
= ntohs(arp
->ar_op
);
536 if (key
->ip
.nw_proto
== ARPOP_REQUEST
537 || key
->ip
.nw_proto
== ARPOP_REPLY
) {
538 memcpy(&key
->ipv4
.addr
.src
, arp
->ar_sip
, sizeof(key
->ipv4
.addr
.src
));
539 memcpy(&key
->ipv4
.addr
.dst
, arp
->ar_tip
, sizeof(key
->ipv4
.addr
.dst
));
540 memcpy(key
->ipv4
.arp
.sha
, arp
->ar_sha
, ETH_ALEN
);
541 memcpy(key
->ipv4
.arp
.tha
, arp
->ar_tha
, ETH_ALEN
);
542 key_len
= SW_FLOW_KEY_OFFSET(ipv4
.arp
);
545 } else if (key
->eth
.type
== htons(ETH_P_IPV6
)) {
546 int nh_len
; /* IPv6 Header + Extensions */
548 nh_len
= parse_ipv6hdr(skb
, key
, &key_len
);
549 if (unlikely(nh_len
< 0)) {
550 if (nh_len
== -EINVAL
)
551 skb
->transport_header
= skb
->network_header
;
557 /* Transport layer. */
558 if (key
->ip
.nw_proto
== NEXTHDR_TCP
) {
559 key_len
= SW_FLOW_KEY_OFFSET(ipv6
.tp
);
560 if (tcphdr_ok(skb
)) {
561 struct tcphdr
*tcp
= tcp_hdr(skb
);
562 key
->ipv6
.tp
.src
= tcp
->source
;
563 key
->ipv6
.tp
.dst
= tcp
->dest
;
565 } else if (key
->ip
.nw_proto
== NEXTHDR_UDP
) {
566 key_len
= SW_FLOW_KEY_OFFSET(ipv6
.tp
);
567 if (udphdr_ok(skb
)) {
568 struct udphdr
*udp
= udp_hdr(skb
);
569 key
->ipv6
.tp
.src
= udp
->source
;
570 key
->ipv6
.tp
.dst
= udp
->dest
;
572 } else if (key
->ip
.nw_proto
== NEXTHDR_ICMP
) {
573 key_len
= SW_FLOW_KEY_OFFSET(ipv6
.tp
);
574 if (icmp6hdr_ok(skb
)) {
575 error
= parse_icmpv6(skb
, key
, &key_len
, nh_len
);
587 u32
flow_hash(const struct sw_flow_key
*key
, int key_len
)
589 return jhash2((u32
*)key
, DIV_ROUND_UP(key_len
, sizeof(u32
)), hash_seed
);
592 int flow_cmp(const struct tbl_node
*node
, void *key2_
, int len
)
594 const struct sw_flow_key
*key1
= &flow_cast(node
)->key
;
595 const struct sw_flow_key
*key2
= key2_
;
597 return !memcmp(key1
, key2
, len
);
600 /* The size of the argument for each %ODP_KEY_ATTR_* Netlink attribute. */
601 static const u32 key_lens
[ODP_KEY_ATTR_MAX
+ 1] = {
602 [ODP_KEY_ATTR_TUN_ID
] = 8,
603 [ODP_KEY_ATTR_IN_PORT
] = 4,
604 [ODP_KEY_ATTR_ETHERNET
] = sizeof(struct odp_key_ethernet
),
605 [ODP_KEY_ATTR_8021Q
] = sizeof(struct odp_key_8021q
),
606 [ODP_KEY_ATTR_ETHERTYPE
] = 2,
607 [ODP_KEY_ATTR_IPV4
] = sizeof(struct odp_key_ipv4
),
608 [ODP_KEY_ATTR_IPV6
] = sizeof(struct odp_key_ipv6
),
609 [ODP_KEY_ATTR_TCP
] = sizeof(struct odp_key_tcp
),
610 [ODP_KEY_ATTR_UDP
] = sizeof(struct odp_key_udp
),
611 [ODP_KEY_ATTR_ICMP
] = sizeof(struct odp_key_icmp
),
612 [ODP_KEY_ATTR_ICMPV6
] = sizeof(struct odp_key_icmpv6
),
613 [ODP_KEY_ATTR_ARP
] = sizeof(struct odp_key_arp
),
614 [ODP_KEY_ATTR_ND
] = sizeof(struct odp_key_nd
),
618 * flow_from_nlattrs - parses Netlink attributes into a flow key.
619 * @swkey: receives the extracted flow key.
620 * @key_lenp: number of bytes used in @swkey.
621 * @attr: Netlink attribute holding nested %ODP_KEY_ATTR_* Netlink attribute
624 * This state machine accepts the following forms, with [] for optional
625 * elements and | for alternatives:
627 * [tun_id] in_port ethernet [8021q] [ethertype \
628 * [IPv4 [TCP|UDP|ICMP] | IPv6 [TCP|UDP|ICMPv6 [ND]] | ARP]]
630 int flow_from_nlattrs(struct sw_flow_key
*swkey
, int *key_lenp
,
631 const struct nlattr
*attr
)
634 const struct nlattr
*nla
;
639 memset(swkey
, 0, sizeof(*swkey
));
640 swkey
->eth
.type
= htons(ETH_P_802_2
);
641 key_len
= SW_FLOW_KEY_OFFSET(eth
);
643 prev_type
= ODP_KEY_ATTR_UNSPEC
;
644 nla_for_each_nested(nla
, attr
, rem
) {
645 const struct odp_key_ethernet
*eth_key
;
646 const struct odp_key_8021q
*q_key
;
647 const struct odp_key_ipv4
*ipv4_key
;
648 const struct odp_key_ipv6
*ipv6_key
;
649 const struct odp_key_tcp
*tcp_key
;
650 const struct odp_key_udp
*udp_key
;
651 const struct odp_key_icmp
*icmp_key
;
652 const struct odp_key_icmpv6
*icmpv6_key
;
653 const struct odp_key_arp
*arp_key
;
654 const struct odp_key_nd
*nd_key
;
656 int type
= nla_type(nla
);
658 if (type
> ODP_KEY_ATTR_MAX
|| nla_len(nla
) != key_lens
[type
])
661 #define TRANSITION(PREV_TYPE, TYPE) (((PREV_TYPE) << 16) | (TYPE))
662 switch (TRANSITION(prev_type
, type
)) {
663 case TRANSITION(ODP_KEY_ATTR_UNSPEC
, ODP_KEY_ATTR_TUN_ID
):
664 swkey
->eth
.tun_id
= nla_get_be64(nla
);
667 case TRANSITION(ODP_KEY_ATTR_UNSPEC
, ODP_KEY_ATTR_IN_PORT
):
668 case TRANSITION(ODP_KEY_ATTR_TUN_ID
, ODP_KEY_ATTR_IN_PORT
):
669 if (nla_get_u32(nla
) >= DP_MAX_PORTS
)
671 swkey
->eth
.in_port
= nla_get_u32(nla
);
674 case TRANSITION(ODP_KEY_ATTR_IN_PORT
, ODP_KEY_ATTR_ETHERNET
):
675 eth_key
= nla_data(nla
);
676 memcpy(swkey
->eth
.src
, eth_key
->eth_src
, ETH_ALEN
);
677 memcpy(swkey
->eth
.dst
, eth_key
->eth_dst
, ETH_ALEN
);
680 case TRANSITION(ODP_KEY_ATTR_ETHERNET
, ODP_KEY_ATTR_8021Q
):
681 q_key
= nla_data(nla
);
682 /* Only standard 0x8100 VLANs currently supported. */
683 if (q_key
->q_tpid
!= htons(ETH_P_8021Q
))
685 if (q_key
->q_tci
& htons(VLAN_TAG_PRESENT
))
687 swkey
->eth
.tci
= q_key
->q_tci
| htons(VLAN_TAG_PRESENT
);
690 case TRANSITION(ODP_KEY_ATTR_8021Q
, ODP_KEY_ATTR_ETHERTYPE
):
691 case TRANSITION(ODP_KEY_ATTR_ETHERNET
, ODP_KEY_ATTR_ETHERTYPE
):
692 swkey
->eth
.type
= nla_get_be16(nla
);
693 if (ntohs(swkey
->eth
.type
) < 1536)
697 case TRANSITION(ODP_KEY_ATTR_ETHERTYPE
, ODP_KEY_ATTR_IPV4
):
698 key_len
= SW_FLOW_KEY_OFFSET(ipv4
.addr
);
699 if (swkey
->eth
.type
!= htons(ETH_P_IP
))
701 ipv4_key
= nla_data(nla
);
702 swkey
->ip
.nw_proto
= ipv4_key
->ipv4_proto
;
703 swkey
->ip
.nw_tos
= ipv4_key
->ipv4_tos
;
704 swkey
->ipv4
.addr
.src
= ipv4_key
->ipv4_src
;
705 swkey
->ipv4
.addr
.dst
= ipv4_key
->ipv4_dst
;
706 if (swkey
->ip
.nw_tos
& INET_ECN_MASK
)
710 case TRANSITION(ODP_KEY_ATTR_ETHERTYPE
, ODP_KEY_ATTR_IPV6
):
711 key_len
= SW_FLOW_KEY_OFFSET(ipv6
.addr
);
712 if (swkey
->eth
.type
!= htons(ETH_P_IPV6
))
714 ipv6_key
= nla_data(nla
);
715 swkey
->ip
.nw_proto
= ipv6_key
->ipv6_proto
;
716 swkey
->ip
.nw_tos
= ipv6_key
->ipv6_tos
;
717 memcpy(&swkey
->ipv6
.addr
.src
, ipv6_key
->ipv6_src
,
718 sizeof(swkey
->ipv6
.addr
.src
));
719 memcpy(&swkey
->ipv6
.addr
.dst
, ipv6_key
->ipv6_dst
,
720 sizeof(swkey
->ipv6
.addr
.dst
));
721 if (swkey
->ip
.nw_tos
& INET_ECN_MASK
)
725 case TRANSITION(ODP_KEY_ATTR_IPV4
, ODP_KEY_ATTR_TCP
):
726 key_len
= SW_FLOW_KEY_OFFSET(ipv4
.tp
);
727 if (swkey
->ip
.nw_proto
!= IPPROTO_TCP
)
729 tcp_key
= nla_data(nla
);
730 swkey
->ipv4
.tp
.src
= tcp_key
->tcp_src
;
731 swkey
->ipv4
.tp
.dst
= tcp_key
->tcp_dst
;
734 case TRANSITION(ODP_KEY_ATTR_IPV6
, ODP_KEY_ATTR_TCP
):
735 key_len
= SW_FLOW_KEY_OFFSET(ipv6
.tp
);
736 if (swkey
->ip
.nw_proto
!= IPPROTO_TCP
)
738 tcp_key
= nla_data(nla
);
739 swkey
->ipv6
.tp
.src
= tcp_key
->tcp_src
;
740 swkey
->ipv6
.tp
.dst
= tcp_key
->tcp_dst
;
743 case TRANSITION(ODP_KEY_ATTR_IPV4
, ODP_KEY_ATTR_UDP
):
744 key_len
= SW_FLOW_KEY_OFFSET(ipv4
.tp
);
745 if (swkey
->ip
.nw_proto
!= IPPROTO_UDP
)
747 udp_key
= nla_data(nla
);
748 swkey
->ipv4
.tp
.src
= udp_key
->udp_src
;
749 swkey
->ipv4
.tp
.dst
= udp_key
->udp_dst
;
752 case TRANSITION(ODP_KEY_ATTR_IPV6
, ODP_KEY_ATTR_UDP
):
753 key_len
= SW_FLOW_KEY_OFFSET(ipv6
.tp
);
754 if (swkey
->ip
.nw_proto
!= IPPROTO_UDP
)
756 udp_key
= nla_data(nla
);
757 swkey
->ipv6
.tp
.src
= udp_key
->udp_src
;
758 swkey
->ipv6
.tp
.dst
= udp_key
->udp_dst
;
761 case TRANSITION(ODP_KEY_ATTR_IPV4
, ODP_KEY_ATTR_ICMP
):
762 key_len
= SW_FLOW_KEY_OFFSET(ipv4
.tp
);
763 if (swkey
->ip
.nw_proto
!= IPPROTO_ICMP
)
765 icmp_key
= nla_data(nla
);
766 swkey
->ipv4
.tp
.src
= htons(icmp_key
->icmp_type
);
767 swkey
->ipv4
.tp
.dst
= htons(icmp_key
->icmp_code
);
770 case TRANSITION(ODP_KEY_ATTR_IPV6
, ODP_KEY_ATTR_ICMPV6
):
771 key_len
= SW_FLOW_KEY_OFFSET(ipv6
.tp
);
772 if (swkey
->ip
.nw_proto
!= IPPROTO_ICMPV6
)
774 icmpv6_key
= nla_data(nla
);
775 swkey
->ipv6
.tp
.src
= htons(icmpv6_key
->icmpv6_type
);
776 swkey
->ipv6
.tp
.dst
= htons(icmpv6_key
->icmpv6_code
);
779 case TRANSITION(ODP_KEY_ATTR_ETHERTYPE
, ODP_KEY_ATTR_ARP
):
780 key_len
= SW_FLOW_KEY_OFFSET(ipv4
.arp
);
781 if (swkey
->eth
.type
!= htons(ETH_P_ARP
))
783 arp_key
= nla_data(nla
);
784 swkey
->ipv4
.addr
.src
= arp_key
->arp_sip
;
785 swkey
->ipv4
.addr
.dst
= arp_key
->arp_tip
;
786 if (arp_key
->arp_op
& htons(0xff00))
788 swkey
->ip
.nw_proto
= ntohs(arp_key
->arp_op
);
789 memcpy(swkey
->ipv4
.arp
.sha
, arp_key
->arp_sha
, ETH_ALEN
);
790 memcpy(swkey
->ipv4
.arp
.tha
, arp_key
->arp_tha
, ETH_ALEN
);
793 case TRANSITION(ODP_KEY_ATTR_ICMPV6
, ODP_KEY_ATTR_ND
):
794 key_len
= SW_FLOW_KEY_OFFSET(ipv6
.nd
);
795 if (swkey
->ipv6
.tp
.src
!= htons(NDISC_NEIGHBOUR_SOLICITATION
)
796 && swkey
->ipv6
.tp
.src
!= htons(NDISC_NEIGHBOUR_ADVERTISEMENT
))
798 nd_key
= nla_data(nla
);
799 memcpy(&swkey
->ipv6
.nd
.target
, nd_key
->nd_target
,
800 sizeof(swkey
->ipv6
.nd
.target
));
801 memcpy(swkey
->ipv6
.nd
.sll
, nd_key
->nd_sll
, ETH_ALEN
);
802 memcpy(swkey
->ipv6
.nd
.tll
, nd_key
->nd_tll
, ETH_ALEN
);
815 case ODP_KEY_ATTR_UNSPEC
:
818 case ODP_KEY_ATTR_TUN_ID
:
819 case ODP_KEY_ATTR_IN_PORT
:
822 case ODP_KEY_ATTR_ETHERNET
:
823 case ODP_KEY_ATTR_8021Q
:
826 case ODP_KEY_ATTR_ETHERTYPE
:
827 if (swkey
->eth
.type
== htons(ETH_P_IP
) ||
828 swkey
->eth
.type
== htons(ETH_P_ARP
))
832 case ODP_KEY_ATTR_IPV4
:
833 if (swkey
->ip
.nw_proto
== IPPROTO_TCP
||
834 swkey
->ip
.nw_proto
== IPPROTO_UDP
||
835 swkey
->ip
.nw_proto
== IPPROTO_ICMP
)
839 case ODP_KEY_ATTR_IPV6
:
840 if (swkey
->ip
.nw_proto
== IPPROTO_TCP
||
841 swkey
->ip
.nw_proto
== IPPROTO_UDP
||
842 swkey
->ip
.nw_proto
== IPPROTO_ICMPV6
)
846 case ODP_KEY_ATTR_ICMPV6
:
847 if (swkey
->ipv6
.tp
.src
== htons(NDISC_NEIGHBOUR_SOLICITATION
) ||
848 swkey
->ipv6
.tp
.src
== htons(NDISC_NEIGHBOUR_ADVERTISEMENT
))
852 case ODP_KEY_ATTR_TCP
:
853 case ODP_KEY_ATTR_UDP
:
854 case ODP_KEY_ATTR_ICMP
:
855 case ODP_KEY_ATTR_ARP
:
856 case ODP_KEY_ATTR_ND
:
867 WARN_ON_ONCE(!key_len
&& !error
);
873 * flow_metadata_from_nlattrs - parses Netlink attributes into a flow key.
874 * @in_port: receives the extracted input port.
875 * @tun_id: receives the extracted tunnel ID.
876 * @key: Netlink attribute holding nested %ODP_KEY_ATTR_* Netlink attribute
879 * This parses a series of Netlink attributes that form a flow key, which must
880 * take the same form accepted by flow_from_nlattrs(), but only enough of it to
881 * get the metadata, that is, the parts of the flow key that cannot be
882 * extracted from the packet itself.
884 int flow_metadata_from_nlattrs(u16
*in_port
, __be64
*tun_id
,
885 const struct nlattr
*attr
)
887 const struct nlattr
*nla
;
893 prev_type
= ODP_KEY_ATTR_UNSPEC
;
894 nla_for_each_nested(nla
, attr
, rem
) {
895 int type
= nla_type(nla
);
897 if (type
> ODP_KEY_ATTR_MAX
|| nla_len(nla
) != key_lens
[type
])
900 switch (TRANSITION(prev_type
, type
)) {
901 case TRANSITION(ODP_KEY_ATTR_UNSPEC
, ODP_KEY_ATTR_TUN_ID
):
902 *tun_id
= nla_get_be64(nla
);
905 case TRANSITION(ODP_KEY_ATTR_UNSPEC
, ODP_KEY_ATTR_IN_PORT
):
906 case TRANSITION(ODP_KEY_ATTR_TUN_ID
, ODP_KEY_ATTR_IN_PORT
):
907 if (nla_get_u32(nla
) >= DP_MAX_PORTS
)
909 *in_port
= nla_get_u32(nla
);
922 if (prev_type
== ODP_KEY_ATTR_UNSPEC
||
923 prev_type
== ODP_KEY_ATTR_TUN_ID
)
928 int flow_to_nlattrs(const struct sw_flow_key
*swkey
, struct sk_buff
*skb
)
930 struct odp_key_ethernet
*eth_key
;
933 /* This is an imperfect sanity-check that FLOW_BUFSIZE doesn't need
934 * to be updated, but will at least raise awareness when new ODP key
935 * types are added. */
936 BUILD_BUG_ON(__ODP_KEY_ATTR_MAX
!= 14);
938 if (swkey
->eth
.tun_id
!= cpu_to_be64(0))
939 NLA_PUT_BE64(skb
, ODP_KEY_ATTR_TUN_ID
, swkey
->eth
.tun_id
);
941 NLA_PUT_U32(skb
, ODP_KEY_ATTR_IN_PORT
, swkey
->eth
.in_port
);
943 nla
= nla_reserve(skb
, ODP_KEY_ATTR_ETHERNET
, sizeof(*eth_key
));
945 goto nla_put_failure
;
946 eth_key
= nla_data(nla
);
947 memcpy(eth_key
->eth_src
, swkey
->eth
.src
, ETH_ALEN
);
948 memcpy(eth_key
->eth_dst
, swkey
->eth
.dst
, ETH_ALEN
);
950 if (swkey
->eth
.tci
!= htons(0)) {
951 struct odp_key_8021q q_key
;
953 q_key
.q_tpid
= htons(ETH_P_8021Q
);
954 q_key
.q_tci
= swkey
->eth
.tci
& ~htons(VLAN_TAG_PRESENT
);
955 NLA_PUT(skb
, ODP_KEY_ATTR_8021Q
, sizeof(q_key
), &q_key
);
958 if (swkey
->eth
.type
== htons(ETH_P_802_2
))
961 NLA_PUT_BE16(skb
, ODP_KEY_ATTR_ETHERTYPE
, swkey
->eth
.type
);
963 if (swkey
->eth
.type
== htons(ETH_P_IP
)) {
964 struct odp_key_ipv4
*ipv4_key
;
966 nla
= nla_reserve(skb
, ODP_KEY_ATTR_IPV4
, sizeof(*ipv4_key
));
968 goto nla_put_failure
;
969 ipv4_key
= nla_data(nla
);
970 memset(ipv4_key
, 0, sizeof(struct odp_key_ipv4
));
971 ipv4_key
->ipv4_src
= swkey
->ipv4
.addr
.src
;
972 ipv4_key
->ipv4_dst
= swkey
->ipv4
.addr
.dst
;
973 ipv4_key
->ipv4_proto
= swkey
->ip
.nw_proto
;
974 ipv4_key
->ipv4_tos
= swkey
->ip
.nw_tos
;
975 } else if (swkey
->eth
.type
== htons(ETH_P_IPV6
)) {
976 struct odp_key_ipv6
*ipv6_key
;
978 nla
= nla_reserve(skb
, ODP_KEY_ATTR_IPV6
, sizeof(*ipv6_key
));
980 goto nla_put_failure
;
981 ipv6_key
= nla_data(nla
);
982 memset(ipv6_key
, 0, sizeof(struct odp_key_ipv6
));
983 memcpy(ipv6_key
->ipv6_src
, &swkey
->ipv6
.addr
.src
,
984 sizeof(ipv6_key
->ipv6_src
));
985 memcpy(ipv6_key
->ipv6_dst
, &swkey
->ipv6
.addr
.dst
,
986 sizeof(ipv6_key
->ipv6_dst
));
987 ipv6_key
->ipv6_proto
= swkey
->ip
.nw_proto
;
988 ipv6_key
->ipv6_tos
= swkey
->ip
.nw_tos
;
989 } else if (swkey
->eth
.type
== htons(ETH_P_ARP
)) {
990 struct odp_key_arp
*arp_key
;
992 nla
= nla_reserve(skb
, ODP_KEY_ATTR_ARP
, sizeof(*arp_key
));
994 goto nla_put_failure
;
995 arp_key
= nla_data(nla
);
996 memset(arp_key
, 0, sizeof(struct odp_key_arp
));
997 arp_key
->arp_sip
= swkey
->ipv4
.addr
.src
;
998 arp_key
->arp_tip
= swkey
->ipv4
.addr
.dst
;
999 arp_key
->arp_op
= htons(swkey
->ip
.nw_proto
);
1000 memcpy(arp_key
->arp_sha
, swkey
->ipv4
.arp
.sha
, ETH_ALEN
);
1001 memcpy(arp_key
->arp_tha
, swkey
->ipv4
.arp
.tha
, ETH_ALEN
);
1004 if (swkey
->eth
.type
== htons(ETH_P_IP
) ||
1005 swkey
->eth
.type
== htons(ETH_P_IPV6
)) {
1007 if (swkey
->ip
.nw_proto
== IPPROTO_TCP
) {
1008 struct odp_key_tcp
*tcp_key
;
1010 nla
= nla_reserve(skb
, ODP_KEY_ATTR_TCP
, sizeof(*tcp_key
));
1012 goto nla_put_failure
;
1013 tcp_key
= nla_data(nla
);
1014 if (swkey
->eth
.type
== htons(ETH_P_IP
)) {
1015 tcp_key
->tcp_src
= swkey
->ipv4
.tp
.src
;
1016 tcp_key
->tcp_dst
= swkey
->ipv4
.tp
.dst
;
1017 } else if (swkey
->eth
.type
== htons(ETH_P_IPV6
)) {
1018 tcp_key
->tcp_src
= swkey
->ipv6
.tp
.src
;
1019 tcp_key
->tcp_dst
= swkey
->ipv6
.tp
.dst
;
1021 } else if (swkey
->ip
.nw_proto
== IPPROTO_UDP
) {
1022 struct odp_key_udp
*udp_key
;
1024 nla
= nla_reserve(skb
, ODP_KEY_ATTR_UDP
, sizeof(*udp_key
));
1026 goto nla_put_failure
;
1027 udp_key
= nla_data(nla
);
1028 if (swkey
->eth
.type
== htons(ETH_P_IP
)) {
1029 udp_key
->udp_src
= swkey
->ipv4
.tp
.src
;
1030 udp_key
->udp_dst
= swkey
->ipv4
.tp
.dst
;
1031 } else if (swkey
->eth
.type
== htons(ETH_P_IPV6
)) {
1032 udp_key
->udp_src
= swkey
->ipv6
.tp
.src
;
1033 udp_key
->udp_dst
= swkey
->ipv6
.tp
.dst
;
1035 } else if (swkey
->eth
.type
== htons(ETH_P_IP
) &&
1036 swkey
->ip
.nw_proto
== IPPROTO_ICMP
) {
1037 struct odp_key_icmp
*icmp_key
;
1039 nla
= nla_reserve(skb
, ODP_KEY_ATTR_ICMP
, sizeof(*icmp_key
));
1041 goto nla_put_failure
;
1042 icmp_key
= nla_data(nla
);
1043 icmp_key
->icmp_type
= ntohs(swkey
->ipv4
.tp
.src
);
1044 icmp_key
->icmp_code
= ntohs(swkey
->ipv4
.tp
.dst
);
1045 } else if (swkey
->eth
.type
== htons(ETH_P_IPV6
) &&
1046 swkey
->ip
.nw_proto
== IPPROTO_ICMPV6
) {
1047 struct odp_key_icmpv6
*icmpv6_key
;
1049 nla
= nla_reserve(skb
, ODP_KEY_ATTR_ICMPV6
,
1050 sizeof(*icmpv6_key
));
1052 goto nla_put_failure
;
1053 icmpv6_key
= nla_data(nla
);
1054 icmpv6_key
->icmpv6_type
= ntohs(swkey
->ipv6
.tp
.src
);
1055 icmpv6_key
->icmpv6_code
= ntohs(swkey
->ipv6
.tp
.dst
);
1057 if (icmpv6_key
->icmpv6_type
== NDISC_NEIGHBOUR_SOLICITATION
||
1058 icmpv6_key
->icmpv6_type
== NDISC_NEIGHBOUR_ADVERTISEMENT
) {
1059 struct odp_key_nd
*nd_key
;
1061 nla
= nla_reserve(skb
, ODP_KEY_ATTR_ND
, sizeof(*nd_key
));
1063 goto nla_put_failure
;
1064 nd_key
= nla_data(nla
);
1065 memcpy(nd_key
->nd_target
, &swkey
->ipv6
.nd
.target
,
1066 sizeof(nd_key
->nd_target
));
1067 memcpy(nd_key
->nd_sll
, swkey
->ipv6
.nd
.sll
, ETH_ALEN
);
1068 memcpy(nd_key
->nd_tll
, swkey
->ipv6
.nd
.tll
, ETH_ALEN
);
1079 /* Initializes the flow module.
1080 * Returns zero if successful or a negative error code. */
1083 flow_cache
= kmem_cache_create("sw_flow", sizeof(struct sw_flow
), 0,
1085 if (flow_cache
== NULL
)
1088 get_random_bytes(&hash_seed
, sizeof(hash_seed
));
1093 /* Uninitializes the flow module. */
1094 void flow_exit(void)
1096 kmem_cache_destroy(flow_cache
);