2 * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc.
3 * Copyright (c) 2013 Simon Horman
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
19 #include "odp-execute.h"
20 #include <arpa/inet.h>
21 #include <netinet/in.h>
22 #include <netinet/icmp6.h>
23 #include <netinet/ip6.h>
27 #include "dp-packet.h"
30 #include "odp-netlink.h"
34 #include "unaligned.h"
37 /* Masked copy of an ethernet address. 'src' is already properly masked. */
39 ether_addr_copy_masked(struct eth_addr
*dst
, const struct eth_addr src
,
40 const struct eth_addr mask
)
44 for (i
= 0; i
< ARRAY_SIZE(dst
->be16
); i
++) {
45 dst
->be16
[i
] = src
.be16
[i
] | (dst
->be16
[i
] & ~mask
.be16
[i
]);
50 odp_eth_set_addrs(struct dp_packet
*packet
, const struct ovs_key_ethernet
*key
,
51 const struct ovs_key_ethernet
*mask
)
53 struct eth_header
*eh
= dp_packet_l2(packet
);
57 eh
->eth_src
= key
->eth_src
;
58 eh
->eth_dst
= key
->eth_dst
;
60 ether_addr_copy_masked(&eh
->eth_src
, key
->eth_src
, mask
->eth_src
);
61 ether_addr_copy_masked(&eh
->eth_dst
, key
->eth_dst
, mask
->eth_dst
);
67 odp_set_ipv4(struct dp_packet
*packet
, const struct ovs_key_ipv4
*key
,
68 const struct ovs_key_ipv4
*mask
)
70 struct ip_header
*nh
= dp_packet_l3(packet
);
74 key
->ipv4_src
| (get_16aligned_be32(&nh
->ip_src
) & ~mask
->ipv4_src
),
75 key
->ipv4_dst
| (get_16aligned_be32(&nh
->ip_dst
) & ~mask
->ipv4_dst
),
76 key
->ipv4_tos
| (nh
->ip_tos
& ~mask
->ipv4_tos
),
77 key
->ipv4_ttl
| (nh
->ip_ttl
& ~mask
->ipv4_ttl
));
80 static const ovs_be32
*
81 mask_ipv6_addr(const ovs_16aligned_be32
*old
, const ovs_be32
*addr
,
82 const ovs_be32
*mask
, ovs_be32
*masked
)
84 for (int i
= 0; i
< 4; i
++) {
85 masked
[i
] = addr
[i
] | (get_16aligned_be32(&old
[i
]) & ~mask
[i
]);
92 odp_set_ipv6(struct dp_packet
*packet
, const struct ovs_key_ipv6
*key
,
93 const struct ovs_key_ipv6
*mask
)
95 struct ovs_16aligned_ip6_hdr
*nh
= dp_packet_l3(packet
);
96 ovs_be32 sbuf
[4], dbuf
[4];
97 uint8_t old_tc
= ntohl(get_16aligned_be32(&nh
->ip6_flow
)) >> 20;
98 ovs_be32 old_fl
= get_16aligned_be32(&nh
->ip6_flow
) & htonl(0xfffff);
102 mask_ipv6_addr(nh
->ip6_src
.be32
, key
->ipv6_src
, mask
->ipv6_src
, sbuf
),
103 mask_ipv6_addr(nh
->ip6_dst
.be32
, key
->ipv6_dst
, mask
->ipv6_dst
, dbuf
),
104 key
->ipv6_tclass
| (old_tc
& ~mask
->ipv6_tclass
),
105 key
->ipv6_label
| (old_fl
& ~mask
->ipv6_label
),
106 key
->ipv6_hlimit
| (nh
->ip6_hlim
& ~mask
->ipv6_hlimit
));
110 odp_set_tcp(struct dp_packet
*packet
, const struct ovs_key_tcp
*key
,
111 const struct ovs_key_tcp
*mask
)
113 struct tcp_header
*th
= dp_packet_l4(packet
);
115 if (OVS_LIKELY(th
&& dp_packet_get_tcp_payload(packet
))) {
116 packet_set_tcp_port(packet
,
117 key
->tcp_src
| (th
->tcp_src
& ~mask
->tcp_src
),
118 key
->tcp_dst
| (th
->tcp_dst
& ~mask
->tcp_dst
));
123 odp_set_udp(struct dp_packet
*packet
, const struct ovs_key_udp
*key
,
124 const struct ovs_key_udp
*mask
)
126 struct udp_header
*uh
= dp_packet_l4(packet
);
128 if (OVS_LIKELY(uh
&& dp_packet_get_udp_payload(packet
))) {
129 packet_set_udp_port(packet
,
130 key
->udp_src
| (uh
->udp_src
& ~mask
->udp_src
),
131 key
->udp_dst
| (uh
->udp_dst
& ~mask
->udp_dst
));
136 odp_set_sctp(struct dp_packet
*packet
, const struct ovs_key_sctp
*key
,
137 const struct ovs_key_sctp
*mask
)
139 struct sctp_header
*sh
= dp_packet_l4(packet
);
141 if (OVS_LIKELY(sh
&& dp_packet_get_sctp_payload(packet
))) {
142 packet_set_sctp_port(packet
,
143 key
->sctp_src
| (sh
->sctp_src
& ~mask
->sctp_src
),
144 key
->sctp_dst
| (sh
->sctp_dst
& ~mask
->sctp_dst
));
149 odp_set_tunnel_action(const struct nlattr
*a
, struct flow_tnl
*tun_key
)
151 enum odp_key_fitness fitness
;
153 fitness
= odp_tun_key_from_attr(a
, true, tun_key
);
154 ovs_assert(fitness
!= ODP_FIT_ERROR
);
158 set_arp(struct dp_packet
*packet
, const struct ovs_key_arp
*key
,
159 const struct ovs_key_arp
*mask
)
161 struct arp_eth_header
*arp
= dp_packet_l3(packet
);
164 arp
->ar_op
= key
->arp_op
;
165 arp
->ar_sha
= key
->arp_sha
;
166 put_16aligned_be32(&arp
->ar_spa
, key
->arp_sip
);
167 arp
->ar_tha
= key
->arp_tha
;
168 put_16aligned_be32(&arp
->ar_tpa
, key
->arp_tip
);
170 ovs_be32 ar_spa
= get_16aligned_be32(&arp
->ar_spa
);
171 ovs_be32 ar_tpa
= get_16aligned_be32(&arp
->ar_tpa
);
173 arp
->ar_op
= key
->arp_op
| (arp
->ar_op
& ~mask
->arp_op
);
174 ether_addr_copy_masked(&arp
->ar_sha
, key
->arp_sha
, mask
->arp_sha
);
175 put_16aligned_be32(&arp
->ar_spa
,
176 key
->arp_sip
| (ar_spa
& ~mask
->arp_sip
));
177 ether_addr_copy_masked(&arp
->ar_tha
, key
->arp_tha
, mask
->arp_tha
);
178 put_16aligned_be32(&arp
->ar_tpa
,
179 key
->arp_tip
| (ar_tpa
& ~mask
->arp_tip
));
184 odp_set_nd(struct dp_packet
*packet
, const struct ovs_key_nd
*key
,
185 const struct ovs_key_nd
*mask
)
187 const struct ovs_nd_msg
*ns
= dp_packet_l4(packet
);
188 const struct ovs_nd_opt
*nd_opt
= dp_packet_get_nd_payload(packet
);
190 if (OVS_LIKELY(ns
&& nd_opt
)) {
191 int bytes_remain
= dp_packet_l4_size(packet
) - sizeof(*ns
);
193 struct eth_addr sll_buf
= eth_addr_zero
;
194 struct eth_addr tll_buf
= eth_addr_zero
;
196 while (bytes_remain
>= ND_OPT_LEN
&& nd_opt
->nd_opt_len
!= 0) {
197 if (nd_opt
->nd_opt_type
== ND_OPT_SOURCE_LINKADDR
198 && nd_opt
->nd_opt_len
== 1) {
199 sll_buf
= nd_opt
->nd_opt_mac
;
200 ether_addr_copy_masked(&sll_buf
, key
->nd_sll
, mask
->nd_sll
);
202 /* A packet can only contain one SLL or TLL option */
204 } else if (nd_opt
->nd_opt_type
== ND_OPT_TARGET_LINKADDR
205 && nd_opt
->nd_opt_len
== 1) {
206 tll_buf
= nd_opt
->nd_opt_mac
;
207 ether_addr_copy_masked(&tll_buf
, key
->nd_tll
, mask
->nd_tll
);
209 /* A packet can only contain one SLL or TLL option */
213 nd_opt
+= nd_opt
->nd_opt_len
;
214 bytes_remain
-= nd_opt
->nd_opt_len
* ND_OPT_LEN
;
217 packet_set_nd(packet
,
218 mask_ipv6_addr(ns
->target
.be32
,
219 key
->nd_target
, mask
->nd_target
, tgt_buf
),
226 odp_execute_set_action(struct dp_packet
*packet
, const struct nlattr
*a
)
228 enum ovs_key_attr type
= nl_attr_type(a
);
229 const struct ovs_key_ipv4
*ipv4_key
;
230 const struct ovs_key_ipv6
*ipv6_key
;
231 struct pkt_metadata
*md
= &packet
->md
;
234 case OVS_KEY_ATTR_PRIORITY
:
235 md
->skb_priority
= nl_attr_get_u32(a
);
238 case OVS_KEY_ATTR_TUNNEL
:
239 odp_set_tunnel_action(a
, &md
->tunnel
);
242 case OVS_KEY_ATTR_SKB_MARK
:
243 md
->pkt_mark
= nl_attr_get_u32(a
);
246 case OVS_KEY_ATTR_ETHERNET
:
247 odp_eth_set_addrs(packet
, nl_attr_get(a
), NULL
);
250 case OVS_KEY_ATTR_IPV4
:
251 ipv4_key
= nl_attr_get_unspec(a
, sizeof(struct ovs_key_ipv4
));
252 packet_set_ipv4(packet
, ipv4_key
->ipv4_src
,
253 ipv4_key
->ipv4_dst
, ipv4_key
->ipv4_tos
,
257 case OVS_KEY_ATTR_IPV6
:
258 ipv6_key
= nl_attr_get_unspec(a
, sizeof(struct ovs_key_ipv6
));
259 packet_set_ipv6(packet
, ipv6_key
->ipv6_src
, ipv6_key
->ipv6_dst
,
260 ipv6_key
->ipv6_tclass
, ipv6_key
->ipv6_label
,
261 ipv6_key
->ipv6_hlimit
);
264 case OVS_KEY_ATTR_TCP
:
265 if (OVS_LIKELY(dp_packet_get_tcp_payload(packet
))) {
266 const struct ovs_key_tcp
*tcp_key
267 = nl_attr_get_unspec(a
, sizeof(struct ovs_key_tcp
));
269 packet_set_tcp_port(packet
, tcp_key
->tcp_src
,
274 case OVS_KEY_ATTR_UDP
:
275 if (OVS_LIKELY(dp_packet_get_udp_payload(packet
))) {
276 const struct ovs_key_udp
*udp_key
277 = nl_attr_get_unspec(a
, sizeof(struct ovs_key_udp
));
279 packet_set_udp_port(packet
, udp_key
->udp_src
,
284 case OVS_KEY_ATTR_SCTP
:
285 if (OVS_LIKELY(dp_packet_get_sctp_payload(packet
))) {
286 const struct ovs_key_sctp
*sctp_key
287 = nl_attr_get_unspec(a
, sizeof(struct ovs_key_sctp
));
289 packet_set_sctp_port(packet
, sctp_key
->sctp_src
,
294 case OVS_KEY_ATTR_MPLS
:
295 set_mpls_lse(packet
, nl_attr_get_be32(a
));
298 case OVS_KEY_ATTR_ARP
:
299 set_arp(packet
, nl_attr_get(a
), NULL
);
302 case OVS_KEY_ATTR_ICMP
:
303 case OVS_KEY_ATTR_ICMPV6
:
304 if (OVS_LIKELY(dp_packet_get_icmp_payload(packet
))) {
305 const struct ovs_key_icmp
*icmp_key
306 = nl_attr_get_unspec(a
, sizeof(struct ovs_key_icmp
));
308 packet_set_icmp(packet
, icmp_key
->icmp_type
, icmp_key
->icmp_code
);
312 case OVS_KEY_ATTR_ND
:
313 if (OVS_LIKELY(dp_packet_get_nd_payload(packet
))) {
314 const struct ovs_key_nd
*nd_key
315 = nl_attr_get_unspec(a
, sizeof(struct ovs_key_nd
));
316 packet_set_nd(packet
, nd_key
->nd_target
, nd_key
->nd_sll
,
321 case OVS_KEY_ATTR_DP_HASH
:
322 md
->dp_hash
= nl_attr_get_u32(a
);
325 case OVS_KEY_ATTR_RECIRC_ID
:
326 md
->recirc_id
= nl_attr_get_u32(a
);
329 case OVS_KEY_ATTR_UNSPEC
:
330 case OVS_KEY_ATTR_ENCAP
:
331 case OVS_KEY_ATTR_ETHERTYPE
:
332 case OVS_KEY_ATTR_IN_PORT
:
333 case OVS_KEY_ATTR_VLAN
:
334 case OVS_KEY_ATTR_TCP_FLAGS
:
335 case OVS_KEY_ATTR_CT_STATE
:
336 case OVS_KEY_ATTR_CT_ZONE
:
337 case OVS_KEY_ATTR_CT_MARK
:
338 case OVS_KEY_ATTR_CT_LABELS
:
339 case __OVS_KEY_ATTR_MAX
:
345 #define get_mask(a, type) ((const type *)(const void *)(a + 1) + 1)
348 odp_execute_masked_set_action(struct dp_packet
*packet
,
349 const struct nlattr
*a
)
351 struct pkt_metadata
*md
= &packet
->md
;
352 enum ovs_key_attr type
= nl_attr_type(a
);
356 case OVS_KEY_ATTR_PRIORITY
:
357 md
->skb_priority
= nl_attr_get_u32(a
)
358 | (md
->skb_priority
& ~*get_mask(a
, uint32_t));
361 case OVS_KEY_ATTR_SKB_MARK
:
362 md
->pkt_mark
= nl_attr_get_u32(a
)
363 | (md
->pkt_mark
& ~*get_mask(a
, uint32_t));
366 case OVS_KEY_ATTR_ETHERNET
:
367 odp_eth_set_addrs(packet
, nl_attr_get(a
),
368 get_mask(a
, struct ovs_key_ethernet
));
371 case OVS_KEY_ATTR_IPV4
:
372 odp_set_ipv4(packet
, nl_attr_get(a
),
373 get_mask(a
, struct ovs_key_ipv4
));
376 case OVS_KEY_ATTR_IPV6
:
377 odp_set_ipv6(packet
, nl_attr_get(a
),
378 get_mask(a
, struct ovs_key_ipv6
));
381 case OVS_KEY_ATTR_TCP
:
382 odp_set_tcp(packet
, nl_attr_get(a
),
383 get_mask(a
, struct ovs_key_tcp
));
386 case OVS_KEY_ATTR_UDP
:
387 odp_set_udp(packet
, nl_attr_get(a
),
388 get_mask(a
, struct ovs_key_udp
));
391 case OVS_KEY_ATTR_SCTP
:
392 odp_set_sctp(packet
, nl_attr_get(a
),
393 get_mask(a
, struct ovs_key_sctp
));
396 case OVS_KEY_ATTR_MPLS
:
397 mh
= dp_packet_l2_5(packet
);
399 put_16aligned_be32(&mh
->mpls_lse
, nl_attr_get_be32(a
)
400 | (get_16aligned_be32(&mh
->mpls_lse
)
401 & ~*get_mask(a
, ovs_be32
)));
405 case OVS_KEY_ATTR_ARP
:
406 set_arp(packet
, nl_attr_get(a
),
407 get_mask(a
, struct ovs_key_arp
));
410 case OVS_KEY_ATTR_ND
:
411 odp_set_nd(packet
, nl_attr_get(a
),
412 get_mask(a
, struct ovs_key_nd
));
415 case OVS_KEY_ATTR_DP_HASH
:
416 md
->dp_hash
= nl_attr_get_u32(a
)
417 | (md
->dp_hash
& ~*get_mask(a
, uint32_t));
420 case OVS_KEY_ATTR_RECIRC_ID
:
421 md
->recirc_id
= nl_attr_get_u32(a
)
422 | (md
->recirc_id
& ~*get_mask(a
, uint32_t));
425 case OVS_KEY_ATTR_TUNNEL
: /* Masked data not supported for tunnel. */
426 case OVS_KEY_ATTR_UNSPEC
:
427 case OVS_KEY_ATTR_CT_STATE
:
428 case OVS_KEY_ATTR_CT_ZONE
:
429 case OVS_KEY_ATTR_CT_MARK
:
430 case OVS_KEY_ATTR_CT_LABELS
:
431 case OVS_KEY_ATTR_ENCAP
:
432 case OVS_KEY_ATTR_ETHERTYPE
:
433 case OVS_KEY_ATTR_IN_PORT
:
434 case OVS_KEY_ATTR_VLAN
:
435 case OVS_KEY_ATTR_ICMP
:
436 case OVS_KEY_ATTR_ICMPV6
:
437 case OVS_KEY_ATTR_TCP_FLAGS
:
438 case __OVS_KEY_ATTR_MAX
:
445 odp_execute_sample(void *dp
, struct dp_packet
*packet
, bool steal
,
446 const struct nlattr
*action
,
447 odp_execute_cb dp_execute_action
)
449 const struct nlattr
*subactions
= NULL
;
450 const struct nlattr
*a
;
451 struct dp_packet_batch pb
;
454 NL_NESTED_FOR_EACH_UNSAFE (a
, left
, action
) {
455 int type
= nl_attr_type(a
);
457 switch ((enum ovs_sample_attr
) type
) {
458 case OVS_SAMPLE_ATTR_PROBABILITY
:
459 if (random_uint32() >= nl_attr_get_u32(a
)) {
461 dp_packet_delete(packet
);
467 case OVS_SAMPLE_ATTR_ACTIONS
:
471 case OVS_SAMPLE_ATTR_UNSPEC
:
472 case __OVS_SAMPLE_ATTR_MAX
:
478 packet_batch_init_packet(&pb
, packet
);
479 odp_execute_actions(dp
, &pb
, steal
, nl_attr_get(subactions
),
480 nl_attr_get_size(subactions
), dp_execute_action
);
484 requires_datapath_assistance(const struct nlattr
*a
)
486 enum ovs_action_attr type
= nl_attr_type(a
);
489 /* These only make sense in the context of a datapath. */
490 case OVS_ACTION_ATTR_OUTPUT
:
491 case OVS_ACTION_ATTR_TUNNEL_PUSH
:
492 case OVS_ACTION_ATTR_TUNNEL_POP
:
493 case OVS_ACTION_ATTR_USERSPACE
:
494 case OVS_ACTION_ATTR_RECIRC
:
495 case OVS_ACTION_ATTR_CT
:
498 case OVS_ACTION_ATTR_SET
:
499 case OVS_ACTION_ATTR_SET_MASKED
:
500 case OVS_ACTION_ATTR_PUSH_VLAN
:
501 case OVS_ACTION_ATTR_POP_VLAN
:
502 case OVS_ACTION_ATTR_SAMPLE
:
503 case OVS_ACTION_ATTR_HASH
:
504 case OVS_ACTION_ATTR_PUSH_MPLS
:
505 case OVS_ACTION_ATTR_POP_MPLS
:
506 case OVS_ACTION_ATTR_TRUNC
:
509 case OVS_ACTION_ATTR_UNSPEC
:
510 case __OVS_ACTION_ATTR_MAX
:
518 odp_execute_actions(void *dp
, struct dp_packet_batch
*batch
, bool steal
,
519 const struct nlattr
*actions
, size_t actions_len
,
520 odp_execute_cb dp_execute_action
)
522 struct dp_packet
**packets
= batch
->packets
;
523 int cnt
= batch
->count
;
524 const struct nlattr
*a
;
528 NL_ATTR_FOR_EACH_UNSAFE (a
, left
, actions
, actions_len
) {
529 int type
= nl_attr_type(a
);
530 bool last_action
= (left
<= NLA_ALIGN(a
->nla_len
));
532 if (requires_datapath_assistance(a
)) {
533 if (dp_execute_action
) {
534 /* Allow 'dp_execute_action' to steal the packet data if we do
535 * not need it any more. */
536 bool may_steal
= steal
&& last_action
;
538 dp_execute_action(dp
, batch
, a
, may_steal
);
541 /* We do not need to free the packets. dp_execute_actions()
549 switch ((enum ovs_action_attr
) type
) {
550 case OVS_ACTION_ATTR_HASH
: {
551 const struct ovs_action_hash
*hash_act
= nl_attr_get(a
);
553 /* Calculate a hash value directly. This might not match the
554 * value computed by the datapath, but it is much less expensive,
555 * and the current use case (bonding) does not require a strict
556 * match to work properly. */
557 if (hash_act
->hash_alg
== OVS_HASH_ALG_L4
) {
561 for (i
= 0; i
< cnt
; i
++) {
562 flow_extract(packets
[i
], &flow
);
563 hash
= flow_hash_5tuple(&flow
, hash_act
->hash_basis
);
565 packets
[i
]->md
.dp_hash
= hash
;
568 /* Assert on unknown hash algorithm. */
574 case OVS_ACTION_ATTR_PUSH_VLAN
: {
575 const struct ovs_action_push_vlan
*vlan
= nl_attr_get(a
);
577 for (i
= 0; i
< cnt
; i
++) {
578 eth_push_vlan(packets
[i
], vlan
->vlan_tpid
, vlan
->vlan_tci
);
583 case OVS_ACTION_ATTR_POP_VLAN
:
584 for (i
= 0; i
< cnt
; i
++) {
585 eth_pop_vlan(packets
[i
]);
589 case OVS_ACTION_ATTR_PUSH_MPLS
: {
590 const struct ovs_action_push_mpls
*mpls
= nl_attr_get(a
);
592 for (i
= 0; i
< cnt
; i
++) {
593 push_mpls(packets
[i
], mpls
->mpls_ethertype
, mpls
->mpls_lse
);
598 case OVS_ACTION_ATTR_POP_MPLS
:
599 for (i
= 0; i
< cnt
; i
++) {
600 pop_mpls(packets
[i
], nl_attr_get_be16(a
));
604 case OVS_ACTION_ATTR_SET
:
605 for (i
= 0; i
< cnt
; i
++) {
606 odp_execute_set_action(packets
[i
], nl_attr_get(a
));
610 case OVS_ACTION_ATTR_SET_MASKED
:
611 for (i
= 0; i
< cnt
; i
++) {
612 odp_execute_masked_set_action(packets
[i
], nl_attr_get(a
));
616 case OVS_ACTION_ATTR_SAMPLE
:
617 for (i
= 0; i
< cnt
; i
++) {
618 odp_execute_sample(dp
, packets
[i
], steal
&& last_action
, a
,
623 /* We do not need to free the packets. odp_execute_sample() has
629 case OVS_ACTION_ATTR_TRUNC
: {
630 const struct ovs_action_trunc
*trunc
=
631 nl_attr_get_unspec(a
, sizeof *trunc
);
634 for (i
= 0; i
< cnt
; i
++) {
635 dp_packet_set_cutlen(packets
[i
], trunc
->max_len
);
640 case OVS_ACTION_ATTR_OUTPUT
:
641 case OVS_ACTION_ATTR_TUNNEL_PUSH
:
642 case OVS_ACTION_ATTR_TUNNEL_POP
:
643 case OVS_ACTION_ATTR_USERSPACE
:
644 case OVS_ACTION_ATTR_RECIRC
:
645 case OVS_ACTION_ATTR_CT
:
646 case OVS_ACTION_ATTR_UNSPEC
:
647 case __OVS_ACTION_ATTR_MAX
:
653 for (i
= 0; i
< cnt
; i
++) {
654 dp_packet_delete(packets
[i
]);