2 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2017, 2019 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include <sys/types.h>
23 #include <netinet/in.h>
24 #include <netinet/icmp6.h>
25 #include <netinet/ip6.h>
29 #include "byte-order.h"
33 #include "openvswitch/dynamic-string.h"
36 #include "openvswitch/match.h"
37 #include "dp-packet.h"
38 #include "openflow/openflow.h"
42 #include "unaligned.h"
44 #include "openvswitch/nsh.h"
45 #include "ovs-router.h"
46 #include "lib/netdev-provider.h"
48 COVERAGE_DEFINE(flow_extract
);
49 COVERAGE_DEFINE(miniflow_malloc
);
51 /* U64 indices for segmented flow classification. */
52 const uint8_t flow_segment_u64s
[4] = {
53 FLOW_SEGMENT_1_ENDS_AT
/ sizeof(uint64_t),
54 FLOW_SEGMENT_2_ENDS_AT
/ sizeof(uint64_t),
55 FLOW_SEGMENT_3_ENDS_AT
/ sizeof(uint64_t),
59 int flow_vlan_limit
= FLOW_MAX_VLAN_HEADERS
;
61 /* Asserts that field 'f1' follows immediately after 'f0' in struct flow,
62 * without any intervening padding. */
63 #define ASSERT_SEQUENTIAL(f0, f1) \
64 BUILD_ASSERT_DECL(offsetof(struct flow, f0) \
65 + MEMBER_SIZEOF(struct flow, f0) \
66 == offsetof(struct flow, f1))
68 /* Asserts that fields 'f0' and 'f1' are in the same 32-bit aligned word within
70 #define ASSERT_SAME_WORD(f0, f1) \
71 BUILD_ASSERT_DECL(offsetof(struct flow, f0) / 4 \
72 == offsetof(struct flow, f1) / 4)
74 /* Asserts that 'f0' and 'f1' are both sequential and within the same 32-bit
75 * aligned word in struct flow. */
76 #define ASSERT_SEQUENTIAL_SAME_WORD(f0, f1) \
77 ASSERT_SEQUENTIAL(f0, f1); \
78 ASSERT_SAME_WORD(f0, f1)
80 /* miniflow_extract() assumes the following to be true to optimize the
81 * extraction process. */
82 ASSERT_SEQUENTIAL_SAME_WORD(nw_frag
, nw_tos
);
83 ASSERT_SEQUENTIAL_SAME_WORD(nw_tos
, nw_ttl
);
84 ASSERT_SEQUENTIAL_SAME_WORD(nw_ttl
, nw_proto
);
86 /* TCP flags in the middle of a BE64, zeroes in the other half. */
87 BUILD_ASSERT_DECL(offsetof(struct flow
, tcp_flags
) % 8 == 4);
90 #define TCP_FLAGS_BE32(tcp_ctl) ((OVS_FORCE ovs_be32)TCP_FLAGS_BE16(tcp_ctl) \
93 #define TCP_FLAGS_BE32(tcp_ctl) ((OVS_FORCE ovs_be32)TCP_FLAGS_BE16(tcp_ctl))
96 ASSERT_SEQUENTIAL_SAME_WORD(tp_src
, tp_dst
);
98 /* Removes 'size' bytes from the head end of '*datap', of size '*sizep', which
99 * must contain at least 'size' bytes of data. Returns the first byte of data
101 static inline const void *
102 data_pull(const void **datap
, size_t *sizep
, size_t size
)
104 const char *data
= *datap
;
105 *datap
= data
+ size
;
110 /* If '*datap' has at least 'size' bytes of data, removes that many bytes from
111 * the head end of '*datap' and returns the first byte removed. Otherwise,
112 * returns a null pointer without modifying '*datap'. */
113 static inline const void *
114 data_try_pull(const void **datap
, size_t *sizep
, size_t size
)
116 return OVS_LIKELY(*sizep
>= size
) ? data_pull(datap
, sizep
, size
) : NULL
;
119 /* Context for pushing data to a miniflow. */
123 uint64_t * const end
;
126 /* miniflow_push_* macros allow filling in a miniflow data values in order.
127 * Assertions are needed only when the layout of the struct flow is modified.
128 * 'ofs' is a compile-time constant, which allows most of the code be optimized
129 * away. Some GCC versions gave warnings on ALWAYS_INLINE, so these are
130 * defined as macros. */
132 #if (FLOW_WC_SEQ != 42)
133 #define MINIFLOW_ASSERT(X) ovs_assert(X)
134 BUILD_MESSAGE("FLOW_WC_SEQ changed: miniflow_extract() will have runtime "
135 "assertions enabled. Consider updating FLOW_WC_SEQ after "
138 #define MINIFLOW_ASSERT(X)
141 /* True if 'IDX' and higher bits are not set. */
142 #define ASSERT_FLOWMAP_NOT_SET(FM, IDX) \
144 MINIFLOW_ASSERT(!((FM)->bits[(IDX) / MAP_T_BITS] & \
145 (MAP_MAX << ((IDX) % MAP_T_BITS)))); \
146 for (size_t i = (IDX) / MAP_T_BITS + 1; i < FLOWMAP_UNITS; i++) { \
147 MINIFLOW_ASSERT(!(FM)->bits[i]); \
151 #define miniflow_set_map(MF, OFS) \
153 ASSERT_FLOWMAP_NOT_SET(&MF.map, (OFS)); \
154 flowmap_set(&MF.map, (OFS), 1); \
157 #define miniflow_assert_in_map(MF, OFS) \
158 MINIFLOW_ASSERT(flowmap_is_set(&MF.map, (OFS))); \
159 ASSERT_FLOWMAP_NOT_SET(&MF.map, (OFS) + 1)
161 #define miniflow_push_uint64_(MF, OFS, VALUE) \
163 MINIFLOW_ASSERT(MF.data < MF.end && (OFS) % 8 == 0); \
164 *MF.data++ = VALUE; \
165 miniflow_set_map(MF, OFS / 8); \
168 #define miniflow_push_be64_(MF, OFS, VALUE) \
169 miniflow_push_uint64_(MF, OFS, (OVS_FORCE uint64_t)(VALUE))
171 #define miniflow_push_uint32_(MF, OFS, VALUE) \
173 MINIFLOW_ASSERT(MF.data < MF.end); \
175 if ((OFS) % 8 == 0) { \
176 miniflow_set_map(MF, OFS / 8); \
177 *(uint32_t *)MF.data = VALUE; \
178 } else if ((OFS) % 8 == 4) { \
179 miniflow_assert_in_map(MF, OFS / 8); \
180 *((uint32_t *)MF.data + 1) = VALUE; \
185 #define miniflow_push_be32_(MF, OFS, VALUE) \
186 miniflow_push_uint32_(MF, OFS, (OVS_FORCE uint32_t)(VALUE))
188 #define miniflow_push_uint16_(MF, OFS, VALUE) \
190 MINIFLOW_ASSERT(MF.data < MF.end); \
192 if ((OFS) % 8 == 0) { \
193 miniflow_set_map(MF, OFS / 8); \
194 *(uint16_t *)MF.data = VALUE; \
195 } else if ((OFS) % 8 == 2) { \
196 miniflow_assert_in_map(MF, OFS / 8); \
197 *((uint16_t *)MF.data + 1) = VALUE; \
198 } else if ((OFS) % 8 == 4) { \
199 miniflow_assert_in_map(MF, OFS / 8); \
200 *((uint16_t *)MF.data + 2) = VALUE; \
201 } else if ((OFS) % 8 == 6) { \
202 miniflow_assert_in_map(MF, OFS / 8); \
203 *((uint16_t *)MF.data + 3) = VALUE; \
208 #define miniflow_push_uint8_(MF, OFS, VALUE) \
210 MINIFLOW_ASSERT(MF.data < MF.end); \
212 if ((OFS) % 8 == 0) { \
213 miniflow_set_map(MF, OFS / 8); \
214 *(uint8_t *)MF.data = VALUE; \
215 } else if ((OFS) % 8 == 7) { \
216 miniflow_assert_in_map(MF, OFS / 8); \
217 *((uint8_t *)MF.data + 7) = VALUE; \
220 miniflow_assert_in_map(MF, OFS / 8); \
221 *((uint8_t *)MF.data + ((OFS) % 8)) = VALUE; \
225 #define miniflow_pad_to_64_(MF, OFS) \
227 MINIFLOW_ASSERT((OFS) % 8 != 0); \
228 miniflow_assert_in_map(MF, OFS / 8); \
230 memset((uint8_t *)MF.data + (OFS) % 8, 0, 8 - (OFS) % 8); \
234 #define miniflow_pad_from_64_(MF, OFS) \
236 MINIFLOW_ASSERT(MF.data < MF.end); \
238 MINIFLOW_ASSERT((OFS) % 8 != 0); \
239 miniflow_set_map(MF, OFS / 8); \
241 memset((uint8_t *)MF.data, 0, (OFS) % 8); \
244 #define miniflow_push_be16_(MF, OFS, VALUE) \
245 miniflow_push_uint16_(MF, OFS, (OVS_FORCE uint16_t)VALUE);
247 #define miniflow_push_be8_(MF, OFS, VALUE) \
248 miniflow_push_uint8_(MF, OFS, (OVS_FORCE uint8_t)VALUE);
250 #define miniflow_set_maps(MF, OFS, N_WORDS) \
252 size_t ofs = (OFS); \
253 size_t n_words = (N_WORDS); \
255 MINIFLOW_ASSERT(n_words && MF.data + n_words <= MF.end); \
256 ASSERT_FLOWMAP_NOT_SET(&MF.map, ofs); \
257 flowmap_set(&MF.map, ofs, n_words); \
260 /* Data at 'valuep' may be unaligned. */
261 #define miniflow_push_words_(MF, OFS, VALUEP, N_WORDS) \
263 MINIFLOW_ASSERT((OFS) % 8 == 0); \
264 miniflow_set_maps(MF, (OFS) / 8, (N_WORDS)); \
265 memcpy(MF.data, (VALUEP), (N_WORDS) * sizeof *MF.data); \
266 MF.data += (N_WORDS); \
269 /* Push 32-bit words padded to 64-bits. */
270 #define miniflow_push_words_32_(MF, OFS, VALUEP, N_WORDS) \
272 miniflow_set_maps(MF, (OFS) / 8, DIV_ROUND_UP(N_WORDS, 2)); \
273 memcpy(MF.data, (VALUEP), (N_WORDS) * sizeof(uint32_t)); \
274 MF.data += DIV_ROUND_UP(N_WORDS, 2); \
275 if ((N_WORDS) & 1) { \
276 *((uint32_t *)MF.data - 1) = 0; \
280 /* Data at 'valuep' may be unaligned. */
281 /* MACs start 64-aligned, and must be followed by other data or padding. */
282 #define miniflow_push_macs_(MF, OFS, VALUEP) \
284 miniflow_set_maps(MF, (OFS) / 8, 2); \
285 memcpy(MF.data, (VALUEP), 2 * ETH_ADDR_LEN); \
286 MF.data += 1; /* First word only. */ \
289 #define miniflow_push_uint32(MF, FIELD, VALUE) \
290 miniflow_push_uint32_(MF, offsetof(struct flow, FIELD), VALUE)
292 #define miniflow_push_be32(MF, FIELD, VALUE) \
293 miniflow_push_be32_(MF, offsetof(struct flow, FIELD), VALUE)
295 #define miniflow_push_uint16(MF, FIELD, VALUE) \
296 miniflow_push_uint16_(MF, offsetof(struct flow, FIELD), VALUE)
298 #define miniflow_push_be16(MF, FIELD, VALUE) \
299 miniflow_push_be16_(MF, offsetof(struct flow, FIELD), VALUE)
301 #define miniflow_push_uint8(MF, FIELD, VALUE) \
302 miniflow_push_uint8_(MF, offsetof(struct flow, FIELD), VALUE)
304 #define miniflow_pad_to_64(MF, FIELD) \
305 miniflow_pad_to_64_(MF, OFFSETOFEND(struct flow, FIELD))
307 #define miniflow_pad_from_64(MF, FIELD) \
308 miniflow_pad_from_64_(MF, offsetof(struct flow, FIELD))
310 #define miniflow_push_words(MF, FIELD, VALUEP, N_WORDS) \
311 miniflow_push_words_(MF, offsetof(struct flow, FIELD), VALUEP, N_WORDS)
313 #define miniflow_push_words_32(MF, FIELD, VALUEP, N_WORDS) \
314 miniflow_push_words_32_(MF, offsetof(struct flow, FIELD), VALUEP, N_WORDS)
316 #define miniflow_push_macs(MF, FIELD, VALUEP) \
317 miniflow_push_macs_(MF, offsetof(struct flow, FIELD), VALUEP)
319 /* Return the pointer to the miniflow data when called BEFORE the corresponding
321 #define miniflow_pointer(MF, FIELD) \
322 (void *)((uint8_t *)MF.data + ((offsetof(struct flow, FIELD)) % 8))
324 /* Pulls the MPLS headers at '*datap' and returns the count of them. */
326 parse_mpls(const void **datap
, size_t *sizep
)
328 const struct mpls_hdr
*mh
;
331 while ((mh
= data_try_pull(datap
, sizep
, sizeof *mh
))) {
333 if (mh
->mpls_lse
.lo
& htons(1 << MPLS_BOS_SHIFT
)) {
337 return MIN(count
, FLOW_MAX_MPLS_LABELS
);
340 /* passed vlan_hdrs arg must be at least size FLOW_MAX_VLAN_HEADERS. */
341 static inline ALWAYS_INLINE
size_t
342 parse_vlan(const void **datap
, size_t *sizep
, union flow_vlan_hdr
*vlan_hdrs
)
344 const ovs_be16
*eth_type
;
346 data_pull(datap
, sizep
, ETH_ADDR_LEN
* 2);
351 for (n
= 0; eth_type_vlan(*eth_type
) && n
< flow_vlan_limit
; n
++) {
352 if (OVS_UNLIKELY(*sizep
< sizeof(ovs_be32
) + sizeof(ovs_be16
))) {
356 memset(vlan_hdrs
+ n
, 0, sizeof(union flow_vlan_hdr
));
357 const ovs_16aligned_be32
*qp
= data_pull(datap
, sizep
, sizeof *qp
);
358 vlan_hdrs
[n
].qtag
= get_16aligned_be32(qp
);
359 vlan_hdrs
[n
].tci
|= htons(VLAN_CFI
);
365 static inline ALWAYS_INLINE ovs_be16
366 parse_ethertype(const void **datap
, size_t *sizep
)
368 const struct llc_snap_header
*llc
;
371 proto
= *(ovs_be16
*) data_pull(datap
, sizep
, sizeof proto
);
372 if (OVS_LIKELY(ntohs(proto
) >= ETH_TYPE_MIN
)) {
376 if (OVS_UNLIKELY(*sizep
< sizeof *llc
)) {
377 return htons(FLOW_DL_TYPE_NONE
);
381 if (OVS_UNLIKELY(llc
->llc
.llc_dsap
!= LLC_DSAP_SNAP
382 || llc
->llc
.llc_ssap
!= LLC_SSAP_SNAP
383 || llc
->llc
.llc_cntl
!= LLC_CNTL_SNAP
384 || memcmp(llc
->snap
.snap_org
, SNAP_ORG_ETHERNET
,
385 sizeof llc
->snap
.snap_org
))) {
386 return htons(FLOW_DL_TYPE_NONE
);
389 data_pull(datap
, sizep
, sizeof *llc
);
391 if (OVS_LIKELY(ntohs(llc
->snap
.snap_type
) >= ETH_TYPE_MIN
)) {
392 return llc
->snap
.snap_type
;
395 return htons(FLOW_DL_TYPE_NONE
);
398 /* Returns 'true' if the packet is an ND packet. In that case the '*nd_target'
399 * and 'arp_buf[]' are filled in. If the packet is not an ND packet, 'false'
400 * is returned and no values are filled in on '*nd_target' or 'arp_buf[]'. */
402 parse_icmpv6(const void **datap
, size_t *sizep
,
403 const struct icmp6_data_header
*icmp6
,
404 ovs_be32
*rso_flags
, const struct in6_addr
**nd_target
,
405 struct eth_addr arp_buf
[2], uint8_t *opt_type
)
407 if (icmp6
->icmp6_base
.icmp6_code
!= 0 ||
408 (icmp6
->icmp6_base
.icmp6_type
!= ND_NEIGHBOR_SOLICIT
&&
409 icmp6
->icmp6_base
.icmp6_type
!= ND_NEIGHBOR_ADVERT
)) {
413 arp_buf
[0] = eth_addr_zero
;
414 arp_buf
[1] = eth_addr_zero
;
417 *rso_flags
= get_16aligned_be32(icmp6
->icmp6_data
.be32
);
419 *nd_target
= data_try_pull(datap
, sizep
, sizeof **nd_target
);
420 if (OVS_UNLIKELY(!*nd_target
)) {
424 while (*sizep
>= 8) {
425 /* The minimum size of an option is 8 bytes, which also is
426 * the size of Ethernet link-layer options. */
427 const struct ovs_nd_lla_opt
*lla_opt
= *datap
;
428 int opt_len
= lla_opt
->len
* ND_LLA_OPT_LEN
;
430 if (!opt_len
|| opt_len
> *sizep
) {
434 /* Store the link layer address if the appropriate option is
435 * provided. It is considered an error if the same link
436 * layer option is specified twice. */
437 if (lla_opt
->type
== ND_OPT_SOURCE_LINKADDR
&& opt_len
== 8) {
438 if (OVS_LIKELY(eth_addr_is_zero(arp_buf
[0]))) {
439 arp_buf
[0] = lla_opt
->mac
;
440 /* We use only first option type present in ND packet. */
441 if (*opt_type
== 0) {
442 *opt_type
= lla_opt
->type
;
447 } else if (lla_opt
->type
== ND_OPT_TARGET_LINKADDR
&& opt_len
== 8) {
448 if (OVS_LIKELY(eth_addr_is_zero(arp_buf
[1]))) {
449 arp_buf
[1] = lla_opt
->mac
;
450 /* We use only first option type present in ND packet. */
451 if (*opt_type
== 0) {
452 *opt_type
= lla_opt
->type
;
459 if (OVS_UNLIKELY(!data_try_pull(datap
, sizep
, opt_len
))) {
467 arp_buf
[0] = eth_addr_zero
;
468 arp_buf
[1] = eth_addr_zero
;
473 parse_ipv6_ext_hdrs__(const void **datap
, size_t *sizep
, uint8_t *nw_proto
,
475 const struct ovs_16aligned_ip6_frag
**frag_hdr
)
479 if (OVS_LIKELY((*nw_proto
!= IPPROTO_HOPOPTS
)
480 && (*nw_proto
!= IPPROTO_ROUTING
)
481 && (*nw_proto
!= IPPROTO_DSTOPTS
)
482 && (*nw_proto
!= IPPROTO_AH
)
483 && (*nw_proto
!= IPPROTO_FRAGMENT
))) {
484 /* It's either a terminal header (e.g., TCP, UDP) or one we
485 * don't understand. In either case, we're done with the
486 * packet, so use it to fill in 'nw_proto'. */
490 /* We only verify that at least 8 bytes of the next header are
491 * available, but many of these headers are longer. Ensure that
492 * accesses within the extension header are within those first 8
493 * bytes. All extension headers are required to be at least 8
495 if (OVS_UNLIKELY(*sizep
< 8)) {
499 if ((*nw_proto
== IPPROTO_HOPOPTS
)
500 || (*nw_proto
== IPPROTO_ROUTING
)
501 || (*nw_proto
== IPPROTO_DSTOPTS
)) {
502 /* These headers, while different, have the fields we care
503 * about in the same location and with the same
505 const struct ip6_ext
*ext_hdr
= *datap
;
506 *nw_proto
= ext_hdr
->ip6e_nxt
;
507 if (OVS_UNLIKELY(!data_try_pull(datap
, sizep
,
508 (ext_hdr
->ip6e_len
+ 1) * 8))) {
511 } else if (*nw_proto
== IPPROTO_AH
) {
512 /* A standard AH definition isn't available, but the fields
513 * we care about are in the same location as the generic
514 * option header--only the header length is calculated
516 const struct ip6_ext
*ext_hdr
= *datap
;
517 *nw_proto
= ext_hdr
->ip6e_nxt
;
518 if (OVS_UNLIKELY(!data_try_pull(datap
, sizep
,
519 (ext_hdr
->ip6e_len
+ 2) * 4))) {
522 } else if (*nw_proto
== IPPROTO_FRAGMENT
) {
525 *nw_proto
= (*frag_hdr
)->ip6f_nxt
;
526 if (!data_try_pull(datap
, sizep
, sizeof **frag_hdr
)) {
530 /* We only process the first fragment. */
531 if ((*frag_hdr
)->ip6f_offlg
!= htons(0)) {
532 *nw_frag
= FLOW_NW_FRAG_ANY
;
533 if (((*frag_hdr
)->ip6f_offlg
& IP6F_OFF_MASK
) != htons(0)) {
534 *nw_frag
|= FLOW_NW_FRAG_LATER
;
535 *nw_proto
= IPPROTO_FRAGMENT
;
543 /* Parses IPv6 extension headers until a terminal header (or header we
544 * don't understand) is found. 'datap' points to the first extension
545 * header and advances as parsing occurs; 'sizep' is the remaining size
546 * and is decreased accordingly. 'nw_proto' starts as the first
547 * extension header to process and is updated as the extension headers
550 * If a fragment header is found, '*frag_hdr' is set to the fragment
551 * header and otherwise set to NULL. If it is the first fragment,
552 * extension header parsing otherwise continues as usual. If it's not
553 * the first fragment, 'nw_proto' is set to IPPROTO_FRAGMENT and 'nw_frag'
554 * has FLOW_NW_FRAG_LATER set. Both first and later fragments have
555 * FLOW_NW_FRAG_ANY set in 'nw_frag'.
557 * A return value of false indicates that there was a problem parsing
558 * the extension headers.*/
560 parse_ipv6_ext_hdrs(const void **datap
, size_t *sizep
, uint8_t *nw_proto
,
562 const struct ovs_16aligned_ip6_frag
**frag_hdr
)
564 return parse_ipv6_ext_hdrs__(datap
, sizep
, nw_proto
, nw_frag
,
569 parse_nsh(const void **datap
, size_t *sizep
, struct ovs_key_nsh
*key
)
571 const struct nsh_hdr
*nsh
= (const struct nsh_hdr
*) *datap
;
572 uint8_t version
, length
, flags
, ttl
;
574 /* Check if it is long enough for NSH header, doesn't support
577 if (OVS_UNLIKELY(*sizep
< NSH_BASE_HDR_LEN
)) {
581 version
= nsh_get_ver(nsh
);
582 flags
= nsh_get_flags(nsh
);
583 length
= nsh_hdr_len(nsh
);
584 ttl
= nsh_get_ttl(nsh
);
586 if (OVS_UNLIKELY(length
> *sizep
|| version
!= 0)) {
592 key
->mdtype
= nsh
->md_type
;
593 key
->np
= nsh
->next_proto
;
594 key
->path_hdr
= nsh_get_path_hdr(nsh
);
596 switch (key
->mdtype
) {
598 if (length
!= NSH_M_TYPE1_LEN
) {
601 for (size_t i
= 0; i
< 4; i
++) {
602 key
->context
[i
] = get_16aligned_be32(&nsh
->md1
.context
[i
]);
606 /* Don't support MD type 2 metedata parsing yet */
607 if (length
< NSH_BASE_HDR_LEN
) {
611 memset(key
->context
, 0, sizeof(key
->context
));
614 /* We don't parse other context headers yet. */
615 memset(key
->context
, 0, sizeof(key
->context
));
619 data_pull(datap
, sizep
, length
);
624 /* This does the same thing as miniflow_extract() with a full-size 'flow' as
625 * the destination. */
627 flow_extract(struct dp_packet
*packet
, struct flow
*flow
)
631 uint64_t buf
[FLOW_U64S
];
634 COVERAGE_INC(flow_extract
);
636 miniflow_extract(packet
, &m
.mf
);
637 miniflow_expand(&m
.mf
, flow
);
641 ipv4_sanity_check(const struct ip_header
*nh
, size_t size
,
642 int *ip_lenp
, uint16_t *tot_lenp
)
647 if (OVS_UNLIKELY(size
< IP_HEADER_LEN
)) {
650 ip_len
= IP_IHL(nh
->ip_ihl_ver
) * 4;
652 if (OVS_UNLIKELY(ip_len
< IP_HEADER_LEN
|| size
< ip_len
)) {
656 tot_len
= ntohs(nh
->ip_tot_len
);
657 if (OVS_UNLIKELY(tot_len
> size
|| ip_len
> tot_len
||
658 size
- tot_len
> UINT8_MAX
)) {
668 static inline uint8_t
669 ipv4_get_nw_frag(const struct ip_header
*nh
)
673 if (OVS_UNLIKELY(IP_IS_FRAGMENT(nh
->ip_frag_off
))) {
674 nw_frag
= FLOW_NW_FRAG_ANY
;
675 if (nh
->ip_frag_off
& htons(IP_FRAG_OFF_MASK
)) {
676 nw_frag
|= FLOW_NW_FRAG_LATER
;
684 ipv6_sanity_check(const struct ovs_16aligned_ip6_hdr
*nh
, size_t size
)
688 if (OVS_UNLIKELY(size
< sizeof *nh
)) {
692 plen
= ntohs(nh
->ip6_plen
);
693 if (OVS_UNLIKELY(plen
+ IPV6_HEADER_LEN
> size
)) {
696 /* Jumbo Payload option not supported yet. */
697 if (OVS_UNLIKELY(size
- (plen
+ IPV6_HEADER_LEN
) > UINT8_MAX
)) {
704 /* Initializes 'dst' from 'packet' and 'md', taking the packet type into
705 * account. 'dst' must have enough space for FLOW_U64S * 8 bytes.
707 * Initializes the layer offsets as follows:
709 * - packet->l2_5_ofs to the
710 * * the start of the MPLS shim header. Can be zero, if the
711 * packet is of type (OFPHTN_ETHERTYPE, ETH_TYPE_MPLS).
712 * * UINT16_MAX when there is no MPLS shim header.
714 * - packet->l3_ofs is set to
715 * * zero if the packet_type is in name space OFPHTN_ETHERTYPE
716 * and there is no MPLS shim header.
717 * * just past the Ethernet header, or just past the vlan_header if
718 * one is present, to the first byte of the payload of the
719 * Ethernet frame if the packet type is Ethernet and there is
720 * no MPLS shim header.
721 * * just past the MPLS label stack to the first byte of the MPLS
722 * payload if there is at least one MPLS shim header.
723 * * UINT16_MAX if the packet type is Ethernet and the frame is
724 * too short to contain an Ethernet header.
726 * - packet->l4_ofs is set to just past the IPv4 or IPv6 header, if one is
727 * present and the packet has at least the content used for the fields
728 * of interest for the flow, otherwise UINT16_MAX.
731 miniflow_extract(struct dp_packet
*packet
, struct miniflow
*dst
)
733 /* Add code to this function (or its callees) to extract new fields. */
734 BUILD_ASSERT_DECL(FLOW_WC_SEQ
== 42);
736 const struct pkt_metadata
*md
= &packet
->md
;
737 const void *data
= dp_packet_data(packet
);
738 size_t size
= dp_packet_size(packet
);
739 ovs_be32 packet_type
= packet
->packet_type
;
740 uint64_t *values
= miniflow_values(dst
);
741 struct mf_ctx mf
= { FLOWMAP_EMPTY_INITIALIZER
, values
,
742 values
+ FLOW_U64S
};
744 ovs_be16 dl_type
= OVS_BE16_MAX
;
745 uint8_t nw_frag
, nw_tos
, nw_ttl
, nw_proto
;
746 uint8_t *ct_nw_proto_p
= NULL
;
747 ovs_be16 ct_tp_src
= 0, ct_tp_dst
= 0;
750 if (flow_tnl_dst_is_set(&md
->tunnel
)) {
751 miniflow_push_words(mf
, tunnel
, &md
->tunnel
,
752 offsetof(struct flow_tnl
, metadata
) /
755 if (!(md
->tunnel
.flags
& FLOW_TNL_F_UDPIF
)) {
756 if (md
->tunnel
.metadata
.present
.map
) {
757 miniflow_push_words(mf
, tunnel
.metadata
, &md
->tunnel
.metadata
,
758 sizeof md
->tunnel
.metadata
/
762 if (md
->tunnel
.metadata
.present
.len
) {
763 miniflow_push_words(mf
, tunnel
.metadata
.present
,
764 &md
->tunnel
.metadata
.present
, 1);
765 miniflow_push_words(mf
, tunnel
.metadata
.opts
.gnv
,
766 md
->tunnel
.metadata
.opts
.gnv
,
767 DIV_ROUND_UP(md
->tunnel
.metadata
.present
.len
,
772 if (md
->skb_priority
|| md
->pkt_mark
) {
773 miniflow_push_uint32(mf
, skb_priority
, md
->skb_priority
);
774 miniflow_push_uint32(mf
, pkt_mark
, md
->pkt_mark
);
776 miniflow_push_uint32(mf
, dp_hash
, md
->dp_hash
);
777 miniflow_push_uint32(mf
, in_port
, odp_to_u32(md
->in_port
.odp_port
));
779 miniflow_push_uint32(mf
, recirc_id
, md
->recirc_id
);
780 miniflow_push_uint8(mf
, ct_state
, md
->ct_state
);
781 ct_nw_proto_p
= miniflow_pointer(mf
, ct_nw_proto
);
782 miniflow_push_uint8(mf
, ct_nw_proto
, 0);
783 miniflow_push_uint16(mf
, ct_zone
, md
->ct_zone
);
784 miniflow_push_uint32(mf
, ct_mark
, md
->ct_mark
);
785 miniflow_push_be32(mf
, packet_type
, packet_type
);
786 if (!ovs_u128_is_zero(md
->ct_label
)) {
787 miniflow_push_words(mf
, ct_label
, &md
->ct_label
,
788 sizeof md
->ct_label
/ sizeof(uint64_t));
792 miniflow_push_uint32(mf
, recirc_id
, md
->recirc_id
);
793 miniflow_pad_to_64(mf
, recirc_id
);
795 miniflow_pad_from_64(mf
, packet_type
);
796 miniflow_push_be32(mf
, packet_type
, packet_type
);
799 /* Initialize packet's layer pointer and offsets. */
801 dp_packet_reset_offsets(packet
);
803 if (packet_type
== htonl(PT_ETH
)) {
804 /* Must have full Ethernet header to proceed. */
805 if (OVS_UNLIKELY(size
< sizeof(struct eth_header
))) {
809 ASSERT_SEQUENTIAL(dl_dst
, dl_src
);
810 miniflow_push_macs(mf
, dl_dst
, data
);
813 union flow_vlan_hdr vlans
[FLOW_MAX_VLAN_HEADERS
];
814 size_t num_vlans
= parse_vlan(&data
, &size
, vlans
);
816 dl_type
= parse_ethertype(&data
, &size
);
817 miniflow_push_be16(mf
, dl_type
, dl_type
);
818 miniflow_pad_to_64(mf
, dl_type
);
820 miniflow_push_words_32(mf
, vlans
, vlans
, num_vlans
);
825 /* Take dl_type from packet_type. */
826 dl_type
= pt_ns_type_be(packet_type
);
827 miniflow_pad_from_64(mf
, dl_type
);
828 miniflow_push_be16(mf
, dl_type
, dl_type
);
829 /* Do not push vlan_tci, pad instead */
830 miniflow_pad_to_64(mf
, dl_type
);
834 if (OVS_UNLIKELY(eth_type_mpls(dl_type
))) {
836 const void *mpls
= data
;
838 packet
->l2_5_ofs
= (char *)data
- frame
;
839 count
= parse_mpls(&data
, &size
);
840 miniflow_push_words_32(mf
, mpls_lse
, mpls
, count
);
844 packet
->l3_ofs
= (char *)data
- frame
;
847 if (OVS_LIKELY(dl_type
== htons(ETH_TYPE_IP
))) {
848 const struct ip_header
*nh
= data
;
852 if (OVS_UNLIKELY(!ipv4_sanity_check(nh
, size
, &ip_len
, &tot_len
))) {
855 dp_packet_set_l2_pad_size(packet
, size
- tot_len
);
856 size
= tot_len
; /* Never pull padding. */
858 /* Push both source and destination address at once. */
859 miniflow_push_words(mf
, nw_src
, &nh
->ip_src
, 1);
860 if (ct_nw_proto_p
&& !md
->ct_orig_tuple_ipv6
) {
861 *ct_nw_proto_p
= md
->ct_orig_tuple
.ipv4
.ipv4_proto
;
862 if (*ct_nw_proto_p
) {
863 miniflow_push_words(mf
, ct_nw_src
,
864 &md
->ct_orig_tuple
.ipv4
.ipv4_src
, 1);
865 ct_tp_src
= md
->ct_orig_tuple
.ipv4
.src_port
;
866 ct_tp_dst
= md
->ct_orig_tuple
.ipv4
.dst_port
;
870 miniflow_push_be32(mf
, ipv6_label
, 0); /* Padding for IPv4. */
874 nw_proto
= nh
->ip_proto
;
875 nw_frag
= ipv4_get_nw_frag(nh
);
876 data_pull(&data
, &size
, ip_len
);
877 } else if (dl_type
== htons(ETH_TYPE_IPV6
)) {
878 const struct ovs_16aligned_ip6_hdr
*nh
= data
;
882 if (OVS_UNLIKELY(!ipv6_sanity_check(nh
, size
))) {
885 data_pull(&data
, &size
, sizeof *nh
);
887 plen
= ntohs(nh
->ip6_plen
);
888 dp_packet_set_l2_pad_size(packet
, size
- plen
);
889 size
= plen
; /* Never pull padding. */
891 miniflow_push_words(mf
, ipv6_src
, &nh
->ip6_src
,
892 sizeof nh
->ip6_src
/ 8);
893 miniflow_push_words(mf
, ipv6_dst
, &nh
->ip6_dst
,
894 sizeof nh
->ip6_dst
/ 8);
895 if (ct_nw_proto_p
&& md
->ct_orig_tuple_ipv6
) {
896 *ct_nw_proto_p
= md
->ct_orig_tuple
.ipv6
.ipv6_proto
;
897 if (*ct_nw_proto_p
) {
898 miniflow_push_words(mf
, ct_ipv6_src
,
899 &md
->ct_orig_tuple
.ipv6
.ipv6_src
,
901 sizeof md
->ct_orig_tuple
.ipv6
.ipv6_src
/ 8);
902 ct_tp_src
= md
->ct_orig_tuple
.ipv6
.src_port
;
903 ct_tp_dst
= md
->ct_orig_tuple
.ipv6
.dst_port
;
907 tc_flow
= get_16aligned_be32(&nh
->ip6_flow
);
908 nw_tos
= ntohl(tc_flow
) >> 20;
909 nw_ttl
= nh
->ip6_hlim
;
910 nw_proto
= nh
->ip6_nxt
;
912 const struct ovs_16aligned_ip6_frag
*frag_hdr
;
913 if (!parse_ipv6_ext_hdrs__(&data
, &size
, &nw_proto
, &nw_frag
,
918 /* This needs to be after the parse_ipv6_ext_hdrs__() call because it
919 * leaves the nw_frag word uninitialized. */
920 ASSERT_SEQUENTIAL(ipv6_label
, nw_frag
);
921 ovs_be32 label
= tc_flow
& htonl(IPV6_LABEL_MASK
);
922 miniflow_push_be32(mf
, ipv6_label
, label
);
924 if (dl_type
== htons(ETH_TYPE_ARP
) ||
925 dl_type
== htons(ETH_TYPE_RARP
)) {
926 struct eth_addr arp_buf
[2];
927 const struct arp_eth_header
*arp
= (const struct arp_eth_header
*)
928 data_try_pull(&data
, &size
, ARP_ETH_HEADER_LEN
);
930 if (OVS_LIKELY(arp
) && OVS_LIKELY(arp
->ar_hrd
== htons(1))
931 && OVS_LIKELY(arp
->ar_pro
== htons(ETH_TYPE_IP
))
932 && OVS_LIKELY(arp
->ar_hln
== ETH_ADDR_LEN
)
933 && OVS_LIKELY(arp
->ar_pln
== 4)) {
934 miniflow_push_be32(mf
, nw_src
,
935 get_16aligned_be32(&arp
->ar_spa
));
936 miniflow_push_be32(mf
, nw_dst
,
937 get_16aligned_be32(&arp
->ar_tpa
));
939 /* We only match on the lower 8 bits of the opcode. */
940 if (OVS_LIKELY(ntohs(arp
->ar_op
) <= 0xff)) {
941 miniflow_push_be32(mf
, ipv6_label
, 0); /* Pad with ARP. */
942 miniflow_push_be32(mf
, nw_frag
, htonl(ntohs(arp
->ar_op
)));
945 /* Must be adjacent. */
946 ASSERT_SEQUENTIAL(arp_sha
, arp_tha
);
948 arp_buf
[0] = arp
->ar_sha
;
949 arp_buf
[1] = arp
->ar_tha
;
950 miniflow_push_macs(mf
, arp_sha
, arp_buf
);
951 miniflow_pad_to_64(mf
, arp_tha
);
953 } else if (dl_type
== htons(ETH_TYPE_NSH
)) {
954 struct ovs_key_nsh nsh
;
956 if (OVS_LIKELY(parse_nsh(&data
, &size
, &nsh
))) {
957 miniflow_push_words(mf
, nsh
, &nsh
,
958 sizeof(struct ovs_key_nsh
) /
965 packet
->l4_ofs
= (char *)data
- frame
;
966 miniflow_push_be32(mf
, nw_frag
,
967 bytes_to_be32(nw_frag
, nw_tos
, nw_ttl
, nw_proto
));
969 if (OVS_LIKELY(!(nw_frag
& FLOW_NW_FRAG_LATER
))) {
970 if (OVS_LIKELY(nw_proto
== IPPROTO_TCP
)) {
971 if (OVS_LIKELY(size
>= TCP_HEADER_LEN
)) {
972 const struct tcp_header
*tcp
= data
;
974 miniflow_push_be32(mf
, arp_tha
.ea
[2], 0);
975 miniflow_push_be32(mf
, tcp_flags
,
976 TCP_FLAGS_BE32(tcp
->tcp_ctl
));
977 miniflow_push_be16(mf
, tp_src
, tcp
->tcp_src
);
978 miniflow_push_be16(mf
, tp_dst
, tcp
->tcp_dst
);
979 miniflow_push_be16(mf
, ct_tp_src
, ct_tp_src
);
980 miniflow_push_be16(mf
, ct_tp_dst
, ct_tp_dst
);
982 } else if (OVS_LIKELY(nw_proto
== IPPROTO_UDP
)) {
983 if (OVS_LIKELY(size
>= UDP_HEADER_LEN
)) {
984 const struct udp_header
*udp
= data
;
986 miniflow_push_be16(mf
, tp_src
, udp
->udp_src
);
987 miniflow_push_be16(mf
, tp_dst
, udp
->udp_dst
);
988 miniflow_push_be16(mf
, ct_tp_src
, ct_tp_src
);
989 miniflow_push_be16(mf
, ct_tp_dst
, ct_tp_dst
);
991 } else if (OVS_LIKELY(nw_proto
== IPPROTO_SCTP
)) {
992 if (OVS_LIKELY(size
>= SCTP_HEADER_LEN
)) {
993 const struct sctp_header
*sctp
= data
;
995 miniflow_push_be16(mf
, tp_src
, sctp
->sctp_src
);
996 miniflow_push_be16(mf
, tp_dst
, sctp
->sctp_dst
);
997 miniflow_push_be16(mf
, ct_tp_src
, ct_tp_src
);
998 miniflow_push_be16(mf
, ct_tp_dst
, ct_tp_dst
);
1000 } else if (OVS_LIKELY(nw_proto
== IPPROTO_ICMP
)) {
1001 if (OVS_LIKELY(size
>= ICMP_HEADER_LEN
)) {
1002 const struct icmp_header
*icmp
= data
;
1004 miniflow_push_be16(mf
, tp_src
, htons(icmp
->icmp_type
));
1005 miniflow_push_be16(mf
, tp_dst
, htons(icmp
->icmp_code
));
1006 miniflow_push_be16(mf
, ct_tp_src
, ct_tp_src
);
1007 miniflow_push_be16(mf
, ct_tp_dst
, ct_tp_dst
);
1009 } else if (OVS_LIKELY(nw_proto
== IPPROTO_IGMP
)) {
1010 if (OVS_LIKELY(size
>= IGMP_HEADER_LEN
)) {
1011 const struct igmp_header
*igmp
= data
;
1013 miniflow_push_be16(mf
, tp_src
, htons(igmp
->igmp_type
));
1014 miniflow_push_be16(mf
, tp_dst
, htons(igmp
->igmp_code
));
1015 miniflow_push_be16(mf
, ct_tp_src
, ct_tp_src
);
1016 miniflow_push_be16(mf
, ct_tp_dst
, ct_tp_dst
);
1017 miniflow_push_be32(mf
, igmp_group_ip4
,
1018 get_16aligned_be32(&igmp
->group
));
1019 miniflow_pad_to_64(mf
, igmp_group_ip4
);
1021 } else if (OVS_LIKELY(nw_proto
== IPPROTO_ICMPV6
)) {
1022 if (OVS_LIKELY(size
>= sizeof(struct icmp6_data_header
))) {
1023 const struct in6_addr
*nd_target
;
1024 struct eth_addr arp_buf
[2];
1025 /* This will populate whether we received Option 1
1028 /* This holds the ND Reserved field. */
1030 const struct icmp6_data_header
*icmp6
;
1032 icmp6
= data_pull(&data
, &size
, sizeof *icmp6
);
1033 if (parse_icmpv6(&data
, &size
, icmp6
,
1034 &rso_flags
, &nd_target
, arp_buf
, &opt_type
)) {
1036 miniflow_push_words(mf
, nd_target
, nd_target
,
1037 sizeof *nd_target
/ sizeof(uint64_t));
1039 miniflow_push_macs(mf
, arp_sha
, arp_buf
);
1040 /* Populate options field and set the padding
1042 if (opt_type
!= 0) {
1043 miniflow_push_be16(mf
, tcp_flags
, htons(opt_type
));
1044 /* Pad to align with 64 bits.
1045 * This will zero out the pad3 field. */
1046 miniflow_pad_to_64(mf
, tcp_flags
);
1048 /* Pad to align with 64 bits.
1049 * This will zero out the tcp_flags & pad3 field. */
1050 miniflow_pad_to_64(mf
, arp_tha
);
1052 miniflow_push_be16(mf
, tp_src
,
1053 htons(icmp6
->icmp6_base
.icmp6_type
));
1054 miniflow_push_be16(mf
, tp_dst
,
1055 htons(icmp6
->icmp6_base
.icmp6_code
));
1056 miniflow_pad_to_64(mf
, tp_dst
);
1057 /* Fill ND reserved field. */
1058 miniflow_push_be32(mf
, igmp_group_ip4
, rso_flags
);
1059 miniflow_pad_to_64(mf
, igmp_group_ip4
);
1061 /* ICMPv6 but not ND. */
1062 miniflow_push_be16(mf
, tp_src
,
1063 htons(icmp6
->icmp6_base
.icmp6_type
));
1064 miniflow_push_be16(mf
, tp_dst
,
1065 htons(icmp6
->icmp6_base
.icmp6_code
));
1066 miniflow_push_be16(mf
, ct_tp_src
, ct_tp_src
);
1067 miniflow_push_be16(mf
, ct_tp_dst
, ct_tp_dst
);
1077 parse_dl_type(const void **datap
, size_t *sizep
)
1079 union flow_vlan_hdr vlans
[FLOW_MAX_VLAN_HEADERS
];
1081 parse_vlan(datap
, sizep
, vlans
);
1083 return parse_ethertype(datap
, sizep
);
1086 /* Parses and return the TCP flags in 'packet', converted to host byte order.
1087 * If 'packet' is not an Ethernet packet embedding TCP, returns 0.
1089 * The caller must ensure that 'packet' is at least ETH_HEADER_LEN bytes
1092 parse_tcp_flags(struct dp_packet
*packet
)
1094 const void *data
= dp_packet_data(packet
);
1095 const char *frame
= (const char *)data
;
1096 size_t size
= dp_packet_size(packet
);
1098 uint8_t nw_frag
= 0, nw_proto
= 0;
1100 if (!dp_packet_is_eth(packet
)) {
1104 dp_packet_reset_offsets(packet
);
1106 dl_type
= parse_dl_type(&data
, &size
);
1107 if (OVS_UNLIKELY(eth_type_mpls(dl_type
))) {
1108 packet
->l2_5_ofs
= (char *)data
- frame
;
1110 packet
->l3_ofs
= (char *)data
- frame
;
1111 if (OVS_LIKELY(dl_type
== htons(ETH_TYPE_IP
))) {
1112 const struct ip_header
*nh
= data
;
1116 if (OVS_UNLIKELY(!ipv4_sanity_check(nh
, size
, &ip_len
, &tot_len
))) {
1119 dp_packet_set_l2_pad_size(packet
, size
- tot_len
);
1120 nw_proto
= nh
->ip_proto
;
1121 nw_frag
= ipv4_get_nw_frag(nh
);
1123 size
= tot_len
; /* Never pull padding. */
1124 data_pull(&data
, &size
, ip_len
);
1125 } else if (dl_type
== htons(ETH_TYPE_IPV6
)) {
1126 const struct ovs_16aligned_ip6_hdr
*nh
= data
;
1129 if (OVS_UNLIKELY(!ipv6_sanity_check(nh
, size
))) {
1132 data_pull(&data
, &size
, sizeof *nh
);
1134 plen
= ntohs(nh
->ip6_plen
); /* Never pull padding. */
1135 dp_packet_set_l2_pad_size(packet
, size
- plen
);
1137 const struct ovs_16aligned_ip6_frag
*frag_hdr
;
1138 nw_proto
= nh
->ip6_nxt
;
1139 if (!parse_ipv6_ext_hdrs__(&data
, &size
, &nw_proto
, &nw_frag
,
1147 packet
->l4_ofs
= (uint16_t)((char *)data
- frame
);
1148 if (!(nw_frag
& FLOW_NW_FRAG_LATER
) && nw_proto
== IPPROTO_TCP
&&
1149 size
>= TCP_HEADER_LEN
) {
1150 const struct tcp_header
*tcp
= data
;
1152 return TCP_FLAGS(tcp
->tcp_ctl
);
1158 /* For every bit of a field that is wildcarded in 'wildcards', sets the
1159 * corresponding bit in 'flow' to zero. */
1161 flow_zero_wildcards(struct flow
*flow
, const struct flow_wildcards
*wildcards
)
1163 uint64_t *flow_u64
= (uint64_t *) flow
;
1164 const uint64_t *wc_u64
= (const uint64_t *) &wildcards
->masks
;
1167 for (i
= 0; i
< FLOW_U64S
; i
++) {
1168 flow_u64
[i
] &= wc_u64
[i
];
1173 flow_unwildcard_tp_ports(const struct flow
*flow
, struct flow_wildcards
*wc
)
1175 if (flow
->nw_proto
!= IPPROTO_ICMP
) {
1176 memset(&wc
->masks
.tp_src
, 0xff, sizeof wc
->masks
.tp_src
);
1177 memset(&wc
->masks
.tp_dst
, 0xff, sizeof wc
->masks
.tp_dst
);
1179 wc
->masks
.tp_src
= htons(0xff);
1180 wc
->masks
.tp_dst
= htons(0xff);
1184 /* Initializes 'flow_metadata' with the metadata found in 'flow'. */
1186 flow_get_metadata(const struct flow
*flow
, struct match
*flow_metadata
)
1190 BUILD_ASSERT_DECL(FLOW_WC_SEQ
== 42);
1192 match_init_catchall(flow_metadata
);
1193 if (flow
->tunnel
.tun_id
!= htonll(0)) {
1194 match_set_tun_id(flow_metadata
, flow
->tunnel
.tun_id
);
1196 if (flow
->tunnel
.flags
& FLOW_TNL_PUB_F_MASK
) {
1197 match_set_tun_flags(flow_metadata
,
1198 flow
->tunnel
.flags
& FLOW_TNL_PUB_F_MASK
);
1200 if (flow
->tunnel
.ip_src
) {
1201 match_set_tun_src(flow_metadata
, flow
->tunnel
.ip_src
);
1203 if (flow
->tunnel
.ip_dst
) {
1204 match_set_tun_dst(flow_metadata
, flow
->tunnel
.ip_dst
);
1206 if (ipv6_addr_is_set(&flow
->tunnel
.ipv6_src
)) {
1207 match_set_tun_ipv6_src(flow_metadata
, &flow
->tunnel
.ipv6_src
);
1209 if (ipv6_addr_is_set(&flow
->tunnel
.ipv6_dst
)) {
1210 match_set_tun_ipv6_dst(flow_metadata
, &flow
->tunnel
.ipv6_dst
);
1212 if (flow
->tunnel
.gbp_id
!= htons(0)) {
1213 match_set_tun_gbp_id(flow_metadata
, flow
->tunnel
.gbp_id
);
1215 if (flow
->tunnel
.gbp_flags
) {
1216 match_set_tun_gbp_flags(flow_metadata
, flow
->tunnel
.gbp_flags
);
1218 if (flow
->tunnel
.erspan_ver
) {
1219 match_set_tun_erspan_ver(flow_metadata
, flow
->tunnel
.erspan_ver
);
1221 if (flow
->tunnel
.erspan_idx
) {
1222 match_set_tun_erspan_idx(flow_metadata
, flow
->tunnel
.erspan_idx
);
1224 if (flow
->tunnel
.erspan_dir
) {
1225 match_set_tun_erspan_dir(flow_metadata
, flow
->tunnel
.erspan_dir
);
1227 if (flow
->tunnel
.erspan_hwid
) {
1228 match_set_tun_erspan_hwid(flow_metadata
, flow
->tunnel
.erspan_hwid
);
1230 if (flow
->tunnel
.gtpu_flags
) {
1231 match_set_tun_gtpu_flags(flow_metadata
, flow
->tunnel
.gtpu_flags
);
1233 if (flow
->tunnel
.gtpu_msgtype
) {
1234 match_set_tun_gtpu_msgtype(flow_metadata
, flow
->tunnel
.gtpu_msgtype
);
1236 tun_metadata_get_fmd(&flow
->tunnel
, flow_metadata
);
1237 if (flow
->metadata
!= htonll(0)) {
1238 match_set_metadata(flow_metadata
, flow
->metadata
);
1241 for (i
= 0; i
< FLOW_N_REGS
; i
++) {
1242 if (flow
->regs
[i
]) {
1243 match_set_reg(flow_metadata
, i
, flow
->regs
[i
]);
1247 if (flow
->pkt_mark
!= 0) {
1248 match_set_pkt_mark(flow_metadata
, flow
->pkt_mark
);
1251 match_set_in_port(flow_metadata
, flow
->in_port
.ofp_port
);
1252 if (flow
->packet_type
!= htonl(PT_ETH
)) {
1253 match_set_packet_type(flow_metadata
, flow
->packet_type
);
1256 if (flow
->ct_state
!= 0) {
1257 match_set_ct_state(flow_metadata
, flow
->ct_state
);
1258 /* Match dl_type since it is required for the later interpretation of
1259 * the conntrack metadata. */
1260 match_set_dl_type(flow_metadata
, flow
->dl_type
);
1261 if (is_ct_valid(flow
, NULL
, NULL
) && flow
->ct_nw_proto
!= 0) {
1262 if (flow
->dl_type
== htons(ETH_TYPE_IP
)) {
1263 match_set_ct_nw_src(flow_metadata
, flow
->ct_nw_src
);
1264 match_set_ct_nw_dst(flow_metadata
, flow
->ct_nw_dst
);
1265 match_set_ct_nw_proto(flow_metadata
, flow
->ct_nw_proto
);
1266 match_set_ct_tp_src(flow_metadata
, flow
->ct_tp_src
);
1267 match_set_ct_tp_dst(flow_metadata
, flow
->ct_tp_dst
);
1268 } else if (flow
->dl_type
== htons(ETH_TYPE_IPV6
)) {
1269 match_set_ct_ipv6_src(flow_metadata
, &flow
->ct_ipv6_src
);
1270 match_set_ct_ipv6_dst(flow_metadata
, &flow
->ct_ipv6_dst
);
1271 match_set_ct_nw_proto(flow_metadata
, flow
->ct_nw_proto
);
1272 match_set_ct_tp_src(flow_metadata
, flow
->ct_tp_src
);
1273 match_set_ct_tp_dst(flow_metadata
, flow
->ct_tp_dst
);
1277 if (flow
->ct_zone
!= 0) {
1278 match_set_ct_zone(flow_metadata
, flow
->ct_zone
);
1280 if (flow
->ct_mark
!= 0) {
1281 match_set_ct_mark(flow_metadata
, flow
->ct_mark
);
1283 if (!ovs_u128_is_zero(flow
->ct_label
)) {
1284 match_set_ct_label(flow_metadata
, flow
->ct_label
);
1289 ct_state_to_string(uint32_t state
)
1292 #define CS_STATE(ENUM, INDEX, NAME) case CS_##ENUM: return NAME;
1301 ct_state_from_string(const char *s
)
1303 #define CS_STATE(ENUM, INDEX, NAME) \
1304 if (!strcmp(s, NAME)) { \
1312 /* Parses conntrack state from 'state_str'. If it is parsed successfully,
1313 * stores the parsed ct_state in 'ct_state', and returns true. Otherwise,
1314 * returns false, and reports error message in 'ds'. */
1316 parse_ct_state(const char *state_str
, uint32_t default_state
,
1317 uint32_t *ct_state
, struct ds
*ds
)
1319 uint32_t state
= default_state
;
1320 char *state_s
= xstrdup(state_str
);
1321 char *save_ptr
= NULL
;
1323 for (char *cs
= strtok_r(state_s
, ", ", &save_ptr
); cs
;
1324 cs
= strtok_r(NULL
, ", ", &save_ptr
)) {
1325 uint32_t bit
= ct_state_from_string(cs
);
1327 ds_put_format(ds
, "%s: unknown connection tracking state flag",
1341 /* Checks the given conntrack state 'state' according to the constraints
1342 * listed in ovs-fields (7). Returns true if it is valid. Otherwise, returns
1343 * false, and reports error in 'ds'. */
1345 validate_ct_state(uint32_t state
, struct ds
*ds
)
1347 bool valid_ct_state
= true;
1348 struct ds d_str
= DS_EMPTY_INITIALIZER
;
1350 format_flags(&d_str
, ct_state_to_string
, state
, '|');
1352 if (state
&& !(state
& CS_TRACKED
)) {
1353 ds_put_format(ds
, "%s: invalid connection state: "
1354 "If \"trk\" is unset, no other flags are set\n",
1356 valid_ct_state
= false;
1358 if (state
& CS_INVALID
&& state
& ~(CS_TRACKED
| CS_INVALID
)) {
1359 ds_put_format(ds
, "%s: invalid connection state: "
1360 "when \"inv\" is set, only \"trk\" may also be set\n",
1362 valid_ct_state
= false;
1364 if (state
& CS_NEW
&& state
& CS_ESTABLISHED
) {
1365 ds_put_format(ds
, "%s: invalid connection state: "
1366 "\"new\" and \"est\" are mutually exclusive\n",
1368 valid_ct_state
= false;
1370 if (state
& CS_NEW
&& state
& CS_REPLY_DIR
) {
1371 ds_put_format(ds
, "%s: invalid connection state: "
1372 "\"new\" and \"rpy\" are mutually exclusive\n",
1374 valid_ct_state
= false;
1378 return valid_ct_state
;
1381 /* Clears the fields in 'flow' associated with connection tracking. */
1383 flow_clear_conntrack(struct flow
*flow
)
1388 flow
->ct_label
= OVS_U128_ZERO
;
1390 flow
->ct_nw_proto
= 0;
1391 flow
->ct_tp_src
= 0;
1392 flow
->ct_tp_dst
= 0;
1393 if (flow
->dl_type
== htons(ETH_TYPE_IP
)) {
1394 flow
->ct_nw_src
= 0;
1395 flow
->ct_nw_dst
= 0;
1396 } else if (flow
->dl_type
== htons(ETH_TYPE_IPV6
)) {
1397 memset(&flow
->ct_ipv6_src
, 0, sizeof flow
->ct_ipv6_src
);
1398 memset(&flow
->ct_ipv6_dst
, 0, sizeof flow
->ct_ipv6_dst
);
1403 flow_to_string(const struct flow
*flow
,
1404 const struct ofputil_port_map
*port_map
)
1406 struct ds ds
= DS_EMPTY_INITIALIZER
;
1407 flow_format(&ds
, flow
, port_map
);
1408 return ds_cstr(&ds
);
1412 flow_tun_flag_to_string(uint32_t flags
)
1415 case FLOW_TNL_F_DONT_FRAGMENT
:
1417 case FLOW_TNL_F_CSUM
:
1419 case FLOW_TNL_F_KEY
:
1421 case FLOW_TNL_F_OAM
:
1429 format_flags(struct ds
*ds
, const char *(*bit_to_string
)(uint32_t),
1430 uint32_t flags
, char del
)
1435 ds_put_char(ds
, '0');
1439 uint32_t bit
= rightmost_1bit(flags
);
1442 s
= bit_to_string(bit
);
1444 ds_put_format(ds
, "%s%c", s
, del
);
1453 ds_put_format(ds
, "0x%"PRIx32
"%c", bad
, del
);
1459 format_flags_masked(struct ds
*ds
, const char *name
,
1460 const char *(*bit_to_string
)(uint32_t), uint32_t flags
,
1461 uint32_t mask
, uint32_t max_mask
)
1464 ds_put_format(ds
, "%s%s=%s", colors
.param
, name
, colors
.end
);
1467 if (mask
== max_mask
) {
1468 format_flags(ds
, bit_to_string
, flags
, '|');
1473 ds_put_cstr(ds
, "0/0");
1478 uint32_t bit
= rightmost_1bit(mask
);
1479 const char *s
= bit_to_string(bit
);
1481 ds_put_format(ds
, "%s%s", (flags
& bit
) ? "+" : "-",
1482 s
? s
: "[Unknown]");
1488 put_u16_masked(struct ds
*s
, uint16_t value
, uint16_t mask
)
1491 ds_put_char(s
, '*');
1494 ds_put_format(s
, "0x%"PRIx16
, value
);
1496 ds_put_format(s
, "%"PRIu16
, value
);
1499 if (mask
!= UINT16_MAX
) {
1500 ds_put_format(s
, "/0x%"PRIx16
, mask
);
1506 format_packet_type_masked(struct ds
*s
, ovs_be32 value
, ovs_be32 mask
)
1508 if (value
== htonl(PT_ETH
) && mask
== OVS_BE32_MAX
) {
1509 ds_put_cstr(s
, "eth");
1511 ds_put_cstr(s
, "packet_type=(");
1512 put_u16_masked(s
, pt_ns(value
), pt_ns(mask
));
1513 ds_put_char(s
, ',');
1514 put_u16_masked(s
, pt_ns_type(value
), pt_ns_type(mask
));
1515 ds_put_char(s
, ')');
1519 /* Scans a string 's' of flags to determine their numerical value and
1520 * returns the number of characters parsed using 'bit_to_string' to
1521 * lookup flag names. Scanning continues until the character 'end' is
1524 * In the event of a failure, a negative error code will be returned. In
1525 * addition, if 'res_string' is non-NULL then a descriptive string will
1526 * be returned incorporating the identifying string 'field_name'. This
1527 * error string must be freed by the caller.
1529 * Upon success, the flag values will be stored in 'res_flags' and
1530 * optionally 'res_mask', if it is non-NULL (if it is NULL then any masks
1531 * present in the original string will be considered an error). The
1532 * caller may restrict the acceptable set of values through the mask
1535 parse_flags(const char *s
, const char *(*bit_to_string
)(uint32_t),
1536 char end
, const char *field_name
, char **res_string
,
1537 uint32_t *res_flags
, uint32_t allowed
, uint32_t *res_mask
)
1539 uint32_t result
= 0;
1542 /* Parse masked flags in numeric format? */
1543 if (res_mask
&& ovs_scan(s
, "%"SCNi32
"/%"SCNi32
"%n",
1544 res_flags
, res_mask
, &n
) && n
> 0) {
1545 if (*res_flags
& ~allowed
|| *res_mask
& ~allowed
) {
1553 if (res_mask
&& (*s
== '+' || *s
== '-')) {
1554 uint32_t flags
= 0, mask
= 0;
1556 /* Parse masked flags. */
1557 while (s
[0] != end
) {
1564 } else if (s
[0] == '-') {
1568 *res_string
= xasprintf("%s: %s must be preceded by '+' "
1569 "(for SET) or '-' (NOT SET)", s
,
1577 for (bit
= 1; bit
; bit
<<= 1) {
1578 const char *fname
= bit_to_string(bit
);
1584 len
= strlen(fname
);
1585 if (strncmp(s
, fname
, len
) ||
1586 (s
[len
] != '+' && s
[len
] != '-' && s
[len
] != end
)) {
1591 /* bit already set. */
1593 *res_string
= xasprintf("%s: Each %s flag can be "
1594 "specified only once", s
,
1599 if (!(bit
& allowed
)) {
1621 /* Parse unmasked flags. If a flag is present, it is set, otherwise
1623 while (s
[n
] != end
) {
1624 unsigned long long int flags
;
1628 if (ovs_scan(&s
[n
], "%lli%n", &flags
, &n0
)) {
1629 if (flags
& ~allowed
) {
1632 n
+= n0
+ (s
[n
+ n0
] == '|');
1637 for (bit
= 1; bit
; bit
<<= 1) {
1638 const char *name
= bit_to_string(bit
);
1646 if (!strncmp(s
+ n
, name
, len
) &&
1647 (s
[n
+ len
] == '|' || s
[n
+ len
] == end
)) {
1648 if (!(bit
& allowed
)) {
1652 n
+= len
+ (s
[n
+ len
] == '|');
1662 *res_flags
= result
;
1664 *res_mask
= UINT32_MAX
;
1673 *res_string
= xasprintf("%s: unknown %s flag(s)", s
, field_name
);
1679 flow_format(struct ds
*ds
,
1680 const struct flow
*flow
, const struct ofputil_port_map
*port_map
)
1683 struct flow_wildcards
*wc
= &match
.wc
;
1685 match_wc_init(&match
, flow
);
1687 /* As this function is most often used for formatting a packet in a
1688 * packet-in message, skip formatting the packet context fields that are
1689 * all-zeroes to make the print-out easier on the eyes. This means that a
1690 * missing context field implies a zero value for that field. This is
1691 * similar to OpenFlow encoding of these fields, as the specification
1692 * states that all-zeroes context fields should not be encoded in the
1693 * packet-in messages. */
1694 if (!flow
->in_port
.ofp_port
) {
1695 WC_UNMASK_FIELD(wc
, in_port
);
1697 if (!flow
->skb_priority
) {
1698 WC_UNMASK_FIELD(wc
, skb_priority
);
1700 if (!flow
->pkt_mark
) {
1701 WC_UNMASK_FIELD(wc
, pkt_mark
);
1703 if (!flow
->recirc_id
) {
1704 WC_UNMASK_FIELD(wc
, recirc_id
);
1706 if (!flow
->dp_hash
) {
1707 WC_UNMASK_FIELD(wc
, dp_hash
);
1709 if (!flow
->ct_state
) {
1710 WC_UNMASK_FIELD(wc
, ct_state
);
1712 if (!flow
->ct_zone
) {
1713 WC_UNMASK_FIELD(wc
, ct_zone
);
1715 if (!flow
->ct_mark
) {
1716 WC_UNMASK_FIELD(wc
, ct_mark
);
1718 if (ovs_u128_is_zero(flow
->ct_label
)) {
1719 WC_UNMASK_FIELD(wc
, ct_label
);
1721 if (!is_ct_valid(flow
, &match
.wc
, NULL
) || !flow
->ct_nw_proto
) {
1722 WC_UNMASK_FIELD(wc
, ct_nw_proto
);
1723 WC_UNMASK_FIELD(wc
, ct_tp_src
);
1724 WC_UNMASK_FIELD(wc
, ct_tp_dst
);
1725 if (flow
->dl_type
== htons(ETH_TYPE_IP
)) {
1726 WC_UNMASK_FIELD(wc
, ct_nw_src
);
1727 WC_UNMASK_FIELD(wc
, ct_nw_dst
);
1728 } else if (flow
->dl_type
== htons(ETH_TYPE_IPV6
)) {
1729 WC_UNMASK_FIELD(wc
, ct_ipv6_src
);
1730 WC_UNMASK_FIELD(wc
, ct_ipv6_dst
);
1733 for (int i
= 0; i
< FLOW_N_REGS
; i
++) {
1734 if (!flow
->regs
[i
]) {
1735 WC_UNMASK_FIELD(wc
, regs
[i
]);
1738 if (!flow
->metadata
) {
1739 WC_UNMASK_FIELD(wc
, metadata
);
1742 match_format(&match
, port_map
, ds
, OFP_DEFAULT_PRIORITY
);
1746 flow_print(FILE *stream
,
1747 const struct flow
*flow
, const struct ofputil_port_map
*port_map
)
1749 char *s
= flow_to_string(flow
, port_map
);
1754 /* flow_wildcards functions. */
1756 /* Initializes 'wc' as a set of wildcards that matches every packet. */
1758 flow_wildcards_init_catchall(struct flow_wildcards
*wc
)
1760 memset(&wc
->masks
, 0, sizeof wc
->masks
);
1763 /* Converts a flow into flow wildcards. It sets the wildcard masks based on
1764 * the packet headers extracted to 'flow'. It will not set the mask for fields
1765 * that do not make sense for the packet type. OpenFlow-only metadata is
1766 * wildcarded, but other metadata is unconditionally exact-matched. */
1768 flow_wildcards_init_for_packet(struct flow_wildcards
*wc
,
1769 const struct flow
*flow
)
1771 ovs_be16 dl_type
= OVS_BE16_MAX
;
1773 memset(&wc
->masks
, 0x0, sizeof wc
->masks
);
1775 /* Update this function whenever struct flow changes. */
1776 BUILD_ASSERT_DECL(FLOW_WC_SEQ
== 42);
1778 if (flow_tnl_dst_is_set(&flow
->tunnel
)) {
1779 if (flow
->tunnel
.flags
& FLOW_TNL_F_KEY
) {
1780 WC_MASK_FIELD(wc
, tunnel
.tun_id
);
1782 WC_MASK_FIELD(wc
, tunnel
.ip_src
);
1783 WC_MASK_FIELD(wc
, tunnel
.ip_dst
);
1784 WC_MASK_FIELD(wc
, tunnel
.ipv6_src
);
1785 WC_MASK_FIELD(wc
, tunnel
.ipv6_dst
);
1786 WC_MASK_FIELD(wc
, tunnel
.flags
);
1787 WC_MASK_FIELD(wc
, tunnel
.ip_tos
);
1788 WC_MASK_FIELD(wc
, tunnel
.ip_ttl
);
1789 WC_MASK_FIELD(wc
, tunnel
.tp_src
);
1790 WC_MASK_FIELD(wc
, tunnel
.tp_dst
);
1791 WC_MASK_FIELD(wc
, tunnel
.gbp_id
);
1792 WC_MASK_FIELD(wc
, tunnel
.gbp_flags
);
1793 WC_MASK_FIELD(wc
, tunnel
.erspan_ver
);
1794 WC_MASK_FIELD(wc
, tunnel
.erspan_idx
);
1795 WC_MASK_FIELD(wc
, tunnel
.erspan_dir
);
1796 WC_MASK_FIELD(wc
, tunnel
.erspan_hwid
);
1797 WC_MASK_FIELD(wc
, tunnel
.gtpu_flags
);
1798 WC_MASK_FIELD(wc
, tunnel
.gtpu_msgtype
);
1800 if (!(flow
->tunnel
.flags
& FLOW_TNL_F_UDPIF
)) {
1801 if (flow
->tunnel
.metadata
.present
.map
) {
1802 wc
->masks
.tunnel
.metadata
.present
.map
=
1803 flow
->tunnel
.metadata
.present
.map
;
1804 WC_MASK_FIELD(wc
, tunnel
.metadata
.opts
.u8
);
1805 WC_MASK_FIELD(wc
, tunnel
.metadata
.tab
);
1808 WC_MASK_FIELD(wc
, tunnel
.metadata
.present
.len
);
1809 memset(wc
->masks
.tunnel
.metadata
.opts
.gnv
, 0xff,
1810 flow
->tunnel
.metadata
.present
.len
);
1812 } else if (flow
->tunnel
.tun_id
) {
1813 WC_MASK_FIELD(wc
, tunnel
.tun_id
);
1816 /* metadata, regs, and conj_id wildcarded. */
1818 WC_MASK_FIELD(wc
, skb_priority
);
1819 WC_MASK_FIELD(wc
, pkt_mark
);
1820 WC_MASK_FIELD(wc
, ct_state
);
1821 WC_MASK_FIELD(wc
, ct_zone
);
1822 WC_MASK_FIELD(wc
, ct_mark
);
1823 WC_MASK_FIELD(wc
, ct_label
);
1824 WC_MASK_FIELD(wc
, recirc_id
);
1825 WC_MASK_FIELD(wc
, dp_hash
);
1826 WC_MASK_FIELD(wc
, in_port
);
1828 /* actset_output wildcarded. */
1830 WC_MASK_FIELD(wc
, packet_type
);
1831 if (flow
->packet_type
== htonl(PT_ETH
)) {
1832 WC_MASK_FIELD(wc
, dl_dst
);
1833 WC_MASK_FIELD(wc
, dl_src
);
1834 WC_MASK_FIELD(wc
, dl_type
);
1835 /* No need to set mask of inner VLANs that don't exist. */
1836 for (int i
= 0; i
< FLOW_MAX_VLAN_HEADERS
; i
++) {
1837 /* Always show the first zero VLAN. */
1838 WC_MASK_FIELD(wc
, vlans
[i
]);
1839 if (flow
->vlans
[i
].tci
== htons(0)) {
1843 dl_type
= flow
->dl_type
;
1845 dl_type
= pt_ns_type_be(flow
->packet_type
);
1848 if (dl_type
== htons(ETH_TYPE_IP
)) {
1849 WC_MASK_FIELD(wc
, nw_src
);
1850 WC_MASK_FIELD(wc
, nw_dst
);
1851 WC_MASK_FIELD(wc
, ct_nw_src
);
1852 WC_MASK_FIELD(wc
, ct_nw_dst
);
1853 } else if (dl_type
== htons(ETH_TYPE_IPV6
)) {
1854 WC_MASK_FIELD(wc
, ipv6_src
);
1855 WC_MASK_FIELD(wc
, ipv6_dst
);
1856 WC_MASK_FIELD(wc
, ipv6_label
);
1857 if (is_nd(flow
, wc
)) {
1858 WC_MASK_FIELD(wc
, arp_sha
);
1859 WC_MASK_FIELD(wc
, arp_tha
);
1860 WC_MASK_FIELD(wc
, nd_target
);
1862 WC_MASK_FIELD(wc
, ct_ipv6_src
);
1863 WC_MASK_FIELD(wc
, ct_ipv6_dst
);
1865 } else if (dl_type
== htons(ETH_TYPE_ARP
) ||
1866 dl_type
== htons(ETH_TYPE_RARP
)) {
1867 WC_MASK_FIELD(wc
, nw_src
);
1868 WC_MASK_FIELD(wc
, nw_dst
);
1869 WC_MASK_FIELD(wc
, nw_proto
);
1870 WC_MASK_FIELD(wc
, arp_sha
);
1871 WC_MASK_FIELD(wc
, arp_tha
);
1873 } else if (eth_type_mpls(dl_type
)) {
1874 for (int i
= 0; i
< FLOW_MAX_MPLS_LABELS
; i
++) {
1875 WC_MASK_FIELD(wc
, mpls_lse
[i
]);
1876 if (flow
->mpls_lse
[i
] & htonl(MPLS_BOS_MASK
)) {
1881 } else if (flow
->dl_type
== htons(ETH_TYPE_NSH
)) {
1882 WC_MASK_FIELD(wc
, nsh
.flags
);
1883 WC_MASK_FIELD(wc
, nsh
.ttl
);
1884 WC_MASK_FIELD(wc
, nsh
.mdtype
);
1885 WC_MASK_FIELD(wc
, nsh
.np
);
1886 WC_MASK_FIELD(wc
, nsh
.path_hdr
);
1887 WC_MASK_FIELD(wc
, nsh
.context
);
1889 return; /* Unknown ethertype. */
1893 WC_MASK_FIELD(wc
, nw_frag
);
1894 WC_MASK_FIELD(wc
, nw_tos
);
1895 WC_MASK_FIELD(wc
, nw_ttl
);
1896 WC_MASK_FIELD(wc
, nw_proto
);
1897 WC_MASK_FIELD(wc
, ct_nw_proto
);
1898 WC_MASK_FIELD(wc
, ct_tp_src
);
1899 WC_MASK_FIELD(wc
, ct_tp_dst
);
1901 /* No transport layer header in later fragments. */
1902 if (!(flow
->nw_frag
& FLOW_NW_FRAG_LATER
) &&
1903 (flow
->nw_proto
== IPPROTO_ICMP
||
1904 flow
->nw_proto
== IPPROTO_ICMPV6
||
1905 flow
->nw_proto
== IPPROTO_TCP
||
1906 flow
->nw_proto
== IPPROTO_UDP
||
1907 flow
->nw_proto
== IPPROTO_SCTP
||
1908 flow
->nw_proto
== IPPROTO_IGMP
)) {
1909 WC_MASK_FIELD(wc
, tp_src
);
1910 WC_MASK_FIELD(wc
, tp_dst
);
1912 if (flow
->nw_proto
== IPPROTO_TCP
) {
1913 WC_MASK_FIELD(wc
, tcp_flags
);
1914 } else if (flow
->nw_proto
== IPPROTO_IGMP
) {
1915 WC_MASK_FIELD(wc
, igmp_group_ip4
);
1920 /* Return a map of possible fields for a packet of the same type as 'flow'.
1921 * Including extra bits in the returned mask is not wrong, it is just less
1924 * This is a less precise version of flow_wildcards_init_for_packet() above. */
1926 flow_wc_map(const struct flow
*flow
, struct flowmap
*map
)
1928 /* Update this function whenever struct flow changes. */
1929 BUILD_ASSERT_DECL(FLOW_WC_SEQ
== 42);
1933 if (flow_tnl_dst_is_set(&flow
->tunnel
)) {
1934 FLOWMAP_SET__(map
, tunnel
, offsetof(struct flow_tnl
, metadata
));
1935 if (!(flow
->tunnel
.flags
& FLOW_TNL_F_UDPIF
)) {
1936 if (flow
->tunnel
.metadata
.present
.map
) {
1937 FLOWMAP_SET(map
, tunnel
.metadata
);
1940 FLOWMAP_SET(map
, tunnel
.metadata
.present
.len
);
1941 FLOWMAP_SET__(map
, tunnel
.metadata
.opts
.gnv
,
1942 flow
->tunnel
.metadata
.present
.len
);
1946 /* Metadata fields that can appear on packet input. */
1947 FLOWMAP_SET(map
, skb_priority
);
1948 FLOWMAP_SET(map
, pkt_mark
);
1949 FLOWMAP_SET(map
, recirc_id
);
1950 FLOWMAP_SET(map
, dp_hash
);
1951 FLOWMAP_SET(map
, in_port
);
1952 FLOWMAP_SET(map
, dl_dst
);
1953 FLOWMAP_SET(map
, dl_src
);
1954 FLOWMAP_SET(map
, dl_type
);
1955 FLOWMAP_SET(map
, vlans
);
1956 FLOWMAP_SET(map
, ct_state
);
1957 FLOWMAP_SET(map
, ct_zone
);
1958 FLOWMAP_SET(map
, ct_mark
);
1959 FLOWMAP_SET(map
, ct_label
);
1960 FLOWMAP_SET(map
, packet_type
);
1962 /* Ethertype-dependent fields. */
1963 if (OVS_LIKELY(flow
->dl_type
== htons(ETH_TYPE_IP
))) {
1964 FLOWMAP_SET(map
, nw_src
);
1965 FLOWMAP_SET(map
, nw_dst
);
1966 FLOWMAP_SET(map
, nw_proto
);
1967 FLOWMAP_SET(map
, nw_frag
);
1968 FLOWMAP_SET(map
, nw_tos
);
1969 FLOWMAP_SET(map
, nw_ttl
);
1970 FLOWMAP_SET(map
, tp_src
);
1971 FLOWMAP_SET(map
, tp_dst
);
1972 FLOWMAP_SET(map
, ct_nw_proto
);
1973 FLOWMAP_SET(map
, ct_nw_src
);
1974 FLOWMAP_SET(map
, ct_nw_dst
);
1975 FLOWMAP_SET(map
, ct_tp_src
);
1976 FLOWMAP_SET(map
, ct_tp_dst
);
1978 if (OVS_UNLIKELY(flow
->nw_proto
== IPPROTO_IGMP
)) {
1979 FLOWMAP_SET(map
, igmp_group_ip4
);
1981 FLOWMAP_SET(map
, tcp_flags
);
1983 } else if (flow
->dl_type
== htons(ETH_TYPE_IPV6
)) {
1984 FLOWMAP_SET(map
, ipv6_src
);
1985 FLOWMAP_SET(map
, ipv6_dst
);
1986 FLOWMAP_SET(map
, ipv6_label
);
1987 FLOWMAP_SET(map
, nw_proto
);
1988 FLOWMAP_SET(map
, nw_frag
);
1989 FLOWMAP_SET(map
, nw_tos
);
1990 FLOWMAP_SET(map
, nw_ttl
);
1991 FLOWMAP_SET(map
, tp_src
);
1992 FLOWMAP_SET(map
, tp_dst
);
1994 if (OVS_UNLIKELY(is_nd(flow
, NULL
))) {
1995 FLOWMAP_SET(map
, nd_target
);
1996 FLOWMAP_SET(map
, arp_sha
);
1997 FLOWMAP_SET(map
, arp_tha
);
1998 FLOWMAP_SET(map
, tcp_flags
);
1999 FLOWMAP_SET(map
, igmp_group_ip4
);
2001 FLOWMAP_SET(map
, ct_nw_proto
);
2002 FLOWMAP_SET(map
, ct_ipv6_src
);
2003 FLOWMAP_SET(map
, ct_ipv6_dst
);
2004 FLOWMAP_SET(map
, ct_tp_src
);
2005 FLOWMAP_SET(map
, ct_tp_dst
);
2006 FLOWMAP_SET(map
, tcp_flags
);
2008 } else if (eth_type_mpls(flow
->dl_type
)) {
2009 FLOWMAP_SET(map
, mpls_lse
);
2010 } else if (flow
->dl_type
== htons(ETH_TYPE_ARP
) ||
2011 flow
->dl_type
== htons(ETH_TYPE_RARP
)) {
2012 FLOWMAP_SET(map
, nw_src
);
2013 FLOWMAP_SET(map
, nw_dst
);
2014 FLOWMAP_SET(map
, nw_proto
);
2015 FLOWMAP_SET(map
, arp_sha
);
2016 FLOWMAP_SET(map
, arp_tha
);
2017 } else if (flow
->dl_type
== htons(ETH_TYPE_NSH
)) {
2018 FLOWMAP_SET(map
, nsh
.flags
);
2019 FLOWMAP_SET(map
, nsh
.mdtype
);
2020 FLOWMAP_SET(map
, nsh
.np
);
2021 FLOWMAP_SET(map
, nsh
.path_hdr
);
2022 FLOWMAP_SET(map
, nsh
.context
);
2026 /* Clear the metadata and register wildcard masks. They are not packet
2029 flow_wildcards_clear_non_packet_fields(struct flow_wildcards
*wc
)
2031 /* Update this function whenever struct flow changes. */
2032 BUILD_ASSERT_DECL(FLOW_WC_SEQ
== 42);
2034 memset(&wc
->masks
.metadata
, 0, sizeof wc
->masks
.metadata
);
2035 memset(&wc
->masks
.regs
, 0, sizeof wc
->masks
.regs
);
2036 wc
->masks
.actset_output
= 0;
2037 wc
->masks
.conj_id
= 0;
2040 /* Returns true if 'wc' matches every packet, false if 'wc' fixes any bits or
2043 flow_wildcards_is_catchall(const struct flow_wildcards
*wc
)
2045 const uint64_t *wc_u64
= (const uint64_t *) &wc
->masks
;
2048 for (i
= 0; i
< FLOW_U64S
; i
++) {
2056 /* Sets 'dst' as the bitwise AND of wildcards in 'src1' and 'src2'.
2057 * That is, a bit or a field is wildcarded in 'dst' if it is wildcarded
2058 * in 'src1' or 'src2' or both. */
2060 flow_wildcards_and(struct flow_wildcards
*dst
,
2061 const struct flow_wildcards
*src1
,
2062 const struct flow_wildcards
*src2
)
2064 uint64_t *dst_u64
= (uint64_t *) &dst
->masks
;
2065 const uint64_t *src1_u64
= (const uint64_t *) &src1
->masks
;
2066 const uint64_t *src2_u64
= (const uint64_t *) &src2
->masks
;
2069 for (i
= 0; i
< FLOW_U64S
; i
++) {
2070 dst_u64
[i
] = src1_u64
[i
] & src2_u64
[i
];
2074 /* Sets 'dst' as the bitwise OR of wildcards in 'src1' and 'src2'. That
2075 * is, a bit or a field is wildcarded in 'dst' if it is neither
2076 * wildcarded in 'src1' nor 'src2'. */
2078 flow_wildcards_or(struct flow_wildcards
*dst
,
2079 const struct flow_wildcards
*src1
,
2080 const struct flow_wildcards
*src2
)
2082 uint64_t *dst_u64
= (uint64_t *) &dst
->masks
;
2083 const uint64_t *src1_u64
= (const uint64_t *) &src1
->masks
;
2084 const uint64_t *src2_u64
= (const uint64_t *) &src2
->masks
;
2087 for (i
= 0; i
< FLOW_U64S
; i
++) {
2088 dst_u64
[i
] = src1_u64
[i
] | src2_u64
[i
];
2092 /* Returns a hash of the wildcards in 'wc'. */
2094 flow_wildcards_hash(const struct flow_wildcards
*wc
, uint32_t basis
)
2096 return flow_hash(&wc
->masks
, basis
);
2099 /* Returns true if 'a' and 'b' represent the same wildcards, false if they are
2102 flow_wildcards_equal(const struct flow_wildcards
*a
,
2103 const struct flow_wildcards
*b
)
2105 return flow_equal(&a
->masks
, &b
->masks
);
2108 /* Returns true if at least one bit or field is wildcarded in 'a' but not in
2109 * 'b', false otherwise. */
2111 flow_wildcards_has_extra(const struct flow_wildcards
*a
,
2112 const struct flow_wildcards
*b
)
2114 const uint64_t *a_u64
= (const uint64_t *) &a
->masks
;
2115 const uint64_t *b_u64
= (const uint64_t *) &b
->masks
;
2118 for (i
= 0; i
< FLOW_U64S
; i
++) {
2119 if ((a_u64
[i
] & b_u64
[i
]) != b_u64
[i
]) {
2126 /* Returns true if 'a' and 'b' are equal, except that 0-bits (wildcarded bits)
2127 * in 'wc' do not need to be equal in 'a' and 'b'. */
2129 flow_equal_except(const struct flow
*a
, const struct flow
*b
,
2130 const struct flow_wildcards
*wc
)
2132 const uint64_t *a_u64
= (const uint64_t *) a
;
2133 const uint64_t *b_u64
= (const uint64_t *) b
;
2134 const uint64_t *wc_u64
= (const uint64_t *) &wc
->masks
;
2137 for (i
= 0; i
< FLOW_U64S
; i
++) {
2138 if ((a_u64
[i
] ^ b_u64
[i
]) & wc_u64
[i
]) {
2145 /* Sets the wildcard mask for register 'idx' in 'wc' to 'mask'.
2146 * (A 0-bit indicates a wildcard bit.) */
2148 flow_wildcards_set_reg_mask(struct flow_wildcards
*wc
, int idx
, uint32_t mask
)
2150 wc
->masks
.regs
[idx
] = mask
;
2153 /* Sets the wildcard mask for register 'idx' in 'wc' to 'mask'.
2154 * (A 0-bit indicates a wildcard bit.) */
2156 flow_wildcards_set_xreg_mask(struct flow_wildcards
*wc
, int idx
, uint64_t mask
)
2158 flow_set_xreg(&wc
->masks
, idx
, mask
);
2161 /* Sets the wildcard mask for register 'idx' in 'wc' to 'mask'.
2162 * (A 0-bit indicates a wildcard bit.) */
2164 flow_wildcards_set_xxreg_mask(struct flow_wildcards
*wc
, int idx
,
2167 flow_set_xxreg(&wc
->masks
, idx
, mask
);
2170 /* Calculates the 5-tuple hash from the given miniflow.
2171 * This returns the same value as flow_hash_5tuple for the corresponding
2174 miniflow_hash_5tuple(const struct miniflow
*flow
, uint32_t basis
)
2176 BUILD_ASSERT_DECL(FLOW_WC_SEQ
== 42);
2177 uint32_t hash
= basis
;
2180 ovs_be16 dl_type
= MINIFLOW_GET_BE16(flow
, dl_type
);
2183 if (dl_type
== htons(ETH_TYPE_IPV6
)) {
2184 struct flowmap map
= FLOWMAP_EMPTY_INITIALIZER
;
2187 FLOWMAP_SET(&map
, ipv6_src
);
2188 FLOWMAP_SET(&map
, ipv6_dst
);
2190 MINIFLOW_FOR_EACH_IN_FLOWMAP(value
, flow
, map
) {
2191 hash
= hash_add64(hash
, value
);
2193 } else if (dl_type
== htons(ETH_TYPE_IP
)
2194 || dl_type
== htons(ETH_TYPE_ARP
)) {
2195 hash
= hash_add(hash
, MINIFLOW_GET_U32(flow
, nw_src
));
2196 hash
= hash_add(hash
, MINIFLOW_GET_U32(flow
, nw_dst
));
2201 nw_proto
= MINIFLOW_GET_U8(flow
, nw_proto
);
2202 hash
= hash_add(hash
, nw_proto
);
2203 if (nw_proto
!= IPPROTO_TCP
&& nw_proto
!= IPPROTO_UDP
2204 && nw_proto
!= IPPROTO_SCTP
&& nw_proto
!= IPPROTO_ICMP
2205 && nw_proto
!= IPPROTO_ICMPV6
) {
2209 /* Add both ports at once. */
2210 hash
= hash_add(hash
, (OVS_FORCE
uint32_t) miniflow_get_ports(flow
));
2213 return hash_finish(hash
, 42);
2216 ASSERT_SEQUENTIAL_SAME_WORD(tp_src
, tp_dst
);
2217 ASSERT_SEQUENTIAL(ipv6_src
, ipv6_dst
);
2219 /* Calculates the 5-tuple hash from the given flow. */
2221 flow_hash_5tuple(const struct flow
*flow
, uint32_t basis
)
2223 BUILD_ASSERT_DECL(FLOW_WC_SEQ
== 42);
2224 uint32_t hash
= basis
;
2228 if (flow
->dl_type
== htons(ETH_TYPE_IPV6
)) {
2229 const uint64_t *flow_u64
= (const uint64_t *)flow
;
2230 int ofs
= offsetof(struct flow
, ipv6_src
) / 8;
2231 int end
= ofs
+ 2 * sizeof flow
->ipv6_src
/ 8;
2233 for (;ofs
< end
; ofs
++) {
2234 hash
= hash_add64(hash
, flow_u64
[ofs
]);
2236 } else if (flow
->dl_type
== htons(ETH_TYPE_IP
)
2237 || flow
->dl_type
== htons(ETH_TYPE_ARP
)) {
2238 hash
= hash_add(hash
, (OVS_FORCE
uint32_t) flow
->nw_src
);
2239 hash
= hash_add(hash
, (OVS_FORCE
uint32_t) flow
->nw_dst
);
2244 hash
= hash_add(hash
, flow
->nw_proto
);
2245 if (flow
->nw_proto
!= IPPROTO_TCP
&& flow
->nw_proto
!= IPPROTO_UDP
2246 && flow
->nw_proto
!= IPPROTO_SCTP
&& flow
->nw_proto
!= IPPROTO_ICMP
2247 && flow
->nw_proto
!= IPPROTO_ICMPV6
) {
2251 /* Add both ports at once. */
2252 hash
= hash_add(hash
,
2253 ((const uint32_t *)flow
)[offsetof(struct flow
, tp_src
)
2254 / sizeof(uint32_t)]);
2257 return hash_finish(hash
, 42); /* Arbitrary number. */
2260 /* Hashes 'flow' based on its L2 through L4 protocol information. */
2262 flow_hash_symmetric_l4(const struct flow
*flow
, uint32_t basis
)
2267 struct in6_addr ipv6_addr
;
2272 struct eth_addr eth_addr
;
2278 memset(&fields
, 0, sizeof fields
);
2279 for (i
= 0; i
< ARRAY_SIZE(fields
.eth_addr
.be16
); i
++) {
2280 fields
.eth_addr
.be16
[i
] = flow
->dl_src
.be16
[i
] ^ flow
->dl_dst
.be16
[i
];
2282 for (i
= 0; i
< FLOW_MAX_VLAN_HEADERS
; i
++) {
2283 fields
.vlan_tci
^= flow
->vlans
[i
].tci
& htons(VLAN_VID_MASK
);
2285 fields
.eth_type
= flow
->dl_type
;
2287 /* UDP source and destination port are not taken into account because they
2288 * will not necessarily be symmetric in a bidirectional flow. */
2289 if (fields
.eth_type
== htons(ETH_TYPE_IP
)) {
2290 fields
.ipv4_addr
= flow
->nw_src
^ flow
->nw_dst
;
2291 fields
.ip_proto
= flow
->nw_proto
;
2292 if (fields
.ip_proto
== IPPROTO_TCP
|| fields
.ip_proto
== IPPROTO_SCTP
) {
2293 fields
.tp_port
= flow
->tp_src
^ flow
->tp_dst
;
2295 } else if (fields
.eth_type
== htons(ETH_TYPE_IPV6
)) {
2296 const uint8_t *a
= &flow
->ipv6_src
.s6_addr
[0];
2297 const uint8_t *b
= &flow
->ipv6_dst
.s6_addr
[0];
2298 uint8_t *ipv6_addr
= &fields
.ipv6_addr
.s6_addr
[0];
2300 for (i
=0; i
<16; i
++) {
2301 ipv6_addr
[i
] = a
[i
] ^ b
[i
];
2303 fields
.ip_proto
= flow
->nw_proto
;
2304 if (fields
.ip_proto
== IPPROTO_TCP
|| fields
.ip_proto
== IPPROTO_SCTP
) {
2305 fields
.tp_port
= flow
->tp_src
^ flow
->tp_dst
;
2308 return jhash_bytes(&fields
, sizeof fields
, basis
);
2311 /* Symmetrically Hashes non-IP 'flow' based on its L2 headers. */
2313 flow_hash_symmetric_l2(const struct flow
*flow
, uint32_t basis
)
2319 struct eth_addr eth_addr
;
2325 uint32_t hash
= basis
;
2328 if (flow
->packet_type
!= htonl(PT_ETH
)) {
2329 /* Cannot hash non-Ethernet flows */
2333 for (i
= 0; i
< ARRAY_SIZE(fields
.eth_addr
.be16
); i
++) {
2334 fields
.eth_addr
.be16
[i
] =
2335 flow
->dl_src
.be16
[i
] ^ flow
->dl_dst
.be16
[i
];
2337 fields
.vlan_tci
= 0;
2338 for (i
= 0; i
< FLOW_MAX_VLAN_HEADERS
; i
++) {
2339 fields
.vlan_tci
^= flow
->vlans
[i
].tci
& htons(VLAN_VID_MASK
);
2341 fields
.eth_type
= flow
->dl_type
;
2344 hash
= hash_add(hash
, fields
.word
[0]);
2345 hash
= hash_add(hash
, fields
.word
[1]);
2346 hash
= hash_add(hash
, fields
.word
[2]);
2347 return hash_finish(hash
, basis
);
2350 /* Hashes 'flow' based on its L3 through L4 protocol information */
2352 flow_hash_symmetric_l3l4(const struct flow
*flow
, uint32_t basis
,
2355 uint32_t hash
= basis
;
2357 /* UDP source and destination port are also taken into account. */
2358 if (flow
->dl_type
== htons(ETH_TYPE_IP
)) {
2359 hash
= hash_add(hash
,
2360 (OVS_FORCE
uint32_t) (flow
->nw_src
^ flow
->nw_dst
));
2361 } else if (flow
->dl_type
== htons(ETH_TYPE_IPV6
)) {
2362 /* IPv6 addresses are 64-bit aligned inside struct flow. */
2363 const uint64_t *a
= ALIGNED_CAST(uint64_t *, flow
->ipv6_src
.s6_addr
);
2364 const uint64_t *b
= ALIGNED_CAST(uint64_t *, flow
->ipv6_dst
.s6_addr
);
2366 for (int i
= 0; i
< sizeof flow
->ipv6_src
/ sizeof *a
; i
++) {
2367 hash
= hash_add64(hash
, a
[i
] ^ b
[i
]);
2370 /* Revert to hashing L2 headers */
2371 return flow_hash_symmetric_l2(flow
, basis
);
2373 hash
= hash_add(hash
, flow
->nw_proto
);
2374 if (!(flow
->nw_frag
& FLOW_NW_FRAG_MASK
)
2375 && (flow
->nw_proto
== IPPROTO_TCP
|| flow
->nw_proto
== IPPROTO_SCTP
||
2376 (inc_udp_ports
&& flow
->nw_proto
== IPPROTO_UDP
))) {
2377 hash
= hash_add(hash
,
2378 (OVS_FORCE
uint16_t) (flow
->tp_src
^ flow
->tp_dst
));
2381 return hash_finish(hash
, basis
);
2384 /* Hashes 'flow' based on its nw_dst and nw_src for multipath. */
2386 flow_hash_symmetric_l3(const struct flow
*flow
, uint32_t basis
)
2391 struct in6_addr ipv6_addr
;
2398 memset(&fields
, 0, sizeof fields
);
2399 fields
.eth_type
= flow
->dl_type
;
2401 if (fields
.eth_type
== htons(ETH_TYPE_IP
)) {
2402 fields
.ipv4_addr
= flow
->nw_src
^ flow
->nw_dst
;
2403 } else if (fields
.eth_type
== htons(ETH_TYPE_IPV6
)) {
2404 const uint8_t *a
= &flow
->ipv6_src
.s6_addr
[0];
2405 const uint8_t *b
= &flow
->ipv6_dst
.s6_addr
[0];
2406 uint8_t *ipv6_addr
= &fields
.ipv6_addr
.s6_addr
[0];
2408 for (i
= 0; i
< 16; i
++) {
2409 ipv6_addr
[i
] = a
[i
] ^ b
[i
];
2412 return jhash_bytes(&fields
, sizeof fields
, basis
);
2415 /* Initialize a flow with random fields that matter for nx_hash_fields. */
2417 flow_random_hash_fields(struct flow
*flow
)
2419 uint16_t rnd
= random_uint16();
2422 /* Initialize to all zeros. */
2423 memset(flow
, 0, sizeof *flow
);
2425 eth_addr_random(&flow
->dl_src
);
2426 eth_addr_random(&flow
->dl_dst
);
2428 for (i
= 0; i
< FLOW_MAX_VLAN_HEADERS
; i
++) {
2429 uint16_t vlan
= random_uint16() & VLAN_VID_MASK
;
2430 flow
->vlans
[i
].tpid
= htons(ETH_TYPE_VLAN_8021Q
);
2431 flow
->vlans
[i
].tci
= htons(vlan
| VLAN_CFI
);
2434 /* Make most of the random flows IPv4, some IPv6, and rest random. */
2435 flow
->dl_type
= rnd
< 0x8000 ? htons(ETH_TYPE_IP
) :
2436 rnd
< 0xc000 ? htons(ETH_TYPE_IPV6
) : (OVS_FORCE ovs_be16
)rnd
;
2438 if (dl_type_is_ip_any(flow
->dl_type
)) {
2439 if (flow
->dl_type
== htons(ETH_TYPE_IP
)) {
2440 flow
->nw_src
= (OVS_FORCE ovs_be32
)random_uint32();
2441 flow
->nw_dst
= (OVS_FORCE ovs_be32
)random_uint32();
2443 random_bytes(&flow
->ipv6_src
, sizeof flow
->ipv6_src
);
2444 random_bytes(&flow
->ipv6_dst
, sizeof flow
->ipv6_dst
);
2446 /* Make most of IP flows TCP, some UDP or SCTP, and rest random. */
2447 rnd
= random_uint16();
2448 flow
->nw_proto
= rnd
< 0x8000 ? IPPROTO_TCP
:
2449 rnd
< 0xc000 ? IPPROTO_UDP
:
2450 rnd
< 0xd000 ? IPPROTO_SCTP
: (uint8_t)rnd
;
2451 if (flow
->nw_proto
== IPPROTO_TCP
||
2452 flow
->nw_proto
== IPPROTO_UDP
||
2453 flow
->nw_proto
== IPPROTO_SCTP
) {
2454 flow
->tp_src
= (OVS_FORCE ovs_be16
)random_uint16();
2455 flow
->tp_dst
= (OVS_FORCE ovs_be16
)random_uint16();
2460 /* Masks the fields in 'wc' that are used by the flow hash 'fields'. */
2462 flow_mask_hash_fields(const struct flow
*flow
, struct flow_wildcards
*wc
,
2463 enum nx_hash_fields fields
)
2467 case NX_HASH_FIELDS_ETH_SRC
:
2468 memset(&wc
->masks
.dl_src
, 0xff, sizeof wc
->masks
.dl_src
);
2471 case NX_HASH_FIELDS_SYMMETRIC_L4
:
2472 memset(&wc
->masks
.dl_src
, 0xff, sizeof wc
->masks
.dl_src
);
2473 memset(&wc
->masks
.dl_dst
, 0xff, sizeof wc
->masks
.dl_dst
);
2474 if (flow
->dl_type
== htons(ETH_TYPE_IP
)) {
2475 memset(&wc
->masks
.nw_src
, 0xff, sizeof wc
->masks
.nw_src
);
2476 memset(&wc
->masks
.nw_dst
, 0xff, sizeof wc
->masks
.nw_dst
);
2477 } else if (flow
->dl_type
== htons(ETH_TYPE_IPV6
)) {
2478 memset(&wc
->masks
.ipv6_src
, 0xff, sizeof wc
->masks
.ipv6_src
);
2479 memset(&wc
->masks
.ipv6_dst
, 0xff, sizeof wc
->masks
.ipv6_dst
);
2481 if (is_ip_any(flow
)) {
2482 memset(&wc
->masks
.nw_proto
, 0xff, sizeof wc
->masks
.nw_proto
);
2483 /* Unwildcard port only for non-UDP packets as udp port
2484 * numbers are not used in hash calculations.
2486 if (flow
->nw_proto
!= IPPROTO_UDP
) {
2487 flow_unwildcard_tp_ports(flow
, wc
);
2490 for (i
= 0; i
< FLOW_MAX_VLAN_HEADERS
; i
++) {
2491 wc
->masks
.vlans
[i
].tci
|= htons(VLAN_VID_MASK
| VLAN_CFI
);
2494 case NX_HASH_FIELDS_SYMMETRIC_L3L4_UDP
:
2495 if (is_ip_any(flow
) && flow
->nw_proto
== IPPROTO_UDP
2496 && !(flow
->nw_frag
& FLOW_NW_FRAG_MASK
)) {
2497 memset(&wc
->masks
.tp_src
, 0xff, sizeof wc
->masks
.tp_src
);
2498 memset(&wc
->masks
.tp_dst
, 0xff, sizeof wc
->masks
.tp_dst
);
2501 case NX_HASH_FIELDS_SYMMETRIC_L3L4
:
2502 if (flow
->dl_type
== htons(ETH_TYPE_IP
)) {
2503 memset(&wc
->masks
.nw_src
, 0xff, sizeof wc
->masks
.nw_src
);
2504 memset(&wc
->masks
.nw_dst
, 0xff, sizeof wc
->masks
.nw_dst
);
2505 } else if (flow
->dl_type
== htons(ETH_TYPE_IPV6
)) {
2506 memset(&wc
->masks
.ipv6_src
, 0xff, sizeof wc
->masks
.ipv6_src
);
2507 memset(&wc
->masks
.ipv6_dst
, 0xff, sizeof wc
->masks
.ipv6_dst
);
2509 break; /* non-IP flow */
2511 memset(&wc
->masks
.nw_proto
, 0xff, sizeof wc
->masks
.nw_proto
);
2512 if ((flow
->nw_proto
== IPPROTO_TCP
|| flow
->nw_proto
== IPPROTO_SCTP
)
2513 && !(flow
->nw_frag
& FLOW_NW_FRAG_MASK
)) {
2514 memset(&wc
->masks
.tp_src
, 0xff, sizeof wc
->masks
.tp_src
);
2515 memset(&wc
->masks
.tp_dst
, 0xff, sizeof wc
->masks
.tp_dst
);
2519 case NX_HASH_FIELDS_NW_SRC
:
2520 if (flow
->dl_type
== htons(ETH_TYPE_IP
)) {
2521 memset(&wc
->masks
.nw_src
, 0xff, sizeof wc
->masks
.nw_src
);
2522 } else if (flow
->dl_type
== htons(ETH_TYPE_IPV6
)) {
2523 memset(&wc
->masks
.ipv6_src
, 0xff, sizeof wc
->masks
.ipv6_src
);
2527 case NX_HASH_FIELDS_NW_DST
:
2528 if (flow
->dl_type
== htons(ETH_TYPE_IP
)) {
2529 memset(&wc
->masks
.nw_dst
, 0xff, sizeof wc
->masks
.nw_dst
);
2530 } else if (flow
->dl_type
== htons(ETH_TYPE_IPV6
)) {
2531 memset(&wc
->masks
.ipv6_dst
, 0xff, sizeof wc
->masks
.ipv6_dst
);
2535 case NX_HASH_FIELDS_SYMMETRIC_L3
:
2536 if (flow
->dl_type
== htons(ETH_TYPE_IP
)) {
2537 memset(&wc
->masks
.nw_src
, 0xff, sizeof wc
->masks
.nw_src
);
2538 memset(&wc
->masks
.nw_dst
, 0xff, sizeof wc
->masks
.nw_dst
);
2539 } else if (flow
->dl_type
== htons(ETH_TYPE_IPV6
)) {
2540 memset(&wc
->masks
.ipv6_src
, 0xff, sizeof wc
->masks
.ipv6_src
);
2541 memset(&wc
->masks
.ipv6_dst
, 0xff, sizeof wc
->masks
.ipv6_dst
);
2550 /* Hashes the portions of 'flow' designated by 'fields'. */
2552 flow_hash_fields(const struct flow
*flow
, enum nx_hash_fields fields
,
2557 case NX_HASH_FIELDS_ETH_SRC
:
2558 return jhash_bytes(&flow
->dl_src
, sizeof flow
->dl_src
, basis
);
2560 case NX_HASH_FIELDS_SYMMETRIC_L4
:
2561 return flow_hash_symmetric_l4(flow
, basis
);
2563 case NX_HASH_FIELDS_SYMMETRIC_L3L4
:
2564 return flow_hash_symmetric_l3l4(flow
, basis
, false);
2566 case NX_HASH_FIELDS_SYMMETRIC_L3L4_UDP
:
2567 return flow_hash_symmetric_l3l4(flow
, basis
, true);
2569 case NX_HASH_FIELDS_NW_SRC
:
2570 if (flow
->dl_type
== htons(ETH_TYPE_IP
)) {
2571 return jhash_bytes(&flow
->nw_src
, sizeof flow
->nw_src
, basis
);
2572 } else if (flow
->dl_type
== htons(ETH_TYPE_IPV6
)) {
2573 return jhash_bytes(&flow
->ipv6_src
, sizeof flow
->ipv6_src
, basis
);
2578 case NX_HASH_FIELDS_NW_DST
:
2579 if (flow
->dl_type
== htons(ETH_TYPE_IP
)) {
2580 return jhash_bytes(&flow
->nw_dst
, sizeof flow
->nw_dst
, basis
);
2581 } else if (flow
->dl_type
== htons(ETH_TYPE_IPV6
)) {
2582 return jhash_bytes(&flow
->ipv6_dst
, sizeof flow
->ipv6_dst
, basis
);
2587 case NX_HASH_FIELDS_SYMMETRIC_L3
:
2588 return flow_hash_symmetric_l3(flow
, basis
);
2594 /* Returns a string representation of 'fields'. */
2596 flow_hash_fields_to_str(enum nx_hash_fields fields
)
2599 case NX_HASH_FIELDS_ETH_SRC
: return "eth_src";
2600 case NX_HASH_FIELDS_SYMMETRIC_L4
: return "symmetric_l4";
2601 case NX_HASH_FIELDS_SYMMETRIC_L3L4
: return "symmetric_l3l4";
2602 case NX_HASH_FIELDS_SYMMETRIC_L3L4_UDP
: return "symmetric_l3l4+udp";
2603 case NX_HASH_FIELDS_NW_SRC
: return "nw_src";
2604 case NX_HASH_FIELDS_NW_DST
: return "nw_dst";
2605 case NX_HASH_FIELDS_SYMMETRIC_L3
: return "symmetric_l3";
2606 default: return "<unknown>";
2610 /* Returns true if the value of 'fields' is supported. Otherwise false. */
2612 flow_hash_fields_valid(enum nx_hash_fields fields
)
2614 return fields
== NX_HASH_FIELDS_ETH_SRC
2615 || fields
== NX_HASH_FIELDS_SYMMETRIC_L4
2616 || fields
== NX_HASH_FIELDS_SYMMETRIC_L3L4
2617 || fields
== NX_HASH_FIELDS_SYMMETRIC_L3L4_UDP
2618 || fields
== NX_HASH_FIELDS_NW_SRC
2619 || fields
== NX_HASH_FIELDS_NW_DST
2620 || fields
== NX_HASH_FIELDS_SYMMETRIC_L3
;
2623 /* Returns a hash value for the bits of 'flow' that are active based on
2624 * 'wc', given 'basis'. */
2626 flow_hash_in_wildcards(const struct flow
*flow
,
2627 const struct flow_wildcards
*wc
, uint32_t basis
)
2629 const uint64_t *wc_u64
= (const uint64_t *) &wc
->masks
;
2630 const uint64_t *flow_u64
= (const uint64_t *) flow
;
2635 for (i
= 0; i
< FLOW_U64S
; i
++) {
2636 hash
= hash_add64(hash
, flow_u64
[i
] & wc_u64
[i
]);
2638 return hash_finish(hash
, 8 * FLOW_U64S
);
2641 /* Sets the VLAN VID that 'flow' matches to 'vid', which is interpreted as an
2642 * OpenFlow 1.0 "dl_vlan" value:
2644 * - If it is in the range 0...4095, 'flow->vlans[0].tci' is set to match
2645 * that VLAN. Any existing PCP match is unchanged (it becomes 0 if
2646 * 'flow' previously matched packets without a VLAN header).
2648 * - If it is OFP_VLAN_NONE, 'flow->vlan_tci' is set to match a packet
2649 * without a VLAN tag.
2651 * - Other values of 'vid' should not be used. */
2653 flow_set_dl_vlan(struct flow
*flow
, ovs_be16 vid
, int id
)
2655 if (vid
== htons(OFP10_VLAN_NONE
)) {
2656 flow
->vlans
[id
].tci
= htons(0);
2658 vid
&= htons(VLAN_VID_MASK
);
2659 flow
->vlans
[id
].tci
&= ~htons(VLAN_VID_MASK
);
2660 flow
->vlans
[id
].tci
|= htons(VLAN_CFI
) | vid
;
2664 /* Sets the VLAN header TPID, which must be either ETH_TYPE_VLAN_8021Q or
2665 * ETH_TYPE_VLAN_8021AD. */
2667 flow_fix_vlan_tpid(struct flow
*flow
)
2669 if (flow
->vlans
[0].tpid
== htons(0) && flow
->vlans
[0].tci
!= 0) {
2670 flow
->vlans
[0].tpid
= htons(ETH_TYPE_VLAN_8021Q
);
2674 /* Sets the VLAN VID that 'flow' matches to 'vid', which is interpreted as an
2675 * OpenFlow 1.2 "vlan_vid" value, that is, the low 13 bits of 'vlan_tci' (VID
2678 flow_set_vlan_vid(struct flow
*flow
, ovs_be16 vid
)
2680 ovs_be16 mask
= htons(VLAN_VID_MASK
| VLAN_CFI
);
2681 flow
->vlans
[0].tci
&= ~mask
;
2682 flow
->vlans
[0].tci
|= vid
& mask
;
2685 /* Sets the VLAN PCP that 'flow' matches to 'pcp', which should be in the
2688 * This function has no effect on the VLAN ID that 'flow' matches.
2690 * After calling this function, 'flow' will not match packets without a VLAN
2693 flow_set_vlan_pcp(struct flow
*flow
, uint8_t pcp
, int id
)
2696 flow
->vlans
[id
].tci
&= ~htons(VLAN_PCP_MASK
);
2697 flow
->vlans
[id
].tci
|= htons((pcp
<< VLAN_PCP_SHIFT
) | VLAN_CFI
);
2700 /* Counts the number of VLAN headers. */
2702 flow_count_vlan_headers(const struct flow
*flow
)
2706 for (i
= 0; i
< FLOW_MAX_VLAN_HEADERS
; i
++) {
2707 if (!(flow
->vlans
[i
].tci
& htons(VLAN_CFI
))) {
2714 /* Given '*p_an' and '*p_bn' pointing to one past the last VLAN header of
2715 * 'a' and 'b' respectively, skip common VLANs so that they point to the
2716 * first different VLAN counting from bottom. */
2718 flow_skip_common_vlan_headers(const struct flow
*a
, int *p_an
,
2719 const struct flow
*b
, int *p_bn
)
2721 int an
= *p_an
, bn
= *p_bn
;
2723 for (an
--, bn
--; an
>= 0 && bn
>= 0; an
--, bn
--) {
2724 if (a
->vlans
[an
].qtag
!= b
->vlans
[bn
].qtag
) {
2733 flow_pop_vlan(struct flow
*flow
, struct flow_wildcards
*wc
)
2735 int n
= flow_count_vlan_headers(flow
);
2738 memset(&wc
->masks
.vlans
[1], 0xff,
2739 sizeof(union flow_vlan_hdr
) * (n
- 1));
2741 memmove(&flow
->vlans
[0], &flow
->vlans
[1],
2742 sizeof(union flow_vlan_hdr
) * (n
- 1));
2745 memset(&flow
->vlans
[n
- 1], 0, sizeof(union flow_vlan_hdr
));
2750 flow_push_vlan_uninit(struct flow
*flow
, struct flow_wildcards
*wc
)
2753 int n
= flow_count_vlan_headers(flow
);
2755 memset(wc
->masks
.vlans
, 0xff, sizeof(union flow_vlan_hdr
) * n
);
2758 memmove(&flow
->vlans
[1], &flow
->vlans
[0],
2759 sizeof(union flow_vlan_hdr
) * (FLOW_MAX_VLAN_HEADERS
- 1));
2760 memset(&flow
->vlans
[0], 0, sizeof(union flow_vlan_hdr
));
2763 /* Returns the number of MPLS LSEs present in 'flow'
2765 * Returns 0 if the 'dl_type' of 'flow' is not an MPLS ethernet type.
2766 * Otherwise traverses 'flow''s MPLS label stack stopping at the
2767 * first entry that has the BoS bit set. If no such entry exists then
2768 * the maximum number of LSEs that can be stored in 'flow' is returned.
2771 flow_count_mpls_labels(const struct flow
*flow
, struct flow_wildcards
*wc
)
2773 /* dl_type is always masked. */
2774 if (eth_type_mpls(flow
->dl_type
)) {
2779 for (i
= 0; i
< FLOW_MAX_MPLS_LABELS
; i
++) {
2781 wc
->masks
.mpls_lse
[i
] |= htonl(MPLS_BOS_MASK
);
2783 if (flow
->mpls_lse
[i
] & htonl(MPLS_BOS_MASK
)) {
2786 if (flow
->mpls_lse
[i
]) {
2796 /* Returns the number consecutive of MPLS LSEs, starting at the
2797 * innermost LSE, that are common in 'a' and 'b'.
2799 * 'an' must be flow_count_mpls_labels(a).
2800 * 'bn' must be flow_count_mpls_labels(b).
2803 flow_count_common_mpls_labels(const struct flow
*a
, int an
,
2804 const struct flow
*b
, int bn
,
2805 struct flow_wildcards
*wc
)
2807 int min_n
= MIN(an
, bn
);
2812 int a_last
= an
- 1;
2813 int b_last
= bn
- 1;
2816 for (i
= 0; i
< min_n
; i
++) {
2818 wc
->masks
.mpls_lse
[a_last
- i
] = OVS_BE32_MAX
;
2819 wc
->masks
.mpls_lse
[b_last
- i
] = OVS_BE32_MAX
;
2821 if (a
->mpls_lse
[a_last
- i
] != b
->mpls_lse
[b_last
- i
]) {
2832 /* Adds a new outermost MPLS label to 'flow' and changes 'flow''s Ethernet type
2833 * to 'mpls_eth_type', which must be an MPLS Ethertype.
2835 * If the new label is the first MPLS label in 'flow', it is generated as;
2837 * - label: 2, if 'flow' is IPv6, otherwise 0.
2839 * - TTL: IPv4 or IPv6 TTL, if present and nonzero, otherwise 64.
2841 * - TC: IPv4 or IPv6 TOS, if present, otherwise 0.
2845 * If the new label is the second or later label MPLS label in 'flow', it is
2848 * - label: Copied from outer label.
2850 * - TTL: Copied from outer label.
2852 * - TC: Copied from outer label.
2856 * 'n' must be flow_count_mpls_labels(flow). 'n' must be less than
2857 * FLOW_MAX_MPLS_LABELS (because otherwise flow->mpls_lse[] would overflow).
2860 flow_push_mpls(struct flow
*flow
, int n
, ovs_be16 mpls_eth_type
,
2861 struct flow_wildcards
*wc
, bool clear_flow_L3
)
2863 ovs_assert(eth_type_mpls(mpls_eth_type
));
2864 ovs_assert(n
< FLOW_MAX_MPLS_LABELS
);
2870 memset(&wc
->masks
.mpls_lse
, 0xff, sizeof *wc
->masks
.mpls_lse
* n
);
2872 for (i
= n
; i
>= 1; i
--) {
2873 flow
->mpls_lse
[i
] = flow
->mpls_lse
[i
- 1];
2875 flow
->mpls_lse
[0] = (flow
->mpls_lse
[1] & htonl(~MPLS_BOS_MASK
));
2877 int label
= 0; /* IPv4 Explicit Null. */
2881 if (flow
->dl_type
== htons(ETH_TYPE_IPV6
)) {
2885 if (is_ip_any(flow
)) {
2886 tc
= (flow
->nw_tos
& IP_DSCP_MASK
) >> 2;
2888 wc
->masks
.nw_tos
|= IP_DSCP_MASK
;
2889 wc
->masks
.nw_ttl
= 0xff;
2897 flow
->mpls_lse
[0] = set_mpls_lse_values(ttl
, tc
, 1, htonl(label
));
2899 if (clear_flow_L3
) {
2900 /* Clear all L3 and L4 fields and dp_hash. */
2901 BUILD_ASSERT(FLOW_WC_SEQ
== 42);
2902 memset((char *) flow
+ FLOW_SEGMENT_2_ENDS_AT
, 0,
2903 sizeof(struct flow
) - FLOW_SEGMENT_2_ENDS_AT
);
2907 flow
->dl_type
= mpls_eth_type
;
2910 /* Tries to remove the outermost MPLS label from 'flow'. Returns true if
2911 * successful, false otherwise. On success, sets 'flow''s Ethernet type to
2914 * 'n' must be flow_count_mpls_labels(flow). */
2916 flow_pop_mpls(struct flow
*flow
, int n
, ovs_be16 eth_type
,
2917 struct flow_wildcards
*wc
)
2922 /* Nothing to pop. */
2924 } else if (n
== FLOW_MAX_MPLS_LABELS
) {
2926 wc
->masks
.mpls_lse
[n
- 1] |= htonl(MPLS_BOS_MASK
);
2928 if (!(flow
->mpls_lse
[n
- 1] & htonl(MPLS_BOS_MASK
))) {
2929 /* Can't pop because don't know what to fill in mpls_lse[n - 1]. */
2935 memset(&wc
->masks
.mpls_lse
[1], 0xff,
2936 sizeof *wc
->masks
.mpls_lse
* (n
- 1));
2938 for (i
= 1; i
< n
; i
++) {
2939 flow
->mpls_lse
[i
- 1] = flow
->mpls_lse
[i
];
2941 flow
->mpls_lse
[n
- 1] = 0;
2942 flow
->dl_type
= eth_type
;
2946 /* Sets the MPLS Label that 'flow' matches to 'label', which is interpreted
2947 * as an OpenFlow 1.1 "mpls_label" value. */
2949 flow_set_mpls_label(struct flow
*flow
, int idx
, ovs_be32 label
)
2951 set_mpls_lse_label(&flow
->mpls_lse
[idx
], label
);
2954 /* Sets the MPLS TTL that 'flow' matches to 'ttl', which should be in the
2957 flow_set_mpls_ttl(struct flow
*flow
, int idx
, uint8_t ttl
)
2959 set_mpls_lse_ttl(&flow
->mpls_lse
[idx
], ttl
);
2962 /* Sets the MPLS TC that 'flow' matches to 'tc', which should be in the
2965 flow_set_mpls_tc(struct flow
*flow
, int idx
, uint8_t tc
)
2967 set_mpls_lse_tc(&flow
->mpls_lse
[idx
], tc
);
2970 /* Sets the MPLS BOS bit that 'flow' matches to which should be 0 or 1. */
2972 flow_set_mpls_bos(struct flow
*flow
, int idx
, uint8_t bos
)
2974 set_mpls_lse_bos(&flow
->mpls_lse
[idx
], bos
);
2977 /* Sets the entire MPLS LSE. */
2979 flow_set_mpls_lse(struct flow
*flow
, int idx
, ovs_be32 lse
)
2981 flow
->mpls_lse
[idx
] = lse
;
2985 flow_compose_l7(struct dp_packet
*p
, const void *l7
, size_t l7_len
)
2989 dp_packet_put(p
, l7
, l7_len
);
2991 uint8_t *payload
= dp_packet_put_uninit(p
, l7_len
);
2992 for (size_t i
= 0; i
< l7_len
; i
++) {
3000 flow_compose_l4(struct dp_packet
*p
, const struct flow
*flow
,
3001 const void *l7
, size_t l7_len
)
3003 size_t orig_len
= dp_packet_size(p
);
3005 if (!(flow
->nw_frag
& FLOW_NW_FRAG_ANY
)
3006 || !(flow
->nw_frag
& FLOW_NW_FRAG_LATER
)) {
3007 if (flow
->nw_proto
== IPPROTO_TCP
) {
3008 struct tcp_header
*tcp
= dp_packet_put_zeros(p
, sizeof *tcp
);
3009 tcp
->tcp_src
= flow
->tp_src
;
3010 tcp
->tcp_dst
= flow
->tp_dst
;
3011 tcp
->tcp_ctl
= TCP_CTL(ntohs(flow
->tcp_flags
), 5);
3012 if (!(flow
->tcp_flags
& htons(TCP_SYN
| TCP_FIN
| TCP_RST
))) {
3013 flow_compose_l7(p
, l7
, l7_len
);
3015 } else if (flow
->nw_proto
== IPPROTO_UDP
) {
3016 struct udp_header
*udp
= dp_packet_put_zeros(p
, sizeof *udp
);
3017 udp
->udp_src
= flow
->tp_src
;
3018 udp
->udp_dst
= flow
->tp_dst
;
3019 udp
->udp_len
= htons(sizeof *udp
+ l7_len
);
3020 flow_compose_l7(p
, l7
, l7_len
);
3021 } else if (flow
->nw_proto
== IPPROTO_SCTP
) {
3022 struct sctp_header
*sctp
= dp_packet_put_zeros(p
, sizeof *sctp
);
3023 sctp
->sctp_src
= flow
->tp_src
;
3024 sctp
->sctp_dst
= flow
->tp_dst
;
3025 /* XXX Someone should figure out what L7 data to include. */
3026 } else if (flow
->nw_proto
== IPPROTO_ICMP
) {
3027 struct icmp_header
*icmp
= dp_packet_put_zeros(p
, sizeof *icmp
);
3028 icmp
->icmp_type
= ntohs(flow
->tp_src
);
3029 icmp
->icmp_code
= ntohs(flow
->tp_dst
);
3030 if ((icmp
->icmp_type
== ICMP4_ECHO_REQUEST
||
3031 icmp
->icmp_type
== ICMP4_ECHO_REPLY
)
3032 && icmp
->icmp_code
== 0) {
3033 flow_compose_l7(p
, l7
, l7_len
);
3035 /* XXX Add inner IP packet for e.g. destination unreachable? */
3037 } else if (flow
->nw_proto
== IPPROTO_IGMP
) {
3038 struct igmp_header
*igmp
= dp_packet_put_zeros(p
, sizeof *igmp
);
3039 igmp
->igmp_type
= ntohs(flow
->tp_src
);
3040 igmp
->igmp_code
= ntohs(flow
->tp_dst
);
3041 put_16aligned_be32(&igmp
->group
, flow
->igmp_group_ip4
);
3042 } else if (flow
->nw_proto
== IPPROTO_ICMPV6
) {
3043 struct icmp6_data_header
*icmp6
;
3045 icmp6
= dp_packet_put_zeros(p
, sizeof *icmp6
);
3046 icmp6
->icmp6_base
.icmp6_type
= ntohs(flow
->tp_src
);
3047 icmp6
->icmp6_base
.icmp6_code
= ntohs(flow
->tp_dst
);
3048 put_16aligned_be32(icmp6
->icmp6_data
.be32
, flow
->igmp_group_ip4
);
3050 if (icmp6
->icmp6_base
.icmp6_code
== 0 &&
3051 (icmp6
->icmp6_base
.icmp6_type
== ND_NEIGHBOR_SOLICIT
||
3052 icmp6
->icmp6_base
.icmp6_type
== ND_NEIGHBOR_ADVERT
)) {
3053 struct in6_addr
*nd_target
;
3054 struct ovs_nd_lla_opt
*lla_opt
;
3056 nd_target
= dp_packet_put_zeros(p
, sizeof *nd_target
);
3057 *nd_target
= flow
->nd_target
;
3059 if (!eth_addr_is_zero(flow
->arp_sha
)) {
3060 lla_opt
= dp_packet_put_zeros(p
, 8);
3062 lla_opt
->type
= ND_OPT_SOURCE_LINKADDR
;
3063 lla_opt
->mac
= flow
->arp_sha
;
3065 if (!eth_addr_is_zero(flow
->arp_tha
)) {
3066 lla_opt
= dp_packet_put_zeros(p
, 8);
3068 lla_opt
->type
= ND_OPT_TARGET_LINKADDR
;
3069 lla_opt
->mac
= flow
->arp_tha
;
3071 } else if (icmp6
->icmp6_base
.icmp6_code
== 0 &&
3072 (icmp6
->icmp6_base
.icmp6_type
== ICMP6_ECHO_REQUEST
||
3073 icmp6
->icmp6_base
.icmp6_type
== ICMP6_ECHO_REPLY
)) {
3074 flow_compose_l7(p
, l7
, l7_len
);
3076 /* XXX Add inner IP packet for e.g. destination unreachable? */
3081 return dp_packet_size(p
) - orig_len
;
3085 flow_compose_l4_csum(struct dp_packet
*p
, const struct flow
*flow
,
3086 uint32_t pseudo_hdr_csum
)
3088 size_t l4_len
= (char *) dp_packet_tail(p
) - (char *) dp_packet_l4(p
);
3090 if (!(flow
->nw_frag
& FLOW_NW_FRAG_ANY
)
3091 || !(flow
->nw_frag
& FLOW_NW_FRAG_LATER
)) {
3092 if (flow
->nw_proto
== IPPROTO_TCP
) {
3093 struct tcp_header
*tcp
= dp_packet_l4(p
);
3096 tcp
->tcp_csum
= csum_finish(csum_continue(pseudo_hdr_csum
,
3098 } else if (flow
->nw_proto
== IPPROTO_UDP
) {
3099 struct udp_header
*udp
= dp_packet_l4(p
);
3102 udp
->udp_csum
= csum_finish(csum_continue(pseudo_hdr_csum
,
3104 if (!udp
->udp_csum
) {
3105 udp
->udp_csum
= htons(0xffff);
3107 } else if (flow
->nw_proto
== IPPROTO_ICMP
) {
3108 struct icmp_header
*icmp
= dp_packet_l4(p
);
3110 icmp
->icmp_csum
= 0;
3111 icmp
->icmp_csum
= csum(icmp
, l4_len
);
3112 } else if (flow
->nw_proto
== IPPROTO_IGMP
) {
3113 struct igmp_header
*igmp
= dp_packet_l4(p
);
3115 igmp
->igmp_csum
= 0;
3116 igmp
->igmp_csum
= csum(igmp
, l4_len
);
3117 } else if (flow
->nw_proto
== IPPROTO_ICMPV6
) {
3118 struct icmp6_data_header
*icmp6
= dp_packet_l4(p
);
3120 icmp6
->icmp6_base
.icmp6_cksum
= 0;
3121 icmp6
->icmp6_base
.icmp6_cksum
=
3122 csum_finish(csum_continue(pseudo_hdr_csum
, icmp6
, l4_len
));
3127 /* Increase the size of packet composed by 'flow_compose_minimal'
3128 * up to 'size' bytes. Fixes all the required packet headers like
3129 * ip/udp lengths and l3/l4 checksums.
3131 * 'size' needs to be larger then the current packet size. */
3133 packet_expand(struct dp_packet
*p
, const struct flow
*flow
, size_t size
)
3137 ovs_assert(size
> dp_packet_size(p
));
3139 extra_size
= size
- dp_packet_size(p
);
3140 dp_packet_put_zeros(p
, extra_size
);
3142 if (flow
->dl_type
== htons(FLOW_DL_TYPE_NONE
)) {
3143 struct eth_header
*eth
= dp_packet_eth(p
);
3145 eth
->eth_type
= htons(dp_packet_size(p
));
3146 } else if (dl_type_is_ip_any(flow
->dl_type
)) {
3147 uint32_t pseudo_hdr_csum
;
3148 size_t l4_len
= (char *) dp_packet_tail(p
) - (char *) dp_packet_l4(p
);
3150 if (flow
->dl_type
== htons(ETH_TYPE_IP
)) {
3151 struct ip_header
*ip
= dp_packet_l3(p
);
3153 ip
->ip_tot_len
= htons(p
->l4_ofs
- p
->l3_ofs
+ l4_len
);
3155 ip
->ip_csum
= csum(ip
, sizeof *ip
);
3157 pseudo_hdr_csum
= packet_csum_pseudoheader(ip
);
3158 } else { /* ETH_TYPE_IPV6 */
3159 struct ovs_16aligned_ip6_hdr
*nh
= dp_packet_l3(p
);
3161 nh
->ip6_plen
= htons(l4_len
);
3162 pseudo_hdr_csum
= packet_csum_pseudoheader6(nh
);
3165 if ((!(flow
->nw_frag
& FLOW_NW_FRAG_ANY
)
3166 || !(flow
->nw_frag
& FLOW_NW_FRAG_LATER
))
3167 && flow
->nw_proto
== IPPROTO_UDP
) {
3168 struct udp_header
*udp
= dp_packet_l4(p
);
3170 udp
->udp_len
= htons(l4_len
+ extra_size
);
3172 flow_compose_l4_csum(p
, flow
, pseudo_hdr_csum
);
3176 /* Puts into 'p' a packet that flow_extract() would parse as having the given
3179 * (This is useful only for testing, obviously, and the packet isn't really
3180 * valid. Lots of fields are just zeroed.)
3182 * For packets whose protocols can encapsulate arbitrary L7 payloads, 'l7' and
3183 * 'l7_len' determine that payload:
3185 * - If 'l7_len' is zero, no payload is included.
3187 * - If 'l7_len' is nonzero and 'l7' is null, an arbitrary payload 'l7_len'
3188 * bytes long is included.
3190 * - If 'l7_len' is nonzero and 'l7' is nonnull, the payload is copied
3193 flow_compose(struct dp_packet
*p
, const struct flow
*flow
,
3194 const void *l7
, size_t l7_len
)
3196 /* Add code to this function (or its callees) for emitting new fields or
3197 * protocols. (This isn't essential, so it can be skipped for initial
3199 BUILD_ASSERT_DECL(FLOW_WC_SEQ
== 42);
3201 uint32_t pseudo_hdr_csum
;
3204 /* eth_compose() sets l3 pointer and makes sure it is 32-bit aligned. */
3205 eth_compose(p
, flow
->dl_dst
, flow
->dl_src
, ntohs(flow
->dl_type
), 0);
3206 if (flow
->dl_type
== htons(FLOW_DL_TYPE_NONE
)) {
3207 struct eth_header
*eth
= dp_packet_eth(p
);
3208 eth
->eth_type
= htons(dp_packet_size(p
));
3212 for (int encaps
= FLOW_MAX_VLAN_HEADERS
- 1; encaps
>= 0; encaps
--) {
3213 if (flow
->vlans
[encaps
].tci
& htons(VLAN_CFI
)) {
3214 eth_push_vlan(p
, flow
->vlans
[encaps
].tpid
,
3215 flow
->vlans
[encaps
].tci
);
3219 if (flow
->dl_type
== htons(ETH_TYPE_IP
)) {
3220 struct ip_header
*ip
;
3222 ip
= dp_packet_put_zeros(p
, sizeof *ip
);
3223 ip
->ip_ihl_ver
= IP_IHL_VER(5, 4);
3224 ip
->ip_tos
= flow
->nw_tos
;
3225 ip
->ip_ttl
= flow
->nw_ttl
;
3226 ip
->ip_proto
= flow
->nw_proto
;
3227 put_16aligned_be32(&ip
->ip_src
, flow
->nw_src
);
3228 put_16aligned_be32(&ip
->ip_dst
, flow
->nw_dst
);
3230 if (flow
->nw_frag
& FLOW_NW_FRAG_ANY
) {
3231 ip
->ip_frag_off
|= htons(IP_MORE_FRAGMENTS
);
3232 if (flow
->nw_frag
& FLOW_NW_FRAG_LATER
) {
3233 ip
->ip_frag_off
|= htons(100);
3237 dp_packet_set_l4(p
, dp_packet_tail(p
));
3239 l4_len
= flow_compose_l4(p
, flow
, l7
, l7_len
);
3241 ip
= dp_packet_l3(p
);
3242 ip
->ip_tot_len
= htons(p
->l4_ofs
- p
->l3_ofs
+ l4_len
);
3243 /* Checksum has already been zeroed by put_zeros call. */
3244 ip
->ip_csum
= csum(ip
, sizeof *ip
);
3246 pseudo_hdr_csum
= packet_csum_pseudoheader(ip
);
3247 flow_compose_l4_csum(p
, flow
, pseudo_hdr_csum
);
3248 } else if (flow
->dl_type
== htons(ETH_TYPE_IPV6
)) {
3249 struct ovs_16aligned_ip6_hdr
*nh
;
3251 nh
= dp_packet_put_zeros(p
, sizeof *nh
);
3252 put_16aligned_be32(&nh
->ip6_flow
, htonl(6 << 28) |
3253 htonl(flow
->nw_tos
<< 20) | flow
->ipv6_label
);
3254 nh
->ip6_hlim
= flow
->nw_ttl
;
3255 nh
->ip6_nxt
= flow
->nw_proto
;
3257 memcpy(&nh
->ip6_src
, &flow
->ipv6_src
, sizeof(nh
->ip6_src
));
3258 memcpy(&nh
->ip6_dst
, &flow
->ipv6_dst
, sizeof(nh
->ip6_dst
));
3260 dp_packet_set_l4(p
, dp_packet_tail(p
));
3262 l4_len
= flow_compose_l4(p
, flow
, l7
, l7_len
);
3264 nh
= dp_packet_l3(p
);
3265 nh
->ip6_plen
= htons(l4_len
);
3267 pseudo_hdr_csum
= packet_csum_pseudoheader6(nh
);
3268 flow_compose_l4_csum(p
, flow
, pseudo_hdr_csum
);
3269 } else if (flow
->dl_type
== htons(ETH_TYPE_ARP
) ||
3270 flow
->dl_type
== htons(ETH_TYPE_RARP
)) {
3271 struct arp_eth_header
*arp
;
3273 arp
= dp_packet_put_zeros(p
, sizeof *arp
);
3274 dp_packet_set_l3(p
, arp
);
3275 arp
->ar_hrd
= htons(1);
3276 arp
->ar_pro
= htons(ETH_TYPE_IP
);
3277 arp
->ar_hln
= ETH_ADDR_LEN
;
3279 arp
->ar_op
= htons(flow
->nw_proto
);
3281 if (flow
->nw_proto
== ARP_OP_REQUEST
||
3282 flow
->nw_proto
== ARP_OP_REPLY
) {
3283 put_16aligned_be32(&arp
->ar_spa
, flow
->nw_src
);
3284 put_16aligned_be32(&arp
->ar_tpa
, flow
->nw_dst
);
3285 arp
->ar_sha
= flow
->arp_sha
;
3286 arp
->ar_tha
= flow
->arp_tha
;
3290 if (eth_type_mpls(flow
->dl_type
)) {
3293 p
->l2_5_ofs
= p
->l3_ofs
;
3294 for (n
= 1; n
< FLOW_MAX_MPLS_LABELS
; n
++) {
3295 if (flow
->mpls_lse
[n
- 1] & htonl(MPLS_BOS_MASK
)) {
3300 push_mpls(p
, flow
->dl_type
, flow
->mpls_lse
[--n
]);
3305 /* Compressed flow. */
3307 /* Completes an initialization of 'dst' as a miniflow copy of 'src' begun by
3308 * the caller. The caller must have already computed 'dst->map' properly to
3309 * indicate the significant uint64_t elements of 'src'.
3311 * Normally the significant elements are the ones that are non-zero. However,
3312 * when a miniflow is initialized from a (mini)mask, the values can be zeroes,
3313 * so that the flow and mask always have the same maps. */
3315 miniflow_init(struct miniflow
*dst
, const struct flow
*src
)
3317 uint64_t *dst_u64
= miniflow_values(dst
);
3320 FLOWMAP_FOR_EACH_INDEX(idx
, dst
->map
) {
3321 *dst_u64
++ = flow_u64_value(src
, idx
);
3325 /* Initialize the maps of 'flow' from 'src'. */
3327 miniflow_map_init(struct miniflow
*flow
, const struct flow
*src
)
3329 /* Initialize map, counting the number of nonzero elements. */
3330 flowmap_init(&flow
->map
);
3331 for (size_t i
= 0; i
< FLOW_U64S
; i
++) {
3332 if (flow_u64_value(src
, i
)) {
3333 flowmap_set(&flow
->map
, i
, 1);
3338 /* Allocates 'n' count of miniflows, consecutive in memory, initializing the
3339 * map of each from 'src'.
3340 * Returns the size of the miniflow data. */
3342 miniflow_alloc(struct miniflow
*dsts
[], size_t n
, const struct miniflow
*src
)
3344 size_t n_values
= miniflow_n_values(src
);
3345 size_t data_size
= MINIFLOW_VALUES_SIZE(n_values
);
3346 struct miniflow
*dst
= xmalloc(n
* (sizeof *src
+ data_size
));
3349 COVERAGE_INC(miniflow_malloc
);
3351 for (i
= 0; i
< n
; i
++) {
3352 *dst
= *src
; /* Copy maps. */
3354 dst
+= 1; /* Just past the maps. */
3355 dst
= (struct miniflow
*)((uint64_t *)dst
+ n_values
); /* Skip data. */
3360 /* Returns a miniflow copy of 'src'. The caller must eventually free() the
3361 * returned miniflow. */
3363 miniflow_create(const struct flow
*src
)
3365 struct miniflow tmp
;
3366 struct miniflow
*dst
;
3368 miniflow_map_init(&tmp
, src
);
3370 miniflow_alloc(&dst
, 1, &tmp
);
3371 miniflow_init(dst
, src
);
3375 /* Initializes 'dst' as a copy of 'src'. The caller must have allocated
3376 * 'dst' to have inline space for 'n_values' data in 'src'. */
3378 miniflow_clone(struct miniflow
*dst
, const struct miniflow
*src
,
3381 *dst
= *src
; /* Copy maps. */
3382 memcpy(miniflow_values(dst
), miniflow_get_values(src
),
3383 MINIFLOW_VALUES_SIZE(n_values
));
3386 /* Initializes 'dst' as a copy of 'src'. */
3388 miniflow_expand(const struct miniflow
*src
, struct flow
*dst
)
3390 memset(dst
, 0, sizeof *dst
);
3391 flow_union_with_miniflow(dst
, src
);
3394 /* Returns true if 'a' and 'b' are equal miniflows, false otherwise. */
3396 miniflow_equal(const struct miniflow
*a
, const struct miniflow
*b
)
3398 const uint64_t *ap
= miniflow_get_values(a
);
3399 const uint64_t *bp
= miniflow_get_values(b
);
3401 /* This is mostly called after a matching hash, so it is highly likely that
3402 * the maps are equal as well. */
3403 if (OVS_LIKELY(flowmap_equal(a
->map
, b
->map
))) {
3404 return !memcmp(ap
, bp
, miniflow_n_values(a
) * sizeof *ap
);
3408 FLOWMAP_FOR_EACH_INDEX (idx
, flowmap_or(a
->map
, b
->map
)) {
3409 if ((flowmap_is_set(&a
->map
, idx
) ? *ap
++ : 0)
3410 != (flowmap_is_set(&b
->map
, idx
) ? *bp
++ : 0)) {
3419 /* Returns false if 'a' and 'b' differ at the places where there are 1-bits
3420 * in 'mask', true otherwise. */
3422 miniflow_equal_in_minimask(const struct miniflow
*a
, const struct miniflow
*b
,
3423 const struct minimask
*mask
)
3425 const uint64_t *p
= miniflow_get_values(&mask
->masks
);
3428 FLOWMAP_FOR_EACH_INDEX(idx
, mask
->masks
.map
) {
3429 if ((miniflow_get(a
, idx
) ^ miniflow_get(b
, idx
)) & *p
++) {
3437 /* Returns true if 'a' and 'b' are equal at the places where there are 1-bits
3438 * in 'mask', false if they differ. */
3440 miniflow_equal_flow_in_minimask(const struct miniflow
*a
, const struct flow
*b
,
3441 const struct minimask
*mask
)
3443 const uint64_t *p
= miniflow_get_values(&mask
->masks
);
3446 FLOWMAP_FOR_EACH_INDEX(idx
, mask
->masks
.map
) {
3447 if ((miniflow_get(a
, idx
) ^ flow_u64_value(b
, idx
)) & *p
++) {
3457 minimask_init(struct minimask
*mask
, const struct flow_wildcards
*wc
)
3459 miniflow_init(&mask
->masks
, &wc
->masks
);
3462 /* Returns a minimask copy of 'wc'. The caller must eventually free the
3463 * returned minimask with free(). */
3465 minimask_create(const struct flow_wildcards
*wc
)
3467 return (struct minimask
*)miniflow_create(&wc
->masks
);
3470 /* Initializes 'dst_' as the bit-wise "and" of 'a_' and 'b_'.
3472 * The caller must provide room for FLOW_U64S "uint64_t"s in 'storage', which
3473 * must follow '*dst_' in memory, for use by 'dst_'. The caller must *not*
3474 * free 'dst_' free(). */
3476 minimask_combine(struct minimask
*dst_
,
3477 const struct minimask
*a_
, const struct minimask
*b_
,
3478 uint64_t storage
[FLOW_U64S
])
3480 struct miniflow
*dst
= &dst_
->masks
;
3481 uint64_t *dst_values
= storage
;
3482 const struct miniflow
*a
= &a_
->masks
;
3483 const struct miniflow
*b
= &b_
->masks
;
3486 flowmap_init(&dst
->map
);
3488 FLOWMAP_FOR_EACH_INDEX(idx
, flowmap_and(a
->map
, b
->map
)) {
3489 /* Both 'a' and 'b' have non-zero data at 'idx'. */
3490 uint64_t mask
= *miniflow_get__(a
, idx
) & *miniflow_get__(b
, idx
);
3493 flowmap_set(&dst
->map
, idx
, 1);
3494 *dst_values
++ = mask
;
3499 /* Initializes 'wc' as a copy of 'mask'. */
3501 minimask_expand(const struct minimask
*mask
, struct flow_wildcards
*wc
)
3503 miniflow_expand(&mask
->masks
, &wc
->masks
);
3506 /* Returns true if 'a' and 'b' are the same flow mask, false otherwise.
3507 * Minimasks may not have zero data values, so for the minimasks to be the
3508 * same, they need to have the same map and the same data values. */
3510 minimask_equal(const struct minimask
*a
, const struct minimask
*b
)
3512 /* At first glance, it might seem that this can be reasonably optimized
3513 * into a single memcmp() for the total size of the region. Such an
3514 * optimization will work OK with most implementations of memcmp() that
3515 * proceed from the start of the regions to be compared to the end in
3516 * reasonably sized chunks. However, memcmp() is not required to be
3517 * implemented that way, and an implementation that, for example, compares
3518 * all of the bytes in both regions without early exit when it finds a
3519 * difference, or one that compares, say, 64 bytes at a time, could access
3520 * an unmapped region of memory if minimasks 'a' and 'b' have different
3521 * lengths. By first checking that the maps are the same with the first
3522 * memcmp(), we verify that 'a' and 'b' have the same length and therefore
3523 * ensure that the second memcmp() is safe. */
3524 return (!memcmp(a
, b
, sizeof *a
)
3525 && !memcmp(a
+ 1, b
+ 1,
3526 MINIFLOW_VALUES_SIZE(miniflow_n_values(&a
->masks
))));
3529 /* Returns true if at least one bit matched by 'b' is wildcarded by 'a',
3530 * false otherwise. */
3532 minimask_has_extra(const struct minimask
*a
, const struct minimask
*b
)
3534 const uint64_t *bp
= miniflow_get_values(&b
->masks
);
3537 FLOWMAP_FOR_EACH_INDEX(idx
, b
->masks
.map
) {
3538 uint64_t b_u64
= *bp
++;
3540 /* 'b_u64' is non-zero, check if the data in 'a' is either zero
3541 * or misses some of the bits in 'b_u64'. */
3542 if (!MINIFLOW_IN_MAP(&a
->masks
, idx
)
3543 || ((*miniflow_get__(&a
->masks
, idx
) & b_u64
) != b_u64
)) {
3544 return true; /* 'a' wildcards some bits 'b' doesn't. */
3552 flow_limit_vlans(int vlan_limit
)
3554 if (vlan_limit
<= 0) {
3555 flow_vlan_limit
= FLOW_MAX_VLAN_HEADERS
;
3557 flow_vlan_limit
= MIN(vlan_limit
, FLOW_MAX_VLAN_HEADERS
);
3562 flow_get_tunnel_netdev(struct flow_tnl
*tunnel
)
3564 char iface
[IFNAMSIZ
];
3565 struct in6_addr ip6
;
3568 if (tunnel
->ip_src
) {
3569 in6_addr_set_mapped_ipv4(&ip6
, tunnel
->ip_src
);
3570 } else if (ipv6_addr_is_set(&tunnel
->ipv6_src
)) {
3571 ip6
= tunnel
->ipv6_src
;
3576 if (!ovs_router_lookup(0, &ip6
, iface
, NULL
, &gw
)) {
3580 return netdev_from_name(iface
);