]> git.proxmox.com Git - ovs.git/blame - lib/flow.c
nsh: rework NSH netlink keys and actions
[ovs.git] / lib / flow.c
CommitLineData
064af421 1/*
fd6cd1bf 2 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2017 Nicira, Inc.
064af421 3 *
a14bc59f
BP
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
064af421 7 *
a14bc59f
BP
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
064af421
BP
15 */
16#include <config.h>
17#include <sys/types.h>
18#include "flow.h"
d31f1109 19#include <errno.h>
064af421 20#include <inttypes.h>
5cb7a798 21#include <limits.h>
064af421 22#include <netinet/in.h>
d31f1109
JP
23#include <netinet/icmp6.h>
24#include <netinet/ip6.h>
5cb7a798 25#include <stdint.h>
064af421
BP
26#include <stdlib.h>
27#include <string.h>
10a24935 28#include "byte-order.h"
0deec6d2 29#include "colors.h"
064af421 30#include "coverage.h"
dc5a7ce7 31#include "csum.h"
3e8a2ad1 32#include "openvswitch/dynamic-string.h"
064af421 33#include "hash.h"
c49d1dd1 34#include "jhash.h"
e29747e4 35#include "openvswitch/match.h"
cf62fa4c 36#include "dp-packet.h"
064af421 37#include "openflow/openflow.h"
064af421 38#include "packets.h"
b5e7e61a 39#include "odp-util.h"
94639963 40#include "random.h"
176aaa65 41#include "unaligned.h"
ee89ea7b 42#include "util.h"
3d2fbd70 43#include "openvswitch/nsh.h"
064af421 44
d76f09ea 45COVERAGE_DEFINE(flow_extract);
5cb7a798 46COVERAGE_DEFINE(miniflow_malloc);
d76f09ea 47
d70e8c28
JR
48/* U64 indices for segmented flow classification. */
49const uint8_t flow_segment_u64s[4] = {
50 FLOW_SEGMENT_1_ENDS_AT / sizeof(uint64_t),
51 FLOW_SEGMENT_2_ENDS_AT / sizeof(uint64_t),
52 FLOW_SEGMENT_3_ENDS_AT / sizeof(uint64_t),
53 FLOW_U64S
476f36e8
JR
54};
55
f0fb825a
EG
56int flow_vlan_limit = FLOW_MAX_VLAN_HEADERS;
57
268eca11
BP
58/* Asserts that field 'f1' follows immediately after 'f0' in struct flow,
59 * without any intervening padding. */
60#define ASSERT_SEQUENTIAL(f0, f1) \
61 BUILD_ASSERT_DECL(offsetof(struct flow, f0) \
62 + MEMBER_SIZEOF(struct flow, f0) \
63 == offsetof(struct flow, f1))
64
65/* Asserts that fields 'f0' and 'f1' are in the same 32-bit aligned word within
66 * struct flow. */
67#define ASSERT_SAME_WORD(f0, f1) \
68 BUILD_ASSERT_DECL(offsetof(struct flow, f0) / 4 \
69 == offsetof(struct flow, f1) / 4)
70
71/* Asserts that 'f0' and 'f1' are both sequential and within the same 32-bit
72 * aligned word in struct flow. */
73#define ASSERT_SEQUENTIAL_SAME_WORD(f0, f1) \
74 ASSERT_SEQUENTIAL(f0, f1); \
75 ASSERT_SAME_WORD(f0, f1)
76
419681da
JR
77/* miniflow_extract() assumes the following to be true to optimize the
78 * extraction process. */
268eca11
BP
79ASSERT_SEQUENTIAL_SAME_WORD(nw_frag, nw_tos);
80ASSERT_SEQUENTIAL_SAME_WORD(nw_tos, nw_ttl);
81ASSERT_SEQUENTIAL_SAME_WORD(nw_ttl, nw_proto);
419681da 82
d70e8c28
JR
83/* TCP flags in the middle of a BE64, zeroes in the other half. */
84BUILD_ASSERT_DECL(offsetof(struct flow, tcp_flags) % 8 == 4);
85
419681da
JR
86#if WORDS_BIGENDIAN
87#define TCP_FLAGS_BE32(tcp_ctl) ((OVS_FORCE ovs_be32)TCP_FLAGS_BE16(tcp_ctl) \
88 << 16)
89#else
90#define TCP_FLAGS_BE32(tcp_ctl) ((OVS_FORCE ovs_be32)TCP_FLAGS_BE16(tcp_ctl))
91#endif
92
268eca11 93ASSERT_SEQUENTIAL_SAME_WORD(tp_src, tp_dst);
419681da
JR
94
95/* Removes 'size' bytes from the head end of '*datap', of size '*sizep', which
96 * must contain at least 'size' bytes of data. Returns the first byte of data
97 * removed. */
98static inline const void *
4c0e587c 99data_pull(const void **datap, size_t *sizep, size_t size)
a26ef517 100{
4c0e587c 101 const char *data = *datap;
419681da
JR
102 *datap = data + size;
103 *sizep -= size;
104 return data;
a26ef517
JP
105}
106
419681da
JR
107/* If '*datap' has at least 'size' bytes of data, removes that many bytes from
108 * the head end of '*datap' and returns the first byte removed. Otherwise,
109 * returns a null pointer without modifying '*datap'. */
110static inline const void *
4c0e587c 111data_try_pull(const void **datap, size_t *sizep, size_t size)
064af421 112{
419681da 113 return OVS_LIKELY(*sizep >= size) ? data_pull(datap, sizep, size) : NULL;
064af421
BP
114}
115
419681da
JR
116/* Context for pushing data to a miniflow. */
117struct mf_ctx {
5fcff47b 118 struct flowmap map;
d70e8c28
JR
119 uint64_t *data;
120 uint64_t * const end;
419681da 121};
064af421 122
419681da
JR
123/* miniflow_push_* macros allow filling in a miniflow data values in order.
124 * Assertions are needed only when the layout of the struct flow is modified.
125 * 'ofs' is a compile-time constant, which allows most of the code be optimized
694ffecc 126 * away. Some GCC versions gave warnings on ALWAYS_INLINE, so these are
419681da
JR
127 * defined as macros. */
128
3d2fbd70 129#if (FLOW_WC_SEQ != 40)
419681da 130#define MINIFLOW_ASSERT(X) ovs_assert(X)
dce96af8
DDP
131BUILD_MESSAGE("FLOW_WC_SEQ changed: miniflow_extract() will have runtime "
132 "assertions enabled. Consider updating FLOW_WC_SEQ after "
133 "testing")
419681da
JR
134#else
135#define MINIFLOW_ASSERT(X)
136#endif
137
5fcff47b
JR
138/* True if 'IDX' and higher bits are not set. */
139#define ASSERT_FLOWMAP_NOT_SET(FM, IDX) \
c2581ccf 140{ \
5fcff47b 141 MINIFLOW_ASSERT(!((FM)->bits[(IDX) / MAP_T_BITS] & \
e807a1a6 142 (MAP_MAX << ((IDX) % MAP_T_BITS)))); \
5fcff47b
JR
143 for (size_t i = (IDX) / MAP_T_BITS + 1; i < FLOWMAP_UNITS; i++) { \
144 MINIFLOW_ASSERT(!(FM)->bits[i]); \
c2581ccf
JR
145 } \
146}
361d808d 147
5fcff47b
JR
148#define miniflow_set_map(MF, OFS) \
149 { \
150 ASSERT_FLOWMAP_NOT_SET(&MF.map, (OFS)); \
151 flowmap_set(&MF.map, (OFS), 1); \
c2581ccf 152}
361d808d 153
e807a1a6
SH
154#define miniflow_assert_in_map(MF, OFS) \
155 MINIFLOW_ASSERT(flowmap_is_set(&MF.map, (OFS))); \
5fcff47b
JR
156 ASSERT_FLOWMAP_NOT_SET(&MF.map, (OFS) + 1)
157
158#define miniflow_push_uint64_(MF, OFS, VALUE) \
159{ \
160 MINIFLOW_ASSERT(MF.data < MF.end && (OFS) % 8 == 0); \
161 *MF.data++ = VALUE; \
162 miniflow_set_map(MF, OFS / 8); \
d31f1109
JP
163}
164
5fcff47b 165#define miniflow_push_be64_(MF, OFS, VALUE) \
d70e8c28 166 miniflow_push_uint64_(MF, OFS, (OVS_FORCE uint64_t)(VALUE))
419681da 167
5fcff47b
JR
168#define miniflow_push_uint32_(MF, OFS, VALUE) \
169 { \
170 MINIFLOW_ASSERT(MF.data < MF.end); \
171 \
172 if ((OFS) % 8 == 0) { \
173 miniflow_set_map(MF, OFS / 8); \
174 *(uint32_t *)MF.data = VALUE; \
175 } else if ((OFS) % 8 == 4) { \
176 miniflow_assert_in_map(MF, OFS / 8); \
177 *((uint32_t *)MF.data + 1) = VALUE; \
178 MF.data++; \
179 } \
d70e8c28
JR
180}
181
182#define miniflow_push_be32_(MF, OFS, VALUE) \
183 miniflow_push_uint32_(MF, OFS, (OVS_FORCE uint32_t)(VALUE))
184
5fcff47b
JR
185#define miniflow_push_uint16_(MF, OFS, VALUE) \
186{ \
187 MINIFLOW_ASSERT(MF.data < MF.end); \
188 \
189 if ((OFS) % 8 == 0) { \
190 miniflow_set_map(MF, OFS / 8); \
191 *(uint16_t *)MF.data = VALUE; \
192 } else if ((OFS) % 8 == 2) { \
193 miniflow_assert_in_map(MF, OFS / 8); \
194 *((uint16_t *)MF.data + 1) = VALUE; \
195 } else if ((OFS) % 8 == 4) { \
196 miniflow_assert_in_map(MF, OFS / 8); \
197 *((uint16_t *)MF.data + 2) = VALUE; \
198 } else if ((OFS) % 8 == 6) { \
199 miniflow_assert_in_map(MF, OFS / 8); \
200 *((uint16_t *)MF.data + 3) = VALUE; \
201 MF.data++; \
202 } \
203}
204
1dcf9ac7
SH
205#define miniflow_push_uint8_(MF, OFS, VALUE) \
206{ \
207 MINIFLOW_ASSERT(MF.data < MF.end); \
208 \
209 if ((OFS) % 8 == 0) { \
210 miniflow_set_map(MF, OFS / 8); \
211 *(uint8_t *)MF.data = VALUE; \
212 } else if ((OFS) % 8 == 7) { \
213 miniflow_assert_in_map(MF, OFS / 8); \
214 *((uint8_t *)MF.data + 7) = VALUE; \
215 MF.data++; \
216 } else { \
217 miniflow_assert_in_map(MF, OFS / 8); \
218 *((uint8_t *)MF.data + ((OFS) % 8)) = VALUE; \
219 } \
220}
221
5fcff47b
JR
222#define miniflow_pad_to_64_(MF, OFS) \
223{ \
224 MINIFLOW_ASSERT((OFS) % 8 != 0); \
225 miniflow_assert_in_map(MF, OFS / 8); \
226 \
227 memset((uint8_t *)MF.data + (OFS) % 8, 0, 8 - (OFS) % 8); \
228 MF.data++; \
d70e8c28
JR
229}
230
1589ee5a
SH
231#define miniflow_pad_from_64_(MF, OFS) \
232{ \
233 MINIFLOW_ASSERT(MF.data < MF.end); \
234 \
235 MINIFLOW_ASSERT((OFS) % 8 != 0); \
236 miniflow_set_map(MF, OFS / 8); \
237 \
238 memset((uint8_t *)MF.data, 0, (OFS) % 8); \
239}
240
d70e8c28 241#define miniflow_push_be16_(MF, OFS, VALUE) \
419681da
JR
242 miniflow_push_uint16_(MF, OFS, (OVS_FORCE uint16_t)VALUE);
243
1dcf9ac7
SH
244#define miniflow_push_be8_(MF, OFS, VALUE) \
245 miniflow_push_uint8_(MF, OFS, (OVS_FORCE uint8_t)VALUE);
246
5fcff47b
JR
247#define miniflow_set_maps(MF, OFS, N_WORDS) \
248{ \
249 size_t ofs = (OFS); \
250 size_t n_words = (N_WORDS); \
251 \
252 MINIFLOW_ASSERT(n_words && MF.data + n_words <= MF.end); \
253 ASSERT_FLOWMAP_NOT_SET(&MF.map, ofs); \
254 flowmap_set(&MF.map, ofs, n_words); \
361d808d
JR
255}
256
419681da
JR
257/* Data at 'valuep' may be unaligned. */
258#define miniflow_push_words_(MF, OFS, VALUEP, N_WORDS) \
259{ \
361d808d
JR
260 MINIFLOW_ASSERT((OFS) % 8 == 0); \
261 miniflow_set_maps(MF, (OFS) / 8, (N_WORDS)); \
262 memcpy(MF.data, (VALUEP), (N_WORDS) * sizeof *MF.data); \
263 MF.data += (N_WORDS); \
064af421
BP
264}
265
d70e8c28
JR
266/* Push 32-bit words padded to 64-bits. */
267#define miniflow_push_words_32_(MF, OFS, VALUEP, N_WORDS) \
268{ \
361d808d 269 miniflow_set_maps(MF, (OFS) / 8, DIV_ROUND_UP(N_WORDS, 2)); \
d70e8c28
JR
270 memcpy(MF.data, (VALUEP), (N_WORDS) * sizeof(uint32_t)); \
271 MF.data += DIV_ROUND_UP(N_WORDS, 2); \
d70e8c28
JR
272 if ((N_WORDS) & 1) { \
273 *((uint32_t *)MF.data - 1) = 0; \
274 } \
275}
50f06e16 276
d70e8c28
JR
277/* Data at 'valuep' may be unaligned. */
278/* MACs start 64-aligned, and must be followed by other data or padding. */
279#define miniflow_push_macs_(MF, OFS, VALUEP) \
280{ \
361d808d 281 miniflow_set_maps(MF, (OFS) / 8, 2); \
d70e8c28
JR
282 memcpy(MF.data, (VALUEP), 2 * ETH_ADDR_LEN); \
283 MF.data += 1; /* First word only. */ \
d70e8c28 284}
50f06e16 285
d70e8c28
JR
286#define miniflow_push_uint32(MF, FIELD, VALUE) \
287 miniflow_push_uint32_(MF, offsetof(struct flow, FIELD), VALUE)
50f06e16 288
d70e8c28
JR
289#define miniflow_push_be32(MF, FIELD, VALUE) \
290 miniflow_push_be32_(MF, offsetof(struct flow, FIELD), VALUE)
50f06e16 291
d70e8c28 292#define miniflow_push_uint16(MF, FIELD, VALUE) \
419681da 293 miniflow_push_uint16_(MF, offsetof(struct flow, FIELD), VALUE)
9e69bc5f 294
d70e8c28 295#define miniflow_push_be16(MF, FIELD, VALUE) \
419681da 296 miniflow_push_be16_(MF, offsetof(struct flow, FIELD), VALUE)
9e69bc5f 297
1dcf9ac7
SH
298#define miniflow_push_uint8(MF, FIELD, VALUE) \
299 miniflow_push_uint8_(MF, offsetof(struct flow, FIELD), VALUE)
300
d70e8c28 301#define miniflow_pad_to_64(MF, FIELD) \
06f41fc4 302 miniflow_pad_to_64_(MF, OFFSETOFEND(struct flow, FIELD))
d70e8c28 303
1589ee5a
SH
304#define miniflow_pad_from_64(MF, FIELD) \
305 miniflow_pad_from_64_(MF, offsetof(struct flow, FIELD))
306
419681da
JR
307#define miniflow_push_words(MF, FIELD, VALUEP, N_WORDS) \
308 miniflow_push_words_(MF, offsetof(struct flow, FIELD), VALUEP, N_WORDS)
064af421 309
d70e8c28
JR
310#define miniflow_push_words_32(MF, FIELD, VALUEP, N_WORDS) \
311 miniflow_push_words_32_(MF, offsetof(struct flow, FIELD), VALUEP, N_WORDS)
312
313#define miniflow_push_macs(MF, FIELD, VALUEP) \
314 miniflow_push_macs_(MF, offsetof(struct flow, FIELD), VALUEP)
315
daf4d3c1
JR
316/* Return the pointer to the miniflow data when called BEFORE the corresponding
317 * push. */
318#define miniflow_pointer(MF, FIELD) \
319 (void *)((uint8_t *)MF.data + ((offsetof(struct flow, FIELD)) % 8))
320
419681da
JR
321/* Pulls the MPLS headers at '*datap' and returns the count of them. */
322static inline int
4c0e587c 323parse_mpls(const void **datap, size_t *sizep)
d31f1109 324{
419681da
JR
325 const struct mpls_hdr *mh;
326 int count = 0;
d31f1109 327
419681da
JR
328 while ((mh = data_try_pull(datap, sizep, sizeof *mh))) {
329 count++;
330 if (mh->mpls_lse.lo & htons(1 << MPLS_BOS_SHIFT)) {
d31f1109
JP
331 break;
332 }
419681da 333 }
ba8561c6 334 return MIN(count, FLOW_MAX_MPLS_LABELS);
419681da 335}
d31f1109 336
f0fb825a
EG
337/* passed vlan_hdrs arg must be at least size FLOW_MAX_VLAN_HEADERS. */
338static inline ALWAYS_INLINE size_t
339parse_vlan(const void **datap, size_t *sizep, union flow_vlan_hdr *vlan_hdrs)
419681da 340{
f0fb825a 341 const ovs_be16 *eth_type;
d31f1109 342
f0fb825a 343 memset(vlan_hdrs, 0, sizeof(union flow_vlan_hdr) * FLOW_MAX_VLAN_HEADERS);
419681da 344 data_pull(datap, sizep, ETH_ADDR_LEN * 2);
d31f1109 345
f0fb825a
EG
346 eth_type = *datap;
347
348 size_t n;
349 for (n = 0; eth_type_vlan(*eth_type) && n < flow_vlan_limit; n++) {
350 if (OVS_UNLIKELY(*sizep < sizeof(ovs_be32) + sizeof(ovs_be16))) {
351 break;
d31f1109 352 }
f0fb825a
EG
353
354 const ovs_16aligned_be32 *qp = data_pull(datap, sizep, sizeof *qp);
355 vlan_hdrs[n].qtag = get_16aligned_be32(qp);
356 vlan_hdrs[n].tci |= htons(VLAN_CFI);
357 eth_type = *datap;
d31f1109 358 }
f0fb825a 359 return n;
d31f1109
JP
360}
361
206b60d4 362static inline ALWAYS_INLINE ovs_be16
4c0e587c 363parse_ethertype(const void **datap, size_t *sizep)
88366484 364{
419681da
JR
365 const struct llc_snap_header *llc;
366 ovs_be16 proto;
5a51b2cd 367
419681da
JR
368 proto = *(ovs_be16 *) data_pull(datap, sizep, sizeof proto);
369 if (OVS_LIKELY(ntohs(proto) >= ETH_TYPE_MIN)) {
370 return proto;
88366484 371 }
5a51b2cd 372
419681da
JR
373 if (OVS_UNLIKELY(*sizep < sizeof *llc)) {
374 return htons(FLOW_DL_TYPE_NONE);
88366484 375 }
5a51b2cd 376
419681da
JR
377 llc = *datap;
378 if (OVS_UNLIKELY(llc->llc.llc_dsap != LLC_DSAP_SNAP
379 || llc->llc.llc_ssap != LLC_SSAP_SNAP
380 || llc->llc.llc_cntl != LLC_CNTL_SNAP
381 || memcmp(llc->snap.snap_org, SNAP_ORG_ETHERNET,
382 sizeof llc->snap.snap_org))) {
383 return htons(FLOW_DL_TYPE_NONE);
c6bcb685 384 }
c6bcb685 385
419681da 386 data_pull(datap, sizep, sizeof *llc);
685a51a5 387
419681da
JR
388 if (OVS_LIKELY(ntohs(llc->snap.snap_type) >= ETH_TYPE_MIN)) {
389 return llc->snap.snap_type;
685a51a5
JP
390 }
391
419681da
JR
392 return htons(FLOW_DL_TYPE_NONE);
393}
685a51a5 394
daf4d3c1
JR
395/* Returns 'true' if the packet is an ND packet. In that case the '*nd_target'
396 * and 'arp_buf[]' are filled in. If the packet is not an ND pacet, 'false' is
397 * returned and no values are filled in on '*nd_target' or 'arp_buf[]'. */
398static inline bool
4c0e587c 399parse_icmpv6(const void **datap, size_t *sizep, const struct icmp6_hdr *icmp,
419681da 400 const struct in6_addr **nd_target,
74ff3298 401 struct eth_addr arp_buf[2])
419681da 402{
daf4d3c1
JR
403 if (icmp->icmp6_code != 0 ||
404 (icmp->icmp6_type != ND_NEIGHBOR_SOLICIT &&
405 icmp->icmp6_type != ND_NEIGHBOR_ADVERT)) {
406 return false;
407 }
685a51a5 408
daf4d3c1
JR
409 arp_buf[0] = eth_addr_zero;
410 arp_buf[1] = eth_addr_zero;
411 *nd_target = data_try_pull(datap, sizep, sizeof **nd_target);
412 if (OVS_UNLIKELY(!*nd_target)) {
413 return true;
414 }
685a51a5 415
daf4d3c1
JR
416 while (*sizep >= 8) {
417 /* The minimum size of an option is 8 bytes, which also is
418 * the size of Ethernet link-layer options. */
86d46f3c
ZKL
419 const struct ovs_nd_lla_opt *lla_opt = *datap;
420 int opt_len = lla_opt->len * ND_LLA_OPT_LEN;
88366484 421
daf4d3c1
JR
422 if (!opt_len || opt_len > *sizep) {
423 return true;
424 }
685a51a5 425
daf4d3c1
JR
426 /* Store the link layer address if the appropriate option is
427 * provided. It is considered an error if the same link
428 * layer option is specified twice. */
86d46f3c 429 if (lla_opt->type == ND_OPT_SOURCE_LINKADDR && opt_len == 8) {
daf4d3c1 430 if (OVS_LIKELY(eth_addr_is_zero(arp_buf[0]))) {
86d46f3c 431 arp_buf[0] = lla_opt->mac;
daf4d3c1
JR
432 } else {
433 goto invalid;
685a51a5 434 }
86d46f3c 435 } else if (lla_opt->type == ND_OPT_TARGET_LINKADDR && opt_len == 8) {
daf4d3c1 436 if (OVS_LIKELY(eth_addr_is_zero(arp_buf[1]))) {
86d46f3c 437 arp_buf[1] = lla_opt->mac;
daf4d3c1
JR
438 } else {
439 goto invalid;
685a51a5 440 }
685a51a5 441 }
685a51a5 442
daf4d3c1
JR
443 if (OVS_UNLIKELY(!data_try_pull(datap, sizep, opt_len))) {
444 return true;
445 }
446 }
447 return true;
685a51a5
JP
448
449invalid:
cc5dba2f 450 *nd_target = NULL;
74ff3298
JR
451 arp_buf[0] = eth_addr_zero;
452 arp_buf[1] = eth_addr_zero;
daf4d3c1 453 return true;
685a51a5
JP
454}
455
94a81e40
DDP
456static inline bool
457parse_ipv6_ext_hdrs__(const void **datap, size_t *sizep, uint8_t *nw_proto,
458 uint8_t *nw_frag)
459{
460 while (1) {
461 if (OVS_LIKELY((*nw_proto != IPPROTO_HOPOPTS)
462 && (*nw_proto != IPPROTO_ROUTING)
463 && (*nw_proto != IPPROTO_DSTOPTS)
464 && (*nw_proto != IPPROTO_AH)
465 && (*nw_proto != IPPROTO_FRAGMENT))) {
466 /* It's either a terminal header (e.g., TCP, UDP) or one we
467 * don't understand. In either case, we're done with the
468 * packet, so use it to fill in 'nw_proto'. */
469 return true;
470 }
471
472 /* We only verify that at least 8 bytes of the next header are
473 * available, but many of these headers are longer. Ensure that
474 * accesses within the extension header are within those first 8
475 * bytes. All extension headers are required to be at least 8
476 * bytes. */
477 if (OVS_UNLIKELY(*sizep < 8)) {
478 return false;
479 }
480
481 if ((*nw_proto == IPPROTO_HOPOPTS)
482 || (*nw_proto == IPPROTO_ROUTING)
483 || (*nw_proto == IPPROTO_DSTOPTS)) {
484 /* These headers, while different, have the fields we care
485 * about in the same location and with the same
486 * interpretation. */
487 const struct ip6_ext *ext_hdr = *datap;
488 *nw_proto = ext_hdr->ip6e_nxt;
489 if (OVS_UNLIKELY(!data_try_pull(datap, sizep,
490 (ext_hdr->ip6e_len + 1) * 8))) {
491 return false;
492 }
493 } else if (*nw_proto == IPPROTO_AH) {
494 /* A standard AH definition isn't available, but the fields
495 * we care about are in the same location as the generic
496 * option header--only the header length is calculated
497 * differently. */
498 const struct ip6_ext *ext_hdr = *datap;
499 *nw_proto = ext_hdr->ip6e_nxt;
500 if (OVS_UNLIKELY(!data_try_pull(datap, sizep,
501 (ext_hdr->ip6e_len + 2) * 4))) {
502 return false;
503 }
504 } else if (*nw_proto == IPPROTO_FRAGMENT) {
505 const struct ovs_16aligned_ip6_frag *frag_hdr = *datap;
506
507 *nw_proto = frag_hdr->ip6f_nxt;
508 if (!data_try_pull(datap, sizep, sizeof *frag_hdr)) {
509 return false;
510 }
511
512 /* We only process the first fragment. */
513 if (frag_hdr->ip6f_offlg != htons(0)) {
514 *nw_frag = FLOW_NW_FRAG_ANY;
515 if ((frag_hdr->ip6f_offlg & IP6F_OFF_MASK) != htons(0)) {
516 *nw_frag |= FLOW_NW_FRAG_LATER;
517 *nw_proto = IPPROTO_FRAGMENT;
518 return true;
519 }
520 }
521 }
522 }
523}
524
525bool
526parse_ipv6_ext_hdrs(const void **datap, size_t *sizep, uint8_t *nw_proto,
527 uint8_t *nw_frag)
528{
529 return parse_ipv6_ext_hdrs__(datap, sizep, nw_proto, nw_frag);
530}
531
3d2fbd70
JS
532bool
533parse_nsh(const void **datap, size_t *sizep, struct flow_nsh *key)
534{
535 const struct nsh_hdr *nsh = (const struct nsh_hdr *) *datap;
3d2fbd70
JS
536 uint8_t version, length, flags;
537 uint32_t path_hdr;
538
539 /* Check if it is long enough for NSH header, doesn't support
540 * MD type 2 yet
541 */
7edef47b 542 if (OVS_UNLIKELY(*sizep < NSH_BASE_HDR_LEN)) {
3d2fbd70
JS
543 return false;
544 }
545
f59cb331
YY
546 version = nsh_get_ver(nsh);
547 flags = nsh_get_flags(nsh);
548 length = nsh_hdr_len(nsh);
3d2fbd70 549
f59cb331 550 if (OVS_UNLIKELY(length > *sizep || version != 0)) {
3d2fbd70
JS
551 return false;
552 }
553
3d2fbd70
JS
554 key->flags = flags;
555 key->mdtype = nsh->md_type;
556 key->np = nsh->next_proto;
557
558 path_hdr = ntohl(get_16aligned_be32(&nsh->path_hdr));
559 key->si = (path_hdr & NSH_SI_MASK) >> NSH_SI_SHIFT;
560 key->spi = htonl((path_hdr & NSH_SPI_MASK) >> NSH_SPI_SHIFT);
561
562 switch (key->mdtype) {
563 case NSH_M_TYPE1:
7edef47b
JS
564 if (length != NSH_M_TYPE1_LEN) {
565 return false;
566 }
3d2fbd70 567 for (size_t i = 0; i < 4; i++) {
f59cb331 568 key->context[i] = get_16aligned_be32(&nsh->md1.context[i]);
3d2fbd70
JS
569 }
570 break;
571 case NSH_M_TYPE2:
f59cb331
YY
572 /* Don't support MD type 2 metedata parsing yet */
573 if (length < NSH_BASE_HDR_LEN) {
574 return false;
575 }
576
577 memset(key->context, 0, sizeof(key->context));
578 break;
3d2fbd70 579 default:
7edef47b
JS
580 /* We don't parse other context headers yet. */
581 break;
3d2fbd70
JS
582 }
583
584 data_pull(datap, sizep, length);
585
586 return true;
587}
588
2482b0b0
JS
589/* Initializes 'flow' members from 'packet' and 'md', taking the packet type
590 * into account.
deedf7e7 591 *
2482b0b0 592 * Initializes the layer offsets as follows:
ca78c6b6 593 *
2482b0b0
JS
594 * - packet->l2_5_ofs to the
595 * * the start of the MPLS shim header. Can be zero, if the
596 * packet is of type (OFPHTN_ETHERTYPE, ETH_TYPE_MPLS).
597 * * UINT16_MAX when there is no MPLS shim header.
ca78c6b6 598 *
2482b0b0
JS
599 * - packet->l3_ofs is set to
600 * * zero if the packet_type is in name space OFPHTN_ETHERTYPE
601 * and there is no MPLS shim header.
602 * * just past the Ethernet header, or just past the vlan_header if
603 * one is present, to the first byte of the payload of the
604 * Ethernet frame if the packet type is Ethernet and there is
605 * no MPLS shim header.
606 * * just past the MPLS label stack to the first byte of the MPLS
607 * payload if there is at least one MPLS shim header.
608 * * UINT16_MAX if the packet type is Ethernet and the frame is
609 * too short to contain an Ethernet header.
ca78c6b6 610 *
2482b0b0
JS
611 * - packet->l4_ofs is set to just past the IPv4 or IPv6 header, if one is
612 * present and the packet has at least the content used for the fields
613 * of interest for the flow, otherwise UINT16_MAX.
ca78c6b6 614 */
7257b535 615void
cf62fa4c 616flow_extract(struct dp_packet *packet, struct flow *flow)
064af421 617{
27bbe15d
JR
618 struct {
619 struct miniflow mf;
d70e8c28 620 uint64_t buf[FLOW_U64S];
27bbe15d 621 } m;
064af421
BP
622
623 COVERAGE_INC(flow_extract);
624
cf62fa4c 625 miniflow_extract(packet, &m.mf);
27bbe15d 626 miniflow_expand(&m.mf, flow);
419681da 627}
296e07ac 628
27bbe15d 629/* Caller is responsible for initializing 'dst' with enough storage for
d70e8c28 630 * FLOW_U64S * 8 bytes. */
419681da 631void
cf62fa4c 632miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
419681da 633{
cf62fa4c 634 const struct pkt_metadata *md = &packet->md;
4c0e587c 635 const void *data = dp_packet_data(packet);
cf62fa4c 636 size_t size = dp_packet_size(packet);
2482b0b0 637 ovs_be32 packet_type = packet->packet_type;
09b0fa9c 638 uint64_t *values = miniflow_values(dst);
5fcff47b
JR
639 struct mf_ctx mf = { FLOWMAP_EMPTY_INITIALIZER, values,
640 values + FLOW_U64S };
2482b0b0
JS
641 const char *frame;
642 ovs_be16 dl_type = OVS_BE16_MAX;
419681da 643 uint8_t nw_frag, nw_tos, nw_ttl, nw_proto;
daf4d3c1
JR
644 uint8_t *ct_nw_proto_p = NULL;
645 ovs_be16 ct_tp_src = 0, ct_tp_dst = 0;
419681da
JR
646
647 /* Metadata. */
ffe4c74f 648 if (flow_tnl_dst_is_set(&md->tunnel)) {
cf62fa4c 649 miniflow_push_words(mf, tunnel, &md->tunnel,
9ad11dbe
JG
650 offsetof(struct flow_tnl, metadata) /
651 sizeof(uint64_t));
6728d578
JG
652
653 if (!(md->tunnel.flags & FLOW_TNL_F_UDPIF)) {
654 if (md->tunnel.metadata.present.map) {
655 miniflow_push_words(mf, tunnel.metadata, &md->tunnel.metadata,
656 sizeof md->tunnel.metadata /
657 sizeof(uint64_t));
658 }
659 } else {
660 if (md->tunnel.metadata.present.len) {
661 miniflow_push_words(mf, tunnel.metadata.present,
662 &md->tunnel.metadata.present, 1);
663 miniflow_push_words(mf, tunnel.metadata.opts.gnv,
664 md->tunnel.metadata.opts.gnv,
665 DIV_ROUND_UP(md->tunnel.metadata.present.len,
666 sizeof(uint64_t)));
667 }
9ad11dbe 668 }
cf62fa4c
PS
669 }
670 if (md->skb_priority || md->pkt_mark) {
671 miniflow_push_uint32(mf, skb_priority, md->skb_priority);
672 miniflow_push_uint32(mf, pkt_mark, md->pkt_mark);
673 }
674 miniflow_push_uint32(mf, dp_hash, md->dp_hash);
675 miniflow_push_uint32(mf, in_port, odp_to_u32(md->in_port.odp_port));
6cf5c521 676 if (md->ct_state) {
cf62fa4c 677 miniflow_push_uint32(mf, recirc_id, md->recirc_id);
2a28ccc8 678 miniflow_push_uint8(mf, ct_state, md->ct_state);
daf4d3c1
JR
679 ct_nw_proto_p = miniflow_pointer(mf, ct_nw_proto);
680 miniflow_push_uint8(mf, ct_nw_proto, 0);
07659514 681 miniflow_push_uint16(mf, ct_zone, md->ct_zone);
6cf5c521
DB
682 } else if (md->recirc_id) {
683 miniflow_push_uint32(mf, recirc_id, md->recirc_id);
684 miniflow_pad_to_64(mf, recirc_id);
296e07ac 685 }
064af421 686
8e53fe8c
JS
687 if (md->ct_state) {
688 miniflow_push_uint32(mf, ct_mark, md->ct_mark);
2482b0b0 689 miniflow_push_be32(mf, packet_type, packet_type);
9daf2348 690
2ff8484b 691 if (!ovs_u128_is_zero(md->ct_label)) {
9daf2348
JS
692 miniflow_push_words(mf, ct_label, &md->ct_label,
693 sizeof md->ct_label / sizeof(uint64_t));
694 }
2482b0b0
JS
695 } else {
696 miniflow_pad_from_64(mf, packet_type);
697 miniflow_push_be32(mf, packet_type, packet_type);
8e53fe8c
JS
698 }
699
419681da 700 /* Initialize packet's layer pointer and offsets. */
2482b0b0 701 frame = data;
82eb5b0a 702 dp_packet_reset_offsets(packet);
064af421 703
2482b0b0
JS
704 if (packet_type == htonl(PT_ETH)) {
705 /* Must have full Ethernet header to proceed. */
706 if (OVS_UNLIKELY(size < sizeof(struct eth_header))) {
707 goto out;
708 } else {
709 /* Link layer. */
710 ASSERT_SEQUENTIAL(dl_dst, dl_src);
711 miniflow_push_macs(mf, dl_dst, data);
712
713 /* VLAN */
714 union flow_vlan_hdr vlans[FLOW_MAX_VLAN_HEADERS];
715 size_t num_vlans = parse_vlan(&data, &size, vlans);
716
717 dl_type = parse_ethertype(&data, &size);
718 miniflow_push_be16(mf, dl_type, dl_type);
719 miniflow_pad_to_64(mf, dl_type);
720 if (num_vlans > 0) {
721 miniflow_push_words_32(mf, vlans, vlans, num_vlans);
722 }
f0fb825a 723
2482b0b0
JS
724 }
725 } else {
726 /* Take dl_type from packet_type. */
727 dl_type = pt_ns_type_be(packet_type);
728 miniflow_pad_from_64(mf, dl_type);
419681da 729 miniflow_push_be16(mf, dl_type, dl_type);
2482b0b0 730 /* Do not push vlan_tci, pad instead */
f0fb825a 731 miniflow_pad_to_64(mf, dl_type);
50f06e16 732 }
50f06e16 733
419681da
JR
734 /* Parse mpls. */
735 if (OVS_UNLIKELY(eth_type_mpls(dl_type))) {
736 int count;
737 const void *mpls = data;
738
2482b0b0 739 packet->l2_5_ofs = (char *)data - frame;
419681da 740 count = parse_mpls(&data, &size);
d70e8c28 741 miniflow_push_words_32(mf, mpls_lse, mpls, count);
b02475c5
SH
742 }
743
ad128cc1 744 /* Network layer. */
2482b0b0 745 packet->l3_ofs = (char *)data - frame;
419681da
JR
746
747 nw_frag = 0;
748 if (OVS_LIKELY(dl_type == htons(ETH_TYPE_IP))) {
749 const struct ip_header *nh = data;
750 int ip_len;
fa8d9001 751 uint16_t tot_len;
419681da
JR
752
753 if (OVS_UNLIKELY(size < IP_HEADER_LEN)) {
754 goto out;
755 }
756 ip_len = IP_IHL(nh->ip_ihl_ver) * 4;
757
758 if (OVS_UNLIKELY(ip_len < IP_HEADER_LEN)) {
759 goto out;
760 }
fa8d9001
JR
761 if (OVS_UNLIKELY(size < ip_len)) {
762 goto out;
763 }
764 tot_len = ntohs(nh->ip_tot_len);
5b2cdc3f 765 if (OVS_UNLIKELY(tot_len > size || ip_len > tot_len)) {
fa8d9001
JR
766 goto out;
767 }
768 if (OVS_UNLIKELY(size - tot_len > UINT8_MAX)) {
769 goto out;
770 }
cf62fa4c 771 dp_packet_set_l2_pad_size(packet, size - tot_len);
fa8d9001 772 size = tot_len; /* Never pull padding. */
419681da
JR
773
774 /* Push both source and destination address at once. */
d70e8c28 775 miniflow_push_words(mf, nw_src, &nh->ip_src, 1);
daf4d3c1
JR
776 if (ct_nw_proto_p && !md->ct_orig_tuple_ipv6) {
777 *ct_nw_proto_p = md->ct_orig_tuple.ipv4.ipv4_proto;
778 if (*ct_nw_proto_p) {
779 miniflow_push_words(mf, ct_nw_src,
780 &md->ct_orig_tuple.ipv4.ipv4_src, 1);
781 ct_tp_src = md->ct_orig_tuple.ipv4.src_port;
782 ct_tp_dst = md->ct_orig_tuple.ipv4.dst_port;
783 }
784 }
d70e8c28
JR
785
786 miniflow_push_be32(mf, ipv6_label, 0); /* Padding for IPv4. */
419681da
JR
787
788 nw_tos = nh->ip_tos;
789 nw_ttl = nh->ip_ttl;
790 nw_proto = nh->ip_proto;
791 if (OVS_UNLIKELY(IP_IS_FRAGMENT(nh->ip_frag_off))) {
792 nw_frag = FLOW_NW_FRAG_ANY;
793 if (nh->ip_frag_off & htons(IP_FRAG_OFF_MASK)) {
794 nw_frag |= FLOW_NW_FRAG_LATER;
795 }
796 }
419681da 797 data_pull(&data, &size, ip_len);
419681da
JR
798 } else if (dl_type == htons(ETH_TYPE_IPV6)) {
799 const struct ovs_16aligned_ip6_hdr *nh;
800 ovs_be32 tc_flow;
fa8d9001 801 uint16_t plen;
419681da
JR
802
803 if (OVS_UNLIKELY(size < sizeof *nh)) {
804 goto out;
805 }
806 nh = data_pull(&data, &size, sizeof *nh);
807
fa8d9001
JR
808 plen = ntohs(nh->ip6_plen);
809 if (OVS_UNLIKELY(plen > size)) {
810 goto out;
811 }
812 /* Jumbo Payload option not supported yet. */
813 if (OVS_UNLIKELY(size - plen > UINT8_MAX)) {
814 goto out;
815 }
cf62fa4c 816 dp_packet_set_l2_pad_size(packet, size - plen);
fa8d9001
JR
817 size = plen; /* Never pull padding. */
818
419681da 819 miniflow_push_words(mf, ipv6_src, &nh->ip6_src,
d70e8c28 820 sizeof nh->ip6_src / 8);
419681da 821 miniflow_push_words(mf, ipv6_dst, &nh->ip6_dst,
d70e8c28 822 sizeof nh->ip6_dst / 8);
daf4d3c1
JR
823 if (ct_nw_proto_p && md->ct_orig_tuple_ipv6) {
824 *ct_nw_proto_p = md->ct_orig_tuple.ipv6.ipv6_proto;
825 if (*ct_nw_proto_p) {
826 miniflow_push_words(mf, ct_ipv6_src,
827 &md->ct_orig_tuple.ipv6.ipv6_src,
828 2 *
829 sizeof md->ct_orig_tuple.ipv6.ipv6_src / 8);
830 ct_tp_src = md->ct_orig_tuple.ipv6.src_port;
831 ct_tp_dst = md->ct_orig_tuple.ipv6.dst_port;
832 }
833 }
419681da
JR
834
835 tc_flow = get_16aligned_be32(&nh->ip6_flow);
836 {
837 ovs_be32 label = tc_flow & htonl(IPV6_LABEL_MASK);
d70e8c28 838 miniflow_push_be32(mf, ipv6_label, label);
419681da
JR
839 }
840
841 nw_tos = ntohl(tc_flow) >> 20;
842 nw_ttl = nh->ip6_hlim;
843 nw_proto = nh->ip6_nxt;
844
94a81e40
DDP
845 if (!parse_ipv6_ext_hdrs__(&data, &size, &nw_proto, &nw_frag)) {
846 goto out;
50f06e16 847 }
419681da
JR
848 } else {
849 if (dl_type == htons(ETH_TYPE_ARP) ||
850 dl_type == htons(ETH_TYPE_RARP)) {
74ff3298 851 struct eth_addr arp_buf[2];
419681da
JR
852 const struct arp_eth_header *arp = (const struct arp_eth_header *)
853 data_try_pull(&data, &size, ARP_ETH_HEADER_LEN);
854
855 if (OVS_LIKELY(arp) && OVS_LIKELY(arp->ar_hrd == htons(1))
856 && OVS_LIKELY(arp->ar_pro == htons(ETH_TYPE_IP))
857 && OVS_LIKELY(arp->ar_hln == ETH_ADDR_LEN)
858 && OVS_LIKELY(arp->ar_pln == 4)) {
d70e8c28
JR
859 miniflow_push_be32(mf, nw_src,
860 get_16aligned_be32(&arp->ar_spa));
861 miniflow_push_be32(mf, nw_dst,
862 get_16aligned_be32(&arp->ar_tpa));
419681da
JR
863
864 /* We only match on the lower 8 bits of the opcode. */
865 if (OVS_LIKELY(ntohs(arp->ar_op) <= 0xff)) {
d70e8c28 866 miniflow_push_be32(mf, ipv6_label, 0); /* Pad with ARP. */
419681da
JR
867 miniflow_push_be32(mf, nw_frag, htonl(ntohs(arp->ar_op)));
868 }
d31f1109 869
419681da 870 /* Must be adjacent. */
268eca11 871 ASSERT_SEQUENTIAL(arp_sha, arp_tha);
419681da 872
74ff3298
JR
873 arp_buf[0] = arp->ar_sha;
874 arp_buf[1] = arp->ar_tha;
d70e8c28 875 miniflow_push_macs(mf, arp_sha, arp_buf);
06f41fc4 876 miniflow_pad_to_64(mf, arp_tha);
419681da 877 }
3d2fbd70
JS
878 } else if (dl_type == htons(ETH_TYPE_NSH)) {
879 struct flow_nsh nsh;
880
881 if (OVS_LIKELY(parse_nsh(&data, &size, &nsh))) {
f59cb331
YY
882 miniflow_push_words(mf, nsh, &nsh,
883 sizeof(struct flow_nsh) /
884 sizeof(uint64_t));
3d2fbd70 885 }
d31f1109 886 }
419681da
JR
887 goto out;
888 }
889
2482b0b0 890 packet->l4_ofs = (char *)data - frame;
419681da 891 miniflow_push_be32(mf, nw_frag,
a13784ba 892 bytes_to_be32(nw_frag, nw_tos, nw_ttl, nw_proto));
419681da
JR
893
894 if (OVS_LIKELY(!(nw_frag & FLOW_NW_FRAG_LATER))) {
895 if (OVS_LIKELY(nw_proto == IPPROTO_TCP)) {
896 if (OVS_LIKELY(size >= TCP_HEADER_LEN)) {
897 const struct tcp_header *tcp = data;
898
74ff3298 899 miniflow_push_be32(mf, arp_tha.ea[2], 0);
419681da
JR
900 miniflow_push_be32(mf, tcp_flags,
901 TCP_FLAGS_BE32(tcp->tcp_ctl));
115f2481
JR
902 miniflow_push_be16(mf, tp_src, tcp->tcp_src);
903 miniflow_push_be16(mf, tp_dst, tcp->tcp_dst);
daf4d3c1
JR
904 miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
905 miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
419681da
JR
906 }
907 } else if (OVS_LIKELY(nw_proto == IPPROTO_UDP)) {
908 if (OVS_LIKELY(size >= UDP_HEADER_LEN)) {
909 const struct udp_header *udp = data;
910
115f2481
JR
911 miniflow_push_be16(mf, tp_src, udp->udp_src);
912 miniflow_push_be16(mf, tp_dst, udp->udp_dst);
daf4d3c1
JR
913 miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
914 miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
064af421 915 }
419681da
JR
916 } else if (OVS_LIKELY(nw_proto == IPPROTO_SCTP)) {
917 if (OVS_LIKELY(size >= SCTP_HEADER_LEN)) {
918 const struct sctp_header *sctp = data;
a26ef517 919
115f2481
JR
920 miniflow_push_be16(mf, tp_src, sctp->sctp_src);
921 miniflow_push_be16(mf, tp_dst, sctp->sctp_dst);
daf4d3c1
JR
922 miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
923 miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
419681da
JR
924 }
925 } else if (OVS_LIKELY(nw_proto == IPPROTO_ICMP)) {
926 if (OVS_LIKELY(size >= ICMP_HEADER_LEN)) {
927 const struct icmp_header *icmp = data;
928
929 miniflow_push_be16(mf, tp_src, htons(icmp->icmp_type));
930 miniflow_push_be16(mf, tp_dst, htons(icmp->icmp_code));
daf4d3c1
JR
931 miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
932 miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
419681da 933 }
0e612675
FL
934 } else if (OVS_LIKELY(nw_proto == IPPROTO_IGMP)) {
935 if (OVS_LIKELY(size >= IGMP_HEADER_LEN)) {
936 const struct igmp_header *igmp = data;
937
938 miniflow_push_be16(mf, tp_src, htons(igmp->igmp_type));
939 miniflow_push_be16(mf, tp_dst, htons(igmp->igmp_code));
daf4d3c1
JR
940 miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
941 miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
0e612675
FL
942 miniflow_push_be32(mf, igmp_group_ip4,
943 get_16aligned_be32(&igmp->group));
daf4d3c1 944 miniflow_pad_to_64(mf, igmp_group_ip4);
0e612675 945 }
419681da
JR
946 } else if (OVS_LIKELY(nw_proto == IPPROTO_ICMPV6)) {
947 if (OVS_LIKELY(size >= sizeof(struct icmp6_hdr))) {
daf4d3c1
JR
948 const struct in6_addr *nd_target;
949 struct eth_addr arp_buf[2];
419681da
JR
950 const struct icmp6_hdr *icmp = data_pull(&data, &size,
951 sizeof *icmp);
daf4d3c1
JR
952 if (parse_icmpv6(&data, &size, icmp, &nd_target, arp_buf)) {
953 if (nd_target) {
954 miniflow_push_words(mf, nd_target, nd_target,
955 sizeof *nd_target / sizeof(uint64_t));
956 }
957 miniflow_push_macs(mf, arp_sha, arp_buf);
958 miniflow_pad_to_64(mf, arp_tha);
959 miniflow_push_be16(mf, tp_src, htons(icmp->icmp6_type));
960 miniflow_push_be16(mf, tp_dst, htons(icmp->icmp6_code));
961 miniflow_pad_to_64(mf, tp_dst);
962 } else {
963 /* ICMPv6 but not ND. */
964 miniflow_push_be16(mf, tp_src, htons(icmp->icmp6_type));
965 miniflow_push_be16(mf, tp_dst, htons(icmp->icmp6_code));
966 miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
967 miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
419681da
JR
968 }
969 }
064af421
BP
970 }
971 }
419681da 972 out:
5fcff47b 973 dst->map = mf.map;
064af421
BP
974}
975
206b60d4
DDP
976ovs_be16
977parse_dl_type(const struct eth_header *data_, size_t size)
978{
979 const void *data = data_;
f0fb825a 980 union flow_vlan_hdr vlans[FLOW_MAX_VLAN_HEADERS];
206b60d4 981
f0fb825a 982 parse_vlan(&data, &size, vlans);
206b60d4
DDP
983
984 return parse_ethertype(&data, &size);
985}
986
993410fb
BP
987/* For every bit of a field that is wildcarded in 'wildcards', sets the
988 * corresponding bit in 'flow' to zero. */
989void
990flow_zero_wildcards(struct flow *flow, const struct flow_wildcards *wildcards)
991{
d70e8c28
JR
992 uint64_t *flow_u64 = (uint64_t *) flow;
993 const uint64_t *wc_u64 = (const uint64_t *) &wildcards->masks;
659c2346 994 size_t i;
993410fb 995
d70e8c28
JR
996 for (i = 0; i < FLOW_U64S; i++) {
997 flow_u64[i] &= wc_u64[i];
26720e24 998 }
993410fb
BP
999}
1000
d8d9c698
EJ
1001void
1002flow_unwildcard_tp_ports(const struct flow *flow, struct flow_wildcards *wc)
1003{
1004 if (flow->nw_proto != IPPROTO_ICMP) {
1005 memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
1006 memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst);
1007 } else {
1008 wc->masks.tp_src = htons(0xff);
1009 wc->masks.tp_dst = htons(0xff);
1010 }
1011}
1012
50dcbd8e 1013/* Initializes 'flow_metadata' with the metadata found in 'flow'. */
5d6c3af0 1014void
50dcbd8e 1015flow_get_metadata(const struct flow *flow, struct match *flow_metadata)
5d6c3af0 1016{
50dcbd8e
JG
1017 int i;
1018
3d2fbd70 1019 BUILD_ASSERT_DECL(FLOW_WC_SEQ == 40);
e9358af6 1020
50dcbd8e
JG
1021 match_init_catchall(flow_metadata);
1022 if (flow->tunnel.tun_id != htonll(0)) {
1023 match_set_tun_id(flow_metadata, flow->tunnel.tun_id);
1024 }
b666962b
JG
1025 if (flow->tunnel.flags & FLOW_TNL_PUB_F_MASK) {
1026 match_set_tun_flags(flow_metadata,
1027 flow->tunnel.flags & FLOW_TNL_PUB_F_MASK);
1028 }
ffe4c74f 1029 if (flow->tunnel.ip_src) {
50dcbd8e
JG
1030 match_set_tun_src(flow_metadata, flow->tunnel.ip_src);
1031 }
ffe4c74f 1032 if (flow->tunnel.ip_dst) {
50dcbd8e
JG
1033 match_set_tun_dst(flow_metadata, flow->tunnel.ip_dst);
1034 }
ffe4c74f
JB
1035 if (ipv6_addr_is_set(&flow->tunnel.ipv6_src)) {
1036 match_set_tun_ipv6_src(flow_metadata, &flow->tunnel.ipv6_src);
1037 }
1038 if (ipv6_addr_is_set(&flow->tunnel.ipv6_dst)) {
1039 match_set_tun_ipv6_dst(flow_metadata, &flow->tunnel.ipv6_dst);
1040 }
50dcbd8e
JG
1041 if (flow->tunnel.gbp_id != htons(0)) {
1042 match_set_tun_gbp_id(flow_metadata, flow->tunnel.gbp_id);
1043 }
1044 if (flow->tunnel.gbp_flags) {
1045 match_set_tun_gbp_flags(flow_metadata, flow->tunnel.gbp_flags);
1046 }
6728d578 1047 tun_metadata_get_fmd(&flow->tunnel, flow_metadata);
50dcbd8e
JG
1048 if (flow->metadata != htonll(0)) {
1049 match_set_metadata(flow_metadata, flow->metadata);
1050 }
1051
1052 for (i = 0; i < FLOW_N_REGS; i++) {
1053 if (flow->regs[i]) {
1054 match_set_reg(flow_metadata, i, flow->regs[i]);
1055 }
1056 }
1057
1058 if (flow->pkt_mark != 0) {
1059 match_set_pkt_mark(flow_metadata, flow->pkt_mark);
1060 }
1061
1062 match_set_in_port(flow_metadata, flow->in_port.ofp_port);
6a81043e
JS
1063 if (flow->packet_type != htonl(PT_ETH)) {
1064 match_set_packet_type(flow_metadata, flow->packet_type);
1065 }
1066
07659514
JS
1067 if (flow->ct_state != 0) {
1068 match_set_ct_state(flow_metadata, flow->ct_state);
7827edca
DA
1069 /* Match dl_type since it is required for the later interpretation of
1070 * the conntrack metadata. */
1071 match_set_dl_type(flow_metadata, flow->dl_type);
daf4d3c1
JR
1072 if (is_ct_valid(flow, NULL, NULL) && flow->ct_nw_proto != 0) {
1073 if (flow->dl_type == htons(ETH_TYPE_IP)) {
1074 match_set_ct_nw_src(flow_metadata, flow->ct_nw_src);
1075 match_set_ct_nw_dst(flow_metadata, flow->ct_nw_dst);
1076 match_set_ct_nw_proto(flow_metadata, flow->ct_nw_proto);
1077 match_set_ct_tp_src(flow_metadata, flow->ct_tp_src);
1078 match_set_ct_tp_dst(flow_metadata, flow->ct_tp_dst);
1079 } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
1080 match_set_ct_ipv6_src(flow_metadata, &flow->ct_ipv6_src);
1081 match_set_ct_ipv6_dst(flow_metadata, &flow->ct_ipv6_dst);
1082 match_set_ct_nw_proto(flow_metadata, flow->ct_nw_proto);
1083 match_set_ct_tp_src(flow_metadata, flow->ct_tp_src);
1084 match_set_ct_tp_dst(flow_metadata, flow->ct_tp_dst);
1085 }
1086 }
07659514
JS
1087 }
1088 if (flow->ct_zone != 0) {
1089 match_set_ct_zone(flow_metadata, flow->ct_zone);
1090 }
8e53fe8c
JS
1091 if (flow->ct_mark != 0) {
1092 match_set_ct_mark(flow_metadata, flow->ct_mark);
1093 }
2ff8484b 1094 if (!ovs_u128_is_zero(flow->ct_label)) {
9daf2348
JS
1095 match_set_ct_label(flow_metadata, flow->ct_label);
1096 }
07659514
JS
1097}
1098
b02e6cf8
BP
1099const char *
1100ct_state_to_string(uint32_t state)
07659514
JS
1101{
1102 switch (state) {
fd6cd1bf
BP
1103#define CS_STATE(ENUM, INDEX, NAME) case CS_##ENUM: return NAME;
1104 CS_STATES
1105#undef CS_STATE
07659514
JS
1106 default:
1107 return NULL;
1108 }
5d6c3af0
EJ
1109}
1110
b02e6cf8
BP
1111uint32_t
1112ct_state_from_string(const char *s)
1113{
1114#define CS_STATE(ENUM, INDEX, NAME) \
1115 if (!strcmp(s, NAME)) { \
1116 return CS_##ENUM; \
1117 }
1118 CS_STATES
1119#undef CS_STATE
1120 return 0;
1121}
1122
b4293a33
YHW
1123/* Parses conntrack state from 'state_str'. If it is parsed successfully,
1124 * stores the parsed ct_state in 'ct_state', and returns true. Otherwise,
1125 * returns false, and reports error message in 'ds'. */
1126bool
1127parse_ct_state(const char *state_str, uint32_t default_state,
1128 uint32_t *ct_state, struct ds *ds)
1129{
1130 uint32_t state = default_state;
1131 char *state_s = xstrdup(state_str);
1132 char *save_ptr = NULL;
1133
1134 for (char *cs = strtok_r(state_s, ", ", &save_ptr); cs;
1135 cs = strtok_r(NULL, ", ", &save_ptr)) {
1136 uint32_t bit = ct_state_from_string(cs);
1137 if (!bit) {
1138 ds_put_format(ds, "%s: unknown connection tracking state flag",
1139 cs);
1140 return false;
1141 }
1142 state |= bit;
1143 }
1144
1145 *ct_state = state;
1146 free(state_s);
1147
1148 return true;
1149}
1150
1151/* Checks the given conntrack state 'state' according to the constraints
1152 * listed in ovs-fields (7). Returns true if it is valid. Otherwise, returns
1153 * false, and reports error in 'ds'. */
1154bool
1155validate_ct_state(uint32_t state, struct ds *ds)
1156{
1157 bool valid_ct_state = true;
1158 struct ds d_str = DS_EMPTY_INITIALIZER;
1159
1160 format_flags(&d_str, ct_state_to_string, state, '|');
1161
1162 if (state && !(state & CS_TRACKED)) {
1163 ds_put_format(ds, "%s: invalid connection state: "
1164 "If \"trk\" is unset, no other flags are set\n",
1165 ds_cstr(&d_str));
1166 valid_ct_state = false;
1167 }
1168 if (state & CS_INVALID && state & ~(CS_TRACKED | CS_INVALID)) {
1169 ds_put_format(ds, "%s: invalid connection state: "
1170 "when \"inv\" is set, only \"trk\" may also be set\n",
1171 ds_cstr(&d_str));
1172 valid_ct_state = false;
1173 }
1174 if (state & CS_NEW && state & CS_ESTABLISHED) {
1175 ds_put_format(ds, "%s: invalid connection state: "
1176 "\"new\" and \"est\" are mutually exclusive\n",
1177 ds_cstr(&d_str));
1178 valid_ct_state = false;
1179 }
1180 if (state & CS_NEW && state & CS_REPLY_DIR) {
1181 ds_put_format(ds, "%s: invalid connection state: "
1182 "\"new\" and \"rpy\" are mutually exclusive\n",
1183 ds_cstr(&d_str));
1184 valid_ct_state = false;
1185 }
1186
1187 ds_destroy(&d_str);
1188 return valid_ct_state;
1189}
1190
6846e91e
BP
1191/* Clears the fields in 'flow' associated with connection tracking. */
1192void
1193flow_clear_conntrack(struct flow *flow)
1194{
1195 flow->ct_state = 0;
1196 flow->ct_zone = 0;
1197 flow->ct_mark = 0;
1198 flow->ct_label = OVS_U128_ZERO;
1199
1200 flow->ct_nw_proto = 0;
1201 flow->ct_tp_src = 0;
1202 flow->ct_tp_dst = 0;
1203 if (flow->dl_type == htons(ETH_TYPE_IP)) {
1204 flow->ct_nw_src = 0;
1205 flow->ct_nw_dst = 0;
1206 } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
1207 memset(&flow->ct_ipv6_src, 0, sizeof flow->ct_ipv6_src);
1208 memset(&flow->ct_ipv6_dst, 0, sizeof flow->ct_ipv6_dst);
1209 }
1210}
1211
064af421 1212char *
50f96b10
BP
1213flow_to_string(const struct flow *flow,
1214 const struct ofputil_port_map *port_map)
064af421
BP
1215{
1216 struct ds ds = DS_EMPTY_INITIALIZER;
50f96b10 1217 flow_format(&ds, flow, port_map);
064af421
BP
1218 return ds_cstr(&ds);
1219}
1220
4fe3445a
PS
1221const char *
1222flow_tun_flag_to_string(uint32_t flags)
1223{
1224 switch (flags) {
1225 case FLOW_TNL_F_DONT_FRAGMENT:
1226 return "df";
1227 case FLOW_TNL_F_CSUM:
1228 return "csum";
1229 case FLOW_TNL_F_KEY:
1230 return "key";
94872594
JG
1231 case FLOW_TNL_F_OAM:
1232 return "oam";
4fe3445a
PS
1233 default:
1234 return NULL;
1235 }
1236}
1237
1238void
1239format_flags(struct ds *ds, const char *(*bit_to_string)(uint32_t),
1240 uint32_t flags, char del)
1241{
1242 uint32_t bad = 0;
1243
1244 if (!flags) {
8e4c1621 1245 ds_put_char(ds, '0');
4fe3445a
PS
1246 return;
1247 }
1248 while (flags) {
1249 uint32_t bit = rightmost_1bit(flags);
1250 const char *s;
1251
1252 s = bit_to_string(bit);
1253 if (s) {
1254 ds_put_format(ds, "%s%c", s, del);
1255 } else {
1256 bad |= bit;
1257 }
1258
1259 flags &= ~bit;
1260 }
1261
1262 if (bad) {
1263 ds_put_format(ds, "0x%"PRIx32"%c", bad, del);
1264 }
1265 ds_chomp(ds, del);
1266}
1267
61bf6666
JR
1268void
1269format_flags_masked(struct ds *ds, const char *name,
1270 const char *(*bit_to_string)(uint32_t), uint32_t flags,
8e4c1621 1271 uint32_t mask, uint32_t max_mask)
61bf6666
JR
1272{
1273 if (name) {
0deec6d2 1274 ds_put_format(ds, "%s%s=%s", colors.param, name, colors.end);
61bf6666 1275 }
8e4c1621
JG
1276
1277 if (mask == max_mask) {
1278 format_flags(ds, bit_to_string, flags, '|');
1279 return;
1280 }
1281
1282 if (!mask) {
1283 ds_put_cstr(ds, "0/0");
1284 return;
1285 }
1286
61bf6666
JR
1287 while (mask) {
1288 uint32_t bit = rightmost_1bit(mask);
1289 const char *s = bit_to_string(bit);
1290
1291 ds_put_format(ds, "%s%s", (flags & bit) ? "+" : "-",
1292 s ? s : "[Unknown]");
1293 mask &= ~bit;
1294 }
1295}
1296
3d4b2e6e
JS
1297static void
1298put_u16_masked(struct ds *s, uint16_t value, uint16_t mask)
1299{
1300 if (!mask) {
1301 ds_put_char(s, '*');
1302 } else {
1303 if (value > 9) {
1304 ds_put_format(s, "0x%"PRIx16, value);
1305 } else {
1306 ds_put_format(s, "%"PRIu16, value);
1307 }
1308
1309 if (mask != UINT16_MAX) {
1310 ds_put_format(s, "/0x%"PRIx16, mask);
1311 }
1312 }
1313}
1314
1315void
1316format_packet_type_masked(struct ds *s, ovs_be32 value, ovs_be32 mask)
1317{
1318 if (value == htonl(PT_ETH) && mask == OVS_BE32_MAX) {
1319 ds_put_cstr(s, "eth");
1320 } else {
1321 ds_put_cstr(s, "packet_type=(");
1322 put_u16_masked(s, pt_ns(value), pt_ns(mask));
1323 ds_put_char(s, ',');
1324 put_u16_masked(s, pt_ns_type(value), pt_ns_type(mask));
1325 ds_put_char(s, ')');
1326 }
1327}
1328
8e4c1621
JG
1329/* Scans a string 's' of flags to determine their numerical value and
1330 * returns the number of characters parsed using 'bit_to_string' to
1331 * lookup flag names. Scanning continues until the character 'end' is
1332 * reached.
1333 *
1334 * In the event of a failure, a negative error code will be returned. In
1335 * addition, if 'res_string' is non-NULL then a descriptive string will
1336 * be returned incorporating the identifying string 'field_name'. This
1337 * error string must be freed by the caller.
1338 *
1339 * Upon success, the flag values will be stored in 'res_flags' and
1340 * optionally 'res_mask', if it is non-NULL (if it is NULL then any masks
1341 * present in the original string will be considered an error). The
1342 * caller may restrict the acceptable set of values through the mask
1343 * 'allowed'. */
1344int
1345parse_flags(const char *s, const char *(*bit_to_string)(uint32_t),
1346 char end, const char *field_name, char **res_string,
1347 uint32_t *res_flags, uint32_t allowed, uint32_t *res_mask)
1348{
1349 uint32_t result = 0;
1350 int n;
1351
1352 /* Parse masked flags in numeric format? */
1353 if (res_mask && ovs_scan(s, "%"SCNi32"/%"SCNi32"%n",
1354 res_flags, res_mask, &n) && n > 0) {
1355 if (*res_flags & ~allowed || *res_mask & ~allowed) {
1356 goto unknown;
1357 }
1358 return n;
1359 }
1360
1361 n = 0;
1362
1363 if (res_mask && (*s == '+' || *s == '-')) {
1364 uint32_t flags = 0, mask = 0;
1365
1366 /* Parse masked flags. */
1367 while (s[0] != end) {
1368 bool set;
1369 uint32_t bit;
1370 size_t len;
1371
1372 if (s[0] == '+') {
1373 set = true;
1374 } else if (s[0] == '-') {
1375 set = false;
1376 } else {
1377 if (res_string) {
1378 *res_string = xasprintf("%s: %s must be preceded by '+' "
1379 "(for SET) or '-' (NOT SET)", s,
1380 field_name);
1381 }
1382 return -EINVAL;
1383 }
1384 s++;
1385 n++;
1386
1387 for (bit = 1; bit; bit <<= 1) {
1388 const char *fname = bit_to_string(bit);
1389
1390 if (!fname) {
1391 continue;
1392 }
1393
1394 len = strlen(fname);
1395 if (strncmp(s, fname, len) ||
1396 (s[len] != '+' && s[len] != '-' && s[len] != end)) {
1397 continue;
1398 }
1399
1400 if (mask & bit) {
1401 /* bit already set. */
1402 if (res_string) {
1403 *res_string = xasprintf("%s: Each %s flag can be "
1404 "specified only once", s,
1405 field_name);
1406 }
1407 return -EINVAL;
1408 }
1409 if (!(bit & allowed)) {
1410 goto unknown;
1411 }
1412 if (set) {
1413 flags |= bit;
1414 }
1415 mask |= bit;
1416 break;
1417 }
1418
1419 if (!bit) {
1420 goto unknown;
1421 }
1422 s += len;
1423 n += len;
1424 }
1425
1426 *res_flags = flags;
1427 *res_mask = mask;
1428 return n;
1429 }
1430
1431 /* Parse unmasked flags. If a flag is present, it is set, otherwise
1432 * it is not set. */
1433 while (s[n] != end) {
1434 unsigned long long int flags;
1435 uint32_t bit;
1436 int n0;
1437
1438 if (ovs_scan(&s[n], "%lli%n", &flags, &n0)) {
1439 if (flags & ~allowed) {
1440 goto unknown;
1441 }
1442 n += n0 + (s[n + n0] == '|');
1443 result |= flags;
1444 continue;
1445 }
1446
1447 for (bit = 1; bit; bit <<= 1) {
1448 const char *name = bit_to_string(bit);
1449 size_t len;
1450
1451 if (!name) {
1452 continue;
1453 }
1454
1455 len = strlen(name);
1456 if (!strncmp(s + n, name, len) &&
1457 (s[n + len] == '|' || s[n + len] == end)) {
1458 if (!(bit & allowed)) {
1459 goto unknown;
1460 }
1461 result |= bit;
1462 n += len + (s[n + len] == '|');
1463 break;
1464 }
1465 }
1466
1467 if (!bit) {
1468 goto unknown;
1469 }
1470 }
1471
1472 *res_flags = result;
1473 if (res_mask) {
1474 *res_mask = UINT32_MAX;
1475 }
1476 if (res_string) {
1477 *res_string = NULL;
1478 }
1479 return n;
1480
1481unknown:
1482 if (res_string) {
1483 *res_string = xasprintf("%s: unknown %s flag(s)", s, field_name);
1484 }
1485 return -EINVAL;
1486}
1487
064af421 1488void
50f96b10
BP
1489flow_format(struct ds *ds,
1490 const struct flow *flow, const struct ofputil_port_map *port_map)
064af421 1491{
aa6c9932 1492 struct match match;
78c9486d 1493 struct flow_wildcards *wc = &match.wc;
296e07ac 1494
aa6c9932 1495 match_wc_init(&match, flow);
78c9486d
JR
1496
1497 /* As this function is most often used for formatting a packet in a
1498 * packet-in message, skip formatting the packet context fields that are
e6d9ab56
JR
1499 * all-zeroes to make the print-out easier on the eyes. This means that a
1500 * missing context field implies a zero value for that field. This is
1501 * similar to OpenFlow encoding of these fields, as the specification
1502 * states that all-zeroes context fields should not be encoded in the
1503 * packet-in messages. */
1504 if (!flow->in_port.ofp_port) {
1505 WC_UNMASK_FIELD(wc, in_port);
1506 }
78c9486d
JR
1507 if (!flow->skb_priority) {
1508 WC_UNMASK_FIELD(wc, skb_priority);
1509 }
1510 if (!flow->pkt_mark) {
1511 WC_UNMASK_FIELD(wc, pkt_mark);
1512 }
1513 if (!flow->recirc_id) {
1514 WC_UNMASK_FIELD(wc, recirc_id);
1515 }
330de069
JR
1516 if (!flow->dp_hash) {
1517 WC_UNMASK_FIELD(wc, dp_hash);
1518 }
07659514
JS
1519 if (!flow->ct_state) {
1520 WC_UNMASK_FIELD(wc, ct_state);
1521 }
1522 if (!flow->ct_zone) {
1523 WC_UNMASK_FIELD(wc, ct_zone);
1524 }
8e53fe8c
JS
1525 if (!flow->ct_mark) {
1526 WC_UNMASK_FIELD(wc, ct_mark);
1527 }
2ff8484b 1528 if (ovs_u128_is_zero(flow->ct_label)) {
9daf2348
JS
1529 WC_UNMASK_FIELD(wc, ct_label);
1530 }
daf4d3c1
JR
1531 if (!is_ct_valid(flow, &match.wc, NULL) || !flow->ct_nw_proto) {
1532 WC_UNMASK_FIELD(wc, ct_nw_proto);
1533 WC_UNMASK_FIELD(wc, ct_tp_src);
1534 WC_UNMASK_FIELD(wc, ct_tp_dst);
1535 if (flow->dl_type == htons(ETH_TYPE_IP)) {
1536 WC_UNMASK_FIELD(wc, ct_nw_src);
1537 WC_UNMASK_FIELD(wc, ct_nw_dst);
1538 } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
1539 WC_UNMASK_FIELD(wc, ct_ipv6_src);
1540 WC_UNMASK_FIELD(wc, ct_ipv6_dst);
1541 }
1542 }
78c9486d
JR
1543 for (int i = 0; i < FLOW_N_REGS; i++) {
1544 if (!flow->regs[i]) {
1545 WC_UNMASK_FIELD(wc, regs[i]);
1546 }
1547 }
1548 if (!flow->metadata) {
1549 WC_UNMASK_FIELD(wc, metadata);
1550 }
1551
50f96b10 1552 match_format(&match, port_map, ds, OFP_DEFAULT_PRIORITY);
064af421
BP
1553}
1554
1555void
50f96b10
BP
1556flow_print(FILE *stream,
1557 const struct flow *flow, const struct ofputil_port_map *port_map)
064af421 1558{
50f96b10 1559 char *s = flow_to_string(flow, port_map);
064af421
BP
1560 fputs(s, stream);
1561 free(s);
1562}
54363004
BP
1563\f
1564/* flow_wildcards functions. */
1565
d8ae4d67 1566/* Initializes 'wc' as a set of wildcards that matches every packet. */
54363004 1567void
d8ae4d67 1568flow_wildcards_init_catchall(struct flow_wildcards *wc)
54363004 1569{
659c2346 1570 memset(&wc->masks, 0, sizeof wc->masks);
54363004
BP
1571}
1572
78c9486d
JR
1573/* Converts a flow into flow wildcards. It sets the wildcard masks based on
1574 * the packet headers extracted to 'flow'. It will not set the mask for fields
1575 * that do not make sense for the packet type. OpenFlow-only metadata is
1576 * wildcarded, but other metadata is unconditionally exact-matched. */
f0fb825a
EG
1577void
1578flow_wildcards_init_for_packet(struct flow_wildcards *wc,
1579 const struct flow *flow)
78c9486d 1580{
cb1145d1
ZB
1581 ovs_be16 dl_type = OVS_BE16_MAX;
1582
78c9486d
JR
1583 memset(&wc->masks, 0x0, sizeof wc->masks);
1584
0de8783a 1585 /* Update this function whenever struct flow changes. */
3d2fbd70 1586 BUILD_ASSERT_DECL(FLOW_WC_SEQ == 40);
0de8783a 1587
ffe4c74f 1588 if (flow_tnl_dst_is_set(&flow->tunnel)) {
78c9486d
JR
1589 if (flow->tunnel.flags & FLOW_TNL_F_KEY) {
1590 WC_MASK_FIELD(wc, tunnel.tun_id);
1591 }
1592 WC_MASK_FIELD(wc, tunnel.ip_src);
1593 WC_MASK_FIELD(wc, tunnel.ip_dst);
ffe4c74f
JB
1594 WC_MASK_FIELD(wc, tunnel.ipv6_src);
1595 WC_MASK_FIELD(wc, tunnel.ipv6_dst);
78c9486d
JR
1596 WC_MASK_FIELD(wc, tunnel.flags);
1597 WC_MASK_FIELD(wc, tunnel.ip_tos);
1598 WC_MASK_FIELD(wc, tunnel.ip_ttl);
1599 WC_MASK_FIELD(wc, tunnel.tp_src);
1600 WC_MASK_FIELD(wc, tunnel.tp_dst);
ac6073e3
MC
1601 WC_MASK_FIELD(wc, tunnel.gbp_id);
1602 WC_MASK_FIELD(wc, tunnel.gbp_flags);
9558d2a5 1603
6728d578
JG
1604 if (!(flow->tunnel.flags & FLOW_TNL_F_UDPIF)) {
1605 if (flow->tunnel.metadata.present.map) {
1606 wc->masks.tunnel.metadata.present.map =
1607 flow->tunnel.metadata.present.map;
1608 WC_MASK_FIELD(wc, tunnel.metadata.opts.u8);
8d8ab6c2 1609 WC_MASK_FIELD(wc, tunnel.metadata.tab);
6728d578
JG
1610 }
1611 } else {
1612 WC_MASK_FIELD(wc, tunnel.metadata.present.len);
1613 memset(wc->masks.tunnel.metadata.opts.gnv, 0xff,
1614 flow->tunnel.metadata.present.len);
9558d2a5 1615 }
78c9486d
JR
1616 } else if (flow->tunnel.tun_id) {
1617 WC_MASK_FIELD(wc, tunnel.tun_id);
1618 }
1619
18080541 1620 /* metadata, regs, and conj_id wildcarded. */
78c9486d
JR
1621
1622 WC_MASK_FIELD(wc, skb_priority);
1623 WC_MASK_FIELD(wc, pkt_mark);
07659514
JS
1624 WC_MASK_FIELD(wc, ct_state);
1625 WC_MASK_FIELD(wc, ct_zone);
8e53fe8c 1626 WC_MASK_FIELD(wc, ct_mark);
9daf2348 1627 WC_MASK_FIELD(wc, ct_label);
78c9486d
JR
1628 WC_MASK_FIELD(wc, recirc_id);
1629 WC_MASK_FIELD(wc, dp_hash);
1630 WC_MASK_FIELD(wc, in_port);
1631
c61f3870
BP
1632 /* actset_output wildcarded. */
1633
3d4b2e6e 1634 WC_MASK_FIELD(wc, packet_type);
cb1145d1
ZB
1635 if (flow->packet_type == htonl(PT_ETH)) {
1636 WC_MASK_FIELD(wc, dl_dst);
1637 WC_MASK_FIELD(wc, dl_src);
1638 WC_MASK_FIELD(wc, dl_type);
1639 /* No need to set mask of inner VLANs that don't exist. */
1640 for (int i = 0; i < FLOW_MAX_VLAN_HEADERS; i++) {
1641 /* Always show the first zero VLAN. */
1642 WC_MASK_FIELD(wc, vlans[i]);
1643 if (flow->vlans[i].tci == htons(0)) {
1644 break;
1645 }
f0fb825a 1646 }
cb1145d1
ZB
1647 dl_type = flow->dl_type;
1648 } else {
1649 dl_type = pt_ns_type_be(flow->packet_type);
f0fb825a 1650 }
78c9486d 1651
cb1145d1 1652 if (dl_type == htons(ETH_TYPE_IP)) {
78c9486d
JR
1653 WC_MASK_FIELD(wc, nw_src);
1654 WC_MASK_FIELD(wc, nw_dst);
daf4d3c1
JR
1655 WC_MASK_FIELD(wc, ct_nw_src);
1656 WC_MASK_FIELD(wc, ct_nw_dst);
cb1145d1 1657 } else if (dl_type == htons(ETH_TYPE_IPV6)) {
78c9486d
JR
1658 WC_MASK_FIELD(wc, ipv6_src);
1659 WC_MASK_FIELD(wc, ipv6_dst);
1660 WC_MASK_FIELD(wc, ipv6_label);
daf4d3c1
JR
1661 if (is_nd(flow, wc)) {
1662 WC_MASK_FIELD(wc, arp_sha);
1663 WC_MASK_FIELD(wc, arp_tha);
1664 WC_MASK_FIELD(wc, nd_target);
1665 } else {
1666 WC_MASK_FIELD(wc, ct_ipv6_src);
1667 WC_MASK_FIELD(wc, ct_ipv6_dst);
1668 }
cb1145d1
ZB
1669 } else if (dl_type == htons(ETH_TYPE_ARP) ||
1670 dl_type == htons(ETH_TYPE_RARP)) {
78c9486d
JR
1671 WC_MASK_FIELD(wc, nw_src);
1672 WC_MASK_FIELD(wc, nw_dst);
1673 WC_MASK_FIELD(wc, nw_proto);
1674 WC_MASK_FIELD(wc, arp_sha);
1675 WC_MASK_FIELD(wc, arp_tha);
1676 return;
cb1145d1 1677 } else if (eth_type_mpls(dl_type)) {
78c9486d
JR
1678 for (int i = 0; i < FLOW_MAX_MPLS_LABELS; i++) {
1679 WC_MASK_FIELD(wc, mpls_lse[i]);
1680 if (flow->mpls_lse[i] & htonl(MPLS_BOS_MASK)) {
1681 break;
1682 }
1683 }
1684 return;
3d2fbd70
JS
1685 } else if (flow->dl_type == htons(ETH_TYPE_NSH)) {
1686 WC_MASK_FIELD(wc, nsh.flags);
1687 WC_MASK_FIELD(wc, nsh.mdtype);
1688 WC_MASK_FIELD(wc, nsh.np);
1689 WC_MASK_FIELD(wc, nsh.spi);
1690 WC_MASK_FIELD(wc, nsh.si);
f59cb331 1691 WC_MASK_FIELD(wc, nsh.context);
78c9486d
JR
1692 } else {
1693 return; /* Unknown ethertype. */
1694 }
1695
1696 /* IPv4 or IPv6. */
1697 WC_MASK_FIELD(wc, nw_frag);
1698 WC_MASK_FIELD(wc, nw_tos);
1699 WC_MASK_FIELD(wc, nw_ttl);
1700 WC_MASK_FIELD(wc, nw_proto);
daf4d3c1
JR
1701 WC_MASK_FIELD(wc, ct_nw_proto);
1702 WC_MASK_FIELD(wc, ct_tp_src);
1703 WC_MASK_FIELD(wc, ct_tp_dst);
78c9486d
JR
1704
1705 /* No transport layer header in later fragments. */
1706 if (!(flow->nw_frag & FLOW_NW_FRAG_LATER) &&
1707 (flow->nw_proto == IPPROTO_ICMP ||
1708 flow->nw_proto == IPPROTO_ICMPV6 ||
1709 flow->nw_proto == IPPROTO_TCP ||
1710 flow->nw_proto == IPPROTO_UDP ||
1711 flow->nw_proto == IPPROTO_SCTP ||
1712 flow->nw_proto == IPPROTO_IGMP)) {
1713 WC_MASK_FIELD(wc, tp_src);
1714 WC_MASK_FIELD(wc, tp_dst);
1715
1716 if (flow->nw_proto == IPPROTO_TCP) {
1717 WC_MASK_FIELD(wc, tcp_flags);
78c9486d
JR
1718 } else if (flow->nw_proto == IPPROTO_IGMP) {
1719 WC_MASK_FIELD(wc, igmp_group_ip4);
1720 }
1721 }
1722}
1723
0de8783a
JR
1724/* Return a map of possible fields for a packet of the same type as 'flow'.
1725 * Including extra bits in the returned mask is not wrong, it is just less
1726 * optimal.
1727 *
1728 * This is a less precise version of flow_wildcards_init_for_packet() above. */
361d808d 1729void
5fcff47b 1730flow_wc_map(const struct flow *flow, struct flowmap *map)
0de8783a
JR
1731{
1732 /* Update this function whenever struct flow changes. */
3d2fbd70 1733 BUILD_ASSERT_DECL(FLOW_WC_SEQ == 40);
0de8783a 1734
5fcff47b
JR
1735 flowmap_init(map);
1736
ffe4c74f 1737 if (flow_tnl_dst_is_set(&flow->tunnel)) {
5fcff47b 1738 FLOWMAP_SET__(map, tunnel, offsetof(struct flow_tnl, metadata));
6728d578
JG
1739 if (!(flow->tunnel.flags & FLOW_TNL_F_UDPIF)) {
1740 if (flow->tunnel.metadata.present.map) {
5fcff47b 1741 FLOWMAP_SET(map, tunnel.metadata);
6728d578
JG
1742 }
1743 } else {
5fcff47b
JR
1744 FLOWMAP_SET(map, tunnel.metadata.present.len);
1745 FLOWMAP_SET__(map, tunnel.metadata.opts.gnv,
1746 flow->tunnel.metadata.present.len);
361d808d
JR
1747 }
1748 }
0de8783a
JR
1749
1750 /* Metadata fields that can appear on packet input. */
5fcff47b
JR
1751 FLOWMAP_SET(map, skb_priority);
1752 FLOWMAP_SET(map, pkt_mark);
1753 FLOWMAP_SET(map, recirc_id);
1754 FLOWMAP_SET(map, dp_hash);
1755 FLOWMAP_SET(map, in_port);
1756 FLOWMAP_SET(map, dl_dst);
1757 FLOWMAP_SET(map, dl_src);
1758 FLOWMAP_SET(map, dl_type);
f0fb825a 1759 FLOWMAP_SET(map, vlans);
07659514
JS
1760 FLOWMAP_SET(map, ct_state);
1761 FLOWMAP_SET(map, ct_zone);
8e53fe8c 1762 FLOWMAP_SET(map, ct_mark);
9daf2348 1763 FLOWMAP_SET(map, ct_label);
2482b0b0 1764 FLOWMAP_SET(map, packet_type);
0de8783a
JR
1765
1766 /* Ethertype-dependent fields. */
1767 if (OVS_LIKELY(flow->dl_type == htons(ETH_TYPE_IP))) {
5fcff47b
JR
1768 FLOWMAP_SET(map, nw_src);
1769 FLOWMAP_SET(map, nw_dst);
1770 FLOWMAP_SET(map, nw_proto);
1771 FLOWMAP_SET(map, nw_frag);
1772 FLOWMAP_SET(map, nw_tos);
1773 FLOWMAP_SET(map, nw_ttl);
8ecb3068
DDP
1774 FLOWMAP_SET(map, tp_src);
1775 FLOWMAP_SET(map, tp_dst);
daf4d3c1
JR
1776 FLOWMAP_SET(map, ct_nw_proto);
1777 FLOWMAP_SET(map, ct_nw_src);
1778 FLOWMAP_SET(map, ct_nw_dst);
1779 FLOWMAP_SET(map, ct_tp_src);
1780 FLOWMAP_SET(map, ct_tp_dst);
5fcff47b 1781
0de8783a 1782 if (OVS_UNLIKELY(flow->nw_proto == IPPROTO_IGMP)) {
5fcff47b 1783 FLOWMAP_SET(map, igmp_group_ip4);
0de8783a 1784 } else {
5fcff47b 1785 FLOWMAP_SET(map, tcp_flags);
0de8783a
JR
1786 }
1787 } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
5fcff47b
JR
1788 FLOWMAP_SET(map, ipv6_src);
1789 FLOWMAP_SET(map, ipv6_dst);
1790 FLOWMAP_SET(map, ipv6_label);
1791 FLOWMAP_SET(map, nw_proto);
1792 FLOWMAP_SET(map, nw_frag);
1793 FLOWMAP_SET(map, nw_tos);
1794 FLOWMAP_SET(map, nw_ttl);
8ecb3068
DDP
1795 FLOWMAP_SET(map, tp_src);
1796 FLOWMAP_SET(map, tp_dst);
5fcff47b 1797
daf4d3c1 1798 if (OVS_UNLIKELY(is_nd(flow, NULL))) {
5fcff47b
JR
1799 FLOWMAP_SET(map, nd_target);
1800 FLOWMAP_SET(map, arp_sha);
1801 FLOWMAP_SET(map, arp_tha);
0de8783a 1802 } else {
daf4d3c1
JR
1803 FLOWMAP_SET(map, ct_nw_proto);
1804 FLOWMAP_SET(map, ct_ipv6_src);
1805 FLOWMAP_SET(map, ct_ipv6_dst);
1806 FLOWMAP_SET(map, ct_tp_src);
1807 FLOWMAP_SET(map, ct_tp_dst);
5fcff47b 1808 FLOWMAP_SET(map, tcp_flags);
0de8783a
JR
1809 }
1810 } else if (eth_type_mpls(flow->dl_type)) {
5fcff47b 1811 FLOWMAP_SET(map, mpls_lse);
0de8783a
JR
1812 } else if (flow->dl_type == htons(ETH_TYPE_ARP) ||
1813 flow->dl_type == htons(ETH_TYPE_RARP)) {
5fcff47b
JR
1814 FLOWMAP_SET(map, nw_src);
1815 FLOWMAP_SET(map, nw_dst);
1816 FLOWMAP_SET(map, nw_proto);
1817 FLOWMAP_SET(map, arp_sha);
1818 FLOWMAP_SET(map, arp_tha);
3d2fbd70
JS
1819 } else if (flow->dl_type == htons(ETH_TYPE_NSH)) {
1820 FLOWMAP_SET(map, nsh.flags);
1821 FLOWMAP_SET(map, nsh.mdtype);
1822 FLOWMAP_SET(map, nsh.np);
1823 FLOWMAP_SET(map, nsh.spi);
1824 FLOWMAP_SET(map, nsh.si);
f59cb331 1825 FLOWMAP_SET(map, nsh.context);
0de8783a 1826 }
0de8783a
JR
1827}
1828
c11c6faa
AZ
1829/* Clear the metadata and register wildcard masks. They are not packet
1830 * header fields. */
1831void
1832flow_wildcards_clear_non_packet_fields(struct flow_wildcards *wc)
1833{
0de8783a 1834 /* Update this function whenever struct flow changes. */
3d2fbd70 1835 BUILD_ASSERT_DECL(FLOW_WC_SEQ == 40);
0de8783a 1836
c11c6faa
AZ
1837 memset(&wc->masks.metadata, 0, sizeof wc->masks.metadata);
1838 memset(&wc->masks.regs, 0, sizeof wc->masks.regs);
c61f3870 1839 wc->masks.actset_output = 0;
18080541 1840 wc->masks.conj_id = 0;
c11c6faa
AZ
1841}
1842
ecf1e7ac
BP
1843/* Returns true if 'wc' matches every packet, false if 'wc' fixes any bits or
1844 * fields. */
1845bool
1846flow_wildcards_is_catchall(const struct flow_wildcards *wc)
1847{
d70e8c28 1848 const uint64_t *wc_u64 = (const uint64_t *) &wc->masks;
659c2346 1849 size_t i;
ecf1e7ac 1850
d70e8c28
JR
1851 for (i = 0; i < FLOW_U64S; i++) {
1852 if (wc_u64[i]) {
ecf1e7ac
BP
1853 return false;
1854 }
1855 }
ecf1e7ac
BP
1856 return true;
1857}
1858
368eefac
EJ
1859/* Sets 'dst' as the bitwise AND of wildcards in 'src1' and 'src2'.
1860 * That is, a bit or a field is wildcarded in 'dst' if it is wildcarded
1861 * in 'src1' or 'src2' or both. */
b5d97350 1862void
368eefac
EJ
1863flow_wildcards_and(struct flow_wildcards *dst,
1864 const struct flow_wildcards *src1,
1865 const struct flow_wildcards *src2)
b5d97350 1866{
d70e8c28
JR
1867 uint64_t *dst_u64 = (uint64_t *) &dst->masks;
1868 const uint64_t *src1_u64 = (const uint64_t *) &src1->masks;
1869 const uint64_t *src2_u64 = (const uint64_t *) &src2->masks;
659c2346 1870 size_t i;
a79c50f3 1871
d70e8c28
JR
1872 for (i = 0; i < FLOW_U64S; i++) {
1873 dst_u64[i] = src1_u64[i] & src2_u64[i];
26720e24 1874 }
b5d97350
BP
1875}
1876
368eefac
EJ
1877/* Sets 'dst' as the bitwise OR of wildcards in 'src1' and 'src2'. That
1878 * is, a bit or a field is wildcarded in 'dst' if it is neither
1879 * wildcarded in 'src1' nor 'src2'. */
1880void
1881flow_wildcards_or(struct flow_wildcards *dst,
1882 const struct flow_wildcards *src1,
1883 const struct flow_wildcards *src2)
1884{
d70e8c28
JR
1885 uint64_t *dst_u64 = (uint64_t *) &dst->masks;
1886 const uint64_t *src1_u64 = (const uint64_t *) &src1->masks;
1887 const uint64_t *src2_u64 = (const uint64_t *) &src2->masks;
368eefac
EJ
1888 size_t i;
1889
d70e8c28
JR
1890 for (i = 0; i < FLOW_U64S; i++) {
1891 dst_u64[i] = src1_u64[i] | src2_u64[i];
368eefac
EJ
1892 }
1893}
1894
b5d97350
BP
1895/* Returns a hash of the wildcards in 'wc'. */
1896uint32_t
1006cda6 1897flow_wildcards_hash(const struct flow_wildcards *wc, uint32_t basis)
b5d97350 1898{
ac31c5af 1899 return flow_hash(&wc->masks, basis);
b5d97350
BP
1900}
1901
1902/* Returns true if 'a' and 'b' represent the same wildcards, false if they are
1903 * different. */
1904bool
1905flow_wildcards_equal(const struct flow_wildcards *a,
1906 const struct flow_wildcards *b)
1907{
659c2346 1908 return flow_equal(&a->masks, &b->masks);
b5d97350
BP
1909}
1910
1911/* Returns true if at least one bit or field is wildcarded in 'a' but not in
1912 * 'b', false otherwise. */
1913bool
1914flow_wildcards_has_extra(const struct flow_wildcards *a,
1915 const struct flow_wildcards *b)
1916{
d70e8c28
JR
1917 const uint64_t *a_u64 = (const uint64_t *) &a->masks;
1918 const uint64_t *b_u64 = (const uint64_t *) &b->masks;
659c2346 1919 size_t i;
a79c50f3 1920
d70e8c28
JR
1921 for (i = 0; i < FLOW_U64S; i++) {
1922 if ((a_u64[i] & b_u64[i]) != b_u64[i]) {
b6c9e612
BP
1923 return true;
1924 }
1925 }
659c2346
BP
1926 return false;
1927}
b6c9e612 1928
659c2346
BP
1929/* Returns true if 'a' and 'b' are equal, except that 0-bits (wildcarded bits)
1930 * in 'wc' do not need to be equal in 'a' and 'b'. */
1931bool
1932flow_equal_except(const struct flow *a, const struct flow *b,
1933 const struct flow_wildcards *wc)
1934{
d70e8c28
JR
1935 const uint64_t *a_u64 = (const uint64_t *) a;
1936 const uint64_t *b_u64 = (const uint64_t *) b;
1937 const uint64_t *wc_u64 = (const uint64_t *) &wc->masks;
659c2346 1938 size_t i;
d31f1109 1939
d70e8c28
JR
1940 for (i = 0; i < FLOW_U64S; i++) {
1941 if ((a_u64[i] ^ b_u64[i]) & wc_u64[i]) {
659c2346
BP
1942 return false;
1943 }
47284b1f 1944 }
659c2346 1945 return true;
b5d97350
BP
1946}
1947
b6c9e612
BP
1948/* Sets the wildcard mask for register 'idx' in 'wc' to 'mask'.
1949 * (A 0-bit indicates a wildcard bit.) */
1950void
1951flow_wildcards_set_reg_mask(struct flow_wildcards *wc, int idx, uint32_t mask)
1952{
26720e24 1953 wc->masks.regs[idx] = mask;
b6c9e612 1954}
ff55ea1f 1955
79fe0f46
BP
1956/* Sets the wildcard mask for register 'idx' in 'wc' to 'mask'.
1957 * (A 0-bit indicates a wildcard bit.) */
1958void
1959flow_wildcards_set_xreg_mask(struct flow_wildcards *wc, int idx, uint64_t mask)
1960{
1961 flow_set_xreg(&wc->masks, idx, mask);
1962}
1963
b23ada8e
JP
1964/* Sets the wildcard mask for register 'idx' in 'wc' to 'mask'.
1965 * (A 0-bit indicates a wildcard bit.) */
1966void
1967flow_wildcards_set_xxreg_mask(struct flow_wildcards *wc, int idx,
1968 ovs_u128 mask)
1969{
1970 flow_set_xxreg(&wc->masks, idx, mask);
1971}
1972
28a560d9
JR
1973/* Calculates the 5-tuple hash from the given miniflow.
1974 * This returns the same value as flow_hash_5tuple for the corresponding
1975 * flow. */
4f150744
JR
1976uint32_t
1977miniflow_hash_5tuple(const struct miniflow *flow, uint32_t basis)
1978{
3d2fbd70 1979 BUILD_ASSERT_DECL(FLOW_WC_SEQ == 40);
28a560d9 1980 uint32_t hash = basis;
4f150744 1981
28a560d9
JR
1982 if (flow) {
1983 ovs_be16 dl_type = MINIFLOW_GET_BE16(flow, dl_type);
362ad4ba 1984 uint8_t nw_proto;
28a560d9 1985
28a560d9 1986 if (dl_type == htons(ETH_TYPE_IPV6)) {
5fcff47b 1987 struct flowmap map = FLOWMAP_EMPTY_INITIALIZER;
d70e8c28 1988 uint64_t value;
4f150744 1989
5fcff47b
JR
1990 FLOWMAP_SET(&map, ipv6_src);
1991 FLOWMAP_SET(&map, ipv6_dst);
1992
1993 MINIFLOW_FOR_EACH_IN_FLOWMAP(value, flow, map) {
d70e8c28 1994 hash = hash_add64(hash, value);
28a560d9 1995 }
362ad4ba
DDP
1996 } else if (dl_type == htons(ETH_TYPE_IP)
1997 || dl_type == htons(ETH_TYPE_ARP)) {
d70e8c28
JR
1998 hash = hash_add(hash, MINIFLOW_GET_U32(flow, nw_src));
1999 hash = hash_add(hash, MINIFLOW_GET_U32(flow, nw_dst));
362ad4ba
DDP
2000 } else {
2001 goto out;
28a560d9 2002 }
362ad4ba
DDP
2003
2004 nw_proto = MINIFLOW_GET_U8(flow, nw_proto);
2005 hash = hash_add(hash, nw_proto);
2006 if (nw_proto != IPPROTO_TCP && nw_proto != IPPROTO_UDP
2007 && nw_proto != IPPROTO_SCTP && nw_proto != IPPROTO_ICMP
2008 && nw_proto != IPPROTO_ICMPV6) {
2009 goto out;
2010 }
2011
d70e8c28
JR
2012 /* Add both ports at once. */
2013 hash = hash_add(hash, MINIFLOW_GET_U32(flow, tp_src));
28a560d9 2014 }
362ad4ba
DDP
2015out:
2016 return hash_finish(hash, 42);
4f150744
JR
2017}
2018
268eca11
BP
2019ASSERT_SEQUENTIAL_SAME_WORD(tp_src, tp_dst);
2020ASSERT_SEQUENTIAL(ipv6_src, ipv6_dst);
4f150744 2021
63be20be
AW
2022/* Calculates the 5-tuple hash from the given flow. */
2023uint32_t
2024flow_hash_5tuple(const struct flow *flow, uint32_t basis)
2025{
3d2fbd70 2026 BUILD_ASSERT_DECL(FLOW_WC_SEQ == 40);
28a560d9 2027 uint32_t hash = basis;
63be20be 2028
28a560d9 2029 if (flow) {
28a560d9
JR
2030
2031 if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
d70e8c28
JR
2032 const uint64_t *flow_u64 = (const uint64_t *)flow;
2033 int ofs = offsetof(struct flow, ipv6_src) / 8;
2034 int end = ofs + 2 * sizeof flow->ipv6_src / 8;
63be20be 2035
d70e8c28
JR
2036 for (;ofs < end; ofs++) {
2037 hash = hash_add64(hash, flow_u64[ofs]);
28a560d9 2038 }
362ad4ba
DDP
2039 } else if (flow->dl_type == htons(ETH_TYPE_IP)
2040 || flow->dl_type == htons(ETH_TYPE_ARP)) {
33c6a1b9
JR
2041 hash = hash_add(hash, (OVS_FORCE uint32_t) flow->nw_src);
2042 hash = hash_add(hash, (OVS_FORCE uint32_t) flow->nw_dst);
362ad4ba
DDP
2043 } else {
2044 goto out;
28a560d9 2045 }
362ad4ba
DDP
2046
2047 hash = hash_add(hash, flow->nw_proto);
2048 if (flow->nw_proto != IPPROTO_TCP && flow->nw_proto != IPPROTO_UDP
2049 && flow->nw_proto != IPPROTO_SCTP && flow->nw_proto != IPPROTO_ICMP
2050 && flow->nw_proto != IPPROTO_ICMPV6) {
2051 goto out;
2052 }
2053
d70e8c28
JR
2054 /* Add both ports at once. */
2055 hash = hash_add(hash,
2056 ((const uint32_t *)flow)[offsetof(struct flow, tp_src)
2057 / sizeof(uint32_t)]);
28a560d9 2058 }
362ad4ba
DDP
2059out:
2060 return hash_finish(hash, 42); /* Arbitrary number. */
63be20be
AW
2061}
2062
ff55ea1f
EJ
2063/* Hashes 'flow' based on its L2 through L4 protocol information. */
2064uint32_t
2065flow_hash_symmetric_l4(const struct flow *flow, uint32_t basis)
2066{
2067 struct {
d31f1109
JP
2068 union {
2069 ovs_be32 ipv4_addr;
2070 struct in6_addr ipv6_addr;
2071 };
ff55ea1f
EJ
2072 ovs_be16 eth_type;
2073 ovs_be16 vlan_tci;
5b909cbb 2074 ovs_be16 tp_port;
74ff3298 2075 struct eth_addr eth_addr;
ff55ea1f
EJ
2076 uint8_t ip_proto;
2077 } fields;
2078
2079 int i;
2080
2081 memset(&fields, 0, sizeof fields);
74ff3298
JR
2082 for (i = 0; i < ARRAY_SIZE(fields.eth_addr.be16); i++) {
2083 fields.eth_addr.be16[i] = flow->dl_src.be16[i] ^ flow->dl_dst.be16[i];
ff55ea1f 2084 }
f0fb825a
EG
2085 for (i = 0; i < FLOW_MAX_VLAN_HEADERS; i++) {
2086 fields.vlan_tci ^= flow->vlans[i].tci & htons(VLAN_VID_MASK);
2087 }
ff55ea1f 2088 fields.eth_type = flow->dl_type;
3e3eda95
EJ
2089
2090 /* UDP source and destination port are not taken into account because they
2091 * will not necessarily be symmetric in a bidirectional flow. */
ff55ea1f 2092 if (fields.eth_type == htons(ETH_TYPE_IP)) {
d31f1109
JP
2093 fields.ipv4_addr = flow->nw_src ^ flow->nw_dst;
2094 fields.ip_proto = flow->nw_proto;
c6bcb685 2095 if (fields.ip_proto == IPPROTO_TCP || fields.ip_proto == IPPROTO_SCTP) {
5b909cbb 2096 fields.tp_port = flow->tp_src ^ flow->tp_dst;
d31f1109
JP
2097 }
2098 } else if (fields.eth_type == htons(ETH_TYPE_IPV6)) {
2099 const uint8_t *a = &flow->ipv6_src.s6_addr[0];
2100 const uint8_t *b = &flow->ipv6_dst.s6_addr[0];
2101 uint8_t *ipv6_addr = &fields.ipv6_addr.s6_addr[0];
2102
2103 for (i=0; i<16; i++) {
2104 ipv6_addr[i] = a[i] ^ b[i];
2105 }
ff55ea1f 2106 fields.ip_proto = flow->nw_proto;
c6bcb685 2107 if (fields.ip_proto == IPPROTO_TCP || fields.ip_proto == IPPROTO_SCTP) {
5b909cbb 2108 fields.tp_port = flow->tp_src ^ flow->tp_dst;
ff55ea1f 2109 }
ff55ea1f 2110 }
c49d1dd1 2111 return jhash_bytes(&fields, sizeof fields, basis);
ff55ea1f 2112}
520e9a2a 2113
4249b547
JB
2114/* Hashes 'flow' based on its L3 through L4 protocol information */
2115uint32_t
2116flow_hash_symmetric_l3l4(const struct flow *flow, uint32_t basis,
2117 bool inc_udp_ports)
2118{
2119 uint32_t hash = basis;
2120
2121 /* UDP source and destination port are also taken into account. */
2122 if (flow->dl_type == htons(ETH_TYPE_IP)) {
2123 hash = hash_add(hash,
2124 (OVS_FORCE uint32_t) (flow->nw_src ^ flow->nw_dst));
2125 } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
2126 /* IPv6 addresses are 64-bit aligned inside struct flow. */
2127 const uint64_t *a = ALIGNED_CAST(uint64_t *, flow->ipv6_src.s6_addr);
2128 const uint64_t *b = ALIGNED_CAST(uint64_t *, flow->ipv6_dst.s6_addr);
2129
caaabd19 2130 for (int i = 0; i < sizeof flow->ipv6_src / sizeof *a; i++) {
4249b547
JB
2131 hash = hash_add64(hash, a[i] ^ b[i]);
2132 }
2133 } else {
2134 /* Cannot hash non-IP flows */
2135 return 0;
2136 }
2137
2138 hash = hash_add(hash, flow->nw_proto);
2139 if (flow->nw_proto == IPPROTO_TCP || flow->nw_proto == IPPROTO_SCTP ||
2140 (inc_udp_ports && flow->nw_proto == IPPROTO_UDP)) {
2141 hash = hash_add(hash,
2142 (OVS_FORCE uint16_t) (flow->tp_src ^ flow->tp_dst));
2143 }
2144
2145 return hash_finish(hash, basis);
2146}
2147
94639963
JR
2148/* Initialize a flow with random fields that matter for nx_hash_fields. */
2149void
2150flow_random_hash_fields(struct flow *flow)
2151{
2152 uint16_t rnd = random_uint16();
f0fb825a 2153 int i;
94639963
JR
2154
2155 /* Initialize to all zeros. */
2156 memset(flow, 0, sizeof *flow);
2157
74ff3298
JR
2158 eth_addr_random(&flow->dl_src);
2159 eth_addr_random(&flow->dl_dst);
94639963 2160
f0fb825a
EG
2161 for (i = 0; i < FLOW_MAX_VLAN_HEADERS; i++) {
2162 uint16_t vlan = random_uint16() & VLAN_VID_MASK;
2163 flow->vlans[i].tpid = htons(ETH_TYPE_VLAN_8021Q);
2164 flow->vlans[i].tci = htons(vlan | VLAN_CFI);
2165 }
94639963
JR
2166
2167 /* Make most of the random flows IPv4, some IPv6, and rest random. */
2168 flow->dl_type = rnd < 0x8000 ? htons(ETH_TYPE_IP) :
2169 rnd < 0xc000 ? htons(ETH_TYPE_IPV6) : (OVS_FORCE ovs_be16)rnd;
2170
2171 if (dl_type_is_ip_any(flow->dl_type)) {
2172 if (flow->dl_type == htons(ETH_TYPE_IP)) {
2173 flow->nw_src = (OVS_FORCE ovs_be32)random_uint32();
2174 flow->nw_dst = (OVS_FORCE ovs_be32)random_uint32();
2175 } else {
2176 random_bytes(&flow->ipv6_src, sizeof flow->ipv6_src);
2177 random_bytes(&flow->ipv6_dst, sizeof flow->ipv6_dst);
2178 }
2179 /* Make most of IP flows TCP, some UDP or SCTP, and rest random. */
2180 rnd = random_uint16();
2181 flow->nw_proto = rnd < 0x8000 ? IPPROTO_TCP :
2182 rnd < 0xc000 ? IPPROTO_UDP :
2183 rnd < 0xd000 ? IPPROTO_SCTP : (uint8_t)rnd;
2184 if (flow->nw_proto == IPPROTO_TCP ||
2185 flow->nw_proto == IPPROTO_UDP ||
2186 flow->nw_proto == IPPROTO_SCTP) {
2187 flow->tp_src = (OVS_FORCE ovs_be16)random_uint16();
2188 flow->tp_dst = (OVS_FORCE ovs_be16)random_uint16();
2189 }
2190 }
2191}
2192
bcd2633a
JP
2193/* Masks the fields in 'wc' that are used by the flow hash 'fields'. */
2194void
6cdd5145
JP
2195flow_mask_hash_fields(const struct flow *flow, struct flow_wildcards *wc,
2196 enum nx_hash_fields fields)
bcd2633a 2197{
f0fb825a 2198 int i;
bcd2633a
JP
2199 switch (fields) {
2200 case NX_HASH_FIELDS_ETH_SRC:
2201 memset(&wc->masks.dl_src, 0xff, sizeof wc->masks.dl_src);
2202 break;
2203
2204 case NX_HASH_FIELDS_SYMMETRIC_L4:
2205 memset(&wc->masks.dl_src, 0xff, sizeof wc->masks.dl_src);
2206 memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
6cdd5145
JP
2207 if (flow->dl_type == htons(ETH_TYPE_IP)) {
2208 memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src);
2209 memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst);
7f8a65ca 2210 } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
6cdd5145
JP
2211 memset(&wc->masks.ipv6_src, 0xff, sizeof wc->masks.ipv6_src);
2212 memset(&wc->masks.ipv6_dst, 0xff, sizeof wc->masks.ipv6_dst);
2213 }
2214 if (is_ip_any(flow)) {
2215 memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
d8d9c698 2216 flow_unwildcard_tp_ports(flow, wc);
6cdd5145 2217 }
f0fb825a
EG
2218 for (i = 0; i < FLOW_MAX_VLAN_HEADERS; i++) {
2219 wc->masks.vlans[i].tci |= htons(VLAN_VID_MASK | VLAN_CFI);
2220 }
bcd2633a
JP
2221 break;
2222
4249b547
JB
2223 case NX_HASH_FIELDS_SYMMETRIC_L3L4_UDP:
2224 if (is_ip_any(flow) && flow->nw_proto == IPPROTO_UDP) {
2225 memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
2226 memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst);
2227 }
73c7216a 2228 /* fall through */
4249b547
JB
2229 case NX_HASH_FIELDS_SYMMETRIC_L3L4:
2230 if (flow->dl_type == htons(ETH_TYPE_IP)) {
2231 memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src);
2232 memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst);
2233 } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
2234 memset(&wc->masks.ipv6_src, 0xff, sizeof wc->masks.ipv6_src);
2235 memset(&wc->masks.ipv6_dst, 0xff, sizeof wc->masks.ipv6_dst);
2236 } else {
2237 break; /* non-IP flow */
2238 }
2239
2240 memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
2241 if (flow->nw_proto == IPPROTO_TCP || flow->nw_proto == IPPROTO_SCTP) {
2242 memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
2243 memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst);
2244 }
2245 break;
2246
417cfdb6 2247 case NX_HASH_FIELDS_NW_SRC:
2248 if (flow->dl_type == htons(ETH_TYPE_IP)) {
2249 memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src);
2250 } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
2251 memset(&wc->masks.ipv6_src, 0xff, sizeof wc->masks.ipv6_src);
2252 }
2253 break;
2254
2255 case NX_HASH_FIELDS_NW_DST:
2256 if (flow->dl_type == htons(ETH_TYPE_IP)) {
2257 memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst);
2258 } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
2259 memset(&wc->masks.ipv6_dst, 0xff, sizeof wc->masks.ipv6_dst);
2260 }
2261 break;
2262
bcd2633a 2263 default:
428b2edd 2264 OVS_NOT_REACHED();
bcd2633a
JP
2265 }
2266}
2267
520e9a2a
EJ
2268/* Hashes the portions of 'flow' designated by 'fields'. */
2269uint32_t
2270flow_hash_fields(const struct flow *flow, enum nx_hash_fields fields,
2271 uint16_t basis)
2272{
2273 switch (fields) {
2274
2275 case NX_HASH_FIELDS_ETH_SRC:
74ff3298 2276 return jhash_bytes(&flow->dl_src, sizeof flow->dl_src, basis);
520e9a2a
EJ
2277
2278 case NX_HASH_FIELDS_SYMMETRIC_L4:
2279 return flow_hash_symmetric_l4(flow, basis);
4249b547
JB
2280
2281 case NX_HASH_FIELDS_SYMMETRIC_L3L4:
2282 return flow_hash_symmetric_l3l4(flow, basis, false);
2283
2284 case NX_HASH_FIELDS_SYMMETRIC_L3L4_UDP:
2285 return flow_hash_symmetric_l3l4(flow, basis, true);
2286
417cfdb6 2287 case NX_HASH_FIELDS_NW_SRC:
2288 if (flow->dl_type == htons(ETH_TYPE_IP)) {
2289 return jhash_bytes(&flow->nw_src, sizeof flow->nw_src, basis);
2290 } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
2291 return jhash_bytes(&flow->ipv6_src, sizeof flow->ipv6_src, basis);
2292 } else {
2293 return basis;
2294 }
2295
2296 case NX_HASH_FIELDS_NW_DST:
2297 if (flow->dl_type == htons(ETH_TYPE_IP)) {
2298 return jhash_bytes(&flow->nw_dst, sizeof flow->nw_dst, basis);
2299 } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
2300 return jhash_bytes(&flow->ipv6_dst, sizeof flow->ipv6_dst, basis);
2301 } else {
2302 return basis;
2303 }
2304
520e9a2a
EJ
2305 }
2306
428b2edd 2307 OVS_NOT_REACHED();
520e9a2a
EJ
2308}
2309
2310/* Returns a string representation of 'fields'. */
2311const char *
2312flow_hash_fields_to_str(enum nx_hash_fields fields)
2313{
2314 switch (fields) {
2315 case NX_HASH_FIELDS_ETH_SRC: return "eth_src";
2316 case NX_HASH_FIELDS_SYMMETRIC_L4: return "symmetric_l4";
4249b547
JB
2317 case NX_HASH_FIELDS_SYMMETRIC_L3L4: return "symmetric_l3l4";
2318 case NX_HASH_FIELDS_SYMMETRIC_L3L4_UDP: return "symmetric_l3l4+udp";
417cfdb6 2319 case NX_HASH_FIELDS_NW_SRC: return "nw_src";
2320 case NX_HASH_FIELDS_NW_DST: return "nw_dst";
520e9a2a
EJ
2321 default: return "<unknown>";
2322 }
2323}
2324
2325/* Returns true if the value of 'fields' is supported. Otherwise false. */
2326bool
2327flow_hash_fields_valid(enum nx_hash_fields fields)
2328{
2329 return fields == NX_HASH_FIELDS_ETH_SRC
4249b547
JB
2330 || fields == NX_HASH_FIELDS_SYMMETRIC_L4
2331 || fields == NX_HASH_FIELDS_SYMMETRIC_L3L4
417cfdb6 2332 || fields == NX_HASH_FIELDS_SYMMETRIC_L3L4_UDP
2333 || fields == NX_HASH_FIELDS_NW_SRC
2334 || fields == NX_HASH_FIELDS_NW_DST;
520e9a2a 2335}
8b3b8dd1 2336
368eefac
EJ
2337/* Returns a hash value for the bits of 'flow' that are active based on
2338 * 'wc', given 'basis'. */
2339uint32_t
2340flow_hash_in_wildcards(const struct flow *flow,
2341 const struct flow_wildcards *wc, uint32_t basis)
2342{
d70e8c28
JR
2343 const uint64_t *wc_u64 = (const uint64_t *) &wc->masks;
2344 const uint64_t *flow_u64 = (const uint64_t *) flow;
368eefac
EJ
2345 uint32_t hash;
2346 size_t i;
2347
2348 hash = basis;
d70e8c28
JR
2349 for (i = 0; i < FLOW_U64S; i++) {
2350 hash = hash_add64(hash, flow_u64[i] & wc_u64[i]);
368eefac 2351 }
d70e8c28 2352 return hash_finish(hash, 8 * FLOW_U64S);
368eefac
EJ
2353}
2354
3719455c
BP
2355/* Sets the VLAN VID that 'flow' matches to 'vid', which is interpreted as an
2356 * OpenFlow 1.0 "dl_vlan" value:
2357 *
f0fb825a 2358 * - If it is in the range 0...4095, 'flow->vlans[0].tci' is set to match
3719455c
BP
2359 * that VLAN. Any existing PCP match is unchanged (it becomes 0 if
2360 * 'flow' previously matched packets without a VLAN header).
2361 *
2362 * - If it is OFP_VLAN_NONE, 'flow->vlan_tci' is set to match a packet
2363 * without a VLAN tag.
2364 *
2365 * - Other values of 'vid' should not be used. */
2366void
fb0451d9 2367flow_set_dl_vlan(struct flow *flow, ovs_be16 vid)
3719455c 2368{
0c436519 2369 if (vid == htons(OFP10_VLAN_NONE)) {
f0fb825a 2370 flow->vlans[0].tci = htons(0);
3719455c
BP
2371 } else {
2372 vid &= htons(VLAN_VID_MASK);
f0fb825a
EG
2373 flow->vlans[0].tci &= ~htons(VLAN_VID_MASK);
2374 flow->vlans[0].tci |= htons(VLAN_CFI) | vid;
2375 }
2376}
2377
2378/* Sets the VLAN header TPID, which must be either ETH_TYPE_VLAN_8021Q or
2379 * ETH_TYPE_VLAN_8021AD. */
2380void
2381flow_fix_vlan_tpid(struct flow *flow)
2382{
2383 if (flow->vlans[0].tpid == htons(0) && flow->vlans[0].tci != 0) {
2384 flow->vlans[0].tpid = htons(ETH_TYPE_VLAN_8021Q);
3719455c
BP
2385 }
2386}
2387
cc34bc8c
BP
2388/* Sets the VLAN VID that 'flow' matches to 'vid', which is interpreted as an
2389 * OpenFlow 1.2 "vlan_vid" value, that is, the low 13 bits of 'vlan_tci' (VID
2390 * plus CFI). */
2391void
2392flow_set_vlan_vid(struct flow *flow, ovs_be16 vid)
2393{
2394 ovs_be16 mask = htons(VLAN_VID_MASK | VLAN_CFI);
f0fb825a
EG
2395 flow->vlans[0].tci &= ~mask;
2396 flow->vlans[0].tci |= vid & mask;
cc34bc8c
BP
2397}
2398
3719455c
BP
2399/* Sets the VLAN PCP that 'flow' matches to 'pcp', which should be in the
2400 * range 0...7.
2401 *
2402 * This function has no effect on the VLAN ID that 'flow' matches.
2403 *
2404 * After calling this function, 'flow' will not match packets without a VLAN
2405 * header. */
2406void
2407flow_set_vlan_pcp(struct flow *flow, uint8_t pcp)
2408{
2409 pcp &= 0x07;
f0fb825a
EG
2410 flow->vlans[0].tci &= ~htons(VLAN_PCP_MASK);
2411 flow->vlans[0].tci |= htons((pcp << VLAN_PCP_SHIFT) | VLAN_CFI);
2412}
2413
2414/* Counts the number of VLAN headers. */
2415int
2416flow_count_vlan_headers(const struct flow *flow)
2417{
2418 int i;
2419
2420 for (i = 0; i < FLOW_MAX_VLAN_HEADERS; i++) {
2421 if (!(flow->vlans[i].tci & htons(VLAN_CFI))) {
2422 break;
2423 }
2424 }
2425 return i;
2426}
2427
2428/* Given '*p_an' and '*p_bn' pointing to one past the last VLAN header of
2429 * 'a' and 'b' respectively, skip common VLANs so that they point to the
2430 * first different VLAN counting from bottom. */
2431void
2432flow_skip_common_vlan_headers(const struct flow *a, int *p_an,
2433 const struct flow *b, int *p_bn)
2434{
2435 int an = *p_an, bn = *p_bn;
2436
2437 for (an--, bn--; an >= 0 && bn >= 0; an--, bn--) {
2438 if (a->vlans[an].qtag != b->vlans[bn].qtag) {
2439 break;
2440 }
2441 }
2442 *p_an = an;
2443 *p_bn = bn;
2444}
2445
2446void
2447flow_pop_vlan(struct flow *flow, struct flow_wildcards *wc)
2448{
2449 int n = flow_count_vlan_headers(flow);
68c744fd
BP
2450 if (n > 1) {
2451 if (wc) {
2452 memset(&wc->masks.vlans[1], 0xff,
2453 sizeof(union flow_vlan_hdr) * (n - 1));
2454 }
2455 memmove(&flow->vlans[0], &flow->vlans[1],
2456 sizeof(union flow_vlan_hdr) * (n - 1));
f0fb825a 2457 }
68c744fd
BP
2458 if (n > 0) {
2459 memset(&flow->vlans[n - 1], 0, sizeof(union flow_vlan_hdr));
f0fb825a 2460 }
f0fb825a
EG
2461}
2462
2463void
2464flow_push_vlan_uninit(struct flow *flow, struct flow_wildcards *wc)
2465{
2466 if (wc) {
2467 int n = flow_count_vlan_headers(flow);
6b6b508b
DB
2468 if (n) {
2469 memset(wc->masks.vlans, 0xff, sizeof(union flow_vlan_hdr) * n);
2470 }
f0fb825a
EG
2471 }
2472 memmove(&flow->vlans[1], &flow->vlans[0],
2473 sizeof(union flow_vlan_hdr) * (FLOW_MAX_VLAN_HEADERS - 1));
2474 memset(&flow->vlans[0], 0, sizeof(union flow_vlan_hdr));
3719455c
BP
2475}
2476
8bfd0fda
BP
2477/* Returns the number of MPLS LSEs present in 'flow'
2478 *
2479 * Returns 0 if the 'dl_type' of 'flow' is not an MPLS ethernet type.
2480 * Otherwise traverses 'flow''s MPLS label stack stopping at the
2481 * first entry that has the BoS bit set. If no such entry exists then
2482 * the maximum number of LSEs that can be stored in 'flow' is returned.
2483 */
2484int
2485flow_count_mpls_labels(const struct flow *flow, struct flow_wildcards *wc)
2486{
22d38fca 2487 /* dl_type is always masked. */
8bfd0fda
BP
2488 if (eth_type_mpls(flow->dl_type)) {
2489 int i;
5af43325 2490 int cnt;
8bfd0fda 2491
5af43325
PS
2492 cnt = 0;
2493 for (i = 0; i < FLOW_MAX_MPLS_LABELS; i++) {
8bfd0fda
BP
2494 if (wc) {
2495 wc->masks.mpls_lse[i] |= htonl(MPLS_BOS_MASK);
2496 }
2497 if (flow->mpls_lse[i] & htonl(MPLS_BOS_MASK)) {
2498 return i + 1;
2499 }
5af43325
PS
2500 if (flow->mpls_lse[i]) {
2501 cnt++;
2502 }
8bfd0fda 2503 }
5af43325 2504 return cnt;
8bfd0fda
BP
2505 } else {
2506 return 0;
2507 }
2508}
2509
2510/* Returns the number consecutive of MPLS LSEs, starting at the
2511 * innermost LSE, that are common in 'a' and 'b'.
2512 *
2513 * 'an' must be flow_count_mpls_labels(a).
2514 * 'bn' must be flow_count_mpls_labels(b).
2515 */
2516int
2517flow_count_common_mpls_labels(const struct flow *a, int an,
2518 const struct flow *b, int bn,
2519 struct flow_wildcards *wc)
2520{
2521 int min_n = MIN(an, bn);
2522 if (min_n == 0) {
2523 return 0;
2524 } else {
2525 int common_n = 0;
2526 int a_last = an - 1;
2527 int b_last = bn - 1;
2528 int i;
2529
2530 for (i = 0; i < min_n; i++) {
2531 if (wc) {
2532 wc->masks.mpls_lse[a_last - i] = OVS_BE32_MAX;
2533 wc->masks.mpls_lse[b_last - i] = OVS_BE32_MAX;
2534 }
2535 if (a->mpls_lse[a_last - i] != b->mpls_lse[b_last - i]) {
2536 break;
2537 } else {
2538 common_n++;
2539 }
2540 }
2541
2542 return common_n;
2543 }
2544}
2545
2546/* Adds a new outermost MPLS label to 'flow' and changes 'flow''s Ethernet type
2547 * to 'mpls_eth_type', which must be an MPLS Ethertype.
2548 *
2549 * If the new label is the first MPLS label in 'flow', it is generated as;
2550 *
2551 * - label: 2, if 'flow' is IPv6, otherwise 0.
2552 *
2553 * - TTL: IPv4 or IPv6 TTL, if present and nonzero, otherwise 64.
2554 *
2555 * - TC: IPv4 or IPv6 TOS, if present, otherwise 0.
2556 *
2557 * - BoS: 1.
2558 *
22d38fca 2559 * If the new label is the second or later label MPLS label in 'flow', it is
8bfd0fda
BP
2560 * generated as;
2561 *
368fb7e6 2562 * - label: Copied from outer label.
8bfd0fda
BP
2563 *
2564 * - TTL: Copied from outer label.
2565 *
2566 * - TC: Copied from outer label.
2567 *
2568 * - BoS: 0.
2569 *
2570 * 'n' must be flow_count_mpls_labels(flow). 'n' must be less than
2571 * FLOW_MAX_MPLS_LABELS (because otherwise flow->mpls_lse[] would overflow).
2572 */
2573void
2574flow_push_mpls(struct flow *flow, int n, ovs_be16 mpls_eth_type,
742c0ac3 2575 struct flow_wildcards *wc, bool clear_flow_L3)
8bfd0fda
BP
2576{
2577 ovs_assert(eth_type_mpls(mpls_eth_type));
2578 ovs_assert(n < FLOW_MAX_MPLS_LABELS);
2579
8bfd0fda
BP
2580 if (n) {
2581 int i;
2582
22d38fca
JR
2583 if (wc) {
2584 memset(&wc->masks.mpls_lse, 0xff, sizeof *wc->masks.mpls_lse * n);
2585 }
8bfd0fda
BP
2586 for (i = n; i >= 1; i--) {
2587 flow->mpls_lse[i] = flow->mpls_lse[i - 1];
2588 }
22d38fca 2589 flow->mpls_lse[0] = (flow->mpls_lse[1] & htonl(~MPLS_BOS_MASK));
8bfd0fda
BP
2590 } else {
2591 int label = 0; /* IPv4 Explicit Null. */
2592 int tc = 0;
2593 int ttl = 64;
2594
2595 if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
2596 label = 2;
2597 }
2598
2599 if (is_ip_any(flow)) {
2600 tc = (flow->nw_tos & IP_DSCP_MASK) >> 2;
22d38fca
JR
2601 if (wc) {
2602 wc->masks.nw_tos |= IP_DSCP_MASK;
2603 wc->masks.nw_ttl = 0xff;
2604 }
8bfd0fda
BP
2605
2606 if (flow->nw_ttl) {
2607 ttl = flow->nw_ttl;
2608 }
8bfd0fda
BP
2609 }
2610
2611 flow->mpls_lse[0] = set_mpls_lse_values(ttl, tc, 1, htonl(label));
2612
742c0ac3
JR
2613 if (clear_flow_L3) {
2614 /* Clear all L3 and L4 fields and dp_hash. */
3d2fbd70 2615 BUILD_ASSERT(FLOW_WC_SEQ == 40);
742c0ac3
JR
2616 memset((char *) flow + FLOW_SEGMENT_2_ENDS_AT, 0,
2617 sizeof(struct flow) - FLOW_SEGMENT_2_ENDS_AT);
2618 flow->dp_hash = 0;
2619 }
8bfd0fda
BP
2620 }
2621 flow->dl_type = mpls_eth_type;
2622}
2623
2624/* Tries to remove the outermost MPLS label from 'flow'. Returns true if
2625 * successful, false otherwise. On success, sets 'flow''s Ethernet type to
2626 * 'eth_type'.
2627 *
2628 * 'n' must be flow_count_mpls_labels(flow). */
2629bool
2630flow_pop_mpls(struct flow *flow, int n, ovs_be16 eth_type,
2631 struct flow_wildcards *wc)
2632{
2633 int i;
2634
2635 if (n == 0) {
2636 /* Nothing to pop. */
2637 return false;
22d38fca
JR
2638 } else if (n == FLOW_MAX_MPLS_LABELS) {
2639 if (wc) {
2640 wc->masks.mpls_lse[n - 1] |= htonl(MPLS_BOS_MASK);
2641 }
2642 if (!(flow->mpls_lse[n - 1] & htonl(MPLS_BOS_MASK))) {
2643 /* Can't pop because don't know what to fill in mpls_lse[n - 1]. */
2644 return false;
2645 }
8bfd0fda
BP
2646 }
2647
22d38fca
JR
2648 if (wc) {
2649 memset(&wc->masks.mpls_lse[1], 0xff,
2650 sizeof *wc->masks.mpls_lse * (n - 1));
2651 }
8bfd0fda
BP
2652 for (i = 1; i < n; i++) {
2653 flow->mpls_lse[i - 1] = flow->mpls_lse[i];
2654 }
2655 flow->mpls_lse[n - 1] = 0;
2656 flow->dl_type = eth_type;
2657 return true;
2658}
2659
b02475c5
SH
2660/* Sets the MPLS Label that 'flow' matches to 'label', which is interpreted
2661 * as an OpenFlow 1.1 "mpls_label" value. */
2662void
8bfd0fda 2663flow_set_mpls_label(struct flow *flow, int idx, ovs_be32 label)
b02475c5 2664{
8bfd0fda 2665 set_mpls_lse_label(&flow->mpls_lse[idx], label);
b02475c5
SH
2666}
2667
b676167a
SH
2668/* Sets the MPLS TTL that 'flow' matches to 'ttl', which should be in the
2669 * range 0...255. */
2670void
8bfd0fda 2671flow_set_mpls_ttl(struct flow *flow, int idx, uint8_t ttl)
b676167a 2672{
8bfd0fda 2673 set_mpls_lse_ttl(&flow->mpls_lse[idx], ttl);
b676167a
SH
2674}
2675
b02475c5
SH
2676/* Sets the MPLS TC that 'flow' matches to 'tc', which should be in the
2677 * range 0...7. */
2678void
8bfd0fda 2679flow_set_mpls_tc(struct flow *flow, int idx, uint8_t tc)
b02475c5 2680{
8bfd0fda 2681 set_mpls_lse_tc(&flow->mpls_lse[idx], tc);
b02475c5
SH
2682}
2683
2684/* Sets the MPLS BOS bit that 'flow' matches to which should be 0 or 1. */
2685void
8bfd0fda 2686flow_set_mpls_bos(struct flow *flow, int idx, uint8_t bos)
b02475c5 2687{
8bfd0fda 2688 set_mpls_lse_bos(&flow->mpls_lse[idx], bos);
b02475c5
SH
2689}
2690
8bfd0fda
BP
2691/* Sets the entire MPLS LSE. */
2692void
2693flow_set_mpls_lse(struct flow *flow, int idx, ovs_be32 lse)
2694{
2695 flow->mpls_lse[idx] = lse;
2696}
52105b67 2697
437d0d22 2698static size_t
cf62fa4c 2699flow_compose_l4(struct dp_packet *p, const struct flow *flow)
52105b67 2700{
437d0d22
JR
2701 size_t l4_len = 0;
2702
52105b67
JR
2703 if (!(flow->nw_frag & FLOW_NW_FRAG_ANY)
2704 || !(flow->nw_frag & FLOW_NW_FRAG_LATER)) {
2705 if (flow->nw_proto == IPPROTO_TCP) {
2706 struct tcp_header *tcp;
2707
437d0d22 2708 l4_len = sizeof *tcp;
cf62fa4c 2709 tcp = dp_packet_put_zeros(p, l4_len);
52105b67
JR
2710 tcp->tcp_src = flow->tp_src;
2711 tcp->tcp_dst = flow->tp_dst;
2712 tcp->tcp_ctl = TCP_CTL(ntohs(flow->tcp_flags), 5);
52105b67
JR
2713 } else if (flow->nw_proto == IPPROTO_UDP) {
2714 struct udp_header *udp;
2715
437d0d22 2716 l4_len = sizeof *udp;
cf62fa4c 2717 udp = dp_packet_put_zeros(p, l4_len);
52105b67
JR
2718 udp->udp_src = flow->tp_src;
2719 udp->udp_dst = flow->tp_dst;
e839d01e 2720 udp->udp_len = htons(l4_len);
52105b67
JR
2721 } else if (flow->nw_proto == IPPROTO_SCTP) {
2722 struct sctp_header *sctp;
2723
437d0d22 2724 l4_len = sizeof *sctp;
cf62fa4c 2725 sctp = dp_packet_put_zeros(p, l4_len);
52105b67
JR
2726 sctp->sctp_src = flow->tp_src;
2727 sctp->sctp_dst = flow->tp_dst;
52105b67
JR
2728 } else if (flow->nw_proto == IPPROTO_ICMP) {
2729 struct icmp_header *icmp;
2730
437d0d22 2731 l4_len = sizeof *icmp;
cf62fa4c 2732 icmp = dp_packet_put_zeros(p, l4_len);
52105b67
JR
2733 icmp->icmp_type = ntohs(flow->tp_src);
2734 icmp->icmp_code = ntohs(flow->tp_dst);
0e612675
FL
2735 } else if (flow->nw_proto == IPPROTO_IGMP) {
2736 struct igmp_header *igmp;
2737
2738 l4_len = sizeof *igmp;
cf62fa4c 2739 igmp = dp_packet_put_zeros(p, l4_len);
0e612675
FL
2740 igmp->igmp_type = ntohs(flow->tp_src);
2741 igmp->igmp_code = ntohs(flow->tp_dst);
2742 put_16aligned_be32(&igmp->group, flow->igmp_group_ip4);
52105b67
JR
2743 } else if (flow->nw_proto == IPPROTO_ICMPV6) {
2744 struct icmp6_hdr *icmp;
2745
437d0d22 2746 l4_len = sizeof *icmp;
cf62fa4c 2747 icmp = dp_packet_put_zeros(p, l4_len);
52105b67
JR
2748 icmp->icmp6_type = ntohs(flow->tp_src);
2749 icmp->icmp6_code = ntohs(flow->tp_dst);
2750
2751 if (icmp->icmp6_code == 0 &&
2752 (icmp->icmp6_type == ND_NEIGHBOR_SOLICIT ||
2753 icmp->icmp6_type == ND_NEIGHBOR_ADVERT)) {
2754 struct in6_addr *nd_target;
86d46f3c 2755 struct ovs_nd_lla_opt *lla_opt;
52105b67 2756
437d0d22 2757 l4_len += sizeof *nd_target;
cf62fa4c 2758 nd_target = dp_packet_put_zeros(p, sizeof *nd_target);
52105b67
JR
2759 *nd_target = flow->nd_target;
2760
2761 if (!eth_addr_is_zero(flow->arp_sha)) {
437d0d22 2762 l4_len += 8;
86d46f3c
ZKL
2763 lla_opt = dp_packet_put_zeros(p, 8);
2764 lla_opt->len = 1;
2765 lla_opt->type = ND_OPT_SOURCE_LINKADDR;
2766 lla_opt->mac = flow->arp_sha;
52105b67
JR
2767 }
2768 if (!eth_addr_is_zero(flow->arp_tha)) {
437d0d22 2769 l4_len += 8;
86d46f3c
ZKL
2770 lla_opt = dp_packet_put_zeros(p, 8);
2771 lla_opt->len = 1;
2772 lla_opt->type = ND_OPT_TARGET_LINKADDR;
2773 lla_opt->mac = flow->arp_tha;
52105b67
JR
2774 }
2775 }
52105b67
JR
2776 }
2777 }
437d0d22 2778 return l4_len;
52105b67
JR
2779}
2780
e839d01e
DDP
2781static void
2782flow_compose_l4_csum(struct dp_packet *p, const struct flow *flow,
2783 uint32_t pseudo_hdr_csum)
2784{
2785 size_t l4_len = (char *) dp_packet_tail(p) - (char *) dp_packet_l4(p);
2786
2787 if (!(flow->nw_frag & FLOW_NW_FRAG_ANY)
2788 || !(flow->nw_frag & FLOW_NW_FRAG_LATER)) {
2789 if (flow->nw_proto == IPPROTO_TCP) {
2790 struct tcp_header *tcp = dp_packet_l4(p);
2791
3476ce3a 2792 tcp->tcp_csum = 0;
e839d01e
DDP
2793 tcp->tcp_csum = csum_finish(csum_continue(pseudo_hdr_csum,
2794 tcp, l4_len));
2795 } else if (flow->nw_proto == IPPROTO_UDP) {
2796 struct udp_header *udp = dp_packet_l4(p);
2797
3476ce3a 2798 udp->udp_csum = 0;
e839d01e
DDP
2799 udp->udp_csum = csum_finish(csum_continue(pseudo_hdr_csum,
2800 udp, l4_len));
2801 } else if (flow->nw_proto == IPPROTO_ICMP) {
2802 struct icmp_header *icmp = dp_packet_l4(p);
2803
3476ce3a 2804 icmp->icmp_csum = 0;
e839d01e
DDP
2805 icmp->icmp_csum = csum(icmp, l4_len);
2806 } else if (flow->nw_proto == IPPROTO_IGMP) {
2807 struct igmp_header *igmp = dp_packet_l4(p);
2808
3476ce3a 2809 igmp->igmp_csum = 0;
e839d01e
DDP
2810 igmp->igmp_csum = csum(igmp, l4_len);
2811 } else if (flow->nw_proto == IPPROTO_ICMPV6) {
2812 struct icmp6_hdr *icmp = dp_packet_l4(p);
2813
3476ce3a 2814 icmp->icmp6_cksum = 0;
e839d01e
DDP
2815 icmp->icmp6_cksum = (OVS_FORCE uint16_t)
2816 csum_finish(csum_continue(pseudo_hdr_csum, icmp, l4_len));
2817 }
2818 }
2819}
2820
bc0f5176
AZ
2821/* Increase the size of packet composed by 'flow_compose_minimal'
2822 * up to 'size' bytes. Fixes all the required packet headers like
2823 * ip/udp lengths and l3/l4 checksums.
2824 *
2825 * 'size' needs to be larger then the current packet size. */
2826static void
2827packet_expand(struct dp_packet *p, const struct flow *flow, size_t size)
3476ce3a
IM
2828{
2829 size_t extra_size;
2830
bc0f5176 2831 ovs_assert(size > dp_packet_size(p));
3476ce3a
IM
2832
2833 extra_size = size - dp_packet_size(p);
2834 dp_packet_put_zeros(p, extra_size);
2835
2836 if (flow->dl_type == htons(FLOW_DL_TYPE_NONE)) {
2837 struct eth_header *eth = dp_packet_eth(p);
2838
2839 eth->eth_type = htons(dp_packet_size(p));
3476ce3a
IM
2840 } else if (dl_type_is_ip_any(flow->dl_type)) {
2841 uint32_t pseudo_hdr_csum;
2842 size_t l4_len = (char *) dp_packet_tail(p) - (char *) dp_packet_l4(p);
2843
2844 if (flow->dl_type == htons(ETH_TYPE_IP)) {
2845 struct ip_header *ip = dp_packet_l3(p);
2846
2847 ip->ip_tot_len = htons(p->l4_ofs - p->l3_ofs + l4_len);
2848 ip->ip_csum = 0;
2849 ip->ip_csum = csum(ip, sizeof *ip);
2850
2851 pseudo_hdr_csum = packet_csum_pseudoheader(ip);
2852 } else { /* ETH_TYPE_IPV6 */
2853 struct ovs_16aligned_ip6_hdr *nh = dp_packet_l3(p);
2854
2855 nh->ip6_plen = htons(l4_len);
2856 pseudo_hdr_csum = packet_csum_pseudoheader6(nh);
2857 }
2858
2859 if ((!(flow->nw_frag & FLOW_NW_FRAG_ANY)
2860 || !(flow->nw_frag & FLOW_NW_FRAG_LATER))
2861 && flow->nw_proto == IPPROTO_UDP) {
2862 struct udp_header *udp = dp_packet_l4(p);
2863
2864 udp->udp_len = htons(l4_len + extra_size);
2865 }
2866 flow_compose_l4_csum(p, flow, pseudo_hdr_csum);
2867 }
2868}
2869
3e24a894 2870/* Puts into 'p' a packet that flow_extract() would parse as having the given
8b3b8dd1
BP
2871 * 'flow'.
2872 *
2873 * (This is useful only for testing, obviously, and the packet isn't really
bc0f5176
AZ
2874 * valid. Lots of fields are just zeroed.)
2875 *
2876 * The created packet has minimal packet size, just big enough to hold
2877 * the packet header fields. */
2878static void
2879flow_compose_minimal(struct dp_packet *p, const struct flow *flow)
8b3b8dd1 2880{
e839d01e 2881 uint32_t pseudo_hdr_csum;
437d0d22
JR
2882 size_t l4_len;
2883
52105b67 2884 /* eth_compose() sets l3 pointer and makes sure it is 32-bit aligned. */
cf62fa4c 2885 eth_compose(p, flow->dl_dst, flow->dl_src, ntohs(flow->dl_type), 0);
8b3b8dd1 2886 if (flow->dl_type == htons(FLOW_DL_TYPE_NONE)) {
2482b0b0 2887 struct eth_header *eth = dp_packet_eth(p);
cf62fa4c 2888 eth->eth_type = htons(dp_packet_size(p));
8b3b8dd1
BP
2889 return;
2890 }
2891
f0fb825a
EG
2892 for (int encaps = FLOW_MAX_VLAN_HEADERS - 1; encaps >= 0; encaps--) {
2893 if (flow->vlans[encaps].tci & htons(VLAN_CFI)) {
2894 eth_push_vlan(p, flow->vlans[encaps].tpid,
2895 flow->vlans[encaps].tci);
2896 }
8b3b8dd1
BP
2897 }
2898
cff78c88 2899 if (flow->dl_type == htons(ETH_TYPE_IP)) {
8b3b8dd1
BP
2900 struct ip_header *ip;
2901
cf62fa4c 2902 ip = dp_packet_put_zeros(p, sizeof *ip);
8b3b8dd1 2903 ip->ip_ihl_ver = IP_IHL_VER(5, 4);
eadef313 2904 ip->ip_tos = flow->nw_tos;
aabf5352 2905 ip->ip_ttl = flow->nw_ttl;
8b3b8dd1 2906 ip->ip_proto = flow->nw_proto;
7c457c33
BP
2907 put_16aligned_be32(&ip->ip_src, flow->nw_src);
2908 put_16aligned_be32(&ip->ip_dst, flow->nw_dst);
8b3b8dd1 2909
eadef313 2910 if (flow->nw_frag & FLOW_NW_FRAG_ANY) {
7257b535 2911 ip->ip_frag_off |= htons(IP_MORE_FRAGMENTS);
eadef313 2912 if (flow->nw_frag & FLOW_NW_FRAG_LATER) {
7257b535
BP
2913 ip->ip_frag_off |= htons(100);
2914 }
2915 }
df9b6612 2916
cf62fa4c 2917 dp_packet_set_l4(p, dp_packet_tail(p));
52105b67 2918
cf62fa4c 2919 l4_len = flow_compose_l4(p, flow);
52105b67 2920
cf62fa4c
PS
2921 ip = dp_packet_l3(p);
2922 ip->ip_tot_len = htons(p->l4_ofs - p->l3_ofs + l4_len);
ece9c294 2923 /* Checksum has already been zeroed by put_zeros call. */
dc5a7ce7 2924 ip->ip_csum = csum(ip, sizeof *ip);
e839d01e
DDP
2925
2926 pseudo_hdr_csum = packet_csum_pseudoheader(ip);
2927 flow_compose_l4_csum(p, flow, pseudo_hdr_csum);
cff78c88 2928 } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
52105b67
JR
2929 struct ovs_16aligned_ip6_hdr *nh;
2930
cf62fa4c 2931 nh = dp_packet_put_zeros(p, sizeof *nh);
52105b67
JR
2932 put_16aligned_be32(&nh->ip6_flow, htonl(6 << 28) |
2933 htonl(flow->nw_tos << 20) | flow->ipv6_label);
2934 nh->ip6_hlim = flow->nw_ttl;
2935 nh->ip6_nxt = flow->nw_proto;
2936
2937 memcpy(&nh->ip6_src, &flow->ipv6_src, sizeof(nh->ip6_src));
2938 memcpy(&nh->ip6_dst, &flow->ipv6_dst, sizeof(nh->ip6_dst));
2939
cf62fa4c 2940 dp_packet_set_l4(p, dp_packet_tail(p));
52105b67 2941
cf62fa4c 2942 l4_len = flow_compose_l4(p, flow);
52105b67 2943
cf62fa4c 2944 nh = dp_packet_l3(p);
437d0d22 2945 nh->ip6_plen = htons(l4_len);
e839d01e
DDP
2946
2947 pseudo_hdr_csum = packet_csum_pseudoheader6(nh);
2948 flow_compose_l4_csum(p, flow, pseudo_hdr_csum);
cff78c88
SH
2949 } else if (flow->dl_type == htons(ETH_TYPE_ARP) ||
2950 flow->dl_type == htons(ETH_TYPE_RARP)) {
8b3b8dd1
BP
2951 struct arp_eth_header *arp;
2952
cf62fa4c
PS
2953 arp = dp_packet_put_zeros(p, sizeof *arp);
2954 dp_packet_set_l3(p, arp);
8b3b8dd1
BP
2955 arp->ar_hrd = htons(1);
2956 arp->ar_pro = htons(ETH_TYPE_IP);
2957 arp->ar_hln = ETH_ADDR_LEN;
2958 arp->ar_pln = 4;
2959 arp->ar_op = htons(flow->nw_proto);
2960
2961 if (flow->nw_proto == ARP_OP_REQUEST ||
2962 flow->nw_proto == ARP_OP_REPLY) {
7c457c33
BP
2963 put_16aligned_be32(&arp->ar_spa, flow->nw_src);
2964 put_16aligned_be32(&arp->ar_tpa, flow->nw_dst);
74ff3298
JR
2965 arp->ar_sha = flow->arp_sha;
2966 arp->ar_tha = flow->arp_tha;
8b3b8dd1
BP
2967 }
2968 }
b02475c5
SH
2969
2970 if (eth_type_mpls(flow->dl_type)) {
8bfd0fda
BP
2971 int n;
2972
cf62fa4c 2973 p->l2_5_ofs = p->l3_ofs;
8bfd0fda
BP
2974 for (n = 1; n < FLOW_MAX_MPLS_LABELS; n++) {
2975 if (flow->mpls_lse[n - 1] & htonl(MPLS_BOS_MASK)) {
2976 break;
2977 }
2978 }
2979 while (n > 0) {
cf62fa4c 2980 push_mpls(p, flow->dl_type, flow->mpls_lse[--n]);
8bfd0fda 2981 }
b02475c5 2982 }
8b3b8dd1 2983}
bc0f5176
AZ
2984
2985/* Puts into 'p' a Ethernet frame of size 'size' that flow_extract() would
2986 * parse as having the given 'flow'.
2987 *
2988 * When 'size' is zero, 'p' is a minimal size packet that only big enough
2989 * to contains all packet headers.
2990 *
2991 * When 'size' is larger than the minimal packet size, the packet will
2992 * be expended to 'size' with the payload set to zero.
2993 *
2994 * Return 'true' if the packet is successfully created. 'false' otherwise.
2995 * Note, when 'size' is set to zero, this function always returns true. */
2996bool
2997flow_compose(struct dp_packet *p, const struct flow *flow, size_t size)
2998{
2999 flow_compose_minimal(p, flow);
3000
3001 if (size && size < dp_packet_size(p)) {
3002 return false;
3003 }
3004
3005 if (size > dp_packet_size(p)) {
3006 packet_expand(p, flow, size);
3007 }
3008
3009 return true;
3010}
5cb7a798
BP
3011\f
3012/* Compressed flow. */
3013
df40c152 3014/* Completes an initialization of 'dst' as a miniflow copy of 'src' begun by
5fcff47b
JR
3015 * the caller. The caller must have already computed 'dst->map' properly to
3016 * indicate the significant uint64_t elements of 'src'.
13751fd8
JR
3017 *
3018 * Normally the significant elements are the ones that are non-zero. However,
3019 * when a miniflow is initialized from a (mini)mask, the values can be zeroes,
ceb3bd67
JR
3020 * so that the flow and mask always have the same maps. */
3021void
3022miniflow_init(struct miniflow *dst, const struct flow *src)
df40c152 3023{
09b0fa9c 3024 uint64_t *dst_u64 = miniflow_values(dst);
361d808d 3025 size_t idx;
df40c152 3026
5fcff47b
JR
3027 FLOWMAP_FOR_EACH_INDEX(idx, dst->map) {
3028 *dst_u64++ = flow_u64_value(src, idx);
df40c152
BP
3029 }
3030}
3031
361d808d 3032/* Initialize the maps of 'flow' from 'src'. */
ceb3bd67
JR
3033void
3034miniflow_map_init(struct miniflow *flow, const struct flow *src)
5cb7a798 3035{
ceb3bd67 3036 /* Initialize map, counting the number of nonzero elements. */
5fcff47b
JR
3037 flowmap_init(&flow->map);
3038 for (size_t i = 0; i < FLOW_U64S; i++) {
3039 if (flow_u64_value(src, i)) {
3040 flowmap_set(&flow->map, i, 1);
5cb7a798
BP
3041 }
3042 }
ceb3bd67 3043}
5cb7a798 3044
ceb3bd67
JR
3045/* Allocates 'n' count of miniflows, consecutive in memory, initializing the
3046 * map of each from 'src'.
3047 * Returns the size of the miniflow data. */
3048size_t
3049miniflow_alloc(struct miniflow *dsts[], size_t n, const struct miniflow *src)
3050{
361d808d
JR
3051 size_t n_values = miniflow_n_values(src);
3052 size_t data_size = MINIFLOW_VALUES_SIZE(n_values);
3053 struct miniflow *dst = xmalloc(n * (sizeof *src + data_size));
5fcff47b 3054 size_t i;
ceb3bd67
JR
3055
3056 COVERAGE_INC(miniflow_malloc);
3057
3058 for (i = 0; i < n; i++) {
361d808d 3059 *dst = *src; /* Copy maps. */
ceb3bd67 3060 dsts[i] = dst;
361d808d
JR
3061 dst += 1; /* Just past the maps. */
3062 dst = (struct miniflow *)((uint64_t *)dst + n_values); /* Skip data. */
ceb3bd67
JR
3063 }
3064 return data_size;
df40c152 3065}
5cb7a798 3066
ceb3bd67
JR
3067/* Returns a miniflow copy of 'src'. The caller must eventually free() the
3068 * returned miniflow. */
8fd47924 3069struct miniflow *
ceb3bd67 3070miniflow_create(const struct flow *src)
df40c152 3071{
ceb3bd67
JR
3072 struct miniflow tmp;
3073 struct miniflow *dst;
3074
3075 miniflow_map_init(&tmp, src);
3076
3077 miniflow_alloc(&dst, 1, &tmp);
3078 miniflow_init(dst, src);
3079 return dst;
5cb7a798
BP
3080}
3081
3016f3e4 3082/* Initializes 'dst' as a copy of 'src'. The caller must have allocated
8fd47924 3083 * 'dst' to have inline space for 'n_values' data in 'src'. */
3016f3e4 3084void
a851eb94
JR
3085miniflow_clone(struct miniflow *dst, const struct miniflow *src,
3086 size_t n_values)
3016f3e4 3087{
361d808d 3088 *dst = *src; /* Copy maps. */
09b0fa9c
JR
3089 memcpy(miniflow_values(dst), miniflow_get_values(src),
3090 MINIFLOW_VALUES_SIZE(n_values));
5cb7a798
BP
3091}
3092
3093/* Initializes 'dst' as a copy of 'src'. */
3094void
3095miniflow_expand(const struct miniflow *src, struct flow *dst)
3096{
ad77e3c5
EJ
3097 memset(dst, 0, sizeof *dst);
3098 flow_union_with_miniflow(dst, src);
5cb7a798
BP
3099}
3100
8fd47924 3101/* Returns true if 'a' and 'b' are equal miniflows, false otherwise. */
5cb7a798
BP
3102bool
3103miniflow_equal(const struct miniflow *a, const struct miniflow *b)
3104{
09b0fa9c
JR
3105 const uint64_t *ap = miniflow_get_values(a);
3106 const uint64_t *bp = miniflow_get_values(b);
5cb7a798 3107
5fcff47b
JR
3108 /* This is mostly called after a matching hash, so it is highly likely that
3109 * the maps are equal as well. */
3110 if (OVS_LIKELY(flowmap_equal(a->map, b->map))) {
361d808d 3111 return !memcmp(ap, bp, miniflow_n_values(a) * sizeof *ap);
080e28d0 3112 } else {
5fcff47b 3113 size_t idx;
df40c152 3114
5fcff47b
JR
3115 FLOWMAP_FOR_EACH_INDEX (idx, flowmap_or(a->map, b->map)) {
3116 if ((flowmap_is_set(&a->map, idx) ? *ap++ : 0)
3117 != (flowmap_is_set(&b->map, idx) ? *bp++ : 0)) {
080e28d0 3118 return false;
df40c152 3119 }
5cb7a798
BP
3120 }
3121 }
3122
df40c152 3123 return true;
5cb7a798
BP
3124}
3125
de4ad4a2
JR
3126/* Returns false if 'a' and 'b' differ at the places where there are 1-bits
3127 * in 'mask', true otherwise. */
5cb7a798
BP
3128bool
3129miniflow_equal_in_minimask(const struct miniflow *a, const struct miniflow *b,
3130 const struct minimask *mask)
3131{
09b0fa9c 3132 const uint64_t *p = miniflow_get_values(&mask->masks);
361d808d 3133 size_t idx;
5cb7a798 3134
5fcff47b 3135 FLOWMAP_FOR_EACH_INDEX(idx, mask->masks.map) {
1cea007c 3136 if ((miniflow_get(a, idx) ^ miniflow_get(b, idx)) & *p++) {
080e28d0 3137 return false;
5cb7a798
BP
3138 }
3139 }
3140
3141 return true;
3142}
3143
3144/* Returns true if 'a' and 'b' are equal at the places where there are 1-bits
3145 * in 'mask', false if they differ. */
3146bool
3147miniflow_equal_flow_in_minimask(const struct miniflow *a, const struct flow *b,
3148 const struct minimask *mask)
3149{
09b0fa9c 3150 const uint64_t *p = miniflow_get_values(&mask->masks);
361d808d 3151 size_t idx;
5cb7a798 3152
5fcff47b
JR
3153 FLOWMAP_FOR_EACH_INDEX(idx, mask->masks.map) {
3154 if ((miniflow_get(a, idx) ^ flow_u64_value(b, idx)) & *p++) {
080e28d0 3155 return false;
5cb7a798
BP
3156 }
3157 }
3158
3159 return true;
3160}
3161
5cb7a798 3162\f
ceb3bd67
JR
3163void
3164minimask_init(struct minimask *mask, const struct flow_wildcards *wc)
3165{
3166 miniflow_init(&mask->masks, &wc->masks);
3167}
3168
8fd47924
JR
3169/* Returns a minimask copy of 'wc'. The caller must eventually free the
3170 * returned minimask with free(). */
3171struct minimask *
3172minimask_create(const struct flow_wildcards *wc)
5cb7a798 3173{
8fd47924 3174 return (struct minimask *)miniflow_create(&wc->masks);
5cb7a798
BP
3175}
3176
3177/* Initializes 'dst_' as the bit-wise "and" of 'a_' and 'b_'.
3178 *
8fd47924
JR
3179 * The caller must provide room for FLOW_U64S "uint64_t"s in 'storage', which
3180 * must follow '*dst_' in memory, for use by 'dst_'. The caller must *not*
3181 * free 'dst_' free(). */
5cb7a798
BP
3182void
3183minimask_combine(struct minimask *dst_,
3184 const struct minimask *a_, const struct minimask *b_,
d70e8c28 3185 uint64_t storage[FLOW_U64S])
5cb7a798
BP
3186{
3187 struct miniflow *dst = &dst_->masks;
d70e8c28 3188 uint64_t *dst_values = storage;
5cb7a798
BP
3189 const struct miniflow *a = &a_->masks;
3190 const struct miniflow *b = &b_->masks;
361d808d 3191 size_t idx;
5cb7a798 3192
5fcff47b 3193 flowmap_init(&dst->map);
080e28d0 3194
5fcff47b 3195 FLOWMAP_FOR_EACH_INDEX(idx, flowmap_and(a->map, b->map)) {
361d808d 3196 /* Both 'a' and 'b' have non-zero data at 'idx'. */
5fcff47b 3197 uint64_t mask = *miniflow_get__(a, idx) & *miniflow_get__(b, idx);
361d808d
JR
3198
3199 if (mask) {
5fcff47b 3200 flowmap_set(&dst->map, idx, 1);
1cea007c 3201 *dst_values++ = mask;
5cb7a798
BP
3202 }
3203 }
3204}
3205
8fd47924 3206/* Initializes 'wc' as a copy of 'mask'. */
5cb7a798
BP
3207void
3208minimask_expand(const struct minimask *mask, struct flow_wildcards *wc)
3209{
3210 miniflow_expand(&mask->masks, &wc->masks);
3211}
3212
f4d335e9
JR
3213/* Returns true if 'a' and 'b' are the same flow mask, false otherwise.
3214 * Minimasks may not have zero data values, so for the minimasks to be the
3215 * same, they need to have the same map and the same data values. */
5cb7a798
BP
3216bool
3217minimask_equal(const struct minimask *a, const struct minimask *b)
3218{
5fcff47b
JR
3219 return !memcmp(a, b, sizeof *a
3220 + MINIFLOW_VALUES_SIZE(miniflow_n_values(&a->masks)));
5cb7a798
BP
3221}
3222
d4570fd8 3223/* Returns true if at least one bit matched by 'b' is wildcarded by 'a',
5cb7a798
BP
3224 * false otherwise. */
3225bool
d4570fd8 3226minimask_has_extra(const struct minimask *a, const struct minimask *b)
5cb7a798 3227{
09b0fa9c 3228 const uint64_t *bp = miniflow_get_values(&b->masks);
361d808d 3229 size_t idx;
5cb7a798 3230
5fcff47b 3231 FLOWMAP_FOR_EACH_INDEX(idx, b->masks.map) {
d70e8c28 3232 uint64_t b_u64 = *bp++;
5cb7a798 3233
d70e8c28
JR
3234 /* 'b_u64' is non-zero, check if the data in 'a' is either zero
3235 * or misses some of the bits in 'b_u64'. */
5fcff47b
JR
3236 if (!MINIFLOW_IN_MAP(&a->masks, idx)
3237 || ((*miniflow_get__(&a->masks, idx) & b_u64) != b_u64)) {
f4d335e9 3238 return true; /* 'a' wildcards some bits 'b' doesn't. */
5cb7a798
BP
3239 }
3240 }
3241
3242 return false;
3243}
f0fb825a
EG
3244
3245void
3246flow_limit_vlans(int vlan_limit)
3247{
3248 if (vlan_limit <= 0) {
3249 flow_vlan_limit = FLOW_MAX_VLAN_HEADERS;
3250 } else {
3251 flow_vlan_limit = MIN(vlan_limit, FLOW_MAX_VLAN_HEADERS);
3252 }
3253}