]>
git.proxmox.com Git - ovs.git/blob - lib/flow.c
2 * Copyright (c) 2008, 2009, 2010 Nicira Networks.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include <sys/types.h>
20 #include <netinet/in.h>
23 #include "byte-order.h"
25 #include "dynamic-string.h"
29 #include "openflow/openflow.h"
30 #include "openvswitch/datapath-protocol.h"
32 #include "unaligned.h"
35 VLOG_DEFINE_THIS_MODULE(flow
);
37 static struct arp_eth_header
*
38 pull_arp(struct ofpbuf
*packet
)
40 return ofpbuf_try_pull(packet
, ARP_ETH_HEADER_LEN
);
43 static struct ip_header
*
44 pull_ip(struct ofpbuf
*packet
)
46 if (packet
->size
>= IP_HEADER_LEN
) {
47 struct ip_header
*ip
= packet
->data
;
48 int ip_len
= IP_IHL(ip
->ip_ihl_ver
) * 4;
49 if (ip_len
>= IP_HEADER_LEN
&& packet
->size
>= ip_len
) {
50 return ofpbuf_pull(packet
, ip_len
);
56 static struct tcp_header
*
57 pull_tcp(struct ofpbuf
*packet
)
59 if (packet
->size
>= TCP_HEADER_LEN
) {
60 struct tcp_header
*tcp
= packet
->data
;
61 int tcp_len
= TCP_OFFSET(tcp
->tcp_ctl
) * 4;
62 if (tcp_len
>= TCP_HEADER_LEN
&& packet
->size
>= tcp_len
) {
63 return ofpbuf_pull(packet
, tcp_len
);
69 static struct udp_header
*
70 pull_udp(struct ofpbuf
*packet
)
72 return ofpbuf_try_pull(packet
, UDP_HEADER_LEN
);
75 static struct icmp_header
*
76 pull_icmp(struct ofpbuf
*packet
)
78 return ofpbuf_try_pull(packet
, ICMP_HEADER_LEN
);
82 parse_vlan(struct ofpbuf
*b
, struct flow
*flow
)
85 ovs_be16 eth_type
; /* ETH_TYPE_VLAN */
89 if (b
->size
>= sizeof(struct qtag_prefix
) + sizeof(ovs_be16
)) {
90 struct qtag_prefix
*qp
= ofpbuf_pull(b
, sizeof *qp
);
91 flow
->dl_vlan
= qp
->tci
& htons(VLAN_VID_MASK
);
92 flow
->dl_vlan_pcp
= vlan_tci_to_pcp(qp
->tci
);
97 parse_ethertype(struct ofpbuf
*b
)
99 struct llc_snap_header
*llc
;
102 proto
= *(ovs_be16
*) ofpbuf_pull(b
, sizeof proto
);
103 if (ntohs(proto
) >= ODP_DL_TYPE_ETH2_CUTOFF
) {
107 if (b
->size
< sizeof *llc
) {
108 return htons(ODP_DL_TYPE_NOT_ETH_TYPE
);
112 if (llc
->llc
.llc_dsap
!= LLC_DSAP_SNAP
113 || llc
->llc
.llc_ssap
!= LLC_SSAP_SNAP
114 || llc
->llc
.llc_cntl
!= LLC_CNTL_SNAP
115 || memcmp(llc
->snap
.snap_org
, SNAP_ORG_ETHERNET
,
116 sizeof llc
->snap
.snap_org
)) {
117 return htons(ODP_DL_TYPE_NOT_ETH_TYPE
);
120 ofpbuf_pull(b
, sizeof *llc
);
121 return llc
->snap
.snap_type
;
124 /* Initializes 'flow' members from 'packet', 'tun_id', and 'in_port.
125 * Initializes 'packet' header pointers as follows:
127 * - packet->l2 to the start of the Ethernet header.
129 * - packet->l3 to just past the Ethernet header, or just past the
130 * vlan_header if one is present, to the first byte of the payload of the
133 * - packet->l4 to just past the IPv4 header, if one is present and has a
134 * correct length, and otherwise NULL.
136 * - packet->l7 to just past the TCP or UDP or ICMP header, if one is
137 * present and has a correct length, and otherwise NULL.
140 flow_extract(struct ofpbuf
*packet
, ovs_be32 tun_id
, uint16_t in_port
,
143 struct ofpbuf b
= *packet
;
144 struct eth_header
*eth
;
147 COVERAGE_INC(flow_extract
);
149 memset(flow
, 0, sizeof *flow
);
150 flow
->tun_id
= tun_id
;
151 flow
->in_port
= in_port
;
152 flow
->dl_vlan
= htons(OFP_VLAN_NONE
);
159 if (b
.size
< sizeof *eth
) {
165 memcpy(flow
->dl_src
, eth
->eth_src
, ETH_ADDR_LEN
);
166 memcpy(flow
->dl_dst
, eth
->eth_dst
, ETH_ADDR_LEN
);
168 /* dl_type, dl_vlan, dl_vlan_pcp. */
169 ofpbuf_pull(&b
, ETH_ADDR_LEN
* 2);
170 if (eth
->eth_type
== htons(ETH_TYPE_VLAN
)) {
171 parse_vlan(&b
, flow
);
173 flow
->dl_type
= parse_ethertype(&b
);
177 if (flow
->dl_type
== htons(ETH_TYPE_IP
)) {
178 const struct ip_header
*nh
= pull_ip(&b
);
180 flow
->nw_src
= get_unaligned_u32(&nh
->ip_src
);
181 flow
->nw_dst
= get_unaligned_u32(&nh
->ip_dst
);
182 flow
->nw_tos
= nh
->ip_tos
& IP_DSCP_MASK
;
183 flow
->nw_proto
= nh
->ip_proto
;
185 if (!IP_IS_FRAGMENT(nh
->ip_frag_off
)) {
186 if (flow
->nw_proto
== IP_TYPE_TCP
) {
187 const struct tcp_header
*tcp
= pull_tcp(&b
);
189 flow
->tp_src
= tcp
->tcp_src
;
190 flow
->tp_dst
= tcp
->tcp_dst
;
193 } else if (flow
->nw_proto
== IP_TYPE_UDP
) {
194 const struct udp_header
*udp
= pull_udp(&b
);
196 flow
->tp_src
= udp
->udp_src
;
197 flow
->tp_dst
= udp
->udp_dst
;
200 } else if (flow
->nw_proto
== IP_TYPE_ICMP
) {
201 const struct icmp_header
*icmp
= pull_icmp(&b
);
203 flow
->icmp_type
= htons(icmp
->icmp_type
);
204 flow
->icmp_code
= htons(icmp
->icmp_code
);
212 } else if (flow
->dl_type
== htons(ETH_TYPE_ARP
)) {
213 const struct arp_eth_header
*arp
= pull_arp(&b
);
214 if (arp
&& arp
->ar_hrd
== htons(1)
215 && arp
->ar_pro
== htons(ETH_TYPE_IP
)
216 && arp
->ar_hln
== ETH_ADDR_LEN
217 && arp
->ar_pln
== 4) {
218 /* We only match on the lower 8 bits of the opcode. */
219 if (ntohs(arp
->ar_op
) <= 0xff) {
220 flow
->nw_proto
= ntohs(arp
->ar_op
);
223 if ((flow
->nw_proto
== ARP_OP_REQUEST
)
224 || (flow
->nw_proto
== ARP_OP_REPLY
)) {
225 flow
->nw_src
= arp
->ar_spa
;
226 flow
->nw_dst
= arp
->ar_tpa
;
233 /* Extracts the flow stats for a packet. The 'flow' and 'packet'
234 * arguments must have been initialized through a call to flow_extract().
237 flow_extract_stats(const struct flow
*flow
, struct ofpbuf
*packet
,
238 struct odp_flow_stats
*stats
)
240 memset(stats
, '\0', sizeof(*stats
));
242 if ((flow
->dl_type
== htons(ETH_TYPE_IP
)) && packet
->l4
) {
243 if ((flow
->nw_proto
== IP_TYPE_TCP
) && packet
->l7
) {
244 struct tcp_header
*tcp
= packet
->l4
;
245 stats
->tcp_flags
= TCP_FLAGS(tcp
->tcp_ctl
);
249 stats
->n_bytes
= packet
->size
;
250 stats
->n_packets
= 1;
253 /* Extract 'flow' with 'wildcards' into the OpenFlow match structure
254 * 'match'. 'flow_format' should be one of NXFF_*. */
256 flow_to_match(const struct flow
*flow
, uint32_t wildcards
,
257 int flow_format
, struct ofp_match
*match
)
259 wildcards
&= (flow_format
== NXFF_TUN_ID_FROM_COOKIE
? OVSFW_ALL
261 match
->wildcards
= htonl(wildcards
);
263 match
->in_port
= htons(flow
->in_port
== ODPP_LOCAL
? OFPP_LOCAL
265 match
->dl_vlan
= flow
->dl_vlan
;
266 match
->dl_vlan_pcp
= flow
->dl_vlan_pcp
;
267 memcpy(match
->dl_src
, flow
->dl_src
, ETH_ADDR_LEN
);
268 memcpy(match
->dl_dst
, flow
->dl_dst
, ETH_ADDR_LEN
);
269 match
->dl_type
= flow
->dl_type
;
270 match
->nw_src
= flow
->nw_src
;
271 match
->nw_dst
= flow
->nw_dst
;
272 match
->nw_tos
= flow
->nw_tos
;
273 match
->nw_proto
= flow
->nw_proto
;
274 match
->tp_src
= flow
->tp_src
;
275 match
->tp_dst
= flow
->tp_dst
;
276 memset(match
->pad1
, '\0', sizeof match
->pad1
);
277 memset(match
->pad2
, '\0', sizeof match
->pad2
);
281 flow_from_match(const struct ofp_match
*match
, int flow_format
,
282 ovs_be64 cookie
, struct flow
*flow
,
283 struct flow_wildcards
*wc
)
285 uint32_t wildcards
= ntohl(match
->wildcards
) & OVSFW_ALL
;
288 if (flow_format
!= NXFF_TUN_ID_FROM_COOKIE
) {
289 wildcards
|= NXFW_TUN_ID
;
291 if (!(wildcards
& NXFW_TUN_ID
)) {
292 flow
->tun_id
= htonl(ntohll(cookie
) >> 32);
295 if (wildcards
& OFPFW_DL_DST
) {
296 /* OpenFlow 1.0 OFPFW_DL_DST covers the whole Ethernet destination, but
297 * internally to OVS it excludes the multicast bit, which has to be set
298 * separately with FWW_ETH_MCAST. */
299 wildcards
|= FWW_ETH_MCAST
;
301 flow_wildcards_init(wc
, wildcards
);
303 flow
->nw_src
= match
->nw_src
;
304 flow
->nw_dst
= match
->nw_dst
;
305 flow
->in_port
= (match
->in_port
== htons(OFPP_LOCAL
) ? ODPP_LOCAL
306 : ntohs(match
->in_port
));
307 flow
->dl_vlan
= match
->dl_vlan
;
308 flow
->dl_vlan_pcp
= match
->dl_vlan_pcp
;
309 flow
->dl_type
= match
->dl_type
;
310 flow
->tp_src
= match
->tp_src
;
311 flow
->tp_dst
= match
->tp_dst
;
312 memcpy(flow
->dl_src
, match
->dl_src
, ETH_ADDR_LEN
);
313 memcpy(flow
->dl_dst
, match
->dl_dst
, ETH_ADDR_LEN
);
314 flow
->nw_tos
= match
->nw_tos
;
315 flow
->nw_proto
= match
->nw_proto
;
319 flow_to_string(const struct flow
*flow
)
321 struct ds ds
= DS_EMPTY_INITIALIZER
;
322 flow_format(&ds
, flow
);
327 flow_format(struct ds
*ds
, const struct flow
*flow
)
329 ds_put_format(ds
, "tunnel%08"PRIx32
":in_port%04"PRIx16
330 ":vlan%"PRIu16
":pcp%"PRIu8
331 " mac"ETH_ADDR_FMT
"->"ETH_ADDR_FMT
335 " ip"IP_FMT
"->"IP_FMT
336 " port%"PRIu16
"->%"PRIu16
,
339 ntohs(flow
->dl_vlan
),
341 ETH_ADDR_ARGS(flow
->dl_src
),
342 ETH_ADDR_ARGS(flow
->dl_dst
),
343 ntohs(flow
->dl_type
),
346 IP_ARGS(&flow
->nw_src
),
347 IP_ARGS(&flow
->nw_dst
),
349 ntohs(flow
->tp_dst
));
353 flow_print(FILE *stream
, const struct flow
*flow
)
355 char *s
= flow_to_string(flow
);
360 /* flow_wildcards functions. */
362 /* Return 'wildcards' in "normal form":
364 * - Forces unknown bits to 0.
366 * - Forces nw_src and nw_dst masks greater than 32 to exactly 32.
368 static inline uint32_t
369 flow_wildcards_normalize(uint32_t wildcards
)
371 wildcards
&= wildcards
& (OVSFW_ALL
| FWW_ALL
);
372 if (wildcards
& (0x20 << OFPFW_NW_SRC_SHIFT
)) {
373 wildcards
&= ~(0x1f << OFPFW_NW_SRC_SHIFT
);
375 if (wildcards
& (0x20 << OFPFW_NW_DST_SHIFT
)) {
376 wildcards
&= ~(0x1f << OFPFW_NW_DST_SHIFT
);
381 /* Initializes 'wc' from 'wildcards', which may be any combination of the
382 * OFPFW_* and OVSFW_* wildcard bits.
384 * All registers (NXM_NX_REG*) are always completely wildcarded, because
385 * 'wildcards' doesn't have enough bits to give the details on which
386 * particular bits should be wildcarded (if any). The caller may use
387 * flow_wildcards_set_reg_mask() to update the register wildcard masks. */
389 flow_wildcards_init(struct flow_wildcards
*wc
, uint32_t wildcards
)
391 wc
->wildcards
= flow_wildcards_normalize(wildcards
) | FWW_REGS
;
392 wc
->nw_src_mask
= ofputil_wcbits_to_netmask(wildcards
>> OFPFW_NW_SRC_SHIFT
);
393 wc
->nw_dst_mask
= ofputil_wcbits_to_netmask(wildcards
>> OFPFW_NW_DST_SHIFT
);
394 memset(wc
->reg_masks
, 0, sizeof wc
->reg_masks
);
397 /* Initializes 'wc' as an exact-match set of wildcards; that is, 'wc' does not
398 * wildcard any bits or fields. */
400 flow_wildcards_init_exact(struct flow_wildcards
*wc
)
403 wc
->nw_src_mask
= htonl(UINT32_MAX
);
404 wc
->nw_dst_mask
= htonl(UINT32_MAX
);
405 memset(wc
->reg_masks
, 0xff, sizeof wc
->reg_masks
);
408 /* Returns true if 'wc' is exact-match, false if 'wc' wildcards any bits or
411 flow_wildcards_is_exact(const struct flow_wildcards
*wc
)
413 return !wc
->wildcards
;
416 static inline uint32_t
417 combine_nw_bits(uint32_t wb1
, uint32_t wb2
, int shift
)
419 uint32_t sb1
= (wb1
>> shift
) & 0x3f;
420 uint32_t sb2
= (wb2
>> shift
) & 0x3f;
421 return MAX(sb1
, sb2
) << shift
;
424 /* Initializes 'dst' as the combination of wildcards in 'src1' and 'src2'.
425 * That is, a bit or a field is wildcarded in 'dst' if it is wildcarded in
426 * 'src1' or 'src2' or both. */
428 flow_wildcards_combine(struct flow_wildcards
*dst
,
429 const struct flow_wildcards
*src1
,
430 const struct flow_wildcards
*src2
)
432 uint32_t wb1
= src1
->wildcards
;
433 uint32_t wb2
= src2
->wildcards
;
436 dst
->wildcards
= (wb1
| wb2
) & ~(OFPFW_NW_SRC_MASK
| OFPFW_NW_DST_MASK
);
437 dst
->wildcards
|= combine_nw_bits(wb1
, wb2
, OFPFW_NW_SRC_SHIFT
);
438 dst
->wildcards
|= combine_nw_bits(wb1
, wb2
, OFPFW_NW_DST_SHIFT
);
439 dst
->nw_src_mask
= src1
->nw_src_mask
& src2
->nw_src_mask
;
440 dst
->nw_dst_mask
= src1
->nw_dst_mask
& src2
->nw_dst_mask
;
441 for (i
= 0; i
< FLOW_N_REGS
; i
++) {
442 dst
->reg_masks
[i
] = src1
->reg_masks
[i
] & src2
->reg_masks
[i
];
446 /* Returns a hash of the wildcards in 'wc'. */
448 flow_wildcards_hash(const struct flow_wildcards
*wc
)
450 /* There is no need to include nw_src_mask or nw_dst_mask because they do
451 * not add any information (they can be computed from wc->wildcards). */
452 BUILD_ASSERT_DECL(sizeof wc
->wildcards
== 4);
453 BUILD_ASSERT_DECL(sizeof wc
->reg_masks
== 4 * FLOW_N_REGS
);
454 BUILD_ASSERT_DECL(offsetof(struct flow_wildcards
, wildcards
) == 0);
455 BUILD_ASSERT_DECL(offsetof(struct flow_wildcards
, reg_masks
) == 4);
456 return hash_words((const uint32_t *) wc
, 1 + FLOW_N_REGS
, 0);
459 /* Returns true if 'a' and 'b' represent the same wildcards, false if they are
462 flow_wildcards_equal(const struct flow_wildcards
*a
,
463 const struct flow_wildcards
*b
)
467 if (a
->wildcards
!= b
->wildcards
) {
471 for (i
= 0; i
< FLOW_N_REGS
; i
++) {
472 if (a
->reg_masks
[i
] != b
->reg_masks
[i
]) {
480 /* Returns true if at least one bit or field is wildcarded in 'a' but not in
481 * 'b', false otherwise. */
483 flow_wildcards_has_extra(const struct flow_wildcards
*a
,
484 const struct flow_wildcards
*b
)
488 for (i
= 0; i
< FLOW_N_REGS
; i
++) {
489 if ((a
->reg_masks
[i
] & b
->reg_masks
[i
]) != b
->reg_masks
[i
]) {
494 #define OFPFW_NW_MASK (OFPFW_NW_SRC_MASK | OFPFW_NW_DST_MASK)
495 return ((a
->wildcards
& ~(b
->wildcards
| OFPFW_NW_MASK
))
496 || (a
->nw_src_mask
& b
->nw_src_mask
) != b
->nw_src_mask
497 || (a
->nw_dst_mask
& b
->nw_dst_mask
) != b
->nw_dst_mask
);
501 set_nw_mask(struct flow_wildcards
*wc
, ovs_be32 mask
,
502 ovs_be32
*maskp
, int shift
)
504 if (ip_is_cidr(mask
)) {
505 wc
->wildcards
&= ~(0x3f << shift
);
506 wc
->wildcards
|= ofputil_netmask_to_wcbits(mask
) << shift
;
514 /* Sets the IP (or ARP) source wildcard mask to CIDR 'mask' (consisting of N
515 * high-order 1-bit and 32-N low-order 0-bits). Returns true if successful,
516 * false if 'mask' is not a CIDR mask. */
518 flow_wildcards_set_nw_src_mask(struct flow_wildcards
*wc
, ovs_be32 mask
)
520 return set_nw_mask(wc
, mask
, &wc
->nw_src_mask
, OFPFW_NW_SRC_SHIFT
);
523 /* Sets the IP (or ARP) destination wildcard mask to CIDR 'mask' (consisting of
524 * N high-order 1-bit and 32-N low-order 0-bits). Returns true if successful,
525 * false if 'mask' is not a CIDR mask. */
527 flow_wildcards_set_nw_dst_mask(struct flow_wildcards
*wc
, ovs_be32 mask
)
529 return set_nw_mask(wc
, mask
, &wc
->nw_dst_mask
, OFPFW_NW_DST_SHIFT
);
532 /* Sets the wildcard mask for register 'idx' in 'wc' to 'mask'.
533 * (A 0-bit indicates a wildcard bit.) */
535 flow_wildcards_set_reg_mask(struct flow_wildcards
*wc
, int idx
, uint32_t mask
)
537 if (mask
!= wc
->reg_masks
[idx
]) {
538 wc
->reg_masks
[idx
] = mask
;
539 if (mask
!= UINT32_MAX
) {
540 wc
->wildcards
|= FWW_REGS
;
544 for (i
= 0; i
< FLOW_N_REGS
; i
++) {
545 if (wc
->reg_masks
[i
] != UINT32_MAX
) {
546 wc
->wildcards
|= FWW_REGS
;
550 wc
->wildcards
&= ~FWW_REGS
;