]> git.proxmox.com Git - mirror_ovs.git/blob - datapath/flow_netlink.c
compat: Fix small naming issue
[mirror_ovs.git] / datapath / flow_netlink.c
1 /*
2 * Copyright (c) 2007-2017 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/uaccess.h>
22 #include <linux/netdevice.h>
23 #include <linux/etherdevice.h>
24 #include <linux/if_ether.h>
25 #include <linux/if_vlan.h>
26 #include <net/llc_pdu.h>
27 #include <linux/kernel.h>
28 #include <linux/jhash.h>
29 #include <linux/jiffies.h>
30 #include <linux/llc.h>
31 #include <linux/module.h>
32 #include <linux/in.h>
33 #include <linux/rcupdate.h>
34 #include <linux/if_arp.h>
35 #include <linux/ip.h>
36 #include <linux/ipv6.h>
37 #include <linux/sctp.h>
38 #include <linux/tcp.h>
39 #include <linux/udp.h>
40 #include <linux/icmp.h>
41 #include <linux/icmpv6.h>
42 #include <linux/rculist.h>
43 #include <net/geneve.h>
44 #include <net/ip.h>
45 #include <net/ipv6.h>
46 #include <net/ndisc.h>
47 #include <net/mpls.h>
48 #include <net/vxlan.h>
49 #include <net/tun_proto.h>
50 #include <net/erspan.h>
51
52 #include "datapath.h"
53 #include "conntrack.h"
54 #include "flow.h"
55 #include "flow_netlink.h"
56 #include "gso.h"
57
58 struct ovs_len_tbl {
59 int len;
60 const struct ovs_len_tbl *next;
61 };
62
63 #define OVS_ATTR_NESTED -1
64 #define OVS_ATTR_VARIABLE -2
65
66 static bool actions_may_change_flow(const struct nlattr *actions)
67 {
68 struct nlattr *nla;
69 int rem;
70
71 nla_for_each_nested(nla, actions, rem) {
72 u16 action = nla_type(nla);
73
74 switch (action) {
75 case OVS_ACTION_ATTR_OUTPUT:
76 case OVS_ACTION_ATTR_RECIRC:
77 case OVS_ACTION_ATTR_TRUNC:
78 case OVS_ACTION_ATTR_USERSPACE:
79 break;
80
81 case OVS_ACTION_ATTR_CT:
82 case OVS_ACTION_ATTR_CT_CLEAR:
83 case OVS_ACTION_ATTR_HASH:
84 case OVS_ACTION_ATTR_POP_ETH:
85 case OVS_ACTION_ATTR_POP_MPLS:
86 case OVS_ACTION_ATTR_POP_NSH:
87 case OVS_ACTION_ATTR_POP_VLAN:
88 case OVS_ACTION_ATTR_PUSH_ETH:
89 case OVS_ACTION_ATTR_PUSH_MPLS:
90 case OVS_ACTION_ATTR_PUSH_NSH:
91 case OVS_ACTION_ATTR_PUSH_VLAN:
92 case OVS_ACTION_ATTR_SAMPLE:
93 case OVS_ACTION_ATTR_SET:
94 case OVS_ACTION_ATTR_SET_MASKED:
95 case OVS_ACTION_ATTR_METER:
96 case OVS_ACTION_ATTR_CHECK_PKT_LEN:
97 default:
98 return true;
99 }
100 }
101 return false;
102 }
103
104 static void update_range(struct sw_flow_match *match,
105 size_t offset, size_t size, bool is_mask)
106 {
107 struct sw_flow_key_range *range;
108 size_t start = rounddown(offset, sizeof(long));
109 size_t end = roundup(offset + size, sizeof(long));
110
111 if (!is_mask)
112 range = &match->range;
113 else
114 range = &match->mask->range;
115
116 if (range->start == range->end) {
117 range->start = start;
118 range->end = end;
119 return;
120 }
121
122 if (range->start > start)
123 range->start = start;
124
125 if (range->end < end)
126 range->end = end;
127 }
128
129 #define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
130 do { \
131 update_range(match, offsetof(struct sw_flow_key, field), \
132 sizeof((match)->key->field), is_mask); \
133 if (is_mask) \
134 (match)->mask->key.field = value; \
135 else \
136 (match)->key->field = value; \
137 } while (0)
138
139 #define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask) \
140 do { \
141 update_range(match, offset, len, is_mask); \
142 if (is_mask) \
143 memcpy((u8 *)&(match)->mask->key + offset, value_p, len);\
144 else \
145 memcpy((u8 *)(match)->key + offset, value_p, len); \
146 } while (0)
147
148 #define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \
149 SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \
150 value_p, len, is_mask)
151
152 #define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask) \
153 do { \
154 update_range(match, offsetof(struct sw_flow_key, field), \
155 sizeof((match)->key->field), is_mask); \
156 if (is_mask) \
157 memset((u8 *)&(match)->mask->key.field, value, \
158 sizeof((match)->mask->key.field)); \
159 else \
160 memset((u8 *)&(match)->key->field, value, \
161 sizeof((match)->key->field)); \
162 } while (0)
163
164 static bool match_validate(const struct sw_flow_match *match,
165 u64 key_attrs, u64 mask_attrs, bool log)
166 {
167 u64 key_expected = 0;
168 u64 mask_allowed = key_attrs; /* At most allow all key attributes */
169
170 /* The following mask attributes allowed only if they
171 * pass the validation tests.
172 */
173 mask_allowed &= ~((1ULL << OVS_KEY_ATTR_IPV4)
174 | (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4)
175 | (1ULL << OVS_KEY_ATTR_IPV6)
176 | (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)
177 | (1ULL << OVS_KEY_ATTR_TCP)
178 | (1ULL << OVS_KEY_ATTR_TCP_FLAGS)
179 | (1ULL << OVS_KEY_ATTR_UDP)
180 | (1ULL << OVS_KEY_ATTR_SCTP)
181 | (1ULL << OVS_KEY_ATTR_ICMP)
182 | (1ULL << OVS_KEY_ATTR_ICMPV6)
183 | (1ULL << OVS_KEY_ATTR_ARP)
184 | (1ULL << OVS_KEY_ATTR_ND)
185 | (1ULL << OVS_KEY_ATTR_MPLS)
186 | (1ULL << OVS_KEY_ATTR_NSH));
187
188 /* Always allowed mask fields. */
189 mask_allowed |= ((1ULL << OVS_KEY_ATTR_TUNNEL)
190 | (1ULL << OVS_KEY_ATTR_IN_PORT)
191 | (1ULL << OVS_KEY_ATTR_ETHERTYPE));
192
193 /* Check key attributes. */
194 if (match->key->eth.type == htons(ETH_P_ARP)
195 || match->key->eth.type == htons(ETH_P_RARP)) {
196 key_expected |= 1ULL << OVS_KEY_ATTR_ARP;
197 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
198 mask_allowed |= 1ULL << OVS_KEY_ATTR_ARP;
199 }
200
201 if (eth_p_mpls(match->key->eth.type)) {
202 key_expected |= 1ULL << OVS_KEY_ATTR_MPLS;
203 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
204 mask_allowed |= 1ULL << OVS_KEY_ATTR_MPLS;
205 }
206
207 if (match->key->eth.type == htons(ETH_P_IP)) {
208 key_expected |= 1ULL << OVS_KEY_ATTR_IPV4;
209 if (match->mask && match->mask->key.eth.type == htons(0xffff)) {
210 mask_allowed |= 1ULL << OVS_KEY_ATTR_IPV4;
211 mask_allowed |= 1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4;
212 }
213
214 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
215 if (match->key->ip.proto == IPPROTO_UDP) {
216 key_expected |= 1ULL << OVS_KEY_ATTR_UDP;
217 if (match->mask && (match->mask->key.ip.proto == 0xff))
218 mask_allowed |= 1ULL << OVS_KEY_ATTR_UDP;
219 }
220
221 if (match->key->ip.proto == IPPROTO_SCTP) {
222 key_expected |= 1ULL << OVS_KEY_ATTR_SCTP;
223 if (match->mask && (match->mask->key.ip.proto == 0xff))
224 mask_allowed |= 1ULL << OVS_KEY_ATTR_SCTP;
225 }
226
227 if (match->key->ip.proto == IPPROTO_TCP) {
228 key_expected |= 1ULL << OVS_KEY_ATTR_TCP;
229 key_expected |= 1ULL << OVS_KEY_ATTR_TCP_FLAGS;
230 if (match->mask && (match->mask->key.ip.proto == 0xff)) {
231 mask_allowed |= 1ULL << OVS_KEY_ATTR_TCP;
232 mask_allowed |= 1ULL << OVS_KEY_ATTR_TCP_FLAGS;
233 }
234 }
235
236 if (match->key->ip.proto == IPPROTO_ICMP) {
237 key_expected |= 1ULL << OVS_KEY_ATTR_ICMP;
238 if (match->mask && (match->mask->key.ip.proto == 0xff))
239 mask_allowed |= 1ULL << OVS_KEY_ATTR_ICMP;
240 }
241 }
242 }
243
244 if (match->key->eth.type == htons(ETH_P_IPV6)) {
245 key_expected |= 1ULL << OVS_KEY_ATTR_IPV6;
246 if (match->mask && match->mask->key.eth.type == htons(0xffff)) {
247 mask_allowed |= 1ULL << OVS_KEY_ATTR_IPV6;
248 mask_allowed |= 1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6;
249 }
250
251 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
252 if (match->key->ip.proto == IPPROTO_UDP) {
253 key_expected |= 1ULL << OVS_KEY_ATTR_UDP;
254 if (match->mask && (match->mask->key.ip.proto == 0xff))
255 mask_allowed |= 1ULL << OVS_KEY_ATTR_UDP;
256 }
257
258 if (match->key->ip.proto == IPPROTO_SCTP) {
259 key_expected |= 1ULL << OVS_KEY_ATTR_SCTP;
260 if (match->mask && (match->mask->key.ip.proto == 0xff))
261 mask_allowed |= 1ULL << OVS_KEY_ATTR_SCTP;
262 }
263
264 if (match->key->ip.proto == IPPROTO_TCP) {
265 key_expected |= 1ULL << OVS_KEY_ATTR_TCP;
266 key_expected |= 1ULL << OVS_KEY_ATTR_TCP_FLAGS;
267 if (match->mask && (match->mask->key.ip.proto == 0xff)) {
268 mask_allowed |= 1ULL << OVS_KEY_ATTR_TCP;
269 mask_allowed |= 1ULL << OVS_KEY_ATTR_TCP_FLAGS;
270 }
271 }
272
273 if (match->key->ip.proto == IPPROTO_ICMPV6) {
274 key_expected |= 1ULL << OVS_KEY_ATTR_ICMPV6;
275 if (match->mask && (match->mask->key.ip.proto == 0xff))
276 mask_allowed |= 1ULL << OVS_KEY_ATTR_ICMPV6;
277
278 if (match->key->tp.src ==
279 htons(NDISC_NEIGHBOUR_SOLICITATION) ||
280 match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
281 key_expected |= 1ULL << OVS_KEY_ATTR_ND;
282 /* Original direction conntrack tuple
283 * uses the same space as the ND fields
284 * in the key, so both are not allowed
285 * at the same time.
286 */
287 mask_allowed &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6);
288 if (match->mask && (match->mask->key.tp.src == htons(0xff)))
289 mask_allowed |= 1ULL << OVS_KEY_ATTR_ND;
290 }
291 }
292 }
293 }
294
295 if (match->key->eth.type == htons(ETH_P_NSH)) {
296 key_expected |= 1 << OVS_KEY_ATTR_NSH;
297 if (match->mask &&
298 match->mask->key.eth.type == htons(0xffff)) {
299 mask_allowed |= 1 << OVS_KEY_ATTR_NSH;
300 }
301 }
302
303 if ((key_attrs & key_expected) != key_expected) {
304 /* Key attributes check failed. */
305 OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)",
306 (unsigned long long)key_attrs,
307 (unsigned long long)key_expected);
308 return false;
309 }
310
311 if ((mask_attrs & mask_allowed) != mask_attrs) {
312 /* Mask attributes check failed. */
313 OVS_NLERR(log, "Unexpected mask (mask=%llx, allowed=%llx)",
314 (unsigned long long)mask_attrs,
315 (unsigned long long)mask_allowed);
316 return false;
317 }
318
319 return true;
320 }
321
322 size_t ovs_tun_key_attr_size(void)
323 {
324 /* Whenever adding new OVS_TUNNEL_KEY_ FIELDS, we should consider
325 * updating this function.
326 */
327 return nla_total_size_64bit(8) /* OVS_TUNNEL_KEY_ATTR_ID */
328 + nla_total_size(16) /* OVS_TUNNEL_KEY_ATTR_IPV[46]_SRC */
329 + nla_total_size(16) /* OVS_TUNNEL_KEY_ATTR_IPV[46]_DST */
330 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */
331 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */
332 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
333 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */
334 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */
335 + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
336 /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS and
337 * OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS is mutually exclusive with
338 * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
339 */
340 + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
341 + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */
342 }
343
344 static size_t ovs_nsh_key_attr_size(void)
345 {
346 /* Whenever adding new OVS_NSH_KEY_ FIELDS, we should consider
347 * updating this function.
348 */
349 return nla_total_size(NSH_BASE_HDR_LEN) /* OVS_NSH_KEY_ATTR_BASE */
350 /* OVS_NSH_KEY_ATTR_MD1 and OVS_NSH_KEY_ATTR_MD2 are
351 * mutually exclusive, so the bigger one can cover
352 * the small one.
353 */
354 + nla_total_size(NSH_CTX_HDRS_MAX_LEN);
355 }
356
357 size_t ovs_key_attr_size(void)
358 {
359 /* Whenever adding new OVS_KEY_ FIELDS, we should consider
360 * updating this function.
361 */
362 BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 29);
363
364 return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
365 + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
366 + ovs_tun_key_attr_size()
367 + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */
368 + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
369 + nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */
370 + nla_total_size(4) /* OVS_KEY_ATTR_RECIRC_ID */
371 + nla_total_size(4) /* OVS_KEY_ATTR_CT_STATE */
372 + nla_total_size(2) /* OVS_KEY_ATTR_CT_ZONE */
373 + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */
374 + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABELS */
375 + nla_total_size(40) /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */
376 + nla_total_size(0) /* OVS_KEY_ATTR_NSH */
377 + ovs_nsh_key_attr_size()
378 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
379 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
380 + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */
381 + nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */
382 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
383 + nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */
384 + nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */
385 + nla_total_size(28); /* OVS_KEY_ATTR_ND */
386 }
387
388 static const struct ovs_len_tbl ovs_vxlan_ext_key_lens[OVS_VXLAN_EXT_MAX + 1] = {
389 [OVS_VXLAN_EXT_GBP] = { .len = sizeof(u32) },
390 };
391
392 static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
393 [OVS_TUNNEL_KEY_ATTR_ID] = { .len = sizeof(u64) },
394 [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = { .len = sizeof(u32) },
395 [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = { .len = sizeof(u32) },
396 [OVS_TUNNEL_KEY_ATTR_TOS] = { .len = 1 },
397 [OVS_TUNNEL_KEY_ATTR_TTL] = { .len = 1 },
398 [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = { .len = 0 },
399 [OVS_TUNNEL_KEY_ATTR_CSUM] = { .len = 0 },
400 [OVS_TUNNEL_KEY_ATTR_TP_SRC] = { .len = sizeof(u16) },
401 [OVS_TUNNEL_KEY_ATTR_TP_DST] = { .len = sizeof(u16) },
402 [OVS_TUNNEL_KEY_ATTR_OAM] = { .len = 0 },
403 [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = OVS_ATTR_VARIABLE },
404 [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = OVS_ATTR_NESTED,
405 .next = ovs_vxlan_ext_key_lens },
406 [OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
407 [OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) },
408 [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS] = { .len = OVS_ATTR_VARIABLE },
409 };
410
411 static const struct ovs_len_tbl
412 ovs_nsh_key_attr_lens[OVS_NSH_KEY_ATTR_MAX + 1] = {
413 [OVS_NSH_KEY_ATTR_BASE] = { .len = sizeof(struct ovs_nsh_key_base) },
414 [OVS_NSH_KEY_ATTR_MD1] = { .len = sizeof(struct ovs_nsh_key_md1) },
415 [OVS_NSH_KEY_ATTR_MD2] = { .len = OVS_ATTR_VARIABLE },
416 };
417
418 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */
419 static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
420 [OVS_KEY_ATTR_ENCAP] = { .len = OVS_ATTR_NESTED },
421 [OVS_KEY_ATTR_PRIORITY] = { .len = sizeof(u32) },
422 [OVS_KEY_ATTR_IN_PORT] = { .len = sizeof(u32) },
423 [OVS_KEY_ATTR_SKB_MARK] = { .len = sizeof(u32) },
424 [OVS_KEY_ATTR_ETHERNET] = { .len = sizeof(struct ovs_key_ethernet) },
425 [OVS_KEY_ATTR_VLAN] = { .len = sizeof(__be16) },
426 [OVS_KEY_ATTR_ETHERTYPE] = { .len = sizeof(__be16) },
427 [OVS_KEY_ATTR_IPV4] = { .len = sizeof(struct ovs_key_ipv4) },
428 [OVS_KEY_ATTR_IPV6] = { .len = sizeof(struct ovs_key_ipv6) },
429 [OVS_KEY_ATTR_TCP] = { .len = sizeof(struct ovs_key_tcp) },
430 [OVS_KEY_ATTR_TCP_FLAGS] = { .len = sizeof(__be16) },
431 [OVS_KEY_ATTR_UDP] = { .len = sizeof(struct ovs_key_udp) },
432 [OVS_KEY_ATTR_SCTP] = { .len = sizeof(struct ovs_key_sctp) },
433 [OVS_KEY_ATTR_ICMP] = { .len = sizeof(struct ovs_key_icmp) },
434 [OVS_KEY_ATTR_ICMPV6] = { .len = sizeof(struct ovs_key_icmpv6) },
435 [OVS_KEY_ATTR_ARP] = { .len = sizeof(struct ovs_key_arp) },
436 [OVS_KEY_ATTR_ND] = { .len = sizeof(struct ovs_key_nd) },
437 [OVS_KEY_ATTR_RECIRC_ID] = { .len = sizeof(u32) },
438 [OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) },
439 [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED,
440 .next = ovs_tunnel_key_lens, },
441 [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) },
442 [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u32) },
443 [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) },
444 [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) },
445 [OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) },
446 [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4] = {
447 .len = sizeof(struct ovs_key_ct_tuple_ipv4) },
448 [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = {
449 .len = sizeof(struct ovs_key_ct_tuple_ipv6) },
450 [OVS_KEY_ATTR_NSH] = { .len = OVS_ATTR_NESTED,
451 .next = ovs_nsh_key_attr_lens, },
452 };
453
454 static bool check_attr_len(unsigned int attr_len, unsigned int expected_len)
455 {
456 return expected_len == attr_len ||
457 expected_len == OVS_ATTR_NESTED ||
458 expected_len == OVS_ATTR_VARIABLE;
459 }
460
461 static bool is_all_zero(const u8 *fp, size_t size)
462 {
463 int i;
464
465 if (!fp)
466 return false;
467
468 for (i = 0; i < size; i++)
469 if (fp[i])
470 return false;
471
472 return true;
473 }
474
475 static int __parse_flow_nlattrs(const struct nlattr *attr,
476 const struct nlattr *a[],
477 u64 *attrsp, bool log, bool nz)
478 {
479 const struct nlattr *nla;
480 u64 attrs;
481 int rem;
482
483 attrs = *attrsp;
484 nla_for_each_nested(nla, attr, rem) {
485 u16 type = nla_type(nla);
486 int expected_len;
487
488 if (type > OVS_KEY_ATTR_MAX) {
489 OVS_NLERR(log, "Key type %d is out of range max %d",
490 type, OVS_KEY_ATTR_MAX);
491 return -EINVAL;
492 }
493
494 if (attrs & (1ULL << type)) {
495 OVS_NLERR(log, "Duplicate key (type %d).", type);
496 return -EINVAL;
497 }
498
499 expected_len = ovs_key_lens[type].len;
500 if (!check_attr_len(nla_len(nla), expected_len)) {
501 OVS_NLERR(log, "Key %d has unexpected len %d expected %d",
502 type, nla_len(nla), expected_len);
503 return -EINVAL;
504 }
505
506 if (!nz || !is_all_zero(nla_data(nla), nla_len(nla))) {
507 attrs |= 1ULL << type;
508 a[type] = nla;
509 }
510 }
511 if (rem) {
512 OVS_NLERR(log, "Message has %d unknown bytes.", rem);
513 return -EINVAL;
514 }
515
516 *attrsp = attrs;
517 return 0;
518 }
519
520 static int parse_flow_mask_nlattrs(const struct nlattr *attr,
521 const struct nlattr *a[], u64 *attrsp,
522 bool log)
523 {
524 return __parse_flow_nlattrs(attr, a, attrsp, log, true);
525 }
526
527 int parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[],
528 u64 *attrsp, bool log)
529 {
530 return __parse_flow_nlattrs(attr, a, attrsp, log, false);
531 }
532
533 static int genev_tun_opt_from_nlattr(const struct nlattr *a,
534 struct sw_flow_match *match, bool is_mask,
535 bool log)
536 {
537 unsigned long opt_key_offset;
538
539 if (nla_len(a) > sizeof(match->key->tun_opts)) {
540 OVS_NLERR(log, "Geneve option length err (len %d, max %zu).",
541 nla_len(a), sizeof(match->key->tun_opts));
542 return -EINVAL;
543 }
544
545 if (nla_len(a) % 4 != 0) {
546 OVS_NLERR(log, "Geneve opt len %d is not a multiple of 4.",
547 nla_len(a));
548 return -EINVAL;
549 }
550
551 /* We need to record the length of the options passed
552 * down, otherwise packets with the same format but
553 * additional options will be silently matched.
554 */
555 if (!is_mask) {
556 SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a),
557 false);
558 } else {
559 /* This is somewhat unusual because it looks at
560 * both the key and mask while parsing the
561 * attributes (and by extension assumes the key
562 * is parsed first). Normally, we would verify
563 * that each is the correct length and that the
564 * attributes line up in the validate function.
565 * However, that is difficult because this is
566 * variable length and we won't have the
567 * information later.
568 */
569 if (match->key->tun_opts_len != nla_len(a)) {
570 OVS_NLERR(log, "Geneve option len %d != mask len %d",
571 match->key->tun_opts_len, nla_len(a));
572 return -EINVAL;
573 }
574
575 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
576 }
577
578 opt_key_offset = TUN_METADATA_OFFSET(nla_len(a));
579 SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a),
580 nla_len(a), is_mask);
581 return 0;
582 }
583
584 static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr,
585 struct sw_flow_match *match, bool is_mask,
586 bool log)
587 {
588 struct nlattr *a;
589 int rem;
590 unsigned long opt_key_offset;
591 struct vxlan_metadata opts;
592
593 BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
594
595 memset(&opts, 0, sizeof(opts));
596 nla_for_each_nested(a, attr, rem) {
597 int type = nla_type(a);
598
599 if (type > OVS_VXLAN_EXT_MAX) {
600 OVS_NLERR(log, "VXLAN extension %d out of range max %d",
601 type, OVS_VXLAN_EXT_MAX);
602 return -EINVAL;
603 }
604
605 if (!check_attr_len(nla_len(a),
606 ovs_vxlan_ext_key_lens[type].len)) {
607 OVS_NLERR(log, "VXLAN extension %d has unexpected len %d expected %d",
608 type, nla_len(a),
609 ovs_vxlan_ext_key_lens[type].len);
610 return -EINVAL;
611 }
612
613 switch (type) {
614 case OVS_VXLAN_EXT_GBP:
615 opts.gbp = nla_get_u32(a);
616 break;
617 default:
618 OVS_NLERR(log, "Unknown VXLAN extension attribute %d",
619 type);
620 return -EINVAL;
621 }
622 }
623 if (rem) {
624 OVS_NLERR(log, "VXLAN extension message has %d unknown bytes.",
625 rem);
626 return -EINVAL;
627 }
628
629 if (!is_mask)
630 SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false);
631 else
632 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
633
634 opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts));
635 SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts),
636 is_mask);
637 return 0;
638 }
639
640 static int erspan_tun_opt_from_nlattr(const struct nlattr *a,
641 struct sw_flow_match *match, bool is_mask,
642 bool log)
643 {
644 unsigned long opt_key_offset;
645
646 BUILD_BUG_ON(sizeof(struct erspan_metadata) >
647 sizeof(match->key->tun_opts));
648
649 if (nla_len(a) > sizeof(match->key->tun_opts)) {
650 OVS_NLERR(log, "ERSPAN option length err (len %d, max %zu).",
651 nla_len(a), sizeof(match->key->tun_opts));
652 return -EINVAL;
653 }
654
655 if (!is_mask)
656 SW_FLOW_KEY_PUT(match, tun_opts_len,
657 sizeof(struct erspan_metadata), false);
658 else
659 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
660
661 opt_key_offset = TUN_METADATA_OFFSET(nla_len(a));
662 SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a),
663 nla_len(a), is_mask);
664 return 0;
665 }
666
667 static int ip_tun_from_nlattr(const struct nlattr *attr,
668 struct sw_flow_match *match, bool is_mask,
669 bool log)
670 {
671 bool ttl = false, ipv4 = false, ipv6 = false;
672 __be16 tun_flags = 0;
673 int opts_type = 0;
674 struct nlattr *a;
675 int rem;
676
677 nla_for_each_nested(a, attr, rem) {
678 int type = nla_type(a);
679 int err;
680
681 if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
682 OVS_NLERR(log, "Tunnel attr %d out of range max %d",
683 type, OVS_TUNNEL_KEY_ATTR_MAX);
684 return -EINVAL;
685 }
686
687 if (!check_attr_len(nla_len(a),
688 ovs_tunnel_key_lens[type].len)) {
689 OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d",
690 type, nla_len(a), ovs_tunnel_key_lens[type].len);
691 return -EINVAL;
692 }
693
694 switch (type) {
695 case OVS_TUNNEL_KEY_ATTR_ID:
696 SW_FLOW_KEY_PUT(match, tun_key.tun_id,
697 nla_get_be64(a), is_mask);
698 tun_flags |= TUNNEL_KEY;
699 break;
700 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
701 SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src,
702 nla_get_in_addr(a), is_mask);
703 ipv4 = true;
704 break;
705 case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
706 SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.dst,
707 nla_get_in_addr(a), is_mask);
708 ipv4 = true;
709 break;
710 case OVS_TUNNEL_KEY_ATTR_IPV6_SRC:
711 SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.src,
712 nla_get_in6_addr(a), is_mask);
713 ipv6 = true;
714 break;
715 case OVS_TUNNEL_KEY_ATTR_IPV6_DST:
716 SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst,
717 nla_get_in6_addr(a), is_mask);
718 ipv6 = true;
719 break;
720 case OVS_TUNNEL_KEY_ATTR_TOS:
721 SW_FLOW_KEY_PUT(match, tun_key.tos,
722 nla_get_u8(a), is_mask);
723 break;
724 case OVS_TUNNEL_KEY_ATTR_TTL:
725 SW_FLOW_KEY_PUT(match, tun_key.ttl,
726 nla_get_u8(a), is_mask);
727 ttl = true;
728 break;
729 case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
730 tun_flags |= TUNNEL_DONT_FRAGMENT;
731 break;
732 case OVS_TUNNEL_KEY_ATTR_CSUM:
733 tun_flags |= TUNNEL_CSUM;
734 break;
735 case OVS_TUNNEL_KEY_ATTR_TP_SRC:
736 SW_FLOW_KEY_PUT(match, tun_key.tp_src,
737 nla_get_be16(a), is_mask);
738 break;
739 case OVS_TUNNEL_KEY_ATTR_TP_DST:
740 SW_FLOW_KEY_PUT(match, tun_key.tp_dst,
741 nla_get_be16(a), is_mask);
742 break;
743 case OVS_TUNNEL_KEY_ATTR_OAM:
744 tun_flags |= TUNNEL_OAM;
745 break;
746 case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
747 if (opts_type) {
748 OVS_NLERR(log, "Multiple metadata blocks provided");
749 return -EINVAL;
750 }
751
752 err = genev_tun_opt_from_nlattr(a, match, is_mask, log);
753 if (err)
754 return err;
755
756 tun_flags |= TUNNEL_GENEVE_OPT;
757 opts_type = type;
758 break;
759 case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
760 if (opts_type) {
761 OVS_NLERR(log, "Multiple metadata blocks provided");
762 return -EINVAL;
763 }
764
765 err = vxlan_tun_opt_from_nlattr(a, match, is_mask, log);
766 if (err)
767 return err;
768
769 tun_flags |= TUNNEL_VXLAN_OPT;
770 opts_type = type;
771 break;
772 case OVS_TUNNEL_KEY_ATTR_PAD:
773 break;
774 case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
775 if (opts_type) {
776 OVS_NLERR(log, "Multiple metadata blocks provided");
777 return -EINVAL;
778 }
779
780 err = erspan_tun_opt_from_nlattr(a, match, is_mask,
781 log);
782 if (err)
783 return err;
784
785 tun_flags |= TUNNEL_ERSPAN_OPT;
786 opts_type = type;
787 break;
788 default:
789 OVS_NLERR(log, "Unknown IP tunnel attribute %d",
790 type);
791 return -EINVAL;
792 }
793 }
794
795 SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
796 if (is_mask)
797 SW_FLOW_KEY_MEMSET_FIELD(match, tun_proto, 0xff, true);
798 else
799 SW_FLOW_KEY_PUT(match, tun_proto, ipv6 ? AF_INET6 : AF_INET,
800 false);
801
802 if (rem > 0) {
803 OVS_NLERR(log, "IP tunnel attribute has %d unknown bytes.",
804 rem);
805 return -EINVAL;
806 }
807
808 if (ipv4 && ipv6) {
809 OVS_NLERR(log, "Mixed IPv4 and IPv6 tunnel attributes");
810 return -EINVAL;
811 }
812
813 if (!is_mask) {
814 if (!ipv4 && !ipv6) {
815 OVS_NLERR(log, "IP tunnel dst address not specified");
816 return -EINVAL;
817 }
818 if (ipv4 && !match->key->tun_key.u.ipv4.dst) {
819 OVS_NLERR(log, "IPv4 tunnel dst address is zero");
820 return -EINVAL;
821 }
822 if (ipv6 && ipv6_addr_any(&match->key->tun_key.u.ipv6.dst)) {
823 OVS_NLERR(log, "IPv6 tunnel dst address is zero");
824 return -EINVAL;
825 }
826
827 if (!ttl) {
828 OVS_NLERR(log, "IP tunnel TTL not specified.");
829 return -EINVAL;
830 }
831 }
832
833 return opts_type;
834 }
835
836 static int vxlan_opt_to_nlattr(struct sk_buff *skb,
837 const void *tun_opts, int swkey_tun_opts_len)
838 {
839 const struct vxlan_metadata *opts = tun_opts;
840 struct nlattr *nla;
841
842 nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS);
843 if (!nla)
844 return -EMSGSIZE;
845
846 if (nla_put_u32(skb, OVS_VXLAN_EXT_GBP, opts->gbp) < 0)
847 return -EMSGSIZE;
848
849 nla_nest_end(skb, nla);
850 return 0;
851 }
852
853 static int __ip_tun_to_nlattr(struct sk_buff *skb,
854 const struct ip_tunnel_key *output,
855 const void *tun_opts, int swkey_tun_opts_len,
856 unsigned short tun_proto)
857 {
858 if (output->tun_flags & TUNNEL_KEY &&
859 nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id,
860 OVS_TUNNEL_KEY_ATTR_PAD))
861 return -EMSGSIZE;
862 switch (tun_proto) {
863 case AF_INET:
864 if (output->u.ipv4.src &&
865 nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC,
866 output->u.ipv4.src))
867 return -EMSGSIZE;
868 if (output->u.ipv4.dst &&
869 nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST,
870 output->u.ipv4.dst))
871 return -EMSGSIZE;
872 break;
873 case AF_INET6:
874 if (!ipv6_addr_any(&output->u.ipv6.src) &&
875 nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_SRC,
876 &output->u.ipv6.src))
877 return -EMSGSIZE;
878 if (!ipv6_addr_any(&output->u.ipv6.dst) &&
879 nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_DST,
880 &output->u.ipv6.dst))
881 return -EMSGSIZE;
882 break;
883 }
884 if (output->tos &&
885 nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->tos))
886 return -EMSGSIZE;
887 if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ttl))
888 return -EMSGSIZE;
889 if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
890 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
891 return -EMSGSIZE;
892 if ((output->tun_flags & TUNNEL_CSUM) &&
893 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
894 return -EMSGSIZE;
895 if (output->tp_src &&
896 nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_SRC, output->tp_src))
897 return -EMSGSIZE;
898 if (output->tp_dst &&
899 nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst))
900 return -EMSGSIZE;
901 if ((output->tun_flags & TUNNEL_OAM) &&
902 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
903 return -EMSGSIZE;
904 if (swkey_tun_opts_len) {
905 if (output->tun_flags & TUNNEL_GENEVE_OPT &&
906 nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
907 swkey_tun_opts_len, tun_opts))
908 return -EMSGSIZE;
909 else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
910 vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
911 return -EMSGSIZE;
912 else if (output->tun_flags & TUNNEL_ERSPAN_OPT &&
913 nla_put(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
914 swkey_tun_opts_len, tun_opts))
915 return -EMSGSIZE;
916 }
917
918 return 0;
919 }
920
921 static int ip_tun_to_nlattr(struct sk_buff *skb,
922 const struct ip_tunnel_key *output,
923 const void *tun_opts, int swkey_tun_opts_len,
924 unsigned short tun_proto)
925 {
926 struct nlattr *nla;
927 int err;
928
929 nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
930 if (!nla)
931 return -EMSGSIZE;
932
933 err = __ip_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len,
934 tun_proto);
935 if (err)
936 return err;
937
938 nla_nest_end(skb, nla);
939 return 0;
940 }
941
942 int ovs_nla_put_tunnel_info(struct sk_buff *skb,
943 struct ip_tunnel_info *tun_info)
944 {
945 return __ip_tun_to_nlattr(skb, &tun_info->key,
946 ip_tunnel_info_opts(tun_info),
947 tun_info->options_len,
948 ip_tunnel_info_af(tun_info));
949 }
950
951 static int encode_vlan_from_nlattrs(struct sw_flow_match *match,
952 const struct nlattr *a[],
953 bool is_mask, bool inner)
954 {
955 __be16 tci = 0;
956 __be16 tpid = 0;
957
958 if (a[OVS_KEY_ATTR_VLAN])
959 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
960
961 if (a[OVS_KEY_ATTR_ETHERTYPE])
962 tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
963
964 if (likely(!inner)) {
965 SW_FLOW_KEY_PUT(match, eth.vlan.tpid, tpid, is_mask);
966 SW_FLOW_KEY_PUT(match, eth.vlan.tci, tci, is_mask);
967 } else {
968 SW_FLOW_KEY_PUT(match, eth.cvlan.tpid, tpid, is_mask);
969 SW_FLOW_KEY_PUT(match, eth.cvlan.tci, tci, is_mask);
970 }
971 return 0;
972 }
973
974 static int validate_vlan_from_nlattrs(const struct sw_flow_match *match,
975 u64 key_attrs, bool inner,
976 const struct nlattr **a, bool log)
977 {
978 __be16 tci = 0;
979
980 if (!((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) &&
981 (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) &&
982 eth_type_vlan(nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE])))) {
983 /* Not a VLAN. */
984 return 0;
985 }
986
987 if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
988 (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
989 OVS_NLERR(log, "Invalid %s frame", (inner) ? "C-VLAN" : "VLAN");
990 return -EINVAL;
991 }
992
993 if (a[OVS_KEY_ATTR_VLAN])
994 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
995
996 if (!(tci & htons(VLAN_CFI_MASK))) {
997 if (tci) {
998 OVS_NLERR(log, "%s TCI does not have VLAN_CFI_MASK bit set.",
999 (inner) ? "C-VLAN" : "VLAN");
1000 return -EINVAL;
1001 } else if (nla_len(a[OVS_KEY_ATTR_ENCAP])) {
1002 /* Corner case for truncated VLAN header. */
1003 OVS_NLERR(log, "Truncated %s header has non-zero encap attribute.",
1004 (inner) ? "C-VLAN" : "VLAN");
1005 return -EINVAL;
1006 }
1007 }
1008
1009 return 1;
1010 }
1011
1012 static int validate_vlan_mask_from_nlattrs(const struct sw_flow_match *match,
1013 u64 key_attrs, bool inner,
1014 const struct nlattr **a, bool log)
1015 {
1016 __be16 tci = 0;
1017 __be16 tpid = 0;
1018 bool encap_valid = !!(match->key->eth.vlan.tci &
1019 htons(VLAN_CFI_MASK));
1020 bool i_encap_valid = !!(match->key->eth.cvlan.tci &
1021 htons(VLAN_CFI_MASK));
1022
1023 if (!(key_attrs & (1 << OVS_KEY_ATTR_ENCAP))) {
1024 /* Not a VLAN. */
1025 return 0;
1026 }
1027
1028 if ((!inner && !encap_valid) || (inner && !i_encap_valid)) {
1029 OVS_NLERR(log, "Encap mask attribute is set for non-%s frame.",
1030 (inner) ? "C-VLAN" : "VLAN");
1031 return -EINVAL;
1032 }
1033
1034 if (a[OVS_KEY_ATTR_VLAN])
1035 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
1036
1037 if (a[OVS_KEY_ATTR_ETHERTYPE])
1038 tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
1039
1040 if (tpid != htons(0xffff)) {
1041 OVS_NLERR(log, "Must have an exact match on %s TPID (mask=%x).",
1042 (inner) ? "C-VLAN" : "VLAN", ntohs(tpid));
1043 return -EINVAL;
1044 }
1045 if (!(tci & htons(VLAN_CFI_MASK))) {
1046 OVS_NLERR(log, "%s TCI mask does not have exact match for VLAN_CFI_MASK bit.",
1047 (inner) ? "C-VLAN" : "VLAN");
1048 return -EINVAL;
1049 }
1050
1051 return 1;
1052 }
1053
1054 static int __parse_vlan_from_nlattrs(struct sw_flow_match *match,
1055 u64 *key_attrs, bool inner,
1056 const struct nlattr **a, bool is_mask,
1057 bool log)
1058 {
1059 int err;
1060 const struct nlattr *encap;
1061
1062 if (!is_mask)
1063 err = validate_vlan_from_nlattrs(match, *key_attrs, inner,
1064 a, log);
1065 else
1066 err = validate_vlan_mask_from_nlattrs(match, *key_attrs, inner,
1067 a, log);
1068 if (err <= 0)
1069 return err;
1070
1071 err = encode_vlan_from_nlattrs(match, a, is_mask, inner);
1072 if (err)
1073 return err;
1074
1075 *key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
1076 *key_attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
1077 *key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
1078
1079 encap = a[OVS_KEY_ATTR_ENCAP];
1080
1081 if (!is_mask)
1082 err = parse_flow_nlattrs(encap, a, key_attrs, log);
1083 else
1084 err = parse_flow_mask_nlattrs(encap, a, key_attrs, log);
1085
1086 return err;
1087 }
1088
1089 static int parse_vlan_from_nlattrs(struct sw_flow_match *match,
1090 u64 *key_attrs, const struct nlattr **a,
1091 bool is_mask, bool log)
1092 {
1093 int err;
1094 bool encap_valid = false;
1095
1096 err = __parse_vlan_from_nlattrs(match, key_attrs, false, a,
1097 is_mask, log);
1098 if (err)
1099 return err;
1100
1101 encap_valid = !!(match->key->eth.vlan.tci & htons(VLAN_CFI_MASK));
1102 if (encap_valid) {
1103 err = __parse_vlan_from_nlattrs(match, key_attrs, true, a,
1104 is_mask, log);
1105 if (err)
1106 return err;
1107 }
1108
1109 return 0;
1110 }
1111
1112 static int parse_eth_type_from_nlattrs(struct sw_flow_match *match,
1113 u64 *attrs, const struct nlattr **a,
1114 bool is_mask, bool log)
1115 {
1116 __be16 eth_type;
1117
1118 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
1119 if (is_mask) {
1120 /* Always exact match EtherType. */
1121 eth_type = htons(0xffff);
1122 } else if (!eth_proto_is_802_3(eth_type)) {
1123 OVS_NLERR(log, "EtherType %x is less than min %x",
1124 ntohs(eth_type), ETH_P_802_3_MIN);
1125 return -EINVAL;
1126 }
1127
1128 SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
1129 *attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
1130 return 0;
1131 }
1132
1133 static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
1134 u64 *attrs, const struct nlattr **a,
1135 bool is_mask, bool log)
1136 {
1137 u8 mac_proto = MAC_PROTO_ETHERNET;
1138
1139 if (*attrs & (1ULL << OVS_KEY_ATTR_DP_HASH)) {
1140 u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]);
1141
1142 SW_FLOW_KEY_PUT(match, ovs_flow_hash, hash_val, is_mask);
1143 *attrs &= ~(1ULL << OVS_KEY_ATTR_DP_HASH);
1144 }
1145
1146 if (*attrs & (1ULL << OVS_KEY_ATTR_RECIRC_ID)) {
1147 u32 recirc_id = nla_get_u32(a[OVS_KEY_ATTR_RECIRC_ID]);
1148
1149 SW_FLOW_KEY_PUT(match, recirc_id, recirc_id, is_mask);
1150 *attrs &= ~(1ULL << OVS_KEY_ATTR_RECIRC_ID);
1151 }
1152
1153 if (*attrs & (1ULL << OVS_KEY_ATTR_PRIORITY)) {
1154 SW_FLOW_KEY_PUT(match, phy.priority,
1155 nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
1156 *attrs &= ~(1ULL << OVS_KEY_ATTR_PRIORITY);
1157 }
1158
1159 if (*attrs & (1ULL << OVS_KEY_ATTR_IN_PORT)) {
1160 u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
1161
1162 if (is_mask) {
1163 in_port = 0xffffffff; /* Always exact match in_port. */
1164 } else if (in_port >= DP_MAX_PORTS) {
1165 OVS_NLERR(log, "Port %d exceeds max allowable %d",
1166 in_port, DP_MAX_PORTS);
1167 return -EINVAL;
1168 }
1169
1170 SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
1171 *attrs &= ~(1ULL << OVS_KEY_ATTR_IN_PORT);
1172 } else if (!is_mask) {
1173 SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask);
1174 }
1175
1176 if (*attrs & (1ULL << OVS_KEY_ATTR_SKB_MARK)) {
1177 uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
1178
1179 SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask);
1180 *attrs &= ~(1ULL << OVS_KEY_ATTR_SKB_MARK);
1181 }
1182 if (*attrs & (1ULL << OVS_KEY_ATTR_TUNNEL)) {
1183 if (ip_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
1184 is_mask, log) < 0)
1185 return -EINVAL;
1186 *attrs &= ~(1ULL << OVS_KEY_ATTR_TUNNEL);
1187 }
1188
1189 if (*attrs & (1 << OVS_KEY_ATTR_CT_STATE) &&
1190 ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) {
1191 u32 ct_state = nla_get_u32(a[OVS_KEY_ATTR_CT_STATE]);
1192
1193 if (ct_state & ~CT_SUPPORTED_MASK) {
1194 OVS_NLERR(log, "ct_state flags %08x unsupported",
1195 ct_state);
1196 return -EINVAL;
1197 }
1198
1199 SW_FLOW_KEY_PUT(match, ct_state, ct_state, is_mask);
1200 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE);
1201 }
1202 if (*attrs & (1 << OVS_KEY_ATTR_CT_ZONE) &&
1203 ovs_ct_verify(net, OVS_KEY_ATTR_CT_ZONE)) {
1204 u16 ct_zone = nla_get_u16(a[OVS_KEY_ATTR_CT_ZONE]);
1205
1206 SW_FLOW_KEY_PUT(match, ct_zone, ct_zone, is_mask);
1207 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ZONE);
1208 }
1209 if (*attrs & (1 << OVS_KEY_ATTR_CT_MARK) &&
1210 ovs_ct_verify(net, OVS_KEY_ATTR_CT_MARK)) {
1211 u32 mark = nla_get_u32(a[OVS_KEY_ATTR_CT_MARK]);
1212
1213 SW_FLOW_KEY_PUT(match, ct.mark, mark, is_mask);
1214 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_MARK);
1215 }
1216 if (*attrs & (1 << OVS_KEY_ATTR_CT_LABELS) &&
1217 ovs_ct_verify(net, OVS_KEY_ATTR_CT_LABELS)) {
1218 const struct ovs_key_ct_labels *cl;
1219
1220 cl = nla_data(a[OVS_KEY_ATTR_CT_LABELS]);
1221 SW_FLOW_KEY_MEMCPY(match, ct.labels, cl->ct_labels,
1222 sizeof(*cl), is_mask);
1223 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS);
1224 }
1225 if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4)) {
1226 const struct ovs_key_ct_tuple_ipv4 *ct;
1227
1228 ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4]);
1229
1230 SW_FLOW_KEY_PUT(match, ipv4.ct_orig.src, ct->ipv4_src, is_mask);
1231 SW_FLOW_KEY_PUT(match, ipv4.ct_orig.dst, ct->ipv4_dst, is_mask);
1232 SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask);
1233 SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask);
1234 SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv4_proto, is_mask);
1235 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4);
1236 }
1237 if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)) {
1238 const struct ovs_key_ct_tuple_ipv6 *ct;
1239
1240 ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6]);
1241
1242 SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.src, &ct->ipv6_src,
1243 sizeof(match->key->ipv6.ct_orig.src),
1244 is_mask);
1245 SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.dst, &ct->ipv6_dst,
1246 sizeof(match->key->ipv6.ct_orig.dst),
1247 is_mask);
1248 SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask);
1249 SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask);
1250 SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv6_proto, is_mask);
1251 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6);
1252 }
1253
1254 /* For layer 3 packets the Ethernet type is provided
1255 * and treated as metadata but no MAC addresses are provided.
1256 */
1257 if (!(*attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) &&
1258 (*attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)))
1259 mac_proto = MAC_PROTO_NONE;
1260
1261 /* Always exact match mac_proto */
1262 SW_FLOW_KEY_PUT(match, mac_proto, is_mask ? 0xff : mac_proto, is_mask);
1263
1264 if (mac_proto == MAC_PROTO_NONE)
1265 return parse_eth_type_from_nlattrs(match, attrs, a, is_mask,
1266 log);
1267
1268 return 0;
1269 }
1270
1271 int nsh_hdr_from_nlattr(const struct nlattr *attr,
1272 struct nshhdr *nh, size_t size)
1273 {
1274 struct nlattr *a;
1275 int rem;
1276 u8 flags = 0;
1277 u8 ttl = 0;
1278 int mdlen = 0;
1279
1280 /* validate_nsh has check this, so we needn't do duplicate check here
1281 */
1282 if (size < NSH_BASE_HDR_LEN)
1283 return -ENOBUFS;
1284
1285 nla_for_each_nested(a, attr, rem) {
1286 int type = nla_type(a);
1287
1288 switch (type) {
1289 case OVS_NSH_KEY_ATTR_BASE: {
1290 const struct ovs_nsh_key_base *base = nla_data(a);
1291
1292 flags = base->flags;
1293 ttl = base->ttl;
1294 nh->np = base->np;
1295 nh->mdtype = base->mdtype;
1296 nh->path_hdr = base->path_hdr;
1297 break;
1298 }
1299 case OVS_NSH_KEY_ATTR_MD1:
1300 mdlen = nla_len(a);
1301 if (mdlen > size - NSH_BASE_HDR_LEN)
1302 return -ENOBUFS;
1303 memcpy(&nh->md1, nla_data(a), mdlen);
1304 break;
1305
1306 case OVS_NSH_KEY_ATTR_MD2:
1307 mdlen = nla_len(a);
1308 if (mdlen > size - NSH_BASE_HDR_LEN)
1309 return -ENOBUFS;
1310 memcpy(&nh->md2, nla_data(a), mdlen);
1311 break;
1312
1313 default:
1314 return -EINVAL;
1315 }
1316 }
1317
1318 /* nsh header length = NSH_BASE_HDR_LEN + mdlen */
1319 nh->ver_flags_ttl_len = 0;
1320 nsh_set_flags_ttl_len(nh, flags, ttl, NSH_BASE_HDR_LEN + mdlen);
1321
1322 return 0;
1323 }
1324
1325 int nsh_key_from_nlattr(const struct nlattr *attr,
1326 struct ovs_key_nsh *nsh, struct ovs_key_nsh *nsh_mask)
1327 {
1328 struct nlattr *a;
1329 int rem;
1330
1331 /* validate_nsh has check this, so we needn't do duplicate check here
1332 */
1333 nla_for_each_nested(a, attr, rem) {
1334 int type = nla_type(a);
1335
1336 switch (type) {
1337 case OVS_NSH_KEY_ATTR_BASE: {
1338 const struct ovs_nsh_key_base *base = nla_data(a);
1339 const struct ovs_nsh_key_base *base_mask = base + 1;
1340
1341 nsh->base = *base;
1342 nsh_mask->base = *base_mask;
1343 break;
1344 }
1345 case OVS_NSH_KEY_ATTR_MD1: {
1346 const struct ovs_nsh_key_md1 *md1 = nla_data(a);
1347 const struct ovs_nsh_key_md1 *md1_mask = md1 + 1;
1348
1349 memcpy(nsh->context, md1->context, sizeof(*md1));
1350 memcpy(nsh_mask->context, md1_mask->context,
1351 sizeof(*md1_mask));
1352 break;
1353 }
1354 case OVS_NSH_KEY_ATTR_MD2:
1355 /* Not supported yet */
1356 return -ENOTSUPP;
1357 default:
1358 return -EINVAL;
1359 }
1360 }
1361
1362 return 0;
1363 }
1364
1365 static int nsh_key_put_from_nlattr(const struct nlattr *attr,
1366 struct sw_flow_match *match, bool is_mask,
1367 bool is_push_nsh, bool log)
1368 {
1369 struct nlattr *a;
1370 int rem;
1371 bool has_base = false;
1372 bool has_md1 = false;
1373 bool has_md2 = false;
1374 u8 mdtype = 0;
1375 int mdlen = 0;
1376
1377 if (WARN_ON(is_push_nsh && is_mask))
1378 return -EINVAL;
1379
1380 nla_for_each_nested(a, attr, rem) {
1381 int type = nla_type(a);
1382 int i;
1383
1384 if (type > OVS_NSH_KEY_ATTR_MAX) {
1385 OVS_NLERR(log, "nsh attr %d is out of range max %d",
1386 type, OVS_NSH_KEY_ATTR_MAX);
1387 return -EINVAL;
1388 }
1389
1390 if (!check_attr_len(nla_len(a),
1391 ovs_nsh_key_attr_lens[type].len)) {
1392 OVS_NLERR(
1393 log,
1394 "nsh attr %d has unexpected len %d expected %d",
1395 type,
1396 nla_len(a),
1397 ovs_nsh_key_attr_lens[type].len
1398 );
1399 return -EINVAL;
1400 }
1401
1402 switch (type) {
1403 case OVS_NSH_KEY_ATTR_BASE: {
1404 const struct ovs_nsh_key_base *base = nla_data(a);
1405
1406 has_base = true;
1407 mdtype = base->mdtype;
1408 SW_FLOW_KEY_PUT(match, nsh.base.flags,
1409 base->flags, is_mask);
1410 SW_FLOW_KEY_PUT(match, nsh.base.ttl,
1411 base->ttl, is_mask);
1412 SW_FLOW_KEY_PUT(match, nsh.base.mdtype,
1413 base->mdtype, is_mask);
1414 SW_FLOW_KEY_PUT(match, nsh.base.np,
1415 base->np, is_mask);
1416 SW_FLOW_KEY_PUT(match, nsh.base.path_hdr,
1417 base->path_hdr, is_mask);
1418 break;
1419 }
1420 case OVS_NSH_KEY_ATTR_MD1: {
1421 const struct ovs_nsh_key_md1 *md1 = nla_data(a);
1422
1423 has_md1 = true;
1424 for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++)
1425 SW_FLOW_KEY_PUT(match, nsh.context[i],
1426 md1->context[i], is_mask);
1427 break;
1428 }
1429 case OVS_NSH_KEY_ATTR_MD2:
1430 if (!is_push_nsh) /* Not supported MD type 2 yet */
1431 return -ENOTSUPP;
1432
1433 has_md2 = true;
1434 mdlen = nla_len(a);
1435 if (mdlen > NSH_CTX_HDRS_MAX_LEN || mdlen <= 0) {
1436 OVS_NLERR(
1437 log,
1438 "Invalid MD length %d for MD type %d",
1439 mdlen,
1440 mdtype
1441 );
1442 return -EINVAL;
1443 }
1444 break;
1445 default:
1446 OVS_NLERR(log, "Unknown nsh attribute %d",
1447 type);
1448 return -EINVAL;
1449 }
1450 }
1451
1452 if (rem > 0) {
1453 OVS_NLERR(log, "nsh attribute has %d unknown bytes.", rem);
1454 return -EINVAL;
1455 }
1456
1457 if (has_md1 && has_md2) {
1458 OVS_NLERR(
1459 1,
1460 "invalid nsh attribute: md1 and md2 are exclusive."
1461 );
1462 return -EINVAL;
1463 }
1464
1465 if (!is_mask) {
1466 if ((has_md1 && mdtype != NSH_M_TYPE1) ||
1467 (has_md2 && mdtype != NSH_M_TYPE2)) {
1468 OVS_NLERR(1, "nsh attribute has unmatched MD type %d.",
1469 mdtype);
1470 return -EINVAL;
1471 }
1472
1473 if (is_push_nsh &&
1474 (!has_base || (!has_md1 && !has_md2))) {
1475 OVS_NLERR(
1476 1,
1477 "push_nsh: missing base or metadata attributes"
1478 );
1479 return -EINVAL;
1480 }
1481 }
1482
1483 return 0;
1484 }
1485
1486 static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
1487 u64 attrs, const struct nlattr **a,
1488 bool is_mask, bool log)
1489 {
1490 int err;
1491
1492 err = metadata_from_nlattrs(net, match, &attrs, a, is_mask, log);
1493 if (err)
1494 return err;
1495
1496 if (attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) {
1497 const struct ovs_key_ethernet *eth_key;
1498
1499 eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
1500 SW_FLOW_KEY_MEMCPY(match, eth.src,
1501 eth_key->eth_src, ETH_ALEN, is_mask);
1502 SW_FLOW_KEY_MEMCPY(match, eth.dst,
1503 eth_key->eth_dst, ETH_ALEN, is_mask);
1504 attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERNET);
1505
1506 if (attrs & (1ULL << OVS_KEY_ATTR_VLAN)) {
1507 /* VLAN attribute is always parsed before getting here since it
1508 * may occur multiple times.
1509 */
1510 OVS_NLERR(log, "VLAN attribute unexpected.");
1511 return -EINVAL;
1512 }
1513
1514 if (attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)) {
1515 err = parse_eth_type_from_nlattrs(match, &attrs, a, is_mask,
1516 log);
1517 if (err)
1518 return err;
1519 } else if (!is_mask) {
1520 SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
1521 }
1522 } else if (!match->key->eth.type) {
1523 OVS_NLERR(log, "Either Ethernet header or EtherType is required.");
1524 return -EINVAL;
1525 }
1526
1527 if (attrs & (1 << OVS_KEY_ATTR_IPV4)) {
1528 const struct ovs_key_ipv4 *ipv4_key;
1529
1530 ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
1531 if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) {
1532 OVS_NLERR(log, "IPv4 frag type %d is out of range max %d",
1533 ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX);
1534 return -EINVAL;
1535 }
1536 SW_FLOW_KEY_PUT(match, ip.proto,
1537 ipv4_key->ipv4_proto, is_mask);
1538 SW_FLOW_KEY_PUT(match, ip.tos,
1539 ipv4_key->ipv4_tos, is_mask);
1540 SW_FLOW_KEY_PUT(match, ip.ttl,
1541 ipv4_key->ipv4_ttl, is_mask);
1542 SW_FLOW_KEY_PUT(match, ip.frag,
1543 ipv4_key->ipv4_frag, is_mask);
1544 SW_FLOW_KEY_PUT(match, ipv4.addr.src,
1545 ipv4_key->ipv4_src, is_mask);
1546 SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
1547 ipv4_key->ipv4_dst, is_mask);
1548 attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
1549 }
1550
1551 if (attrs & (1ULL << OVS_KEY_ATTR_IPV6)) {
1552 const struct ovs_key_ipv6 *ipv6_key;
1553
1554 ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
1555 if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) {
1556 OVS_NLERR(log, "IPv6 frag type %d is out of range max %d",
1557 ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
1558 return -EINVAL;
1559 }
1560
1561 if (!is_mask && ipv6_key->ipv6_label & htonl(0xFFF00000)) {
1562 OVS_NLERR(log, "IPv6 flow label %x is out of range (max=%x)",
1563 ntohl(ipv6_key->ipv6_label), (1 << 20) - 1);
1564 return -EINVAL;
1565 }
1566
1567 SW_FLOW_KEY_PUT(match, ipv6.label,
1568 ipv6_key->ipv6_label, is_mask);
1569 SW_FLOW_KEY_PUT(match, ip.proto,
1570 ipv6_key->ipv6_proto, is_mask);
1571 SW_FLOW_KEY_PUT(match, ip.tos,
1572 ipv6_key->ipv6_tclass, is_mask);
1573 SW_FLOW_KEY_PUT(match, ip.ttl,
1574 ipv6_key->ipv6_hlimit, is_mask);
1575 SW_FLOW_KEY_PUT(match, ip.frag,
1576 ipv6_key->ipv6_frag, is_mask);
1577 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src,
1578 ipv6_key->ipv6_src,
1579 sizeof(match->key->ipv6.addr.src),
1580 is_mask);
1581 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst,
1582 ipv6_key->ipv6_dst,
1583 sizeof(match->key->ipv6.addr.dst),
1584 is_mask);
1585
1586 attrs &= ~(1ULL << OVS_KEY_ATTR_IPV6);
1587 }
1588
1589 if (attrs & (1ULL << OVS_KEY_ATTR_ARP)) {
1590 const struct ovs_key_arp *arp_key;
1591
1592 arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
1593 if (!is_mask && (arp_key->arp_op & htons(0xff00))) {
1594 OVS_NLERR(log, "Unknown ARP opcode (opcode=%d).",
1595 arp_key->arp_op);
1596 return -EINVAL;
1597 }
1598
1599 SW_FLOW_KEY_PUT(match, ipv4.addr.src,
1600 arp_key->arp_sip, is_mask);
1601 SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
1602 arp_key->arp_tip, is_mask);
1603 SW_FLOW_KEY_PUT(match, ip.proto,
1604 ntohs(arp_key->arp_op), is_mask);
1605 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha,
1606 arp_key->arp_sha, ETH_ALEN, is_mask);
1607 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha,
1608 arp_key->arp_tha, ETH_ALEN, is_mask);
1609
1610 attrs &= ~(1ULL << OVS_KEY_ATTR_ARP);
1611 }
1612
1613 if (attrs & (1 << OVS_KEY_ATTR_NSH)) {
1614 if (nsh_key_put_from_nlattr(a[OVS_KEY_ATTR_NSH], match,
1615 is_mask, false, log) < 0)
1616 return -EINVAL;
1617 attrs &= ~(1 << OVS_KEY_ATTR_NSH);
1618 }
1619
1620 if (attrs & (1ULL << OVS_KEY_ATTR_MPLS)) {
1621 const struct ovs_key_mpls *mpls_key;
1622
1623 mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
1624 SW_FLOW_KEY_PUT(match, mpls.top_lse,
1625 mpls_key->mpls_lse, is_mask);
1626
1627 attrs &= ~(1ULL << OVS_KEY_ATTR_MPLS);
1628 }
1629
1630 if (attrs & (1ULL << OVS_KEY_ATTR_TCP)) {
1631 const struct ovs_key_tcp *tcp_key;
1632
1633 tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
1634 SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask);
1635 SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask);
1636 attrs &= ~(1ULL << OVS_KEY_ATTR_TCP);
1637 }
1638
1639 if (attrs & (1ULL << OVS_KEY_ATTR_TCP_FLAGS)) {
1640 SW_FLOW_KEY_PUT(match, tp.flags,
1641 nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
1642 is_mask);
1643 attrs &= ~(1ULL << OVS_KEY_ATTR_TCP_FLAGS);
1644 }
1645
1646 if (attrs & (1ULL << OVS_KEY_ATTR_UDP)) {
1647 const struct ovs_key_udp *udp_key;
1648
1649 udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
1650 SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask);
1651 SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask);
1652 attrs &= ~(1ULL << OVS_KEY_ATTR_UDP);
1653 }
1654
1655 if (attrs & (1ULL << OVS_KEY_ATTR_SCTP)) {
1656 const struct ovs_key_sctp *sctp_key;
1657
1658 sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
1659 SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask);
1660 SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask);
1661 attrs &= ~(1ULL << OVS_KEY_ATTR_SCTP);
1662 }
1663
1664 if (attrs & (1ULL << OVS_KEY_ATTR_ICMP)) {
1665 const struct ovs_key_icmp *icmp_key;
1666
1667 icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
1668 SW_FLOW_KEY_PUT(match, tp.src,
1669 htons(icmp_key->icmp_type), is_mask);
1670 SW_FLOW_KEY_PUT(match, tp.dst,
1671 htons(icmp_key->icmp_code), is_mask);
1672 attrs &= ~(1ULL << OVS_KEY_ATTR_ICMP);
1673 }
1674
1675 if (attrs & (1ULL << OVS_KEY_ATTR_ICMPV6)) {
1676 const struct ovs_key_icmpv6 *icmpv6_key;
1677
1678 icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
1679 SW_FLOW_KEY_PUT(match, tp.src,
1680 htons(icmpv6_key->icmpv6_type), is_mask);
1681 SW_FLOW_KEY_PUT(match, tp.dst,
1682 htons(icmpv6_key->icmpv6_code), is_mask);
1683 attrs &= ~(1ULL << OVS_KEY_ATTR_ICMPV6);
1684 }
1685
1686 if (attrs & (1ULL << OVS_KEY_ATTR_ND)) {
1687 const struct ovs_key_nd *nd_key;
1688
1689 nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
1690 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target,
1691 nd_key->nd_target,
1692 sizeof(match->key->ipv6.nd.target),
1693 is_mask);
1694 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll,
1695 nd_key->nd_sll, ETH_ALEN, is_mask);
1696 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll,
1697 nd_key->nd_tll, ETH_ALEN, is_mask);
1698 attrs &= ~(1ULL << OVS_KEY_ATTR_ND);
1699 }
1700
1701 if (attrs != 0) {
1702 OVS_NLERR(log, "Unknown key attributes %llx",
1703 (unsigned long long)attrs);
1704 return -EINVAL;
1705 }
1706
1707 return 0;
1708 }
1709
1710 static void nlattr_set(struct nlattr *attr, u8 val,
1711 const struct ovs_len_tbl *tbl)
1712 {
1713 struct nlattr *nla;
1714 int rem;
1715
1716 /* The nlattr stream should already have been validated */
1717 nla_for_each_nested(nla, attr, rem) {
1718 if (tbl[nla_type(nla)].len == OVS_ATTR_NESTED)
1719 nlattr_set(nla, val, tbl[nla_type(nla)].next ? : tbl);
1720 else
1721 memset(nla_data(nla), val, nla_len(nla));
1722
1723 if (nla_type(nla) == OVS_KEY_ATTR_CT_STATE)
1724 *(u32 *)nla_data(nla) &= CT_SUPPORTED_MASK;
1725 }
1726 }
1727
1728 static void mask_set_nlattr(struct nlattr *attr, u8 val)
1729 {
1730 nlattr_set(attr, val, ovs_key_lens);
1731 }
1732
1733 /**
1734 * ovs_nla_get_match - parses Netlink attributes into a flow key and
1735 * mask. In case the 'mask' is NULL, the flow is treated as exact match
1736 * flow. Otherwise, it is treated as a wildcarded flow, except the mask
1737 * does not include any don't care bit.
1738 * @net: Used to determine per-namespace field support.
1739 * @match: receives the extracted flow match information.
1740 * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
1741 * sequence. The fields should of the packet that triggered the creation
1742 * of this flow.
1743 * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink
1744 * attribute specifies the mask field of the wildcarded flow.
1745 * @log: Boolean to allow kernel error logging. Normally true, but when
1746 * probing for feature compatibility this should be passed in as false to
1747 * suppress unnecessary error logging.
1748 */
1749 int ovs_nla_get_match(struct net *net, struct sw_flow_match *match,
1750 const struct nlattr *nla_key,
1751 const struct nlattr *nla_mask,
1752 bool log)
1753 {
1754 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
1755 struct nlattr *newmask = NULL;
1756 u64 key_attrs = 0;
1757 u64 mask_attrs = 0;
1758 int err;
1759
1760 err = parse_flow_nlattrs(nla_key, a, &key_attrs, log);
1761 if (err)
1762 return err;
1763
1764 err = parse_vlan_from_nlattrs(match, &key_attrs, a, false, log);
1765 if (err)
1766 return err;
1767
1768 err = ovs_key_from_nlattrs(net, match, key_attrs, a, false, log);
1769 if (err)
1770 return err;
1771
1772 if (match->mask) {
1773 if (!nla_mask) {
1774 /* Create an exact match mask. We need to set to 0xff
1775 * all the 'match->mask' fields that have been touched
1776 * in 'match->key'. We cannot simply memset
1777 * 'match->mask', because padding bytes and fields not
1778 * specified in 'match->key' should be left to 0.
1779 * Instead, we use a stream of netlink attributes,
1780 * copied from 'key' and set to 0xff.
1781 * ovs_key_from_nlattrs() will take care of filling
1782 * 'match->mask' appropriately.
1783 */
1784 newmask = kmemdup(nla_key,
1785 nla_total_size(nla_len(nla_key)),
1786 GFP_KERNEL);
1787 if (!newmask)
1788 return -ENOMEM;
1789
1790 mask_set_nlattr(newmask, 0xff);
1791
1792 /* The userspace does not send tunnel attributes that
1793 * are 0, but we should not wildcard them nonetheless.
1794 */
1795 if (match->key->tun_proto)
1796 SW_FLOW_KEY_MEMSET_FIELD(match, tun_key,
1797 0xff, true);
1798
1799 nla_mask = newmask;
1800 }
1801
1802 err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs, log);
1803 if (err)
1804 goto free_newmask;
1805
1806 SW_FLOW_KEY_PUT(match, eth.vlan.tci, htons(0xffff), true);
1807 SW_FLOW_KEY_PUT(match, eth.cvlan.tci, htons(0xffff), true);
1808
1809 err = parse_vlan_from_nlattrs(match, &mask_attrs, a, true, log);
1810 if (err)
1811 goto free_newmask;
1812
1813 err = ovs_key_from_nlattrs(net, match, mask_attrs, a, true,
1814 log);
1815 if (err)
1816 goto free_newmask;
1817 }
1818
1819 if (!match_validate(match, key_attrs, mask_attrs, log))
1820 err = -EINVAL;
1821
1822 free_newmask:
1823 kfree(newmask);
1824 return err;
1825 }
1826
1827 static size_t get_ufid_len(const struct nlattr *attr, bool log)
1828 {
1829 size_t len;
1830
1831 if (!attr)
1832 return 0;
1833
1834 len = nla_len(attr);
1835 if (len < 1 || len > MAX_UFID_LENGTH) {
1836 OVS_NLERR(log, "ufid size %u bytes exceeds the range (1, %d)",
1837 nla_len(attr), MAX_UFID_LENGTH);
1838 return 0;
1839 }
1840
1841 return len;
1842 }
1843
1844 /* Initializes 'flow->ufid', returning true if 'attr' contains a valid UFID,
1845 * or false otherwise.
1846 */
1847 bool ovs_nla_get_ufid(struct sw_flow_id *sfid, const struct nlattr *attr,
1848 bool log)
1849 {
1850 sfid->ufid_len = get_ufid_len(attr, log);
1851 if (sfid->ufid_len)
1852 memcpy(sfid->ufid, nla_data(attr), sfid->ufid_len);
1853
1854 return sfid->ufid_len;
1855 }
1856
1857 int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
1858 const struct sw_flow_key *key, bool log)
1859 {
1860 struct sw_flow_key *new_key;
1861
1862 if (ovs_nla_get_ufid(sfid, ufid, log))
1863 return 0;
1864
1865 /* If UFID was not provided, use unmasked key. */
1866 new_key = kmalloc(sizeof(*new_key), GFP_KERNEL);
1867 if (!new_key)
1868 return -ENOMEM;
1869 memcpy(new_key, key, sizeof(*key));
1870 sfid->unmasked_key = new_key;
1871
1872 return 0;
1873 }
1874
1875 u32 ovs_nla_get_ufid_flags(const struct nlattr *attr)
1876 {
1877 return attr ? nla_get_u32(attr) : 0;
1878 }
1879
1880 /**
1881 * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
1882 * @net: Network namespace.
1883 * @key: Receives extracted in_port, priority, tun_key, skb_mark and conntrack
1884 * metadata.
1885 * @a: Array of netlink attributes holding parsed %OVS_KEY_ATTR_* Netlink
1886 * attributes.
1887 * @attrs: Bit mask for the netlink attributes included in @a.
1888 * @log: Boolean to allow kernel error logging. Normally true, but when
1889 * probing for feature compatibility this should be passed in as false to
1890 * suppress unnecessary error logging.
1891 *
1892 * This parses a series of Netlink attributes that form a flow key, which must
1893 * take the same form accepted by flow_from_nlattrs(), but only enough of it to
1894 * get the metadata, that is, the parts of the flow key that cannot be
1895 * extracted from the packet itself.
1896 *
1897 * This must be called before the packet key fields are filled in 'key'.
1898 */
1899
1900 int ovs_nla_get_flow_metadata(struct net *net,
1901 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1],
1902 u64 attrs, struct sw_flow_key *key, bool log)
1903 {
1904 struct sw_flow_match match;
1905
1906 memset(&match, 0, sizeof(match));
1907 match.key = key;
1908
1909 key->ct_state = 0;
1910 key->ct_zone = 0;
1911 key->ct_orig_proto = 0;
1912 memset(&key->ct, 0, sizeof(key->ct));
1913 memset(&key->ipv4.ct_orig, 0, sizeof(key->ipv4.ct_orig));
1914 memset(&key->ipv6.ct_orig, 0, sizeof(key->ipv6.ct_orig));
1915
1916 key->phy.in_port = DP_MAX_PORTS;
1917
1918 return metadata_from_nlattrs(net, &match, &attrs, a, false, log);
1919 }
1920
1921 static int ovs_nla_put_vlan(struct sk_buff *skb, const struct vlan_head *vh,
1922 bool is_mask)
1923 {
1924 __be16 eth_type = !is_mask ? vh->tpid : htons(0xffff);
1925
1926 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
1927 nla_put_be16(skb, OVS_KEY_ATTR_VLAN, vh->tci))
1928 return -EMSGSIZE;
1929 return 0;
1930 }
1931
1932 static int nsh_key_to_nlattr(const struct ovs_key_nsh *nsh, bool is_mask,
1933 struct sk_buff *skb)
1934 {
1935 struct nlattr *start;
1936
1937 start = nla_nest_start(skb, OVS_KEY_ATTR_NSH);
1938 if (!start)
1939 return -EMSGSIZE;
1940
1941 if (nla_put(skb, OVS_NSH_KEY_ATTR_BASE, sizeof(nsh->base), &nsh->base))
1942 goto nla_put_failure;
1943
1944 if (is_mask || nsh->base.mdtype == NSH_M_TYPE1) {
1945 if (nla_put(skb, OVS_NSH_KEY_ATTR_MD1,
1946 sizeof(nsh->context), nsh->context))
1947 goto nla_put_failure;
1948 }
1949
1950 /* Don't support MD type 2 yet */
1951
1952 nla_nest_end(skb, start);
1953
1954 return 0;
1955
1956 nla_put_failure:
1957 return -EMSGSIZE;
1958 }
1959
1960 static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
1961 const struct sw_flow_key *output, bool is_mask,
1962 struct sk_buff *skb)
1963 {
1964 struct ovs_key_ethernet *eth_key;
1965 struct nlattr *nla;
1966 struct nlattr *encap = NULL;
1967 struct nlattr *in_encap = NULL;
1968
1969 if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id))
1970 goto nla_put_failure;
1971
1972 if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH, output->ovs_flow_hash))
1973 goto nla_put_failure;
1974
1975 if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
1976 goto nla_put_failure;
1977
1978 if ((swkey->tun_proto || is_mask)) {
1979 const void *opts = NULL;
1980
1981 if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
1982 opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len);
1983
1984 if (ip_tun_to_nlattr(skb, &output->tun_key, opts,
1985 swkey->tun_opts_len, swkey->tun_proto))
1986 goto nla_put_failure;
1987 }
1988
1989 if (swkey->phy.in_port == DP_MAX_PORTS) {
1990 if (is_mask && (output->phy.in_port == 0xffff))
1991 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff))
1992 goto nla_put_failure;
1993 } else {
1994 u16 upper_u16;
1995 upper_u16 = !is_mask ? 0 : 0xffff;
1996
1997 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT,
1998 (upper_u16 << 16) | output->phy.in_port))
1999 goto nla_put_failure;
2000 }
2001
2002 if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
2003 goto nla_put_failure;
2004
2005 if (ovs_ct_put_key(swkey, output, skb))
2006 goto nla_put_failure;
2007
2008 if (ovs_key_mac_proto(swkey) == MAC_PROTO_ETHERNET) {
2009 nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
2010 if (!nla)
2011 goto nla_put_failure;
2012
2013 eth_key = nla_data(nla);
2014 ether_addr_copy(eth_key->eth_src, output->eth.src);
2015 ether_addr_copy(eth_key->eth_dst, output->eth.dst);
2016
2017 if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) {
2018 if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask))
2019 goto nla_put_failure;
2020 encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
2021 if (!swkey->eth.vlan.tci)
2022 goto unencap;
2023
2024 if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) {
2025 if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask))
2026 goto nla_put_failure;
2027 in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
2028 if (!swkey->eth.cvlan.tci)
2029 goto unencap;
2030 }
2031 }
2032
2033 if (swkey->eth.type == htons(ETH_P_802_2)) {
2034 /*
2035 * Ethertype 802.2 is represented in the netlink with omitted
2036 * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
2037 * 0xffff in the mask attribute. Ethertype can also
2038 * be wildcarded.
2039 */
2040 if (is_mask && output->eth.type)
2041 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
2042 output->eth.type))
2043 goto nla_put_failure;
2044 goto unencap;
2045 }
2046 }
2047
2048 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
2049 goto nla_put_failure;
2050
2051 if (eth_type_vlan(swkey->eth.type)) {
2052 /* There are 3 VLAN tags, we don't know anything about the rest
2053 * of the packet, so truncate here.
2054 */
2055 WARN_ON_ONCE(!(encap && in_encap));
2056 goto unencap;
2057 }
2058
2059 if (swkey->eth.type == htons(ETH_P_IP)) {
2060 struct ovs_key_ipv4 *ipv4_key;
2061
2062 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key));
2063 if (!nla)
2064 goto nla_put_failure;
2065 ipv4_key = nla_data(nla);
2066 ipv4_key->ipv4_src = output->ipv4.addr.src;
2067 ipv4_key->ipv4_dst = output->ipv4.addr.dst;
2068 ipv4_key->ipv4_proto = output->ip.proto;
2069 ipv4_key->ipv4_tos = output->ip.tos;
2070 ipv4_key->ipv4_ttl = output->ip.ttl;
2071 ipv4_key->ipv4_frag = output->ip.frag;
2072 } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
2073 struct ovs_key_ipv6 *ipv6_key;
2074
2075 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key));
2076 if (!nla)
2077 goto nla_put_failure;
2078 ipv6_key = nla_data(nla);
2079 memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src,
2080 sizeof(ipv6_key->ipv6_src));
2081 memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst,
2082 sizeof(ipv6_key->ipv6_dst));
2083 ipv6_key->ipv6_label = output->ipv6.label;
2084 ipv6_key->ipv6_proto = output->ip.proto;
2085 ipv6_key->ipv6_tclass = output->ip.tos;
2086 ipv6_key->ipv6_hlimit = output->ip.ttl;
2087 ipv6_key->ipv6_frag = output->ip.frag;
2088 } else if (swkey->eth.type == htons(ETH_P_NSH)) {
2089 if (nsh_key_to_nlattr(&output->nsh, is_mask, skb))
2090 goto nla_put_failure;
2091 } else if (swkey->eth.type == htons(ETH_P_ARP) ||
2092 swkey->eth.type == htons(ETH_P_RARP)) {
2093 struct ovs_key_arp *arp_key;
2094
2095 nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key));
2096 if (!nla)
2097 goto nla_put_failure;
2098 arp_key = nla_data(nla);
2099 memset(arp_key, 0, sizeof(struct ovs_key_arp));
2100 arp_key->arp_sip = output->ipv4.addr.src;
2101 arp_key->arp_tip = output->ipv4.addr.dst;
2102 arp_key->arp_op = htons(output->ip.proto);
2103 ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
2104 ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
2105 } else if (eth_p_mpls(swkey->eth.type)) {
2106 struct ovs_key_mpls *mpls_key;
2107
2108 nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
2109 if (!nla)
2110 goto nla_put_failure;
2111 mpls_key = nla_data(nla);
2112 mpls_key->mpls_lse = output->mpls.top_lse;
2113 }
2114
2115 if ((swkey->eth.type == htons(ETH_P_IP) ||
2116 swkey->eth.type == htons(ETH_P_IPV6)) &&
2117 swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
2118
2119 if (swkey->ip.proto == IPPROTO_TCP) {
2120 struct ovs_key_tcp *tcp_key;
2121
2122 nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key));
2123 if (!nla)
2124 goto nla_put_failure;
2125 tcp_key = nla_data(nla);
2126 tcp_key->tcp_src = output->tp.src;
2127 tcp_key->tcp_dst = output->tp.dst;
2128 if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
2129 output->tp.flags))
2130 goto nla_put_failure;
2131 } else if (swkey->ip.proto == IPPROTO_UDP) {
2132 struct ovs_key_udp *udp_key;
2133
2134 nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key));
2135 if (!nla)
2136 goto nla_put_failure;
2137 udp_key = nla_data(nla);
2138 udp_key->udp_src = output->tp.src;
2139 udp_key->udp_dst = output->tp.dst;
2140 } else if (swkey->ip.proto == IPPROTO_SCTP) {
2141 struct ovs_key_sctp *sctp_key;
2142
2143 nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key));
2144 if (!nla)
2145 goto nla_put_failure;
2146 sctp_key = nla_data(nla);
2147 sctp_key->sctp_src = output->tp.src;
2148 sctp_key->sctp_dst = output->tp.dst;
2149 } else if (swkey->eth.type == htons(ETH_P_IP) &&
2150 swkey->ip.proto == IPPROTO_ICMP) {
2151 struct ovs_key_icmp *icmp_key;
2152
2153 nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key));
2154 if (!nla)
2155 goto nla_put_failure;
2156 icmp_key = nla_data(nla);
2157 icmp_key->icmp_type = ntohs(output->tp.src);
2158 icmp_key->icmp_code = ntohs(output->tp.dst);
2159 } else if (swkey->eth.type == htons(ETH_P_IPV6) &&
2160 swkey->ip.proto == IPPROTO_ICMPV6) {
2161 struct ovs_key_icmpv6 *icmpv6_key;
2162
2163 nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6,
2164 sizeof(*icmpv6_key));
2165 if (!nla)
2166 goto nla_put_failure;
2167 icmpv6_key = nla_data(nla);
2168 icmpv6_key->icmpv6_type = ntohs(output->tp.src);
2169 icmpv6_key->icmpv6_code = ntohs(output->tp.dst);
2170
2171 if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
2172 icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
2173 struct ovs_key_nd *nd_key;
2174
2175 nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key));
2176 if (!nla)
2177 goto nla_put_failure;
2178 nd_key = nla_data(nla);
2179 memcpy(nd_key->nd_target, &output->ipv6.nd.target,
2180 sizeof(nd_key->nd_target));
2181 ether_addr_copy(nd_key->nd_sll, output->ipv6.nd.sll);
2182 ether_addr_copy(nd_key->nd_tll, output->ipv6.nd.tll);
2183 }
2184 }
2185 }
2186
2187 unencap:
2188 if (in_encap)
2189 nla_nest_end(skb, in_encap);
2190 if (encap)
2191 nla_nest_end(skb, encap);
2192
2193 return 0;
2194
2195 nla_put_failure:
2196 return -EMSGSIZE;
2197 }
2198
2199 int ovs_nla_put_key(const struct sw_flow_key *swkey,
2200 const struct sw_flow_key *output, int attr, bool is_mask,
2201 struct sk_buff *skb)
2202 {
2203 int err;
2204 struct nlattr *nla;
2205
2206 nla = nla_nest_start(skb, attr);
2207 if (!nla)
2208 return -EMSGSIZE;
2209 err = __ovs_nla_put_key(swkey, output, is_mask, skb);
2210 if (err)
2211 return err;
2212 nla_nest_end(skb, nla);
2213
2214 return 0;
2215 }
2216
2217 /* Called with ovs_mutex or RCU read lock. */
2218 int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb)
2219 {
2220 if (ovs_identifier_is_ufid(&flow->id))
2221 return nla_put(skb, OVS_FLOW_ATTR_UFID, flow->id.ufid_len,
2222 flow->id.ufid);
2223
2224 return ovs_nla_put_key(flow->id.unmasked_key, flow->id.unmasked_key,
2225 OVS_FLOW_ATTR_KEY, false, skb);
2226 }
2227
2228 /* Called with ovs_mutex or RCU read lock. */
2229 int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb)
2230 {
2231 return ovs_nla_put_key(&flow->key, &flow->key,
2232 OVS_FLOW_ATTR_KEY, false, skb);
2233 }
2234
2235 /* Called with ovs_mutex or RCU read lock. */
2236 int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb)
2237 {
2238 return ovs_nla_put_key(&flow->key, &flow->mask->key,
2239 OVS_FLOW_ATTR_MASK, true, skb);
2240 }
2241
2242 #if LINUX_VERSION_CODE < KERNEL_VERSION(4,9,0)
2243 #define MAX_ACTIONS_BUFSIZE (16 * 1024)
2244 #else
2245 #define MAX_ACTIONS_BUFSIZE (32 * 1024)
2246 #endif
2247
2248 static struct sw_flow_actions *nla_alloc_flow_actions(int size)
2249 {
2250 struct sw_flow_actions *sfa;
2251
2252 WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE);
2253
2254 sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
2255 if (!sfa)
2256 return ERR_PTR(-ENOMEM);
2257
2258 sfa->actions_len = 0;
2259 return sfa;
2260 }
2261
2262 static void ovs_nla_free_set_action(const struct nlattr *a)
2263 {
2264 const struct nlattr *ovs_key = nla_data(a);
2265 struct ovs_tunnel_info *ovs_tun;
2266
2267 switch (nla_type(ovs_key)) {
2268 case OVS_KEY_ATTR_TUNNEL_INFO:
2269 ovs_tun = nla_data(ovs_key);
2270 ovs_dst_release((struct dst_entry *)ovs_tun->tun_dst);
2271 break;
2272 }
2273 }
2274
2275 void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
2276 {
2277 const struct nlattr *a;
2278 int rem;
2279
2280 if (!sf_acts)
2281 return;
2282
2283 nla_for_each_attr(a, sf_acts->actions, sf_acts->actions_len, rem) {
2284 switch (nla_type(a)) {
2285 case OVS_ACTION_ATTR_SET:
2286 ovs_nla_free_set_action(a);
2287 break;
2288 case OVS_ACTION_ATTR_CT:
2289 ovs_ct_free_action(a);
2290 break;
2291 }
2292 }
2293
2294 kfree(sf_acts);
2295 }
2296
2297 static void __ovs_nla_free_flow_actions(struct rcu_head *head)
2298 {
2299 ovs_nla_free_flow_actions(container_of(head, struct sw_flow_actions, rcu));
2300 }
2301
2302 /* Schedules 'sf_acts' to be freed after the next RCU grace period.
2303 * The caller must hold rcu_read_lock for this to be sensible. */
2304 void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *sf_acts)
2305 {
2306 call_rcu(&sf_acts->rcu, __ovs_nla_free_flow_actions);
2307 }
2308
2309 static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
2310 int attr_len, bool log)
2311 {
2312
2313 struct sw_flow_actions *acts;
2314 int new_acts_size;
2315 size_t req_size = NLA_ALIGN(attr_len);
2316 int next_offset = offsetof(struct sw_flow_actions, actions) +
2317 (*sfa)->actions_len;
2318
2319 if (req_size <= (ksize(*sfa) - next_offset))
2320 goto out;
2321
2322 new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2);
2323
2324 if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
2325 if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) {
2326 OVS_NLERR(log, "Flow action size exceeds max %u",
2327 MAX_ACTIONS_BUFSIZE);
2328 return ERR_PTR(-EMSGSIZE);
2329 }
2330 new_acts_size = MAX_ACTIONS_BUFSIZE;
2331 }
2332
2333 acts = nla_alloc_flow_actions(new_acts_size);
2334 if (IS_ERR(acts))
2335 return (void *)acts;
2336
2337 memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
2338 acts->actions_len = (*sfa)->actions_len;
2339 acts->orig_len = (*sfa)->orig_len;
2340 kfree(*sfa);
2341 *sfa = acts;
2342
2343 out:
2344 (*sfa)->actions_len += req_size;
2345 return (struct nlattr *) ((unsigned char *)(*sfa) + next_offset);
2346 }
2347
2348 static struct nlattr *__add_action(struct sw_flow_actions **sfa,
2349 int attrtype, void *data, int len, bool log)
2350 {
2351 struct nlattr *a;
2352
2353 a = reserve_sfa_size(sfa, nla_attr_size(len), log);
2354 if (IS_ERR(a))
2355 return a;
2356
2357 a->nla_type = attrtype;
2358 a->nla_len = nla_attr_size(len);
2359
2360 if (data)
2361 memcpy(nla_data(a), data, len);
2362 memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len));
2363
2364 return a;
2365 }
2366
2367 int ovs_nla_add_action(struct sw_flow_actions **sfa, int attrtype, void *data,
2368 int len, bool log)
2369 {
2370 struct nlattr *a;
2371
2372 a = __add_action(sfa, attrtype, data, len, log);
2373
2374 return PTR_ERR_OR_ZERO(a);
2375 }
2376
2377 static inline int add_nested_action_start(struct sw_flow_actions **sfa,
2378 int attrtype, bool log)
2379 {
2380 int used = (*sfa)->actions_len;
2381 int err;
2382
2383 err = ovs_nla_add_action(sfa, attrtype, NULL, 0, log);
2384 if (err)
2385 return err;
2386
2387 return used;
2388 }
2389
2390 static inline void add_nested_action_end(struct sw_flow_actions *sfa,
2391 int st_offset)
2392 {
2393 struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions +
2394 st_offset);
2395
2396 a->nla_len = sfa->actions_len - st_offset;
2397 }
2398
2399 static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
2400 const struct sw_flow_key *key,
2401 struct sw_flow_actions **sfa,
2402 __be16 eth_type, __be16 vlan_tci, bool log);
2403
2404 static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
2405 const struct sw_flow_key *key,
2406 struct sw_flow_actions **sfa,
2407 __be16 eth_type, __be16 vlan_tci,
2408 bool log, bool last)
2409 {
2410 const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
2411 const struct nlattr *probability, *actions;
2412 const struct nlattr *a;
2413 int rem, start, err;
2414 struct sample_arg arg;
2415
2416 memset(attrs, 0, sizeof(attrs));
2417 nla_for_each_nested(a, attr, rem) {
2418 int type = nla_type(a);
2419 if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
2420 return -EINVAL;
2421 attrs[type] = a;
2422 }
2423 if (rem)
2424 return -EINVAL;
2425
2426 probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
2427 if (!probability || nla_len(probability) != sizeof(u32))
2428 return -EINVAL;
2429
2430 actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
2431 if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
2432 return -EINVAL;
2433
2434 /* validation done, copy sample action. */
2435 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log);
2436 if (start < 0)
2437 return start;
2438
2439 /* When both skb and flow may be changed, put the sample
2440 * into a deferred fifo. On the other hand, if only skb
2441 * may be modified, the actions can be executed in place.
2442 *
2443 * Do this analysis at the flow installation time.
2444 * Set 'clone_action->exec' to true if the actions can be
2445 * executed without being deferred.
2446 *
2447 * If the sample is the last action, it can always be excuted
2448 * rather than deferred.
2449 */
2450 arg.exec = last || !actions_may_change_flow(actions);
2451 arg.probability = nla_get_u32(probability);
2452
2453 err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg),
2454 log);
2455 if (err)
2456 return err;
2457
2458 err = __ovs_nla_copy_actions(net, actions, key, sfa,
2459 eth_type, vlan_tci, log);
2460
2461 if (err)
2462 return err;
2463
2464 add_nested_action_end(*sfa, start);
2465
2466 return 0;
2467 }
2468
2469 static int validate_and_copy_clone(struct net *net,
2470 const struct nlattr *attr,
2471 const struct sw_flow_key *key,
2472 struct sw_flow_actions **sfa,
2473 __be16 eth_type, __be16 vlan_tci,
2474 bool log, bool last)
2475 {
2476 int start, err;
2477 u32 exec;
2478
2479 if (nla_len(attr) && nla_len(attr) < NLA_HDRLEN)
2480 return -EINVAL;
2481
2482 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_CLONE, log);
2483 if (start < 0)
2484 return start;
2485
2486 exec = last || !actions_may_change_flow(attr);
2487
2488 err = ovs_nla_add_action(sfa, OVS_CLONE_ATTR_EXEC, &exec,
2489 sizeof(exec), log);
2490 if (err)
2491 return err;
2492
2493 err = __ovs_nla_copy_actions(net, attr, key, sfa,
2494 eth_type, vlan_tci, log);
2495 if (err)
2496 return err;
2497
2498 add_nested_action_end(*sfa, start);
2499
2500 return 0;
2501 }
2502
2503 void ovs_match_init(struct sw_flow_match *match,
2504 struct sw_flow_key *key,
2505 bool reset_key,
2506 struct sw_flow_mask *mask)
2507 {
2508 memset(match, 0, sizeof(*match));
2509 match->key = key;
2510 match->mask = mask;
2511
2512 if (reset_key)
2513 memset(key, 0, sizeof(*key));
2514
2515 if (mask) {
2516 memset(&mask->key, 0, sizeof(mask->key));
2517 mask->range.start = mask->range.end = 0;
2518 }
2519 }
2520
2521 static int validate_geneve_opts(struct sw_flow_key *key)
2522 {
2523 struct geneve_opt *option;
2524 int opts_len = key->tun_opts_len;
2525 bool crit_opt = false;
2526
2527 option = (struct geneve_opt *)TUN_METADATA_OPTS(key, key->tun_opts_len);
2528 while (opts_len > 0) {
2529 int len;
2530
2531 if (opts_len < sizeof(*option))
2532 return -EINVAL;
2533
2534 len = sizeof(*option) + option->length * 4;
2535 if (len > opts_len)
2536 return -EINVAL;
2537
2538 crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE);
2539
2540 option = (struct geneve_opt *)((u8 *)option + len);
2541 opts_len -= len;
2542 }
2543
2544 key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
2545
2546 return 0;
2547 }
2548
2549 static int validate_and_copy_set_tun(const struct nlattr *attr,
2550 struct sw_flow_actions **sfa, bool log)
2551 {
2552 struct sw_flow_match match;
2553 struct sw_flow_key key;
2554 struct metadata_dst *tun_dst;
2555 struct ip_tunnel_info *tun_info;
2556 struct ovs_tunnel_info *ovs_tun;
2557 struct nlattr *a;
2558 int err = 0, start, opts_type;
2559 __be16 dst_opt_type;
2560
2561 dst_opt_type = 0;
2562 ovs_match_init(&match, &key, true, NULL);
2563 opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log);
2564 if (opts_type < 0)
2565 return opts_type;
2566
2567 if (key.tun_opts_len) {
2568 switch (opts_type) {
2569 case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
2570 err = validate_geneve_opts(&key);
2571 if (err < 0)
2572 return err;
2573 dst_opt_type = TUNNEL_GENEVE_OPT;
2574 break;
2575 case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
2576 dst_opt_type = TUNNEL_VXLAN_OPT;
2577 break;
2578 case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
2579 dst_opt_type = TUNNEL_ERSPAN_OPT;
2580 break;
2581 }
2582 }
2583
2584 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log);
2585 if (start < 0)
2586 return start;
2587
2588 tun_dst = metadata_dst_alloc(key.tun_opts_len, METADATA_IP_TUNNEL,
2589 GFP_KERNEL);
2590
2591 if (!tun_dst)
2592 return -ENOMEM;
2593
2594 err = dst_cache_init(&tun_dst->u.tun_info.dst_cache, GFP_KERNEL);
2595 if (err) {
2596 dst_release((struct dst_entry *)tun_dst);
2597 return err;
2598 }
2599 a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
2600 sizeof(*ovs_tun), log);
2601 if (IS_ERR(a)) {
2602 ovs_dst_release((struct dst_entry *)tun_dst);
2603 return PTR_ERR(a);
2604 }
2605
2606 ovs_tun = nla_data(a);
2607 ovs_tun->tun_dst = tun_dst;
2608
2609 tun_info = &tun_dst->u.tun_info;
2610 tun_info->mode = IP_TUNNEL_INFO_TX;
2611 if (key.tun_proto == AF_INET6)
2612 tun_info->mode |= IP_TUNNEL_INFO_IPV6;
2613 tun_info->key = key.tun_key;
2614
2615 /* We need to store the options in the action itself since
2616 * everything else will go away after flow setup. We can append
2617 * it to tun_info and then point there.
2618 */
2619 ip_tunnel_info_opts_set(tun_info,
2620 TUN_METADATA_OPTS(&key, key.tun_opts_len),
2621 key.tun_opts_len, dst_opt_type);
2622 add_nested_action_end(*sfa, start);
2623
2624 return err;
2625 }
2626
2627 static bool validate_nsh(const struct nlattr *attr, bool is_mask,
2628 bool is_push_nsh, bool log)
2629 {
2630 struct sw_flow_match match;
2631 struct sw_flow_key key;
2632 int ret = 0;
2633
2634 ovs_match_init(&match, &key, true, NULL);
2635 ret = nsh_key_put_from_nlattr(attr, &match, is_mask,
2636 is_push_nsh, log);
2637 return !ret;
2638 }
2639
2640 /* Return false if there are any non-masked bits set.
2641 * Mask follows data immediately, before any netlink padding.
2642 */
2643 static bool validate_masked(u8 *data, int len)
2644 {
2645 u8 *mask = data + len;
2646
2647 while (len--)
2648 if (*data++ & ~*mask++)
2649 return false;
2650
2651 return true;
2652 }
2653
2654 static int validate_set(const struct nlattr *a,
2655 const struct sw_flow_key *flow_key,
2656 struct sw_flow_actions **sfa, bool *skip_copy,
2657 u8 mac_proto, __be16 eth_type, bool masked, bool log)
2658 {
2659 const struct nlattr *ovs_key = nla_data(a);
2660 int key_type = nla_type(ovs_key);
2661 size_t key_len;
2662
2663 /* There can be only one key in a action */
2664 if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
2665 return -EINVAL;
2666
2667 key_len = nla_len(ovs_key);
2668 if (masked)
2669 key_len /= 2;
2670
2671 if (key_type > OVS_KEY_ATTR_MAX ||
2672 !check_attr_len(key_len, ovs_key_lens[key_type].len))
2673 return -EINVAL;
2674
2675 if (masked && !validate_masked(nla_data(ovs_key), key_len))
2676 return -EINVAL;
2677
2678 switch (key_type) {
2679 const struct ovs_key_ipv4 *ipv4_key;
2680 const struct ovs_key_ipv6 *ipv6_key;
2681 int err;
2682
2683 case OVS_KEY_ATTR_PRIORITY:
2684 case OVS_KEY_ATTR_SKB_MARK:
2685 case OVS_KEY_ATTR_CT_MARK:
2686 case OVS_KEY_ATTR_CT_LABELS:
2687 break;
2688
2689 case OVS_KEY_ATTR_ETHERNET:
2690 if (mac_proto != MAC_PROTO_ETHERNET)
2691 return -EINVAL;
2692 break;
2693
2694 case OVS_KEY_ATTR_TUNNEL:
2695 #ifndef USE_UPSTREAM_TUNNEL
2696 if (eth_p_mpls(eth_type))
2697 return -EINVAL;
2698 #endif
2699 if (masked)
2700 return -EINVAL; /* Masked tunnel set not supported. */
2701
2702 *skip_copy = true;
2703 err = validate_and_copy_set_tun(a, sfa, log);
2704 if (err)
2705 return err;
2706 break;
2707
2708 case OVS_KEY_ATTR_IPV4:
2709 if (eth_type != htons(ETH_P_IP))
2710 return -EINVAL;
2711
2712 ipv4_key = nla_data(ovs_key);
2713
2714 if (masked) {
2715 const struct ovs_key_ipv4 *mask = ipv4_key + 1;
2716
2717 /* Non-writeable fields. */
2718 if (mask->ipv4_proto || mask->ipv4_frag)
2719 return -EINVAL;
2720 } else {
2721 if (ipv4_key->ipv4_proto != flow_key->ip.proto)
2722 return -EINVAL;
2723
2724 if (ipv4_key->ipv4_frag != flow_key->ip.frag)
2725 return -EINVAL;
2726 }
2727 break;
2728
2729 case OVS_KEY_ATTR_IPV6:
2730 if (eth_type != htons(ETH_P_IPV6))
2731 return -EINVAL;
2732
2733 ipv6_key = nla_data(ovs_key);
2734
2735 if (masked) {
2736 const struct ovs_key_ipv6 *mask = ipv6_key + 1;
2737
2738 /* Non-writeable fields. */
2739 if (mask->ipv6_proto || mask->ipv6_frag)
2740 return -EINVAL;
2741
2742 /* Invalid bits in the flow label mask? */
2743 if (ntohl(mask->ipv6_label) & 0xFFF00000)
2744 return -EINVAL;
2745 } else {
2746 if (ipv6_key->ipv6_proto != flow_key->ip.proto)
2747 return -EINVAL;
2748
2749 if (ipv6_key->ipv6_frag != flow_key->ip.frag)
2750 return -EINVAL;
2751 }
2752 if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
2753 return -EINVAL;
2754
2755 break;
2756
2757 case OVS_KEY_ATTR_TCP:
2758 if ((eth_type != htons(ETH_P_IP) &&
2759 eth_type != htons(ETH_P_IPV6)) ||
2760 flow_key->ip.proto != IPPROTO_TCP)
2761 return -EINVAL;
2762
2763 break;
2764
2765 case OVS_KEY_ATTR_UDP:
2766 if ((eth_type != htons(ETH_P_IP) &&
2767 eth_type != htons(ETH_P_IPV6)) ||
2768 flow_key->ip.proto != IPPROTO_UDP)
2769 return -EINVAL;
2770
2771 break;
2772
2773 case OVS_KEY_ATTR_MPLS:
2774 if (!eth_p_mpls(eth_type))
2775 return -EINVAL;
2776 break;
2777
2778 case OVS_KEY_ATTR_SCTP:
2779 if ((eth_type != htons(ETH_P_IP) &&
2780 eth_type != htons(ETH_P_IPV6)) ||
2781 flow_key->ip.proto != IPPROTO_SCTP)
2782 return -EINVAL;
2783
2784 break;
2785
2786 case OVS_KEY_ATTR_NSH:
2787 if (eth_type != htons(ETH_P_NSH))
2788 return -EINVAL;
2789 if (!validate_nsh(nla_data(a), masked, false, log))
2790 return -EINVAL;
2791 break;
2792
2793 default:
2794 return -EINVAL;
2795 }
2796
2797 /* Convert non-masked non-tunnel set actions to masked set actions. */
2798 if (!masked && key_type != OVS_KEY_ATTR_TUNNEL) {
2799 int start, len = key_len * 2;
2800 struct nlattr *at;
2801
2802 *skip_copy = true;
2803
2804 start = add_nested_action_start(sfa,
2805 OVS_ACTION_ATTR_SET_TO_MASKED,
2806 log);
2807 if (start < 0)
2808 return start;
2809
2810 at = __add_action(sfa, key_type, NULL, len, log);
2811 if (IS_ERR(at))
2812 return PTR_ERR(at);
2813
2814 memcpy(nla_data(at), nla_data(ovs_key), key_len); /* Key. */
2815 memset(nla_data(at) + key_len, 0xff, key_len); /* Mask. */
2816 /* Clear non-writeable bits from otherwise writeable fields. */
2817 if (key_type == OVS_KEY_ATTR_IPV6) {
2818 struct ovs_key_ipv6 *mask = nla_data(at) + key_len;
2819
2820 mask->ipv6_label &= htonl(0x000FFFFF);
2821 }
2822 add_nested_action_end(*sfa, start);
2823 }
2824
2825 return 0;
2826 }
2827
2828 static int validate_userspace(const struct nlattr *attr)
2829 {
2830 static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
2831 [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
2832 [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
2833 [OVS_USERSPACE_ATTR_EGRESS_TUN_PORT] = {.type = NLA_U32 },
2834 };
2835 struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
2836 int error;
2837
2838 error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, attr,
2839 userspace_policy, NULL);
2840 if (error)
2841 return error;
2842
2843 if (!a[OVS_USERSPACE_ATTR_PID] ||
2844 !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
2845 return -EINVAL;
2846
2847 return 0;
2848 }
2849
2850 static const struct nla_policy cpl_policy[OVS_CHECK_PKT_LEN_ATTR_MAX + 1] = {
2851 [OVS_CHECK_PKT_LEN_ATTR_PKT_LEN] = {.type = NLA_U16 },
2852 [OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER] = {.type = NLA_NESTED },
2853 [OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL] = {.type = NLA_NESTED },
2854 };
2855
2856 static int validate_and_copy_check_pkt_len(struct net *net,
2857 const struct nlattr *attr,
2858 const struct sw_flow_key *key,
2859 struct sw_flow_actions **sfa,
2860 __be16 eth_type, __be16 vlan_tci,
2861 bool log, bool last)
2862 {
2863 const struct nlattr *acts_if_greater, *acts_if_lesser_eq;
2864 struct nlattr *a[OVS_CHECK_PKT_LEN_ATTR_MAX + 1];
2865 struct check_pkt_len_arg arg;
2866 int nested_acts_start;
2867 int start, err;
2868
2869 err = nla_parse_nested(a, OVS_CHECK_PKT_LEN_ATTR_MAX, attr,
2870 cpl_policy, NULL);
2871 if (err)
2872 return err;
2873
2874 if (!a[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN] ||
2875 !nla_get_u16(a[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN]))
2876 return -EINVAL;
2877
2878 acts_if_lesser_eq = a[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL];
2879 acts_if_greater = a[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER];
2880
2881 /* Both the nested action should be present. */
2882 if (!acts_if_greater || !acts_if_lesser_eq)
2883 return -EINVAL;
2884
2885 /* validation done, copy the nested actions. */
2886 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_CHECK_PKT_LEN,
2887 log);
2888 if (start < 0)
2889 return start;
2890
2891 arg.pkt_len = nla_get_u16(a[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN]);
2892 arg.exec_for_lesser_equal =
2893 last || !actions_may_change_flow(acts_if_lesser_eq);
2894 arg.exec_for_greater =
2895 last || !actions_may_change_flow(acts_if_greater);
2896
2897 err = ovs_nla_add_action(sfa, OVS_CHECK_PKT_LEN_ATTR_ARG, &arg,
2898 sizeof(arg), log);
2899 if (err)
2900 return err;
2901
2902 nested_acts_start = add_nested_action_start(sfa,
2903 OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL, log);
2904 if (nested_acts_start < 0)
2905 return nested_acts_start;
2906
2907 err = __ovs_nla_copy_actions(net, acts_if_lesser_eq, key, sfa,
2908 eth_type, vlan_tci, log);
2909
2910 if (err)
2911 return err;
2912
2913 add_nested_action_end(*sfa, nested_acts_start);
2914
2915 nested_acts_start = add_nested_action_start(sfa,
2916 OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER, log);
2917 if (nested_acts_start < 0)
2918 return nested_acts_start;
2919
2920 err = __ovs_nla_copy_actions(net, acts_if_greater, key, sfa,
2921 eth_type, vlan_tci, log);
2922
2923 if (err)
2924 return err;
2925
2926 add_nested_action_end(*sfa, nested_acts_start);
2927 add_nested_action_end(*sfa, start);
2928 return 0;
2929 }
2930
2931 static int copy_action(const struct nlattr *from,
2932 struct sw_flow_actions **sfa, bool log)
2933 {
2934 int totlen = NLA_ALIGN(from->nla_len);
2935 struct nlattr *to;
2936
2937 to = reserve_sfa_size(sfa, from->nla_len, log);
2938 if (IS_ERR(to))
2939 return PTR_ERR(to);
2940
2941 memcpy(to, from, totlen);
2942 return 0;
2943 }
2944
2945 static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
2946 const struct sw_flow_key *key,
2947 struct sw_flow_actions **sfa,
2948 __be16 eth_type, __be16 vlan_tci, bool log)
2949 {
2950 u8 mac_proto = ovs_key_mac_proto(key);
2951 const struct nlattr *a;
2952 int rem, err;
2953
2954 nla_for_each_nested(a, attr, rem) {
2955 /* Expected argument lengths, (u32)-1 for variable length. */
2956 static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
2957 [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
2958 [OVS_ACTION_ATTR_RECIRC] = sizeof(u32),
2959 [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
2960 [OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls),
2961 [OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16),
2962 [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
2963 [OVS_ACTION_ATTR_POP_VLAN] = 0,
2964 [OVS_ACTION_ATTR_SET] = (u32)-1,
2965 [OVS_ACTION_ATTR_SET_MASKED] = (u32)-1,
2966 [OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
2967 [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash),
2968 [OVS_ACTION_ATTR_CT] = (u32)-1,
2969 [OVS_ACTION_ATTR_CT_CLEAR] = 0,
2970 [OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc),
2971 [OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth),
2972 [OVS_ACTION_ATTR_POP_ETH] = 0,
2973 [OVS_ACTION_ATTR_PUSH_NSH] = (u32)-1,
2974 [OVS_ACTION_ATTR_POP_NSH] = 0,
2975 [OVS_ACTION_ATTR_METER] = sizeof(u32),
2976 [OVS_ACTION_ATTR_CLONE] = (u32)-1,
2977 [OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1,
2978 };
2979 const struct ovs_action_push_vlan *vlan;
2980 int type = nla_type(a);
2981 bool skip_copy;
2982
2983 if (type > OVS_ACTION_ATTR_MAX ||
2984 (action_lens[type] != nla_len(a) &&
2985 action_lens[type] != (u32)-1))
2986 return -EINVAL;
2987
2988 skip_copy = false;
2989 switch (type) {
2990 case OVS_ACTION_ATTR_UNSPEC:
2991 return -EINVAL;
2992
2993 case OVS_ACTION_ATTR_USERSPACE:
2994 err = validate_userspace(a);
2995 if (err)
2996 return err;
2997 break;
2998
2999 case OVS_ACTION_ATTR_OUTPUT:
3000 if (nla_get_u32(a) >= DP_MAX_PORTS)
3001 return -EINVAL;
3002 break;
3003
3004 case OVS_ACTION_ATTR_TRUNC: {
3005 const struct ovs_action_trunc *trunc = nla_data(a);
3006
3007 if (trunc->max_len < ETH_HLEN)
3008 return -EINVAL;
3009 break;
3010 }
3011
3012 case OVS_ACTION_ATTR_HASH: {
3013 const struct ovs_action_hash *act_hash = nla_data(a);
3014
3015 switch (act_hash->hash_alg) {
3016 case OVS_HASH_ALG_L4:
3017 break;
3018 default:
3019 return -EINVAL;
3020 }
3021
3022 break;
3023 }
3024
3025 case OVS_ACTION_ATTR_POP_VLAN:
3026 if (mac_proto != MAC_PROTO_ETHERNET)
3027 return -EINVAL;
3028 vlan_tci = htons(0);
3029 break;
3030
3031 case OVS_ACTION_ATTR_PUSH_VLAN:
3032 if (mac_proto != MAC_PROTO_ETHERNET)
3033 return -EINVAL;
3034 vlan = nla_data(a);
3035 if (!eth_type_vlan(vlan->vlan_tpid))
3036 return -EINVAL;
3037 if (!(vlan->vlan_tci & htons(VLAN_CFI_MASK)))
3038 return -EINVAL;
3039 vlan_tci = vlan->vlan_tci;
3040 break;
3041
3042 case OVS_ACTION_ATTR_RECIRC:
3043 break;
3044
3045 case OVS_ACTION_ATTR_PUSH_MPLS: {
3046 const struct ovs_action_push_mpls *mpls = nla_data(a);
3047
3048 if (!eth_p_mpls(mpls->mpls_ethertype))
3049 return -EINVAL;
3050 /* Prohibit push MPLS other than to a white list
3051 * for packets that have a known tag order.
3052 */
3053 if (vlan_tci & htons(VLAN_CFI_MASK) ||
3054 (eth_type != htons(ETH_P_IP) &&
3055 eth_type != htons(ETH_P_IPV6) &&
3056 eth_type != htons(ETH_P_ARP) &&
3057 eth_type != htons(ETH_P_RARP) &&
3058 !eth_p_mpls(eth_type)))
3059 return -EINVAL;
3060 eth_type = mpls->mpls_ethertype;
3061 break;
3062 }
3063
3064 case OVS_ACTION_ATTR_POP_MPLS:
3065 if (vlan_tci & htons(VLAN_CFI_MASK) ||
3066 !eth_p_mpls(eth_type))
3067 return -EINVAL;
3068
3069 /* Disallow subsequent L2.5+ set and mpls_pop actions
3070 * as there is no check here to ensure that the new
3071 * eth_type is valid and thus set actions could
3072 * write off the end of the packet or otherwise
3073 * corrupt it.
3074 *
3075 * Support for these actions is planned using packet
3076 * recirculation.
3077 */
3078 eth_type = htons(0);
3079 break;
3080
3081 case OVS_ACTION_ATTR_SET:
3082 err = validate_set(a, key, sfa,
3083 &skip_copy, mac_proto, eth_type,
3084 false, log);
3085 if (err)
3086 return err;
3087 break;
3088
3089 case OVS_ACTION_ATTR_SET_MASKED:
3090 err = validate_set(a, key, sfa,
3091 &skip_copy, mac_proto, eth_type,
3092 true, log);
3093 if (err)
3094 return err;
3095 break;
3096
3097 case OVS_ACTION_ATTR_SAMPLE: {
3098 bool last = nla_is_last(a, rem);
3099
3100 err = validate_and_copy_sample(net, a, key, sfa,
3101 eth_type, vlan_tci,
3102 log, last);
3103 if (err)
3104 return err;
3105 skip_copy = true;
3106 break;
3107 }
3108
3109 case OVS_ACTION_ATTR_CT:
3110 err = ovs_ct_copy_action(net, a, key, sfa, log);
3111 if (err)
3112 return err;
3113 skip_copy = true;
3114 break;
3115
3116 case OVS_ACTION_ATTR_CT_CLEAR:
3117 break;
3118
3119 case OVS_ACTION_ATTR_PUSH_ETH:
3120 /* Disallow pushing an Ethernet header if one
3121 * is already present */
3122 if (mac_proto != MAC_PROTO_NONE)
3123 return -EINVAL;
3124 mac_proto = MAC_PROTO_ETHERNET;
3125 break;
3126
3127 case OVS_ACTION_ATTR_POP_ETH:
3128 if (mac_proto != MAC_PROTO_ETHERNET)
3129 return -EINVAL;
3130 if (vlan_tci & htons(VLAN_CFI_MASK))
3131 return -EINVAL;
3132 mac_proto = MAC_PROTO_NONE;
3133 break;
3134
3135 case OVS_ACTION_ATTR_PUSH_NSH:
3136 if (mac_proto != MAC_PROTO_ETHERNET) {
3137 u8 next_proto;
3138
3139 next_proto = tun_p_from_eth_p(eth_type);
3140 if (!next_proto)
3141 return -EINVAL;
3142 }
3143 mac_proto = MAC_PROTO_NONE;
3144 if (!validate_nsh(nla_data(a), false, true, true))
3145 return -EINVAL;
3146 break;
3147
3148 case OVS_ACTION_ATTR_POP_NSH: {
3149 __be16 inner_proto;
3150
3151 if (eth_type != htons(ETH_P_NSH))
3152 return -EINVAL;
3153 inner_proto = tun_p_to_eth_p(key->nsh.base.np);
3154 if (!inner_proto)
3155 return -EINVAL;
3156 if (key->nsh.base.np == TUN_P_ETHERNET)
3157 mac_proto = MAC_PROTO_ETHERNET;
3158 else
3159 mac_proto = MAC_PROTO_NONE;
3160 break;
3161 }
3162
3163 case OVS_ACTION_ATTR_METER:
3164 /* Non-existent meters are simply ignored. */
3165 break;
3166
3167 case OVS_ACTION_ATTR_CLONE: {
3168 bool last = nla_is_last(a, rem);
3169
3170 err = validate_and_copy_clone(net, a, key, sfa,
3171 eth_type, vlan_tci,
3172 log, last);
3173 if (err)
3174 return err;
3175 skip_copy = true;
3176 break;
3177 }
3178
3179 case OVS_ACTION_ATTR_CHECK_PKT_LEN: {
3180 bool last = nla_is_last(a, rem);
3181
3182 err = validate_and_copy_check_pkt_len(net, a, key, sfa,
3183 eth_type,
3184 vlan_tci, log,
3185 last);
3186 if (err)
3187 return err;
3188 skip_copy = true;
3189 break;
3190 }
3191
3192 default:
3193 OVS_NLERR(log, "Unknown Action type %d", type);
3194 return -EINVAL;
3195 }
3196 if (!skip_copy) {
3197 err = copy_action(a, sfa, log);
3198 if (err)
3199 return err;
3200 }
3201 }
3202
3203 if (rem > 0)
3204 return -EINVAL;
3205
3206 return 0;
3207 }
3208
3209 /* 'key' must be the masked key. */
3210 int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
3211 const struct sw_flow_key *key,
3212 struct sw_flow_actions **sfa, bool log)
3213 {
3214 int err;
3215
3216 *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE));
3217 if (IS_ERR(*sfa))
3218 return PTR_ERR(*sfa);
3219
3220 (*sfa)->orig_len = nla_len(attr);
3221 err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type,
3222 key->eth.vlan.tci, log);
3223 if (err)
3224 ovs_nla_free_flow_actions(*sfa);
3225
3226 return err;
3227 }
3228
3229 static int sample_action_to_attr(const struct nlattr *attr,
3230 struct sk_buff *skb)
3231 {
3232 struct nlattr *start, *ac_start = NULL, *sample_arg;
3233 int err = 0, rem = nla_len(attr);
3234 const struct sample_arg *arg;
3235 struct nlattr *actions;
3236
3237 start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE);
3238 if (!start)
3239 return -EMSGSIZE;
3240
3241 sample_arg = nla_data(attr);
3242 arg = nla_data(sample_arg);
3243 actions = nla_next(sample_arg, &rem);
3244
3245 if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PROBABILITY, arg->probability)) {
3246 err = -EMSGSIZE;
3247 goto out;
3248 }
3249
3250 ac_start = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
3251 if (!ac_start) {
3252 err = -EMSGSIZE;
3253 goto out;
3254 }
3255
3256 err = ovs_nla_put_actions(actions, rem, skb);
3257
3258 out:
3259 if (err) {
3260 nla_nest_cancel(skb, ac_start);
3261 nla_nest_cancel(skb, start);
3262 } else {
3263 nla_nest_end(skb, ac_start);
3264 nla_nest_end(skb, start);
3265 }
3266
3267 return err;
3268 }
3269
3270 static int clone_action_to_attr(const struct nlattr *attr,
3271 struct sk_buff *skb)
3272 {
3273 struct nlattr *start;
3274 int err = 0, rem = nla_len(attr);
3275
3276 start = nla_nest_start(skb, OVS_ACTION_ATTR_CLONE);
3277 if (!start)
3278 return -EMSGSIZE;
3279
3280 err = ovs_nla_put_actions(nla_data(attr), rem, skb);
3281
3282 if (err)
3283 nla_nest_cancel(skb, start);
3284 else
3285 nla_nest_end(skb, start);
3286
3287 return err;
3288 }
3289
3290 static int check_pkt_len_action_to_attr(const struct nlattr *attr,
3291 struct sk_buff *skb)
3292 {
3293 struct nlattr *start, *ac_start = NULL;
3294 const struct check_pkt_len_arg *arg;
3295 const struct nlattr *a, *cpl_arg;
3296 int err = 0, rem = nla_len(attr);
3297
3298 start = nla_nest_start(skb, OVS_ACTION_ATTR_CHECK_PKT_LEN);
3299 if (!start)
3300 return -EMSGSIZE;
3301
3302 /* The first nested attribute in 'attr' is always
3303 * 'OVS_CHECK_PKT_LEN_ATTR_ARG'.
3304 */
3305 cpl_arg = nla_data(attr);
3306 arg = nla_data(cpl_arg);
3307
3308 if (nla_put_u16(skb, OVS_CHECK_PKT_LEN_ATTR_PKT_LEN, arg->pkt_len)) {
3309 err = -EMSGSIZE;
3310 goto out;
3311 }
3312
3313 /* Second nested attribute in 'attr' is always
3314 * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL'.
3315 */
3316 a = nla_next(cpl_arg, &rem);
3317 ac_start = nla_nest_start(skb,
3318 OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL);
3319 if (!ac_start) {
3320 err = -EMSGSIZE;
3321 goto out;
3322 }
3323
3324 err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
3325 if (err) {
3326 nla_nest_cancel(skb, ac_start);
3327 goto out;
3328 } else {
3329 nla_nest_end(skb, ac_start);
3330 }
3331
3332 /* Third nested attribute in 'attr' is always
3333 * OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER.
3334 */
3335 a = nla_next(a, &rem);
3336 ac_start = nla_nest_start(skb,
3337 OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER);
3338 if (!ac_start) {
3339 err = -EMSGSIZE;
3340 goto out;
3341 }
3342
3343 err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
3344 if (err) {
3345 nla_nest_cancel(skb, ac_start);
3346 goto out;
3347 } else {
3348 nla_nest_end(skb, ac_start);
3349 }
3350
3351 nla_nest_end(skb, start);
3352 return 0;
3353
3354 out:
3355 nla_nest_cancel(skb, start);
3356 return err;
3357 }
3358
3359 static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
3360 {
3361 const struct nlattr *ovs_key = nla_data(a);
3362 int key_type = nla_type(ovs_key);
3363 struct nlattr *start;
3364 int err;
3365
3366 switch (key_type) {
3367 case OVS_KEY_ATTR_TUNNEL_INFO: {
3368 struct ovs_tunnel_info *ovs_tun = nla_data(ovs_key);
3369 struct ip_tunnel_info *tun_info = &ovs_tun->tun_dst->u.tun_info;
3370
3371 start = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
3372 if (!start)
3373 return -EMSGSIZE;
3374
3375 err = ip_tun_to_nlattr(skb, &tun_info->key,
3376 ip_tunnel_info_opts(tun_info),
3377 tun_info->options_len,
3378 ip_tunnel_info_af(tun_info));
3379 if (err)
3380 return err;
3381 nla_nest_end(skb, start);
3382 break;
3383 }
3384 default:
3385 if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key))
3386 return -EMSGSIZE;
3387 break;
3388 }
3389
3390 return 0;
3391 }
3392
3393 static int masked_set_action_to_set_action_attr(const struct nlattr *a,
3394 struct sk_buff *skb)
3395 {
3396 const struct nlattr *ovs_key = nla_data(a);
3397 struct nlattr *nla;
3398 size_t key_len = nla_len(ovs_key) / 2;
3399
3400 /* Revert the conversion we did from a non-masked set action to
3401 * masked set action.
3402 */
3403 nla = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
3404 if (!nla)
3405 return -EMSGSIZE;
3406
3407 if (nla_put(skb, nla_type(ovs_key), key_len, nla_data(ovs_key)))
3408 return -EMSGSIZE;
3409
3410 nla_nest_end(skb, nla);
3411 return 0;
3412 }
3413
3414 int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
3415 {
3416 const struct nlattr *a;
3417 int rem, err;
3418
3419 nla_for_each_attr(a, attr, len, rem) {
3420 int type = nla_type(a);
3421
3422 switch (type) {
3423 case OVS_ACTION_ATTR_SET:
3424 err = set_action_to_attr(a, skb);
3425 if (err)
3426 return err;
3427 break;
3428
3429 case OVS_ACTION_ATTR_SET_TO_MASKED:
3430 err = masked_set_action_to_set_action_attr(a, skb);
3431 if (err)
3432 return err;
3433 break;
3434
3435 case OVS_ACTION_ATTR_SAMPLE:
3436 err = sample_action_to_attr(a, skb);
3437 if (err)
3438 return err;
3439 break;
3440
3441 case OVS_ACTION_ATTR_CT:
3442 err = ovs_ct_action_to_attr(nla_data(a), skb);
3443 if (err)
3444 return err;
3445 break;
3446
3447 case OVS_ACTION_ATTR_CLONE:
3448 err = clone_action_to_attr(a, skb);
3449 if (err)
3450 return err;
3451 break;
3452
3453 case OVS_ACTION_ATTR_CHECK_PKT_LEN:
3454 err = check_pkt_len_action_to_attr(a, skb);
3455 if (err)
3456 return err;
3457 break;
3458
3459 default:
3460 if (nla_put(skb, type, nla_len(a), nla_data(a)))
3461 return -EMSGSIZE;
3462 break;
3463 }
3464 }
3465
3466 return 0;
3467 }