]> git.proxmox.com Git - mirror_ovs.git/blame - lib/tun-metadata.c
compat: Backport conntrack strictly to v3.10+.
[mirror_ovs.git] / lib / tun-metadata.c
CommitLineData
9558d2a5
JG
1/*
2 * Copyright (c) 2015 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <config.h>
18#include <errno.h>
19#include <stdbool.h>
20
21#include "bitmap.h"
22#include "compiler.h"
23#include "hmap.h"
24#include "match.h"
25#include "nx-match.h"
26#include "odp-netlink.h"
27#include "ofp-util.h"
28#include "ovs-thread.h"
29#include "ovs-rcu.h"
30#include "packets.h"
31#include "tun-metadata.h"
32
33struct tun_meta_entry {
34 struct hmap_node node; /* In struct tun_table's key_hmap. */
35 uint32_t key; /* (class << 16) | type. */
36 struct tun_metadata_loc loc;
37 bool valid; /* True if allocated to a class and type. */
38};
39
4e548ad9 40/* Maps from TLV option class+type to positions in a struct tun_metadata's
9558d2a5
JG
41 * 'opts' array. */
42struct tun_table {
43 /* TUN_METADATA<i> is stored in element <i>. */
44 struct tun_meta_entry entries[TUN_METADATA_NUM_OPTS];
45
46 /* Each bit represents 4 bytes of space, 0-bits are free space. */
47 unsigned long alloc_map[BITMAP_N_LONGS(TUN_METADATA_TOT_OPT_SIZE / 4)];
48
49 /* The valid elements in entries[], indexed by class+type. */
50 struct hmap key_hmap;
51};
52BUILD_ASSERT_DECL(TUN_METADATA_TOT_OPT_SIZE % 4 == 0);
53
54static struct ovs_mutex tab_mutex = OVS_MUTEX_INITIALIZER;
55static OVSRCU_TYPE(struct tun_table *) metadata_tab;
56
57static enum ofperr tun_metadata_add_entry(struct tun_table *map, uint8_t idx,
58 uint16_t opt_class, uint8_t type,
59 uint8_t len) OVS_REQUIRES(tab_mutex);
60static void tun_metadata_del_entry(struct tun_table *map, uint8_t idx)
61 OVS_REQUIRES(tab_mutex);
62static void memcpy_to_metadata(struct tun_metadata *dst, const void *src,
2234a7db
JG
63 const struct tun_metadata_loc *,
64 unsigned int idx);
9558d2a5
JG
65static void memcpy_from_metadata(void *dst, const struct tun_metadata *src,
66 const struct tun_metadata_loc *);
67
68static uint32_t
69tun_meta_key(ovs_be16 class, uint8_t type)
70{
71 return (OVS_FORCE uint16_t)class << 8 | type;
72}
73
74static ovs_be16
75tun_key_class(uint32_t key)
76{
77 return (OVS_FORCE ovs_be16)(key >> 8);
78}
79
80static uint8_t
81tun_key_type(uint32_t key)
82{
83 return key & 0xff;
84}
85
86/* Returns a newly allocated tun_table. If 'old_map' is nonnull then the new
87 * tun_table is a deep copy of the old one. */
88static struct tun_table *
89table_alloc(const struct tun_table *old_map) OVS_REQUIRES(tab_mutex)
90{
91 struct tun_table *new_map;
92
93 new_map = xzalloc(sizeof *new_map);
94
95 if (old_map) {
96 struct tun_meta_entry *entry;
97
98 *new_map = *old_map;
99 hmap_init(&new_map->key_hmap);
100
101 HMAP_FOR_EACH (entry, node, &old_map->key_hmap) {
102 struct tun_meta_entry *new_entry;
103 struct tun_metadata_loc_chain *chain;
104
105 new_entry = &new_map->entries[entry - old_map->entries];
106 hmap_insert(&new_map->key_hmap, &new_entry->node, entry->node.hash);
107
108 chain = &new_entry->loc.c;
109 while (chain->next) {
110 chain->next = xmemdup(chain->next, sizeof *chain->next);
111 chain = chain->next;
112 }
113 }
114 } else {
115 hmap_init(&new_map->key_hmap);
116 }
117
118 return new_map;
119}
120
121/* Frees 'map' and all the memory it owns. */
122static void
123table_free(struct tun_table *map) OVS_REQUIRES(tab_mutex)
124{
125 struct tun_meta_entry *entry;
126
127 if (!map) {
128 return;
129 }
130
131 HMAP_FOR_EACH (entry, node, &map->key_hmap) {
132 tun_metadata_del_entry(map, entry - map->entries);
133 }
134
135 free(map);
136}
137
138/* Creates a global tunnel metadata mapping table, if none already exists. */
139void
140tun_metadata_init(void)
141{
142 ovs_mutex_lock(&tab_mutex);
143
144 if (!ovsrcu_get_protected(struct tun_table *, &metadata_tab)) {
145 ovsrcu_set(&metadata_tab, table_alloc(NULL));
146 }
147
148 ovs_mutex_unlock(&tab_mutex);
149}
150
151enum ofperr
4e548ad9 152tun_metadata_table_mod(struct ofputil_tlv_table_mod *ttm)
9558d2a5
JG
153{
154 struct tun_table *old_map, *new_map;
4e548ad9 155 struct ofputil_tlv_map *ofp_map;
9558d2a5
JG
156 enum ofperr err = 0;
157
158 ovs_mutex_lock(&tab_mutex);
159
160 old_map = ovsrcu_get_protected(struct tun_table *, &metadata_tab);
161
4e548ad9
ML
162 switch (ttm->command) {
163 case NXTTMC_ADD:
9558d2a5
JG
164 new_map = table_alloc(old_map);
165
4e548ad9 166 LIST_FOR_EACH (ofp_map, list_node, &ttm->mappings) {
9558d2a5
JG
167 err = tun_metadata_add_entry(new_map, ofp_map->index,
168 ofp_map->option_class,
169 ofp_map->option_type,
170 ofp_map->option_len);
171 if (err) {
172 table_free(new_map);
173 goto out;
174 }
175 }
176 break;
177
4e548ad9 178 case NXTTMC_DELETE:
9558d2a5
JG
179 new_map = table_alloc(old_map);
180
4e548ad9 181 LIST_FOR_EACH (ofp_map, list_node, &ttm->mappings) {
9558d2a5
JG
182 tun_metadata_del_entry(new_map, ofp_map->index);
183 }
184 break;
185
4e548ad9 186 case NXTTMC_CLEAR:
9558d2a5
JG
187 new_map = table_alloc(NULL);
188 break;
189
190 default:
191 OVS_NOT_REACHED();
192 }
193
194 ovsrcu_set(&metadata_tab, new_map);
195 ovsrcu_postpone(table_free, old_map);
196
197out:
198 ovs_mutex_unlock(&tab_mutex);
199 return err;
200}
201
202void
4e548ad9 203tun_metadata_table_request(struct ofputil_tlv_table_reply *ttr)
9558d2a5
JG
204{
205 struct tun_table *map = ovsrcu_get(struct tun_table *, &metadata_tab);
206 int i;
207
4e548ad9
ML
208 ttr->max_option_space = TUN_METADATA_TOT_OPT_SIZE;
209 ttr->max_fields = TUN_METADATA_NUM_OPTS;
210 list_init(&ttr->mappings);
9558d2a5
JG
211
212 for (i = 0; i < TUN_METADATA_NUM_OPTS; i++) {
213 struct tun_meta_entry *entry = &map->entries[i];
4e548ad9 214 struct ofputil_tlv_map *map;
9558d2a5
JG
215
216 if (!entry->valid) {
217 continue;
218 }
219
220 map = xmalloc(sizeof *map);
221 map->option_class = ntohs(tun_key_class(entry->key));
222 map->option_type = tun_key_type(entry->key);
223 map->option_len = entry->loc.len;
224 map->index = i;
225
4e548ad9 226 list_push_back(&ttr->mappings, &map->list_node);
9558d2a5
JG
227 }
228}
229
6728d578 230/* Copies the value of field 'mf' from 'tnl' (which must be in non-UDPIF format) * into 'value'.
9558d2a5
JG
231 *
232 * 'mf' must be an MFF_TUN_METADATA* field.
233 *
234 * This uses the global tunnel metadata mapping table created by
235 * tun_metadata_init(). If no such table has been created or if 'mf' hasn't
236 * been allocated in it yet, this just zeros 'value'. */
237void
6728d578 238tun_metadata_read(const struct flow_tnl *tnl,
9558d2a5
JG
239 const struct mf_field *mf, union mf_value *value)
240{
241 struct tun_table *map = ovsrcu_get(struct tun_table *, &metadata_tab);
242 unsigned int idx = mf->id - MFF_TUN_METADATA0;
243 struct tun_metadata_loc *loc;
244
245 if (!map) {
246 memset(value->tun_metadata, 0, mf->n_bytes);
247 return;
248 }
249
250 loc = &map->entries[idx].loc;
251
252 memset(value->tun_metadata, 0, mf->n_bytes - loc->len);
253 memcpy_from_metadata(value->tun_metadata + mf->n_bytes - loc->len,
6728d578 254 &tnl->metadata, loc);
9558d2a5
JG
255}
256
6728d578 257/* Copies 'value' into field 'mf' in 'tnl' (in non-UDPIF format).
9558d2a5
JG
258 *
259 * 'mf' must be an MFF_TUN_METADATA* field.
260 *
261 * This uses the global tunnel metadata mapping table created by
262 * tun_metadata_init(). If no such table has been created or if 'mf' hasn't
263 * been allocated in it yet, this function does nothing. */
264void
6728d578 265tun_metadata_write(struct flow_tnl *tnl,
9558d2a5
JG
266 const struct mf_field *mf, const union mf_value *value)
267{
268 struct tun_table *map = ovsrcu_get(struct tun_table *, &metadata_tab);
269 unsigned int idx = mf->id - MFF_TUN_METADATA0;
270 struct tun_metadata_loc *loc;
271
272 if (!map || !map->entries[idx].valid) {
273 return;
274 }
275
276 loc = &map->entries[idx].loc;
6728d578 277 memcpy_to_metadata(&tnl->metadata,
2234a7db 278 value->tun_metadata + mf->n_bytes - loc->len, loc, idx);
9558d2a5
JG
279}
280
281static const struct tun_metadata_loc *
282metadata_loc_from_match(struct tun_table *map, struct match *match,
4f7b100c
JG
283 const char *name, unsigned int idx,
284 unsigned int field_len, bool masked, char **err_str)
9558d2a5
JG
285{
286 ovs_assert(idx < TUN_METADATA_NUM_OPTS);
287
4f7b100c
JG
288 if (err_str) {
289 *err_str = NULL;
290 }
291
9558d2a5
JG
292 if (map) {
293 if (map->entries[idx].valid) {
294 return &map->entries[idx].loc;
295 } else {
296 return NULL;
297 }
298 }
299
4f7b100c
JG
300 if (match->tun_md.alloc_offset + field_len > TUN_METADATA_TOT_OPT_SIZE) {
301 if (err_str) {
302 *err_str = xasprintf("field %s exceeds maximum size for tunnel "
303 "metadata (used %d, max %d)", name,
304 match->tun_md.alloc_offset + field_len,
305 TUN_METADATA_TOT_OPT_SIZE);
306 }
307
308 return NULL;
309 }
310
311 if (ULLONG_GET(match->wc.masks.tunnel.metadata.present.map, idx)) {
312 if (err_str) {
313 *err_str = xasprintf("field %s set multiple times", name);
314 }
315
9558d2a5
JG
316 return NULL;
317 }
318
1cb20095
JG
319 match->tun_md.entry[idx].loc.len = field_len;
320 match->tun_md.entry[idx].loc.c.offset = match->tun_md.alloc_offset;
321 match->tun_md.entry[idx].loc.c.len = field_len;
322 match->tun_md.entry[idx].loc.c.next = NULL;
323 match->tun_md.entry[idx].masked = masked;
9558d2a5
JG
324 match->tun_md.alloc_offset += field_len;
325 match->tun_md.valid = true;
326
1cb20095 327 return &match->tun_md.entry[idx].loc;
9558d2a5
JG
328}
329
330/* Makes 'match' match 'value'/'mask' on field 'mf'.
331 *
6728d578 332 * 'mf' must be an MFF_TUN_METADATA* field. 'match' must be in non-UDPIF format.
9558d2a5
JG
333 *
334 * If there is global tunnel metadata matching table, this function is
335 * effective only if there is already a mapping for 'mf'. Otherwise, the
336 * metadata mapping table integrated into 'match' is used, adding 'mf' to its
337 * mapping table if it isn't already mapped (and if there is room). If 'mf'
338 * isn't or can't be mapped, this function returns without modifying 'match'.
339 *
340 * 'value' may be NULL; if so, then 'mf' is made to match on an all-zeros
341 * value.
342 *
343 * 'mask' may be NULL; if so, then 'mf' is made exact-match.
4f7b100c
JG
344 *
345 * If non-NULL, 'err_str' returns a malloc'ed string describing any errors
346 * with the request or NULL if there is no error. The caller is reponsible
347 * for freeing the string.
9558d2a5
JG
348 */
349void
350tun_metadata_set_match(const struct mf_field *mf, const union mf_value *value,
4f7b100c
JG
351 const union mf_value *mask, struct match *match,
352 char **err_str)
9558d2a5
JG
353{
354 struct tun_table *map = ovsrcu_get(struct tun_table *, &metadata_tab);
355 const struct tun_metadata_loc *loc;
356 unsigned int idx = mf->id - MFF_TUN_METADATA0;
357 unsigned int field_len;
1cb20095 358 bool is_masked;
9558d2a5
JG
359 unsigned int data_offset;
360 union mf_value data;
361
6728d578
JG
362 ovs_assert(!(match->flow.tunnel.flags & FLOW_TNL_F_UDPIF));
363
1cb20095 364 field_len = mf_field_len(mf, value, mask, &is_masked);
4f7b100c
JG
365 loc = metadata_loc_from_match(map, match, mf->name, idx, field_len,
366 is_masked, err_str);
9558d2a5
JG
367 if (!loc) {
368 return;
369 }
370
371 data_offset = mf->n_bytes - loc->len;
372
373 if (!value) {
374 memset(data.tun_metadata, 0, loc->len);
375 } else if (!mask) {
376 memcpy(data.tun_metadata, value->tun_metadata + data_offset, loc->len);
377 } else {
378 int i;
379 for (i = 0; i < loc->len; i++) {
380 data.tun_metadata[i] = value->tun_metadata[data_offset + i] &
381 mask->tun_metadata[data_offset + i];
382 }
383 }
2234a7db
JG
384 memcpy_to_metadata(&match->flow.tunnel.metadata, data.tun_metadata,
385 loc, idx);
9558d2a5
JG
386
387 if (!value) {
388 memset(data.tun_metadata, 0, loc->len);
389 } else if (!mask) {
390 memset(data.tun_metadata, 0xff, loc->len);
391 } else {
392 memcpy(data.tun_metadata, mask->tun_metadata + data_offset, loc->len);
393 }
2234a7db
JG
394 memcpy_to_metadata(&match->wc.masks.tunnel.metadata, data.tun_metadata,
395 loc, idx);
9558d2a5
JG
396}
397
6728d578
JG
398static bool
399udpif_to_parsed(const struct flow_tnl *flow, const struct flow_tnl *mask,
400 struct flow_tnl *flow_xlate, struct flow_tnl *mask_xlate)
401{
402 if (flow->flags & FLOW_TNL_F_UDPIF) {
403 int err;
404
405 err = tun_metadata_from_geneve_udpif(flow, flow, flow_xlate);
406 if (err) {
407 return false;
408 }
409
410 if (mask) {
411 tun_metadata_from_geneve_udpif(flow, mask, mask_xlate);
412 if (err) {
413 return false;
414 }
415 }
416 } else {
417 if (flow->metadata.present.map == 0) {
418 /* There is no tunnel metadata, don't bother copying. */
419 return false;
420 }
421
422 memcpy(flow_xlate, flow, sizeof *flow_xlate);
423 if (mask) {
424 memcpy(mask_xlate, mask, sizeof *mask_xlate);
425 }
426
427 if (!flow_xlate->metadata.tab) {
428 flow_xlate->metadata.tab = ovsrcu_get(struct tun_table *,
429 &metadata_tab);
430 }
431 }
432
433 return true;
434}
435
436/* Copies all MFF_TUN_METADATA* fields from 'tnl' to 'flow_metadata'. */
9558d2a5 437void
6728d578 438tun_metadata_get_fmd(const struct flow_tnl *tnl, struct match *flow_metadata)
9558d2a5 439{
6728d578 440 struct flow_tnl flow;
9558d2a5
JG
441 int i;
442
6728d578
JG
443 if (!udpif_to_parsed(tnl, NULL, &flow, NULL)) {
444 return;
9558d2a5
JG
445 }
446
6728d578 447 ULLONG_FOR_EACH_1 (i, flow.metadata.present.map) {
9558d2a5 448 union mf_value opts;
6728d578 449 const struct tun_metadata_loc *old_loc = &flow.metadata.tab->entries[i].loc;
9558d2a5
JG
450 const struct tun_metadata_loc *new_loc;
451
4f7b100c
JG
452 new_loc = metadata_loc_from_match(NULL, flow_metadata, NULL, i,
453 old_loc->len, false, NULL);
9558d2a5 454
6728d578 455 memcpy_from_metadata(opts.tun_metadata, &flow.metadata, old_loc);
9558d2a5 456 memcpy_to_metadata(&flow_metadata->flow.tunnel.metadata,
2234a7db 457 opts.tun_metadata, new_loc, i);
9558d2a5
JG
458
459 memset(opts.tun_metadata, 0xff, old_loc->len);
460 memcpy_to_metadata(&flow_metadata->wc.masks.tunnel.metadata,
2234a7db 461 opts.tun_metadata, new_loc, i);
9558d2a5
JG
462 }
463}
464
465static uint32_t
466tun_meta_hash(uint32_t key)
467{
468 return hash_int(key, 0);
469}
470
471static struct tun_meta_entry *
472tun_meta_find_key(const struct hmap *hmap, uint32_t key)
473{
474 struct tun_meta_entry *entry;
475
476 HMAP_FOR_EACH_IN_BUCKET (entry, node, tun_meta_hash(key), hmap) {
477 if (entry->key == key) {
478 return entry;
479 }
480 }
481 return NULL;
482}
483
484static void
485memcpy_to_metadata(struct tun_metadata *dst, const void *src,
2234a7db 486 const struct tun_metadata_loc *loc, unsigned int idx)
9558d2a5
JG
487{
488 const struct tun_metadata_loc_chain *chain = &loc->c;
489 int addr = 0;
490
491 while (chain) {
6728d578 492 memcpy(dst->opts.u8 + loc->c.offset + addr, (uint8_t *)src + addr,
9558d2a5
JG
493 chain->len);
494 addr += chain->len;
495 chain = chain->next;
496 }
2234a7db
JG
497
498 ULLONG_SET1(dst->present.map, idx);
9558d2a5
JG
499}
500
501static void
502memcpy_from_metadata(void *dst, const struct tun_metadata *src,
503 const struct tun_metadata_loc *loc)
504{
505 const struct tun_metadata_loc_chain *chain = &loc->c;
506 int addr = 0;
507
508 while (chain) {
6728d578 509 memcpy((uint8_t *)dst + addr, src->opts.u8 + loc->c.offset + addr,
9558d2a5
JG
510 chain->len);
511 addr += chain->len;
512 chain = chain->next;
513 }
514}
515
516static int
517tun_metadata_alloc_chain(struct tun_table *map, uint8_t len,
518 struct tun_metadata_loc_chain *loc)
519 OVS_REQUIRES(tab_mutex)
520{
521 int alloc_len = len / 4;
522 int scan_start = 0;
523 int scan_end = TUN_METADATA_TOT_OPT_SIZE / 4;
524 int pos_start, pos_end, pos_len;
525 int best_start = 0, best_len = 0;
526
527 while (true) {
528 pos_start = bitmap_scan(map->alloc_map, 0, scan_start, scan_end);
529 if (pos_start == scan_end) {
530 break;
531 }
532
533 pos_end = bitmap_scan(map->alloc_map, 1, pos_start,
534 MIN(pos_start + alloc_len, scan_end));
535 pos_len = pos_end - pos_start;
536 if (pos_len == alloc_len) {
537 goto found;
538 }
539
540 if (pos_len > best_len) {
541 best_start = pos_start;
542 best_len = pos_len;
543 }
544 scan_start = pos_end + 1;
545 }
546
547 if (best_len == 0) {
548 return ENOSPC;
549 }
550
551 pos_start = best_start;
552 pos_len = best_len;
553
554found:
555 bitmap_set_multiple(map->alloc_map, pos_start, pos_len, 1);
556 loc->offset = pos_start * 4;
557 loc->len = pos_len * 4;
558
559 return 0;
560}
561
562static enum ofperr
563tun_metadata_add_entry(struct tun_table *map, uint8_t idx, uint16_t opt_class,
564 uint8_t type, uint8_t len) OVS_REQUIRES(tab_mutex)
565{
566 struct tun_meta_entry *entry;
567 struct tun_metadata_loc_chain *cur_chain, *prev_chain;
568
569 ovs_assert(idx < TUN_METADATA_NUM_OPTS);
570
571 entry = &map->entries[idx];
572 if (entry->valid) {
4e548ad9 573 return OFPERR_NXTTMFC_ALREADY_MAPPED;
9558d2a5
JG
574 }
575
576 entry->key = tun_meta_key(htons(opt_class), type);
577 if (tun_meta_find_key(&map->key_hmap, entry->key)) {
4e548ad9 578 return OFPERR_NXTTMFC_DUP_ENTRY;
9558d2a5
JG
579 }
580
581 entry->valid = true;
582 hmap_insert(&map->key_hmap, &entry->node,
583 tun_meta_hash(entry->key));
584
585 entry->loc.len = len;
586 cur_chain = &entry->loc.c;
587 memset(cur_chain, 0, sizeof *cur_chain);
588 prev_chain = NULL;
589
590 while (len) {
591 int err;
592
593 if (!cur_chain) {
594 cur_chain = xzalloc(sizeof *cur_chain);
595 }
596
597 err = tun_metadata_alloc_chain(map, len, cur_chain);
598 if (err) {
599 tun_metadata_del_entry(map, idx);
4e548ad9 600 return OFPERR_NXTTMFC_TABLE_FULL;
9558d2a5
JG
601 }
602
603 len -= cur_chain->len;
604
605 if (prev_chain) {
606 prev_chain->next = cur_chain;
607 }
608 prev_chain = cur_chain;
609 cur_chain = NULL;
610 }
611
612 return 0;
613}
614
615static void
616tun_metadata_del_entry(struct tun_table *map, uint8_t idx)
617 OVS_REQUIRES(tab_mutex)
618{
619 struct tun_meta_entry *entry;
620 struct tun_metadata_loc_chain *chain;
621
622 if (idx >= TUN_METADATA_NUM_OPTS) {
623 return;
624 }
625
626 entry = &map->entries[idx];
627 if (!entry->valid) {
628 return;
629 }
630
631 chain = &entry->loc.c;
632 while (chain) {
633 struct tun_metadata_loc_chain *next = chain->next;
634
635 bitmap_set_multiple(map->alloc_map, chain->offset / 4,
636 chain->len / 4, 0);
637 if (chain != &entry->loc.c) {
638 free(chain);
639 }
640 chain = next;
641 }
642
643 entry->valid = false;
644 hmap_remove(&map->key_hmap, &entry->node);
645 memset(&entry->loc, 0, sizeof entry->loc);
646}
647
5bb08b0e 648static int
6728d578
JG
649tun_metadata_from_geneve__(const struct tun_metadata *flow_metadata,
650 const struct geneve_opt *opt,
5bb08b0e
JG
651 const struct geneve_opt *flow_opt, int opts_len,
652 struct tun_metadata *metadata)
9558d2a5 653{
6728d578
JG
654 struct tun_table *map;
655 bool is_mask = flow_opt != opt;
656
657 if (!is_mask) {
658 map = ovsrcu_get(struct tun_table *, &metadata_tab);
659 metadata->tab = map;
660 } else {
661 map = flow_metadata->tab;
662 }
663
9558d2a5
JG
664 if (!map) {
665 return 0;
666 }
667
9558d2a5
JG
668 while (opts_len > 0) {
669 int len;
670 struct tun_meta_entry *entry;
671
672 if (opts_len < sizeof(*opt)) {
673 return EINVAL;
674 }
675
676 len = sizeof(*opt) + flow_opt->length * 4;
677 if (len > opts_len) {
678 return EINVAL;
679 }
680
681 entry = tun_meta_find_key(&map->key_hmap,
682 tun_meta_key(flow_opt->opt_class,
683 flow_opt->type));
684 if (entry) {
685 if (entry->loc.len == flow_opt->length * 4) {
2234a7db
JG
686 memcpy_to_metadata(metadata, opt + 1, &entry->loc,
687 entry - map->entries);
9558d2a5
JG
688 } else {
689 return EINVAL;
690 }
691 } else if (flow_opt->type & GENEVE_CRIT_OPT_TYPE) {
692 return EINVAL;
693 }
694
695 opt = opt + len / sizeof(*opt);
696 flow_opt = flow_opt + len / sizeof(*opt);
697 opts_len -= len;
698 }
699
700 return 0;
701}
702
6728d578
JG
703static const struct nlattr *
704tun_metadata_find_geneve_key(const struct nlattr *key, uint32_t key_len)
705{
706 const struct nlattr *tnl_key;
707
708 tnl_key = nl_attr_find__(key, key_len, OVS_KEY_ATTR_TUNNEL);
709 if (!tnl_key) {
710 return NULL;
711 }
712
713 return nl_attr_find_nested(tnl_key, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS);
714}
715
716/* Converts from Geneve netlink attributes in 'attr' to tunnel metadata
717 * in 'tun'. The result may either in be UDPIF format or not, as determined
718 * by 'udpif'.
719 *
720 * In the event that a mask is being converted, it is also necessary to
721 * pass in flow information. This includes the full set of netlink attributes
722 * (i.e. not just the Geneve attribute) in 'flow_attrs'/'flow_attr_len' and
723 * the previously converted tunnel metadata 'flow_tun'.
724 *
725 * If a flow rather than mask is being converted, 'flow_attrs' must be NULL. */
5bb08b0e
JG
726int
727tun_metadata_from_geneve_nlattr(const struct nlattr *attr,
728 const struct nlattr *flow_attrs,
729 size_t flow_attr_len,
6728d578
JG
730 const struct flow_tnl *flow_tun, bool udpif,
731 struct flow_tnl *tun)
9558d2a5 732{
5bb08b0e 733 bool is_mask = !!flow_attrs;
6728d578 734 int attr_len = nl_attr_get_size(attr);
5bb08b0e 735 const struct nlattr *flow;
9558d2a5 736
6728d578
JG
737 /* No need for real translation, just copy things over. */
738 if (udpif) {
739 memcpy(tun->metadata.opts.gnv, nl_attr_get(attr), attr_len);
5bb08b0e 740
6728d578
JG
741 if (!is_mask) {
742 tun->metadata.present.len = attr_len;
743 tun->flags |= FLOW_TNL_F_UDPIF;
744 } else {
745 /* We need to exact match on the length so we don't
746 * accidentally match on sets of options that are the same
747 * at the beginning but with additional options after. */
748 tun->metadata.present.len = 0xff;
5bb08b0e
JG
749 }
750
6728d578
JG
751 return 0;
752 }
753
754 if (is_mask) {
755 flow = tun_metadata_find_geneve_key(flow_attrs, flow_attr_len);
5bb08b0e 756 if (!flow) {
6728d578 757 return attr_len ? EINVAL : 0;
5bb08b0e
JG
758 }
759
6728d578 760 if (attr_len != nl_attr_get_size(flow)) {
5bb08b0e
JG
761 return EINVAL;
762 }
763 } else {
764 flow = attr;
9558d2a5
JG
765 }
766
6728d578
JG
767 return tun_metadata_from_geneve__(&flow_tun->metadata, nl_attr_get(attr),
768 nl_attr_get(flow), nl_attr_get_size(flow),
769 &tun->metadata);
5bb08b0e
JG
770}
771
6728d578
JG
772/* Converts from the flat Geneve options representation extracted directly
773 * from the tunnel header to the representation that maps options to
774 * pre-allocated locations. The original version (in UDPIF form) is passed
775 * in 'src' and the translated form in stored in 'dst'. To handle masks, the
776 * flow must also be passed in through 'flow' (in the original, raw form). */
5bb08b0e 777int
6728d578
JG
778tun_metadata_from_geneve_udpif(const struct flow_tnl *flow,
779 const struct flow_tnl *src,
780 struct flow_tnl *dst)
5bb08b0e 781{
6728d578 782 ovs_assert(flow->flags & FLOW_TNL_F_UDPIF);
5bb08b0e 783
6728d578
JG
784 if (flow == src) {
785 dst->flags = flow->flags & ~FLOW_TNL_F_UDPIF;
786 } else {
787 dst->metadata.tab = NULL;
788 }
789 dst->metadata.present.map = 0;
790 return tun_metadata_from_geneve__(&flow->metadata, src->metadata.opts.gnv,
791 flow->metadata.opts.gnv,
792 flow->metadata.present.len,
793 &dst->metadata);
5bb08b0e
JG
794}
795
796static void
797tun_metadata_to_geneve__(const struct tun_metadata *flow, struct ofpbuf *b,
798 bool *crit_opt)
799{
800 struct tun_table *map;
801 int i;
802
9558d2a5
JG
803 map = flow->tab;
804 if (!map) {
805 map = ovsrcu_get(struct tun_table *, &metadata_tab);
806 }
807
5bb08b0e 808 *crit_opt = false;
9558d2a5 809
6728d578 810 ULLONG_FOR_EACH_1 (i, flow->present.map) {
9558d2a5
JG
811 struct tun_meta_entry *entry = &map->entries[i];
812 struct geneve_opt *opt;
813
814 opt = ofpbuf_put_uninit(b, sizeof *opt + entry->loc.len);
815
816 opt->opt_class = tun_key_class(entry->key);
817 opt->type = tun_key_type(entry->key);
818 opt->length = entry->loc.len / 4;
819 opt->r1 = 0;
820 opt->r2 = 0;
821 opt->r3 = 0;
822
823 memcpy_from_metadata(opt + 1, flow, &entry->loc);
5bb08b0e 824 *crit_opt |= !!(opt->type & GENEVE_CRIT_OPT_TYPE);
9558d2a5 825 }
5bb08b0e
JG
826}
827
6728d578
JG
828static void
829tun_metadata_to_geneve_nlattr_flow(const struct flow_tnl *flow,
5bb08b0e
JG
830 struct ofpbuf *b)
831{
832 size_t nlattr_offset;
833 bool crit_opt;
834
6728d578 835 if (!flow->metadata.present.map) {
5bb08b0e
JG
836 return;
837 }
838
839 /* For all intents and purposes, the Geneve options are nested
840 * attributes even if this doesn't show up directly to netlink. It's
841 * similar enough that we can use the same mechanism. */
842 nlattr_offset = nl_msg_start_nested(b, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS);
843
6728d578 844 tun_metadata_to_geneve__(&flow->metadata, b, &crit_opt);
9558d2a5
JG
845
846 nl_msg_end_nested(b, nlattr_offset);
847}
848
6728d578
JG
849/* Converts from processed tunnel metadata information (in non-udpif
850 * format) in 'flow' to a stream of Geneve options suitable for
851 * transmission in 'opts'. Additionally returns whether there were
852 * any critical options in 'crit_opt' as well as the total length of
853 * data. */
5bb08b0e 854int
6728d578 855tun_metadata_to_geneve_header(const struct flow_tnl *flow,
5bb08b0e
JG
856 struct geneve_opt *opts, bool *crit_opt)
857{
858 struct ofpbuf b;
859
6728d578
JG
860 ovs_assert(!(flow->flags & FLOW_TNL_F_UDPIF));
861
4e548ad9 862 ofpbuf_use_stack(&b, opts, TLV_TOT_OPT_SIZE);
6728d578 863 tun_metadata_to_geneve__(&flow->metadata, &b, crit_opt);
5bb08b0e
JG
864
865 return b.size;
866}
867
6728d578
JG
868static void
869tun_metadata_to_geneve_mask__(const struct tun_metadata *flow,
870 const struct tun_metadata *mask,
871 struct geneve_opt *opt, int opts_len)
9558d2a5
JG
872{
873 struct tun_table *map = flow->tab;
9558d2a5
JG
874
875 if (!map) {
876 return;
877 }
878
9558d2a5
JG
879 /* All of these options have already been validated, so no need
880 * for sanity checking. */
9558d2a5
JG
881 while (opts_len > 0) {
882 struct tun_meta_entry *entry;
883 int len = sizeof(*opt) + opt->length * 4;
884
885 entry = tun_meta_find_key(&map->key_hmap,
886 tun_meta_key(opt->opt_class, opt->type));
887 if (entry) {
888 memcpy_from_metadata(opt + 1, mask, &entry->loc);
889 } else {
890 memset(opt + 1, 0, opt->length * 4);
891 }
892
893 opt->opt_class = htons(0xffff);
894 opt->type = 0xff;
895 opt->length = 0x1f;
896 opt->r1 = 0;
897 opt->r2 = 0;
898 opt->r3 = 0;
899
900 opt = opt + len / sizeof(*opt);
901 opts_len -= len;
902 }
903}
904
6728d578
JG
905static void
906tun_metadata_to_geneve_nlattr_mask(const struct ofpbuf *key,
907 const struct flow_tnl *mask,
908 const struct flow_tnl *flow,
909 struct ofpbuf *b)
910{
911 const struct nlattr *geneve_key;
912 struct nlattr *geneve_mask;
913 struct geneve_opt *opt;
914 int opts_len;
915
916 if (!key) {
917 return;
918 }
919
920 geneve_key = tun_metadata_find_geneve_key(key->data, key->size);
921 if (!geneve_key) {
922 return;
923 }
924
925 geneve_mask = ofpbuf_tail(b);
926 nl_msg_put(b, geneve_key, geneve_key->nla_len);
927
928 opt = CONST_CAST(struct geneve_opt *, nl_attr_get(geneve_mask));
929 opts_len = nl_attr_get_size(geneve_mask);
930
931 tun_metadata_to_geneve_mask__(&flow->metadata, &mask->metadata,
932 opt, opts_len);
933}
934
935/* Convert from the tunnel metadata in 'tun' to netlink attributes stored
936 * in 'b'. Either UDPIF or non-UDPIF input forms are accepted.
937 *
938 * To assist with parsing, it is necessary to also pass in the tunnel metadata
939 * from the flow in 'flow' as well in the original netlink form of the flow in
940 * 'key'. */
941void
942tun_metadata_to_geneve_nlattr(const struct flow_tnl *tun,
943 const struct flow_tnl *flow,
944 const struct ofpbuf *key,
945 struct ofpbuf *b)
946{
947 bool is_mask = tun != flow;
948
949 if (!(flow->flags & FLOW_TNL_F_UDPIF)) {
950 if (!is_mask) {
951 tun_metadata_to_geneve_nlattr_flow(tun, b);
952 } else {
953 tun_metadata_to_geneve_nlattr_mask(key, tun, flow, b);
954 }
955 } else if (flow->metadata.present.len || is_mask) {
956 nl_msg_put_unspec(b, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
957 tun->metadata.opts.gnv,
958 flow->metadata.present.len);
959 }
960}
961
962/* Converts 'mask_src' (in non-UDPIF format) to a series of masked options in
963 * 'dst'. 'flow_src' (also in non-UDPIF format) and the original set of
964 * options 'flow_src_opt'/'opts_len' are needed as a guide to interpret the
965 * mask data. */
966void
967tun_metadata_to_geneve_udpif_mask(const struct flow_tnl *flow_src,
968 const struct flow_tnl *mask_src,
969 const struct geneve_opt *flow_src_opt,
970 int opts_len, struct geneve_opt *dst)
971{
972 ovs_assert(!(flow_src->flags & FLOW_TNL_F_UDPIF));
973
974 memcpy(dst, flow_src_opt, opts_len);
975 tun_metadata_to_geneve_mask__(&flow_src->metadata,
976 &mask_src->metadata, dst, opts_len);
977}
978
9558d2a5
JG
979static const struct tun_metadata_loc *
980metadata_loc_from_match_read(struct tun_table *map, const struct match *match,
1cb20095
JG
981 unsigned int idx, struct flow_tnl *mask,
982 bool *is_masked)
9558d2a5 983{
1cb20095
JG
984 union mf_value mask_opts;
985
9558d2a5 986 if (match->tun_md.valid) {
1cb20095
JG
987 *is_masked = match->tun_md.entry[idx].masked;
988 return &match->tun_md.entry[idx].loc;
9558d2a5
JG
989 }
990
1cb20095
JG
991 memcpy_from_metadata(mask_opts.tun_metadata, &mask->metadata,
992 &map->entries[idx].loc);
993
994 *is_masked = map->entries[idx].loc.len == 0 ||
995 !is_all_ones(mask_opts.tun_metadata,
996 map->entries[idx].loc.len);
9558d2a5
JG
997 return &map->entries[idx].loc;
998}
999
1000void
1001tun_metadata_to_nx_match(struct ofpbuf *b, enum ofp_version oxm,
1002 const struct match *match)
1003{
6728d578 1004 struct flow_tnl flow, mask;
9558d2a5
JG
1005 int i;
1006
6728d578
JG
1007 if (!udpif_to_parsed(&match->flow.tunnel, &match->wc.masks.tunnel,
1008 &flow, &mask)) {
1009 return;
1010 }
1011
1012 ULLONG_FOR_EACH_1 (i, mask.metadata.present.map) {
9558d2a5 1013 const struct tun_metadata_loc *loc;
1cb20095 1014 bool is_masked;
9558d2a5
JG
1015 union mf_value opts;
1016 union mf_value mask_opts;
1017
1cb20095
JG
1018 loc = metadata_loc_from_match_read(flow.metadata.tab, match, i,
1019 &mask, &is_masked);
6728d578
JG
1020 memcpy_from_metadata(opts.tun_metadata, &flow.metadata, loc);
1021 memcpy_from_metadata(mask_opts.tun_metadata, &mask.metadata, loc);
1cb20095
JG
1022 nxm_put__(b, MFF_TUN_METADATA0 + i, oxm, opts.tun_metadata,
1023 is_masked ? mask_opts.tun_metadata : NULL, loc->len);
9558d2a5
JG
1024 }
1025}
1026
1027void
1028tun_metadata_match_format(struct ds *s, const struct match *match)
1029{
6728d578 1030 struct flow_tnl flow, mask;
9558d2a5
JG
1031 unsigned int i;
1032
6728d578
JG
1033 if (!udpif_to_parsed(&match->flow.tunnel, &match->wc.masks.tunnel,
1034 &flow, &mask)) {
1035 return;
1036 }
1037
1038 ULLONG_FOR_EACH_1 (i, mask.metadata.present.map) {
9558d2a5 1039 const struct tun_metadata_loc *loc;
1cb20095
JG
1040 bool is_masked;
1041 union mf_value opts, mask_opts;
9558d2a5 1042
1cb20095
JG
1043 loc = metadata_loc_from_match_read(flow.metadata.tab, match, i,
1044 &mask, &is_masked);
9558d2a5 1045
1cb20095
JG
1046 ds_put_format(s, "tun_metadata%u", i);
1047 memcpy_from_metadata(mask_opts.tun_metadata, &mask.metadata, loc);
1048
1049 if (!ULLONG_GET(flow.metadata.present.map, i)) {
1050 /* Indicate that we are matching on the field being not present. */
1051 ds_put_cstr(s, "=NP");
1052 } else if (!(is_masked &&
1053 is_all_zeros(mask_opts.tun_metadata, loc->len))) {
1054 ds_put_char(s, '=');
9558d2a5 1055
1cb20095 1056 memcpy_from_metadata(opts.tun_metadata, &flow.metadata, loc);
9558d2a5 1057 ds_put_hex(s, opts.tun_metadata, loc->len);
1cb20095
JG
1058
1059 if (!is_all_ones(mask_opts.tun_metadata, loc->len)) {
1060 ds_put_char(s, '/');
1061 ds_put_hex(s, mask_opts.tun_metadata, loc->len);
1062 }
9558d2a5
JG
1063 }
1064 ds_put_char(s, ',');
1065 }
1066}