]> git.proxmox.com Git - mirror_ovs.git/blame - lib/tun-metadata.c
Break netdev.h into private and public parts
[mirror_ovs.git] / lib / tun-metadata.c
CommitLineData
9558d2a5
JG
1/*
2 * Copyright (c) 2015 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <config.h>
18#include <errno.h>
19#include <stdbool.h>
20
21#include "bitmap.h"
22#include "compiler.h"
23#include "hmap.h"
24#include "match.h"
25#include "nx-match.h"
26#include "odp-netlink.h"
27#include "ofp-util.h"
28#include "ovs-thread.h"
29#include "ovs-rcu.h"
30#include "packets.h"
31#include "tun-metadata.h"
32
33struct tun_meta_entry {
34 struct hmap_node node; /* In struct tun_table's key_hmap. */
35 uint32_t key; /* (class << 16) | type. */
36 struct tun_metadata_loc loc;
37 bool valid; /* True if allocated to a class and type. */
38};
39
4e548ad9 40/* Maps from TLV option class+type to positions in a struct tun_metadata's
9558d2a5
JG
41 * 'opts' array. */
42struct tun_table {
43 /* TUN_METADATA<i> is stored in element <i>. */
44 struct tun_meta_entry entries[TUN_METADATA_NUM_OPTS];
45
46 /* Each bit represents 4 bytes of space, 0-bits are free space. */
47 unsigned long alloc_map[BITMAP_N_LONGS(TUN_METADATA_TOT_OPT_SIZE / 4)];
48
49 /* The valid elements in entries[], indexed by class+type. */
50 struct hmap key_hmap;
51};
52BUILD_ASSERT_DECL(TUN_METADATA_TOT_OPT_SIZE % 4 == 0);
53
54static struct ovs_mutex tab_mutex = OVS_MUTEX_INITIALIZER;
55static OVSRCU_TYPE(struct tun_table *) metadata_tab;
56
57static enum ofperr tun_metadata_add_entry(struct tun_table *map, uint8_t idx,
58 uint16_t opt_class, uint8_t type,
59 uint8_t len) OVS_REQUIRES(tab_mutex);
60static void tun_metadata_del_entry(struct tun_table *map, uint8_t idx)
61 OVS_REQUIRES(tab_mutex);
62static void memcpy_to_metadata(struct tun_metadata *dst, const void *src,
2234a7db
JG
63 const struct tun_metadata_loc *,
64 unsigned int idx);
9558d2a5
JG
65static void memcpy_from_metadata(void *dst, const struct tun_metadata *src,
66 const struct tun_metadata_loc *);
67
68static uint32_t
69tun_meta_key(ovs_be16 class, uint8_t type)
70{
71 return (OVS_FORCE uint16_t)class << 8 | type;
72}
73
74static ovs_be16
75tun_key_class(uint32_t key)
76{
77 return (OVS_FORCE ovs_be16)(key >> 8);
78}
79
80static uint8_t
81tun_key_type(uint32_t key)
82{
83 return key & 0xff;
84}
85
86/* Returns a newly allocated tun_table. If 'old_map' is nonnull then the new
87 * tun_table is a deep copy of the old one. */
88static struct tun_table *
89table_alloc(const struct tun_table *old_map) OVS_REQUIRES(tab_mutex)
90{
91 struct tun_table *new_map;
92
93 new_map = xzalloc(sizeof *new_map);
94
95 if (old_map) {
96 struct tun_meta_entry *entry;
97
98 *new_map = *old_map;
99 hmap_init(&new_map->key_hmap);
100
101 HMAP_FOR_EACH (entry, node, &old_map->key_hmap) {
102 struct tun_meta_entry *new_entry;
103 struct tun_metadata_loc_chain *chain;
104
105 new_entry = &new_map->entries[entry - old_map->entries];
106 hmap_insert(&new_map->key_hmap, &new_entry->node, entry->node.hash);
107
108 chain = &new_entry->loc.c;
109 while (chain->next) {
110 chain->next = xmemdup(chain->next, sizeof *chain->next);
111 chain = chain->next;
112 }
113 }
114 } else {
115 hmap_init(&new_map->key_hmap);
116 }
117
118 return new_map;
119}
120
121/* Frees 'map' and all the memory it owns. */
122static void
123table_free(struct tun_table *map) OVS_REQUIRES(tab_mutex)
124{
125 struct tun_meta_entry *entry;
126
127 if (!map) {
128 return;
129 }
130
131 HMAP_FOR_EACH (entry, node, &map->key_hmap) {
132 tun_metadata_del_entry(map, entry - map->entries);
133 }
134
070767fc 135 hmap_destroy(&map->key_hmap);
9558d2a5
JG
136 free(map);
137}
138
139/* Creates a global tunnel metadata mapping table, if none already exists. */
140void
141tun_metadata_init(void)
142{
143 ovs_mutex_lock(&tab_mutex);
144
145 if (!ovsrcu_get_protected(struct tun_table *, &metadata_tab)) {
146 ovsrcu_set(&metadata_tab, table_alloc(NULL));
147 }
148
149 ovs_mutex_unlock(&tab_mutex);
150}
151
152enum ofperr
4e548ad9 153tun_metadata_table_mod(struct ofputil_tlv_table_mod *ttm)
9558d2a5
JG
154{
155 struct tun_table *old_map, *new_map;
4e548ad9 156 struct ofputil_tlv_map *ofp_map;
9558d2a5
JG
157 enum ofperr err = 0;
158
159 ovs_mutex_lock(&tab_mutex);
160
161 old_map = ovsrcu_get_protected(struct tun_table *, &metadata_tab);
162
4e548ad9
ML
163 switch (ttm->command) {
164 case NXTTMC_ADD:
9558d2a5
JG
165 new_map = table_alloc(old_map);
166
4e548ad9 167 LIST_FOR_EACH (ofp_map, list_node, &ttm->mappings) {
9558d2a5
JG
168 err = tun_metadata_add_entry(new_map, ofp_map->index,
169 ofp_map->option_class,
170 ofp_map->option_type,
171 ofp_map->option_len);
172 if (err) {
173 table_free(new_map);
174 goto out;
175 }
176 }
177 break;
178
4e548ad9 179 case NXTTMC_DELETE:
9558d2a5
JG
180 new_map = table_alloc(old_map);
181
4e548ad9 182 LIST_FOR_EACH (ofp_map, list_node, &ttm->mappings) {
9558d2a5
JG
183 tun_metadata_del_entry(new_map, ofp_map->index);
184 }
185 break;
186
4e548ad9 187 case NXTTMC_CLEAR:
9558d2a5
JG
188 new_map = table_alloc(NULL);
189 break;
190
191 default:
192 OVS_NOT_REACHED();
193 }
194
195 ovsrcu_set(&metadata_tab, new_map);
196 ovsrcu_postpone(table_free, old_map);
197
198out:
199 ovs_mutex_unlock(&tab_mutex);
200 return err;
201}
202
203void
4e548ad9 204tun_metadata_table_request(struct ofputil_tlv_table_reply *ttr)
9558d2a5
JG
205{
206 struct tun_table *map = ovsrcu_get(struct tun_table *, &metadata_tab);
207 int i;
208
4e548ad9
ML
209 ttr->max_option_space = TUN_METADATA_TOT_OPT_SIZE;
210 ttr->max_fields = TUN_METADATA_NUM_OPTS;
417e7e66 211 ovs_list_init(&ttr->mappings);
9558d2a5
JG
212
213 for (i = 0; i < TUN_METADATA_NUM_OPTS; i++) {
214 struct tun_meta_entry *entry = &map->entries[i];
4e548ad9 215 struct ofputil_tlv_map *map;
9558d2a5
JG
216
217 if (!entry->valid) {
218 continue;
219 }
220
221 map = xmalloc(sizeof *map);
222 map->option_class = ntohs(tun_key_class(entry->key));
223 map->option_type = tun_key_type(entry->key);
224 map->option_len = entry->loc.len;
225 map->index = i;
226
417e7e66 227 ovs_list_push_back(&ttr->mappings, &map->list_node);
9558d2a5
JG
228 }
229}
230
6728d578 231/* Copies the value of field 'mf' from 'tnl' (which must be in non-UDPIF format) * into 'value'.
9558d2a5
JG
232 *
233 * 'mf' must be an MFF_TUN_METADATA* field.
234 *
235 * This uses the global tunnel metadata mapping table created by
236 * tun_metadata_init(). If no such table has been created or if 'mf' hasn't
237 * been allocated in it yet, this just zeros 'value'. */
238void
6728d578 239tun_metadata_read(const struct flow_tnl *tnl,
9558d2a5
JG
240 const struct mf_field *mf, union mf_value *value)
241{
242 struct tun_table *map = ovsrcu_get(struct tun_table *, &metadata_tab);
243 unsigned int idx = mf->id - MFF_TUN_METADATA0;
244 struct tun_metadata_loc *loc;
245
246 if (!map) {
247 memset(value->tun_metadata, 0, mf->n_bytes);
248 return;
249 }
250
251 loc = &map->entries[idx].loc;
252
253 memset(value->tun_metadata, 0, mf->n_bytes - loc->len);
254 memcpy_from_metadata(value->tun_metadata + mf->n_bytes - loc->len,
6728d578 255 &tnl->metadata, loc);
9558d2a5
JG
256}
257
6728d578 258/* Copies 'value' into field 'mf' in 'tnl' (in non-UDPIF format).
9558d2a5
JG
259 *
260 * 'mf' must be an MFF_TUN_METADATA* field.
261 *
262 * This uses the global tunnel metadata mapping table created by
263 * tun_metadata_init(). If no such table has been created or if 'mf' hasn't
264 * been allocated in it yet, this function does nothing. */
265void
6728d578 266tun_metadata_write(struct flow_tnl *tnl,
9558d2a5
JG
267 const struct mf_field *mf, const union mf_value *value)
268{
269 struct tun_table *map = ovsrcu_get(struct tun_table *, &metadata_tab);
270 unsigned int idx = mf->id - MFF_TUN_METADATA0;
271 struct tun_metadata_loc *loc;
272
273 if (!map || !map->entries[idx].valid) {
274 return;
275 }
276
277 loc = &map->entries[idx].loc;
6728d578 278 memcpy_to_metadata(&tnl->metadata,
2234a7db 279 value->tun_metadata + mf->n_bytes - loc->len, loc, idx);
9558d2a5
JG
280}
281
282static const struct tun_metadata_loc *
283metadata_loc_from_match(struct tun_table *map, struct match *match,
4f7b100c
JG
284 const char *name, unsigned int idx,
285 unsigned int field_len, bool masked, char **err_str)
9558d2a5
JG
286{
287 ovs_assert(idx < TUN_METADATA_NUM_OPTS);
288
4f7b100c
JG
289 if (err_str) {
290 *err_str = NULL;
291 }
292
9558d2a5
JG
293 if (map) {
294 if (map->entries[idx].valid) {
295 return &map->entries[idx].loc;
296 } else {
297 return NULL;
298 }
299 }
300
4f7b100c
JG
301 if (match->tun_md.alloc_offset + field_len > TUN_METADATA_TOT_OPT_SIZE) {
302 if (err_str) {
303 *err_str = xasprintf("field %s exceeds maximum size for tunnel "
304 "metadata (used %d, max %d)", name,
305 match->tun_md.alloc_offset + field_len,
306 TUN_METADATA_TOT_OPT_SIZE);
307 }
308
309 return NULL;
310 }
311
312 if (ULLONG_GET(match->wc.masks.tunnel.metadata.present.map, idx)) {
313 if (err_str) {
314 *err_str = xasprintf("field %s set multiple times", name);
315 }
316
9558d2a5
JG
317 return NULL;
318 }
319
1cb20095
JG
320 match->tun_md.entry[idx].loc.len = field_len;
321 match->tun_md.entry[idx].loc.c.offset = match->tun_md.alloc_offset;
322 match->tun_md.entry[idx].loc.c.len = field_len;
323 match->tun_md.entry[idx].loc.c.next = NULL;
324 match->tun_md.entry[idx].masked = masked;
9558d2a5
JG
325 match->tun_md.alloc_offset += field_len;
326 match->tun_md.valid = true;
327
1cb20095 328 return &match->tun_md.entry[idx].loc;
9558d2a5
JG
329}
330
331/* Makes 'match' match 'value'/'mask' on field 'mf'.
332 *
6728d578 333 * 'mf' must be an MFF_TUN_METADATA* field. 'match' must be in non-UDPIF format.
9558d2a5
JG
334 *
335 * If there is global tunnel metadata matching table, this function is
336 * effective only if there is already a mapping for 'mf'. Otherwise, the
337 * metadata mapping table integrated into 'match' is used, adding 'mf' to its
338 * mapping table if it isn't already mapped (and if there is room). If 'mf'
339 * isn't or can't be mapped, this function returns without modifying 'match'.
340 *
341 * 'value' may be NULL; if so, then 'mf' is made to match on an all-zeros
342 * value.
343 *
344 * 'mask' may be NULL; if so, then 'mf' is made exact-match.
4f7b100c
JG
345 *
346 * If non-NULL, 'err_str' returns a malloc'ed string describing any errors
347 * with the request or NULL if there is no error. The caller is reponsible
348 * for freeing the string.
9558d2a5
JG
349 */
350void
351tun_metadata_set_match(const struct mf_field *mf, const union mf_value *value,
4f7b100c
JG
352 const union mf_value *mask, struct match *match,
353 char **err_str)
9558d2a5
JG
354{
355 struct tun_table *map = ovsrcu_get(struct tun_table *, &metadata_tab);
356 const struct tun_metadata_loc *loc;
357 unsigned int idx = mf->id - MFF_TUN_METADATA0;
358 unsigned int field_len;
1cb20095 359 bool is_masked;
9558d2a5
JG
360 unsigned int data_offset;
361 union mf_value data;
362
6728d578
JG
363 ovs_assert(!(match->flow.tunnel.flags & FLOW_TNL_F_UDPIF));
364
1cb20095 365 field_len = mf_field_len(mf, value, mask, &is_masked);
4f7b100c
JG
366 loc = metadata_loc_from_match(map, match, mf->name, idx, field_len,
367 is_masked, err_str);
9558d2a5
JG
368 if (!loc) {
369 return;
370 }
371
372 data_offset = mf->n_bytes - loc->len;
373
374 if (!value) {
375 memset(data.tun_metadata, 0, loc->len);
376 } else if (!mask) {
377 memcpy(data.tun_metadata, value->tun_metadata + data_offset, loc->len);
378 } else {
379 int i;
380 for (i = 0; i < loc->len; i++) {
381 data.tun_metadata[i] = value->tun_metadata[data_offset + i] &
382 mask->tun_metadata[data_offset + i];
383 }
384 }
2234a7db
JG
385 memcpy_to_metadata(&match->flow.tunnel.metadata, data.tun_metadata,
386 loc, idx);
9558d2a5
JG
387
388 if (!value) {
389 memset(data.tun_metadata, 0, loc->len);
390 } else if (!mask) {
391 memset(data.tun_metadata, 0xff, loc->len);
392 } else {
393 memcpy(data.tun_metadata, mask->tun_metadata + data_offset, loc->len);
394 }
2234a7db
JG
395 memcpy_to_metadata(&match->wc.masks.tunnel.metadata, data.tun_metadata,
396 loc, idx);
9558d2a5
JG
397}
398
6728d578
JG
399static bool
400udpif_to_parsed(const struct flow_tnl *flow, const struct flow_tnl *mask,
401 struct flow_tnl *flow_xlate, struct flow_tnl *mask_xlate)
402{
403 if (flow->flags & FLOW_TNL_F_UDPIF) {
404 int err;
405
406 err = tun_metadata_from_geneve_udpif(flow, flow, flow_xlate);
407 if (err) {
408 return false;
409 }
410
411 if (mask) {
412 tun_metadata_from_geneve_udpif(flow, mask, mask_xlate);
413 if (err) {
414 return false;
415 }
416 }
417 } else {
418 if (flow->metadata.present.map == 0) {
419 /* There is no tunnel metadata, don't bother copying. */
420 return false;
421 }
422
423 memcpy(flow_xlate, flow, sizeof *flow_xlate);
424 if (mask) {
425 memcpy(mask_xlate, mask, sizeof *mask_xlate);
426 }
427
428 if (!flow_xlate->metadata.tab) {
429 flow_xlate->metadata.tab = ovsrcu_get(struct tun_table *,
430 &metadata_tab);
431 }
432 }
433
434 return true;
435}
436
437/* Copies all MFF_TUN_METADATA* fields from 'tnl' to 'flow_metadata'. */
9558d2a5 438void
6728d578 439tun_metadata_get_fmd(const struct flow_tnl *tnl, struct match *flow_metadata)
9558d2a5 440{
6728d578 441 struct flow_tnl flow;
9558d2a5
JG
442 int i;
443
6728d578
JG
444 if (!udpif_to_parsed(tnl, NULL, &flow, NULL)) {
445 return;
9558d2a5
JG
446 }
447
6728d578 448 ULLONG_FOR_EACH_1 (i, flow.metadata.present.map) {
9558d2a5 449 union mf_value opts;
6728d578 450 const struct tun_metadata_loc *old_loc = &flow.metadata.tab->entries[i].loc;
9558d2a5
JG
451 const struct tun_metadata_loc *new_loc;
452
4f7b100c
JG
453 new_loc = metadata_loc_from_match(NULL, flow_metadata, NULL, i,
454 old_loc->len, false, NULL);
9558d2a5 455
6728d578 456 memcpy_from_metadata(opts.tun_metadata, &flow.metadata, old_loc);
9558d2a5 457 memcpy_to_metadata(&flow_metadata->flow.tunnel.metadata,
2234a7db 458 opts.tun_metadata, new_loc, i);
9558d2a5
JG
459
460 memset(opts.tun_metadata, 0xff, old_loc->len);
461 memcpy_to_metadata(&flow_metadata->wc.masks.tunnel.metadata,
2234a7db 462 opts.tun_metadata, new_loc, i);
9558d2a5
JG
463 }
464}
465
466static uint32_t
467tun_meta_hash(uint32_t key)
468{
469 return hash_int(key, 0);
470}
471
472static struct tun_meta_entry *
473tun_meta_find_key(const struct hmap *hmap, uint32_t key)
474{
475 struct tun_meta_entry *entry;
476
477 HMAP_FOR_EACH_IN_BUCKET (entry, node, tun_meta_hash(key), hmap) {
478 if (entry->key == key) {
479 return entry;
480 }
481 }
482 return NULL;
483}
484
485static void
486memcpy_to_metadata(struct tun_metadata *dst, const void *src,
2234a7db 487 const struct tun_metadata_loc *loc, unsigned int idx)
9558d2a5
JG
488{
489 const struct tun_metadata_loc_chain *chain = &loc->c;
490 int addr = 0;
491
492 while (chain) {
6728d578 493 memcpy(dst->opts.u8 + loc->c.offset + addr, (uint8_t *)src + addr,
9558d2a5
JG
494 chain->len);
495 addr += chain->len;
496 chain = chain->next;
497 }
2234a7db
JG
498
499 ULLONG_SET1(dst->present.map, idx);
9558d2a5
JG
500}
501
502static void
503memcpy_from_metadata(void *dst, const struct tun_metadata *src,
504 const struct tun_metadata_loc *loc)
505{
506 const struct tun_metadata_loc_chain *chain = &loc->c;
507 int addr = 0;
508
509 while (chain) {
6728d578 510 memcpy((uint8_t *)dst + addr, src->opts.u8 + loc->c.offset + addr,
9558d2a5
JG
511 chain->len);
512 addr += chain->len;
513 chain = chain->next;
514 }
515}
516
517static int
518tun_metadata_alloc_chain(struct tun_table *map, uint8_t len,
519 struct tun_metadata_loc_chain *loc)
520 OVS_REQUIRES(tab_mutex)
521{
522 int alloc_len = len / 4;
523 int scan_start = 0;
524 int scan_end = TUN_METADATA_TOT_OPT_SIZE / 4;
525 int pos_start, pos_end, pos_len;
526 int best_start = 0, best_len = 0;
527
528 while (true) {
529 pos_start = bitmap_scan(map->alloc_map, 0, scan_start, scan_end);
530 if (pos_start == scan_end) {
531 break;
532 }
533
534 pos_end = bitmap_scan(map->alloc_map, 1, pos_start,
535 MIN(pos_start + alloc_len, scan_end));
536 pos_len = pos_end - pos_start;
537 if (pos_len == alloc_len) {
538 goto found;
539 }
540
541 if (pos_len > best_len) {
542 best_start = pos_start;
543 best_len = pos_len;
544 }
545 scan_start = pos_end + 1;
546 }
547
548 if (best_len == 0) {
549 return ENOSPC;
550 }
551
552 pos_start = best_start;
553 pos_len = best_len;
554
555found:
556 bitmap_set_multiple(map->alloc_map, pos_start, pos_len, 1);
557 loc->offset = pos_start * 4;
558 loc->len = pos_len * 4;
559
560 return 0;
561}
562
563static enum ofperr
564tun_metadata_add_entry(struct tun_table *map, uint8_t idx, uint16_t opt_class,
565 uint8_t type, uint8_t len) OVS_REQUIRES(tab_mutex)
566{
567 struct tun_meta_entry *entry;
568 struct tun_metadata_loc_chain *cur_chain, *prev_chain;
569
570 ovs_assert(idx < TUN_METADATA_NUM_OPTS);
571
572 entry = &map->entries[idx];
573 if (entry->valid) {
4e548ad9 574 return OFPERR_NXTTMFC_ALREADY_MAPPED;
9558d2a5
JG
575 }
576
577 entry->key = tun_meta_key(htons(opt_class), type);
578 if (tun_meta_find_key(&map->key_hmap, entry->key)) {
4e548ad9 579 return OFPERR_NXTTMFC_DUP_ENTRY;
9558d2a5
JG
580 }
581
582 entry->valid = true;
583 hmap_insert(&map->key_hmap, &entry->node,
584 tun_meta_hash(entry->key));
585
586 entry->loc.len = len;
587 cur_chain = &entry->loc.c;
588 memset(cur_chain, 0, sizeof *cur_chain);
589 prev_chain = NULL;
590
591 while (len) {
592 int err;
593
594 if (!cur_chain) {
595 cur_chain = xzalloc(sizeof *cur_chain);
482553cc 596 prev_chain->next = cur_chain;
9558d2a5
JG
597 }
598
599 err = tun_metadata_alloc_chain(map, len, cur_chain);
600 if (err) {
601 tun_metadata_del_entry(map, idx);
4e548ad9 602 return OFPERR_NXTTMFC_TABLE_FULL;
9558d2a5
JG
603 }
604
605 len -= cur_chain->len;
606
9558d2a5
JG
607 prev_chain = cur_chain;
608 cur_chain = NULL;
609 }
610
611 return 0;
612}
613
614static void
615tun_metadata_del_entry(struct tun_table *map, uint8_t idx)
616 OVS_REQUIRES(tab_mutex)
617{
618 struct tun_meta_entry *entry;
619 struct tun_metadata_loc_chain *chain;
620
621 if (idx >= TUN_METADATA_NUM_OPTS) {
622 return;
623 }
624
625 entry = &map->entries[idx];
626 if (!entry->valid) {
627 return;
628 }
629
630 chain = &entry->loc.c;
631 while (chain) {
632 struct tun_metadata_loc_chain *next = chain->next;
633
634 bitmap_set_multiple(map->alloc_map, chain->offset / 4,
635 chain->len / 4, 0);
636 if (chain != &entry->loc.c) {
637 free(chain);
638 }
639 chain = next;
640 }
641
642 entry->valid = false;
643 hmap_remove(&map->key_hmap, &entry->node);
644 memset(&entry->loc, 0, sizeof entry->loc);
645}
646
5bb08b0e 647static int
6728d578
JG
648tun_metadata_from_geneve__(const struct tun_metadata *flow_metadata,
649 const struct geneve_opt *opt,
5bb08b0e
JG
650 const struct geneve_opt *flow_opt, int opts_len,
651 struct tun_metadata *metadata)
9558d2a5 652{
6728d578
JG
653 struct tun_table *map;
654 bool is_mask = flow_opt != opt;
655
656 if (!is_mask) {
657 map = ovsrcu_get(struct tun_table *, &metadata_tab);
658 metadata->tab = map;
659 } else {
660 map = flow_metadata->tab;
661 }
662
9558d2a5
JG
663 if (!map) {
664 return 0;
665 }
666
9558d2a5
JG
667 while (opts_len > 0) {
668 int len;
669 struct tun_meta_entry *entry;
670
671 if (opts_len < sizeof(*opt)) {
672 return EINVAL;
673 }
674
675 len = sizeof(*opt) + flow_opt->length * 4;
676 if (len > opts_len) {
677 return EINVAL;
678 }
679
680 entry = tun_meta_find_key(&map->key_hmap,
681 tun_meta_key(flow_opt->opt_class,
682 flow_opt->type));
683 if (entry) {
684 if (entry->loc.len == flow_opt->length * 4) {
2234a7db
JG
685 memcpy_to_metadata(metadata, opt + 1, &entry->loc,
686 entry - map->entries);
9558d2a5
JG
687 } else {
688 return EINVAL;
689 }
690 } else if (flow_opt->type & GENEVE_CRIT_OPT_TYPE) {
691 return EINVAL;
692 }
693
694 opt = opt + len / sizeof(*opt);
695 flow_opt = flow_opt + len / sizeof(*opt);
696 opts_len -= len;
697 }
698
699 return 0;
700}
701
6728d578
JG
702static const struct nlattr *
703tun_metadata_find_geneve_key(const struct nlattr *key, uint32_t key_len)
704{
705 const struct nlattr *tnl_key;
706
707 tnl_key = nl_attr_find__(key, key_len, OVS_KEY_ATTR_TUNNEL);
708 if (!tnl_key) {
709 return NULL;
710 }
711
712 return nl_attr_find_nested(tnl_key, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS);
713}
714
715/* Converts from Geneve netlink attributes in 'attr' to tunnel metadata
716 * in 'tun'. The result may either in be UDPIF format or not, as determined
717 * by 'udpif'.
718 *
719 * In the event that a mask is being converted, it is also necessary to
720 * pass in flow information. This includes the full set of netlink attributes
721 * (i.e. not just the Geneve attribute) in 'flow_attrs'/'flow_attr_len' and
722 * the previously converted tunnel metadata 'flow_tun'.
723 *
724 * If a flow rather than mask is being converted, 'flow_attrs' must be NULL. */
5bb08b0e
JG
725int
726tun_metadata_from_geneve_nlattr(const struct nlattr *attr,
727 const struct nlattr *flow_attrs,
728 size_t flow_attr_len,
6728d578
JG
729 const struct flow_tnl *flow_tun, bool udpif,
730 struct flow_tnl *tun)
9558d2a5 731{
5bb08b0e 732 bool is_mask = !!flow_attrs;
6728d578 733 int attr_len = nl_attr_get_size(attr);
5bb08b0e 734 const struct nlattr *flow;
9558d2a5 735
6728d578
JG
736 /* No need for real translation, just copy things over. */
737 if (udpif) {
738 memcpy(tun->metadata.opts.gnv, nl_attr_get(attr), attr_len);
5bb08b0e 739
6728d578
JG
740 if (!is_mask) {
741 tun->metadata.present.len = attr_len;
742 tun->flags |= FLOW_TNL_F_UDPIF;
743 } else {
744 /* We need to exact match on the length so we don't
745 * accidentally match on sets of options that are the same
746 * at the beginning but with additional options after. */
747 tun->metadata.present.len = 0xff;
5bb08b0e
JG
748 }
749
6728d578
JG
750 return 0;
751 }
752
753 if (is_mask) {
754 flow = tun_metadata_find_geneve_key(flow_attrs, flow_attr_len);
5bb08b0e 755 if (!flow) {
6728d578 756 return attr_len ? EINVAL : 0;
5bb08b0e
JG
757 }
758
6728d578 759 if (attr_len != nl_attr_get_size(flow)) {
5bb08b0e
JG
760 return EINVAL;
761 }
762 } else {
763 flow = attr;
9558d2a5
JG
764 }
765
6728d578
JG
766 return tun_metadata_from_geneve__(&flow_tun->metadata, nl_attr_get(attr),
767 nl_attr_get(flow), nl_attr_get_size(flow),
768 &tun->metadata);
5bb08b0e
JG
769}
770
6728d578
JG
771/* Converts from the flat Geneve options representation extracted directly
772 * from the tunnel header to the representation that maps options to
773 * pre-allocated locations. The original version (in UDPIF form) is passed
774 * in 'src' and the translated form in stored in 'dst'. To handle masks, the
775 * flow must also be passed in through 'flow' (in the original, raw form). */
5bb08b0e 776int
6728d578
JG
777tun_metadata_from_geneve_udpif(const struct flow_tnl *flow,
778 const struct flow_tnl *src,
779 struct flow_tnl *dst)
5bb08b0e 780{
6728d578 781 ovs_assert(flow->flags & FLOW_TNL_F_UDPIF);
5bb08b0e 782
6728d578
JG
783 if (flow == src) {
784 dst->flags = flow->flags & ~FLOW_TNL_F_UDPIF;
785 } else {
786 dst->metadata.tab = NULL;
787 }
788 dst->metadata.present.map = 0;
789 return tun_metadata_from_geneve__(&flow->metadata, src->metadata.opts.gnv,
790 flow->metadata.opts.gnv,
791 flow->metadata.present.len,
792 &dst->metadata);
5bb08b0e
JG
793}
794
795static void
796tun_metadata_to_geneve__(const struct tun_metadata *flow, struct ofpbuf *b,
797 bool *crit_opt)
798{
799 struct tun_table *map;
800 int i;
801
9558d2a5
JG
802 map = flow->tab;
803 if (!map) {
804 map = ovsrcu_get(struct tun_table *, &metadata_tab);
805 }
806
5bb08b0e 807 *crit_opt = false;
9558d2a5 808
6728d578 809 ULLONG_FOR_EACH_1 (i, flow->present.map) {
9558d2a5
JG
810 struct tun_meta_entry *entry = &map->entries[i];
811 struct geneve_opt *opt;
812
813 opt = ofpbuf_put_uninit(b, sizeof *opt + entry->loc.len);
814
815 opt->opt_class = tun_key_class(entry->key);
816 opt->type = tun_key_type(entry->key);
817 opt->length = entry->loc.len / 4;
818 opt->r1 = 0;
819 opt->r2 = 0;
820 opt->r3 = 0;
821
822 memcpy_from_metadata(opt + 1, flow, &entry->loc);
5bb08b0e 823 *crit_opt |= !!(opt->type & GENEVE_CRIT_OPT_TYPE);
9558d2a5 824 }
5bb08b0e
JG
825}
826
6728d578
JG
827static void
828tun_metadata_to_geneve_nlattr_flow(const struct flow_tnl *flow,
5bb08b0e
JG
829 struct ofpbuf *b)
830{
831 size_t nlattr_offset;
832 bool crit_opt;
833
6728d578 834 if (!flow->metadata.present.map) {
5bb08b0e
JG
835 return;
836 }
837
838 /* For all intents and purposes, the Geneve options are nested
839 * attributes even if this doesn't show up directly to netlink. It's
840 * similar enough that we can use the same mechanism. */
841 nlattr_offset = nl_msg_start_nested(b, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS);
842
6728d578 843 tun_metadata_to_geneve__(&flow->metadata, b, &crit_opt);
9558d2a5
JG
844
845 nl_msg_end_nested(b, nlattr_offset);
846}
847
6728d578
JG
848/* Converts from processed tunnel metadata information (in non-udpif
849 * format) in 'flow' to a stream of Geneve options suitable for
850 * transmission in 'opts'. Additionally returns whether there were
851 * any critical options in 'crit_opt' as well as the total length of
852 * data. */
5bb08b0e 853int
6728d578 854tun_metadata_to_geneve_header(const struct flow_tnl *flow,
5bb08b0e
JG
855 struct geneve_opt *opts, bool *crit_opt)
856{
857 struct ofpbuf b;
858
6728d578
JG
859 ovs_assert(!(flow->flags & FLOW_TNL_F_UDPIF));
860
4e548ad9 861 ofpbuf_use_stack(&b, opts, TLV_TOT_OPT_SIZE);
6728d578 862 tun_metadata_to_geneve__(&flow->metadata, &b, crit_opt);
5bb08b0e
JG
863
864 return b.size;
865}
866
6728d578
JG
867static void
868tun_metadata_to_geneve_mask__(const struct tun_metadata *flow,
869 const struct tun_metadata *mask,
870 struct geneve_opt *opt, int opts_len)
9558d2a5
JG
871{
872 struct tun_table *map = flow->tab;
9558d2a5
JG
873
874 if (!map) {
875 return;
876 }
877
9558d2a5
JG
878 /* All of these options have already been validated, so no need
879 * for sanity checking. */
9558d2a5
JG
880 while (opts_len > 0) {
881 struct tun_meta_entry *entry;
882 int len = sizeof(*opt) + opt->length * 4;
883
884 entry = tun_meta_find_key(&map->key_hmap,
885 tun_meta_key(opt->opt_class, opt->type));
886 if (entry) {
887 memcpy_from_metadata(opt + 1, mask, &entry->loc);
888 } else {
889 memset(opt + 1, 0, opt->length * 4);
890 }
891
892 opt->opt_class = htons(0xffff);
893 opt->type = 0xff;
894 opt->length = 0x1f;
895 opt->r1 = 0;
896 opt->r2 = 0;
897 opt->r3 = 0;
898
899 opt = opt + len / sizeof(*opt);
900 opts_len -= len;
901 }
902}
903
6728d578
JG
904static void
905tun_metadata_to_geneve_nlattr_mask(const struct ofpbuf *key,
906 const struct flow_tnl *mask,
907 const struct flow_tnl *flow,
908 struct ofpbuf *b)
909{
910 const struct nlattr *geneve_key;
911 struct nlattr *geneve_mask;
912 struct geneve_opt *opt;
913 int opts_len;
914
915 if (!key) {
916 return;
917 }
918
919 geneve_key = tun_metadata_find_geneve_key(key->data, key->size);
920 if (!geneve_key) {
921 return;
922 }
923
924 geneve_mask = ofpbuf_tail(b);
925 nl_msg_put(b, geneve_key, geneve_key->nla_len);
926
927 opt = CONST_CAST(struct geneve_opt *, nl_attr_get(geneve_mask));
928 opts_len = nl_attr_get_size(geneve_mask);
929
930 tun_metadata_to_geneve_mask__(&flow->metadata, &mask->metadata,
931 opt, opts_len);
932}
933
934/* Convert from the tunnel metadata in 'tun' to netlink attributes stored
935 * in 'b'. Either UDPIF or non-UDPIF input forms are accepted.
936 *
937 * To assist with parsing, it is necessary to also pass in the tunnel metadata
938 * from the flow in 'flow' as well in the original netlink form of the flow in
939 * 'key'. */
940void
941tun_metadata_to_geneve_nlattr(const struct flow_tnl *tun,
942 const struct flow_tnl *flow,
943 const struct ofpbuf *key,
944 struct ofpbuf *b)
945{
946 bool is_mask = tun != flow;
947
948 if (!(flow->flags & FLOW_TNL_F_UDPIF)) {
949 if (!is_mask) {
950 tun_metadata_to_geneve_nlattr_flow(tun, b);
951 } else {
952 tun_metadata_to_geneve_nlattr_mask(key, tun, flow, b);
953 }
954 } else if (flow->metadata.present.len || is_mask) {
955 nl_msg_put_unspec(b, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
956 tun->metadata.opts.gnv,
957 flow->metadata.present.len);
958 }
959}
960
961/* Converts 'mask_src' (in non-UDPIF format) to a series of masked options in
962 * 'dst'. 'flow_src' (also in non-UDPIF format) and the original set of
963 * options 'flow_src_opt'/'opts_len' are needed as a guide to interpret the
964 * mask data. */
965void
966tun_metadata_to_geneve_udpif_mask(const struct flow_tnl *flow_src,
967 const struct flow_tnl *mask_src,
968 const struct geneve_opt *flow_src_opt,
969 int opts_len, struct geneve_opt *dst)
970{
971 ovs_assert(!(flow_src->flags & FLOW_TNL_F_UDPIF));
972
973 memcpy(dst, flow_src_opt, opts_len);
974 tun_metadata_to_geneve_mask__(&flow_src->metadata,
975 &mask_src->metadata, dst, opts_len);
976}
977
9558d2a5
JG
978static const struct tun_metadata_loc *
979metadata_loc_from_match_read(struct tun_table *map, const struct match *match,
1cb20095
JG
980 unsigned int idx, struct flow_tnl *mask,
981 bool *is_masked)
9558d2a5 982{
1cb20095
JG
983 union mf_value mask_opts;
984
9558d2a5 985 if (match->tun_md.valid) {
1cb20095
JG
986 *is_masked = match->tun_md.entry[idx].masked;
987 return &match->tun_md.entry[idx].loc;
9558d2a5
JG
988 }
989
1cb20095
JG
990 memcpy_from_metadata(mask_opts.tun_metadata, &mask->metadata,
991 &map->entries[idx].loc);
992
993 *is_masked = map->entries[idx].loc.len == 0 ||
994 !is_all_ones(mask_opts.tun_metadata,
995 map->entries[idx].loc.len);
9558d2a5
JG
996 return &map->entries[idx].loc;
997}
998
999void
1000tun_metadata_to_nx_match(struct ofpbuf *b, enum ofp_version oxm,
1001 const struct match *match)
1002{
6728d578 1003 struct flow_tnl flow, mask;
9558d2a5
JG
1004 int i;
1005
6728d578
JG
1006 if (!udpif_to_parsed(&match->flow.tunnel, &match->wc.masks.tunnel,
1007 &flow, &mask)) {
1008 return;
1009 }
1010
1011 ULLONG_FOR_EACH_1 (i, mask.metadata.present.map) {
9558d2a5 1012 const struct tun_metadata_loc *loc;
1cb20095 1013 bool is_masked;
9558d2a5
JG
1014 union mf_value opts;
1015 union mf_value mask_opts;
1016
1cb20095
JG
1017 loc = metadata_loc_from_match_read(flow.metadata.tab, match, i,
1018 &mask, &is_masked);
6728d578
JG
1019 memcpy_from_metadata(opts.tun_metadata, &flow.metadata, loc);
1020 memcpy_from_metadata(mask_opts.tun_metadata, &mask.metadata, loc);
1cb20095
JG
1021 nxm_put__(b, MFF_TUN_METADATA0 + i, oxm, opts.tun_metadata,
1022 is_masked ? mask_opts.tun_metadata : NULL, loc->len);
9558d2a5
JG
1023 }
1024}
1025
1026void
1027tun_metadata_match_format(struct ds *s, const struct match *match)
1028{
6728d578 1029 struct flow_tnl flow, mask;
9558d2a5
JG
1030 unsigned int i;
1031
6728d578
JG
1032 if (!udpif_to_parsed(&match->flow.tunnel, &match->wc.masks.tunnel,
1033 &flow, &mask)) {
1034 return;
1035 }
1036
1037 ULLONG_FOR_EACH_1 (i, mask.metadata.present.map) {
9558d2a5 1038 const struct tun_metadata_loc *loc;
1cb20095
JG
1039 bool is_masked;
1040 union mf_value opts, mask_opts;
9558d2a5 1041
1cb20095
JG
1042 loc = metadata_loc_from_match_read(flow.metadata.tab, match, i,
1043 &mask, &is_masked);
9558d2a5 1044
1cb20095
JG
1045 ds_put_format(s, "tun_metadata%u", i);
1046 memcpy_from_metadata(mask_opts.tun_metadata, &mask.metadata, loc);
1047
1048 if (!ULLONG_GET(flow.metadata.present.map, i)) {
1049 /* Indicate that we are matching on the field being not present. */
1050 ds_put_cstr(s, "=NP");
1051 } else if (!(is_masked &&
1052 is_all_zeros(mask_opts.tun_metadata, loc->len))) {
1053 ds_put_char(s, '=');
9558d2a5 1054
1cb20095 1055 memcpy_from_metadata(opts.tun_metadata, &flow.metadata, loc);
9558d2a5 1056 ds_put_hex(s, opts.tun_metadata, loc->len);
1cb20095
JG
1057
1058 if (!is_all_ones(mask_opts.tun_metadata, loc->len)) {
1059 ds_put_char(s, '/');
1060 ds_put_hex(s, mask_opts.tun_metadata, loc->len);
1061 }
9558d2a5
JG
1062 }
1063 ds_put_char(s, ',');
1064 }
1065}