]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blob - tools/testing/selftests/net/pmtu.sh
selftests: pmtu: use -oneline for ip route list cache
[mirror_ubuntu-hirsute-kernel.git] / tools / testing / selftests / net / pmtu.sh
1 #!/bin/sh
2 # SPDX-License-Identifier: GPL-2.0
3 #
4 # Check that route PMTU values match expectations, and that initial device MTU
5 # values are assigned correctly
6 #
7 # Tests currently implemented:
8 #
9 # - pmtu_ipv4
10 # Set up two namespaces, A and B, with two paths between them over routers
11 # R1 and R2 (also implemented with namespaces), with different MTUs:
12 #
13 # segment a_r1 segment b_r1 a_r1: 2000
14 # .--------------R1--------------. a_r2: 1500
15 # A B a_r3: 2000
16 # '--------------R2--------------' a_r4: 1400
17 # segment a_r2 segment b_r2
18 #
19 # Check that PMTU exceptions with the correct PMTU are created. Then
20 # decrease and increase the MTU of the local link for one of the paths,
21 # A to R1, checking that route exception PMTU changes accordingly over
22 # this path. Also check that locked exceptions are created when an ICMP
23 # message advertising a PMTU smaller than net.ipv4.route.min_pmtu is
24 # received
25 #
26 # - pmtu_ipv6
27 # Same as pmtu_ipv4, except for locked PMTU tests, using IPv6
28 #
29 # - pmtu_ipv4_vxlan4_exception
30 # Set up the same network topology as pmtu_ipv4, create a VXLAN tunnel
31 # over IPv4 between A and B, routed via R1. On the link between R1 and B,
32 # set a MTU lower than the VXLAN MTU and the MTU on the link between A and
33 # R1. Send IPv4 packets, exceeding the MTU between R1 and B, over VXLAN
34 # from A to B and check that the PMTU exception is created with the right
35 # value on A
36 #
37 # - pmtu_ipv6_vxlan4_exception
38 # Same as pmtu_ipv4_vxlan4_exception, but send IPv6 packets from A to B
39 #
40 # - pmtu_ipv4_vxlan6_exception
41 # Same as pmtu_ipv4_vxlan4_exception, but use IPv6 transport from A to B
42 #
43 # - pmtu_ipv6_vxlan6_exception
44 # Same as pmtu_ipv4_vxlan6_exception, but send IPv6 packets from A to B
45 #
46 # - pmtu_ipv4_geneve4_exception
47 # Same as pmtu_ipv4_vxlan4_exception, but using a GENEVE tunnel instead of
48 # VXLAN
49 #
50 # - pmtu_ipv6_geneve4_exception
51 # Same as pmtu_ipv6_vxlan4_exception, but using a GENEVE tunnel instead of
52 # VXLAN
53 #
54 # - pmtu_ipv4_geneve6_exception
55 # Same as pmtu_ipv4_vxlan6_exception, but using a GENEVE tunnel instead of
56 # VXLAN
57 #
58 # - pmtu_ipv6_geneve6_exception
59 # Same as pmtu_ipv6_vxlan6_exception, but using a GENEVE tunnel instead of
60 # VXLAN
61 #
62 # - pmtu_ipv{4,6}_fou{4,6}_exception
63 # Same as pmtu_ipv4_vxlan4, but using a direct IPv4/IPv6 encapsulation
64 # (FoU) over IPv4/IPv6, instead of VXLAN
65 #
66 # - pmtu_ipv{4,6}_fou{4,6}_exception
67 # Same as pmtu_ipv4_vxlan4, but using a generic UDP IPv4/IPv6
68 # encapsulation (GUE) over IPv4/IPv6, instead of VXLAN
69 #
70 # - pmtu_vti4_exception
71 # Set up vti tunnel on top of veth, with xfrm states and policies, in two
72 # namespaces with matching endpoints. Check that route exception is not
73 # created if link layer MTU is not exceeded, then exceed it and check that
74 # exception is created with the expected PMTU. The approach described
75 # below for IPv6 doesn't apply here, because, on IPv4, administrative MTU
76 # changes alone won't affect PMTU
77 #
78 # - pmtu_vti6_exception
79 # Set up vti6 tunnel on top of veth, with xfrm states and policies, in two
80 # namespaces with matching endpoints. Check that route exception is
81 # created by exceeding link layer MTU with ping to other endpoint. Then
82 # decrease and increase MTU of tunnel, checking that route exception PMTU
83 # changes accordingly
84 #
85 # - pmtu_vti4_default_mtu
86 # Set up vti4 tunnel on top of veth, in two namespaces with matching
87 # endpoints. Check that MTU assigned to vti interface is the MTU of the
88 # lower layer (veth) minus additional lower layer headers (zero, for veth)
89 # minus IPv4 header length
90 #
91 # - pmtu_vti6_default_mtu
92 # Same as above, for IPv6
93 #
94 # - pmtu_vti4_link_add_mtu
95 # Set up vti4 interface passing MTU value at link creation, check MTU is
96 # configured, and that link is not created with invalid MTU values
97 #
98 # - pmtu_vti6_link_add_mtu
99 # Same as above, for IPv6
100 #
101 # - pmtu_vti6_link_change_mtu
102 # Set up two dummy interfaces with different MTUs, create a vti6 tunnel
103 # and check that configured MTU is used on link creation and changes, and
104 # that MTU is properly calculated instead when MTU is not configured from
105 # userspace
106 #
107 # - cleanup_ipv4_exception
108 # Similar to pmtu_ipv4_vxlan4_exception, but explicitly generate PMTU
109 # exceptions on multiple CPUs and check that the veth device tear-down
110 # happens in a timely manner
111 #
112 # - cleanup_ipv6_exception
113 # Same as above, but use IPv6 transport from A to B
114 #
115 # - list_flush_ipv4_exception
116 # Using the same topology as in pmtu_ipv4, create exceptions, and check
117 # they are shown when listing exception caches, gone after flushing them
118 #
119 # - list_flush_ipv6_exception
120 # Using the same topology as in pmtu_ipv6, create exceptions, and check
121 # they are shown when listing exception caches, gone after flushing them
122
123
124 # Kselftest framework requirement - SKIP code is 4.
125 ksft_skip=4
126
127 PAUSE_ON_FAIL=no
128 VERBOSE=0
129 TRACING=0
130
131 # Some systems don't have a ping6 binary anymore
132 which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
133
134 # Name Description re-run with nh
135 tests="
136 pmtu_ipv4_exception ipv4: PMTU exceptions 1
137 pmtu_ipv6_exception ipv6: PMTU exceptions 1
138 pmtu_ipv4_vxlan4_exception IPv4 over vxlan4: PMTU exceptions 1
139 pmtu_ipv6_vxlan4_exception IPv6 over vxlan4: PMTU exceptions 1
140 pmtu_ipv4_vxlan6_exception IPv4 over vxlan6: PMTU exceptions 1
141 pmtu_ipv6_vxlan6_exception IPv6 over vxlan6: PMTU exceptions 1
142 pmtu_ipv4_geneve4_exception IPv4 over geneve4: PMTU exceptions 1
143 pmtu_ipv6_geneve4_exception IPv6 over geneve4: PMTU exceptions 1
144 pmtu_ipv4_geneve6_exception IPv4 over geneve6: PMTU exceptions 1
145 pmtu_ipv6_geneve6_exception IPv6 over geneve6: PMTU exceptions 1
146 pmtu_ipv4_fou4_exception IPv4 over fou4: PMTU exceptions 1
147 pmtu_ipv6_fou4_exception IPv6 over fou4: PMTU exceptions 1
148 pmtu_ipv4_fou6_exception IPv4 over fou6: PMTU exceptions 1
149 pmtu_ipv6_fou6_exception IPv6 over fou6: PMTU exceptions 1
150 pmtu_ipv4_gue4_exception IPv4 over gue4: PMTU exceptions 1
151 pmtu_ipv6_gue4_exception IPv6 over gue4: PMTU exceptions 1
152 pmtu_ipv4_gue6_exception IPv4 over gue6: PMTU exceptions 1
153 pmtu_ipv6_gue6_exception IPv6 over gue6: PMTU exceptions 1
154 pmtu_vti6_exception vti6: PMTU exceptions 0
155 pmtu_vti4_exception vti4: PMTU exceptions 0
156 pmtu_vti4_default_mtu vti4: default MTU assignment 0
157 pmtu_vti6_default_mtu vti6: default MTU assignment 0
158 pmtu_vti4_link_add_mtu vti4: MTU setting on link creation 0
159 pmtu_vti6_link_add_mtu vti6: MTU setting on link creation 0
160 pmtu_vti6_link_change_mtu vti6: MTU changes on link changes 0
161 cleanup_ipv4_exception ipv4: cleanup of cached exceptions 1
162 cleanup_ipv6_exception ipv6: cleanup of cached exceptions 1
163 list_flush_ipv4_exception ipv4: list and flush cached exceptions 1
164 list_flush_ipv6_exception ipv6: list and flush cached exceptions 1"
165
166 NS_A="ns-A"
167 NS_B="ns-B"
168 NS_R1="ns-R1"
169 NS_R2="ns-R2"
170 ns_a="ip netns exec ${NS_A}"
171 ns_b="ip netns exec ${NS_B}"
172 ns_r1="ip netns exec ${NS_R1}"
173 ns_r2="ip netns exec ${NS_R2}"
174
175 # Addressing and routing for tests with routers: four network segments, with
176 # index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an
177 # identifier ID, which is 1 for hosts (A and B), 2 for routers (R1 and R2).
178 # Addresses are:
179 # - IPv4: PREFIX4.SEGMENT.ID (/24)
180 # - IPv6: PREFIX6:SEGMENT::ID (/64)
181 prefix4="10.0"
182 prefix6="fc00"
183 a_r1=1
184 a_r2=2
185 b_r1=3
186 b_r2=4
187 # ns peer segment
188 routing_addrs="
189 A R1 ${a_r1}
190 A R2 ${a_r2}
191 B R1 ${b_r1}
192 B R2 ${b_r2}
193 "
194 # Traffic from A to B goes through R1 by default, and through R2, if destined to
195 # B's address on the b_r2 segment.
196 # Traffic from B to A goes through R1.
197 # ns destination gateway
198 routes="
199 A default ${prefix4}.${a_r1}.2
200 A ${prefix4}.${b_r2}.1 ${prefix4}.${a_r2}.2
201 B default ${prefix4}.${b_r1}.2
202
203 A default ${prefix6}:${a_r1}::2
204 A ${prefix6}:${b_r2}::1 ${prefix6}:${a_r2}::2
205 B default ${prefix6}:${b_r1}::2
206 "
207
208 USE_NH="no"
209 # ns family nh id destination gateway
210 nexthops="
211 A 4 41 ${prefix4}.${a_r1}.2 veth_A-R1
212 A 4 42 ${prefix4}.${a_r2}.2 veth_A-R2
213 B 4 41 ${prefix4}.${b_r1}.2 veth_B-R1
214
215 A 6 61 ${prefix6}:${a_r1}::2 veth_A-R1
216 A 6 62 ${prefix6}:${a_r2}::2 veth_A-R2
217 B 6 61 ${prefix6}:${b_r1}::2 veth_B-R1
218 "
219
220 # nexthop id correlates to id in nexthops config above
221 # ns family prefix nh id
222 routes_nh="
223 A 4 default 41
224 A 4 ${prefix4}.${b_r2}.1 42
225 B 4 default 41
226
227 A 6 default 61
228 A 6 ${prefix6}:${b_r2}::1 62
229 B 6 default 61
230 "
231
232 veth4_a_addr="192.168.1.1"
233 veth4_b_addr="192.168.1.2"
234 veth4_mask="24"
235 veth6_a_addr="fd00:1::a"
236 veth6_b_addr="fd00:1::b"
237 veth6_mask="64"
238
239 tunnel4_a_addr="192.168.2.1"
240 tunnel4_b_addr="192.168.2.2"
241 tunnel4_mask="24"
242 tunnel6_a_addr="fd00:2::a"
243 tunnel6_b_addr="fd00:2::b"
244 tunnel6_mask="64"
245
246 dummy6_0_prefix="fc00:1000::"
247 dummy6_1_prefix="fc00:1001::"
248 dummy6_mask="64"
249
250 err_buf=
251 tcpdump_pids=
252
253 err() {
254 err_buf="${err_buf}${1}
255 "
256 }
257
258 err_flush() {
259 echo -n "${err_buf}"
260 err_buf=
261 }
262
263 run_cmd() {
264 cmd="$*"
265
266 if [ "$VERBOSE" = "1" ]; then
267 printf " COMMAND: $cmd\n"
268 fi
269
270 out="$($cmd 2>&1)"
271 rc=$?
272 if [ "$VERBOSE" = "1" -a -n "$out" ]; then
273 echo " $out"
274 echo
275 fi
276
277 return $rc
278 }
279
280 # Find the auto-generated name for this namespace
281 nsname() {
282 eval echo \$NS_$1
283 }
284
285 setup_fou_or_gue() {
286 outer="${1}"
287 inner="${2}"
288 encap="${3}"
289
290 if [ "${outer}" = "4" ]; then
291 modprobe fou || return 2
292 a_addr="${prefix4}.${a_r1}.1"
293 b_addr="${prefix4}.${b_r1}.1"
294 if [ "${inner}" = "4" ]; then
295 type="ipip"
296 ipproto="4"
297 else
298 type="sit"
299 ipproto="41"
300 fi
301 else
302 modprobe fou6 || return 2
303 a_addr="${prefix6}:${a_r1}::1"
304 b_addr="${prefix6}:${b_r1}::1"
305 if [ "${inner}" = "4" ]; then
306 type="ip6tnl"
307 mode="mode ipip6"
308 ipproto="4 -6"
309 else
310 type="ip6tnl"
311 mode="mode ip6ip6"
312 ipproto="41 -6"
313 fi
314 fi
315
316 run_cmd ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return 2
317 run_cmd ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return 2
318
319 run_cmd ${ns_b} ip fou add port 5556 ipproto ${ipproto}
320 run_cmd ${ns_b} ip link add ${encap}_b type ${type} ${mode} local ${b_addr} remote ${a_addr} encap ${encap} encap-sport auto encap-dport 5555
321
322 if [ "${inner}" = "4" ]; then
323 run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${encap}_a
324 run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${encap}_b
325 else
326 run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${encap}_a
327 run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${encap}_b
328 fi
329
330 run_cmd ${ns_a} ip link set ${encap}_a up
331 run_cmd ${ns_b} ip link set ${encap}_b up
332 }
333
334 setup_fou44() {
335 setup_fou_or_gue 4 4 fou
336 }
337
338 setup_fou46() {
339 setup_fou_or_gue 4 6 fou
340 }
341
342 setup_fou64() {
343 setup_fou_or_gue 6 4 fou
344 }
345
346 setup_fou66() {
347 setup_fou_or_gue 6 6 fou
348 }
349
350 setup_gue44() {
351 setup_fou_or_gue 4 4 gue
352 }
353
354 setup_gue46() {
355 setup_fou_or_gue 4 6 gue
356 }
357
358 setup_gue64() {
359 setup_fou_or_gue 6 4 gue
360 }
361
362 setup_gue66() {
363 setup_fou_or_gue 6 6 gue
364 }
365
366 setup_namespaces() {
367 for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
368 ip netns add ${n} || return 1
369
370 # Disable DAD, so that we don't have to wait to use the
371 # configured IPv6 addresses
372 ip netns exec ${n} sysctl -q net/ipv6/conf/default/accept_dad=0
373 done
374 }
375
376 setup_veth() {
377 run_cmd ${ns_a} ip link add veth_a type veth peer name veth_b || return 1
378 run_cmd ${ns_a} ip link set veth_b netns ${NS_B}
379
380 run_cmd ${ns_a} ip addr add ${veth4_a_addr}/${veth4_mask} dev veth_a
381 run_cmd ${ns_b} ip addr add ${veth4_b_addr}/${veth4_mask} dev veth_b
382
383 run_cmd ${ns_a} ip addr add ${veth6_a_addr}/${veth6_mask} dev veth_a
384 run_cmd ${ns_b} ip addr add ${veth6_b_addr}/${veth6_mask} dev veth_b
385
386 run_cmd ${ns_a} ip link set veth_a up
387 run_cmd ${ns_b} ip link set veth_b up
388 }
389
390 setup_vti() {
391 proto=${1}
392 veth_a_addr="${2}"
393 veth_b_addr="${3}"
394 vti_a_addr="${4}"
395 vti_b_addr="${5}"
396 vti_mask=${6}
397
398 [ ${proto} -eq 6 ] && vti_type="vti6" || vti_type="vti"
399
400 run_cmd ${ns_a} ip link add vti${proto}_a type ${vti_type} local ${veth_a_addr} remote ${veth_b_addr} key 10 || return 1
401 run_cmd ${ns_b} ip link add vti${proto}_b type ${vti_type} local ${veth_b_addr} remote ${veth_a_addr} key 10
402
403 run_cmd ${ns_a} ip addr add ${vti_a_addr}/${vti_mask} dev vti${proto}_a
404 run_cmd ${ns_b} ip addr add ${vti_b_addr}/${vti_mask} dev vti${proto}_b
405
406 run_cmd ${ns_a} ip link set vti${proto}_a up
407 run_cmd ${ns_b} ip link set vti${proto}_b up
408 }
409
410 setup_vti4() {
411 setup_vti 4 ${veth4_a_addr} ${veth4_b_addr} ${tunnel4_a_addr} ${tunnel4_b_addr} ${tunnel4_mask}
412 }
413
414 setup_vti6() {
415 setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask}
416 }
417
418 setup_vxlan_or_geneve() {
419 type="${1}"
420 a_addr="${2}"
421 b_addr="${3}"
422 opts="${4}"
423
424 if [ "${type}" = "vxlan" ]; then
425 opts="${opts} ttl 64 dstport 4789"
426 opts_a="local ${a_addr}"
427 opts_b="local ${b_addr}"
428 else
429 opts_a=""
430 opts_b=""
431 fi
432
433 run_cmd ${ns_a} ip link add ${type}_a type ${type} id 1 ${opts_a} remote ${b_addr} ${opts} || return 1
434 run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts}
435
436 run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${type}_a
437 run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b
438
439 run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${type}_a
440 run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b
441
442 run_cmd ${ns_a} ip link set ${type}_a up
443 run_cmd ${ns_b} ip link set ${type}_b up
444 }
445
446 setup_geneve4() {
447 setup_vxlan_or_geneve geneve ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "df set"
448 }
449
450 setup_vxlan4() {
451 setup_vxlan_or_geneve vxlan ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "df set"
452 }
453
454 setup_geneve6() {
455 setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
456 }
457
458 setup_vxlan6() {
459 setup_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
460 }
461
462 setup_xfrm() {
463 proto=${1}
464 veth_a_addr="${2}"
465 veth_b_addr="${3}"
466
467 run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1
468 run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
469 run_cmd ${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
470 run_cmd ${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
471
472 run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
473 run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
474 run_cmd ${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
475 run_cmd ${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
476 }
477
478 setup_xfrm4() {
479 setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr}
480 }
481
482 setup_xfrm6() {
483 setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr}
484 }
485
486 setup_routing_old() {
487 for i in ${routes}; do
488 [ "${ns}" = "" ] && ns="${i}" && continue
489 [ "${addr}" = "" ] && addr="${i}" && continue
490 [ "${gw}" = "" ] && gw="${i}"
491
492 ns_name="$(nsname ${ns})"
493
494 ip -n ${ns_name} route add ${addr} via ${gw}
495
496 ns=""; addr=""; gw=""
497 done
498 }
499
500 setup_routing_new() {
501 for i in ${nexthops}; do
502 [ "${ns}" = "" ] && ns="${i}" && continue
503 [ "${fam}" = "" ] && fam="${i}" && continue
504 [ "${nhid}" = "" ] && nhid="${i}" && continue
505 [ "${gw}" = "" ] && gw="${i}" && continue
506 [ "${dev}" = "" ] && dev="${i}"
507
508 ns_name="$(nsname ${ns})"
509
510 ip -n ${ns_name} -${fam} nexthop add id ${nhid} via ${gw} dev ${dev}
511
512 ns=""; fam=""; nhid=""; gw=""; dev=""
513
514 done
515
516 for i in ${routes_nh}; do
517 [ "${ns}" = "" ] && ns="${i}" && continue
518 [ "${fam}" = "" ] && fam="${i}" && continue
519 [ "${addr}" = "" ] && addr="${i}" && continue
520 [ "${nhid}" = "" ] && nhid="${i}"
521
522 ns_name="$(nsname ${ns})"
523
524 ip -n ${ns_name} -${fam} route add ${addr} nhid ${nhid}
525
526 ns=""; fam=""; addr=""; nhid=""
527 done
528 }
529
530 setup_routing() {
531 for i in ${NS_R1} ${NS_R2}; do
532 ip netns exec ${i} sysctl -q net/ipv4/ip_forward=1
533 ip netns exec ${i} sysctl -q net/ipv6/conf/all/forwarding=1
534 done
535
536 for i in ${routing_addrs}; do
537 [ "${ns}" = "" ] && ns="${i}" && continue
538 [ "${peer}" = "" ] && peer="${i}" && continue
539 [ "${segment}" = "" ] && segment="${i}"
540
541 ns_name="$(nsname ${ns})"
542 peer_name="$(nsname ${peer})"
543 if="veth_${ns}-${peer}"
544 ifpeer="veth_${peer}-${ns}"
545
546 # Create veth links
547 ip link add ${if} up netns ${ns_name} type veth peer name ${ifpeer} netns ${peer_name} || return 1
548 ip -n ${peer_name} link set dev ${ifpeer} up
549
550 # Add addresses
551 ip -n ${ns_name} addr add ${prefix4}.${segment}.1/24 dev ${if}
552 ip -n ${ns_name} addr add ${prefix6}:${segment}::1/64 dev ${if}
553
554 ip -n ${peer_name} addr add ${prefix4}.${segment}.2/24 dev ${ifpeer}
555 ip -n ${peer_name} addr add ${prefix6}:${segment}::2/64 dev ${ifpeer}
556
557 ns=""; peer=""; segment=""
558 done
559
560 if [ "$USE_NH" = "yes" ]; then
561 setup_routing_new
562 else
563 setup_routing_old
564 fi
565
566 return 0
567 }
568
569 setup() {
570 [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip
571
572 cleanup
573 for arg do
574 eval setup_${arg} || { echo " ${arg} not supported"; return 1; }
575 done
576 }
577
578 trace() {
579 [ $TRACING -eq 0 ] && return
580
581 for arg do
582 [ "${ns_cmd}" = "" ] && ns_cmd="${arg}" && continue
583 ${ns_cmd} tcpdump -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null &
584 tcpdump_pids="${tcpdump_pids} $!"
585 ns_cmd=
586 done
587 sleep 1
588 }
589
590 cleanup() {
591 for pid in ${tcpdump_pids}; do
592 kill ${pid}
593 done
594 tcpdump_pids=
595
596 for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
597 ip netns del ${n} 2> /dev/null
598 done
599 }
600
601 mtu() {
602 ns_cmd="${1}"
603 dev="${2}"
604 mtu="${3}"
605
606 ${ns_cmd} ip link set dev ${dev} mtu ${mtu}
607 }
608
609 mtu_parse() {
610 input="${1}"
611
612 next=0
613 for i in ${input}; do
614 [ ${next} -eq 1 -a "${i}" = "lock" ] && next=2 && continue
615 [ ${next} -eq 1 ] && echo "${i}" && return
616 [ ${next} -eq 2 ] && echo "lock ${i}" && return
617 [ "${i}" = "mtu" ] && next=1
618 done
619 }
620
621 link_get() {
622 ns_cmd="${1}"
623 name="${2}"
624
625 ${ns_cmd} ip link show dev "${name}"
626 }
627
628 link_get_mtu() {
629 ns_cmd="${1}"
630 name="${2}"
631
632 mtu_parse "$(link_get "${ns_cmd}" ${name})"
633 }
634
635 route_get_dst_exception() {
636 ns_cmd="${1}"
637 dst="${2}"
638
639 ${ns_cmd} ip route get "${dst}"
640 }
641
642 route_get_dst_pmtu_from_exception() {
643 ns_cmd="${1}"
644 dst="${2}"
645
646 mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})"
647 }
648
649 check_pmtu_value() {
650 expected="${1}"
651 value="${2}"
652 event="${3}"
653
654 [ "${expected}" = "any" ] && [ -n "${value}" ] && return 0
655 [ "${value}" = "${expected}" ] && return 0
656 [ -z "${value}" ] && err " PMTU exception wasn't created after ${event}" && return 1
657 [ -z "${expected}" ] && err " PMTU exception shouldn't exist after ${event}" && return 1
658 err " found PMTU exception with incorrect MTU ${value}, expected ${expected}, after ${event}"
659 return 1
660 }
661
662 test_pmtu_ipvX() {
663 family=${1}
664
665 setup namespaces routing || return 2
666 trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
667 "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \
668 "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \
669 "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2
670
671 if [ ${family} -eq 4 ]; then
672 ping=ping
673 dst1="${prefix4}.${b_r1}.1"
674 dst2="${prefix4}.${b_r2}.1"
675 else
676 ping=${ping6}
677 dst1="${prefix6}:${b_r1}::1"
678 dst2="${prefix6}:${b_r2}::1"
679 fi
680
681 # Set up initial MTU values
682 mtu "${ns_a}" veth_A-R1 2000
683 mtu "${ns_r1}" veth_R1-A 2000
684 mtu "${ns_r1}" veth_R1-B 1400
685 mtu "${ns_b}" veth_B-R1 1400
686
687 mtu "${ns_a}" veth_A-R2 2000
688 mtu "${ns_r2}" veth_R2-A 2000
689 mtu "${ns_r2}" veth_R2-B 1500
690 mtu "${ns_b}" veth_B-R2 1500
691
692 # Create route exceptions
693 run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst1}
694 run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst2}
695
696 # Check that exceptions have been created with the correct PMTU
697 pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
698 check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
699 pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
700 check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
701
702 # Decrease local MTU below PMTU, check for PMTU decrease in route exception
703 mtu "${ns_a}" veth_A-R1 1300
704 mtu "${ns_r1}" veth_R1-A 1300
705 pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
706 check_pmtu_value "1300" "${pmtu_1}" "decreasing local MTU" || return 1
707 # Second exception shouldn't be modified
708 pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
709 check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1
710
711 # Increase MTU, check for PMTU increase in route exception
712 mtu "${ns_a}" veth_A-R1 1700
713 mtu "${ns_r1}" veth_R1-A 1700
714 pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
715 check_pmtu_value "1700" "${pmtu_1}" "increasing local MTU" || return 1
716 # Second exception shouldn't be modified
717 pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
718 check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1
719
720 # Skip PMTU locking tests for IPv6
721 [ $family -eq 6 ] && return 0
722
723 # Decrease remote MTU on path via R2, get new exception
724 mtu "${ns_r2}" veth_R2-B 400
725 mtu "${ns_b}" veth_B-R2 400
726 run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2}
727 pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
728 check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1
729
730 # Decrease local MTU below PMTU
731 mtu "${ns_a}" veth_A-R2 500
732 mtu "${ns_r2}" veth_R2-A 500
733 pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
734 check_pmtu_value "500" "${pmtu_2}" "decreasing local MTU" || return 1
735
736 # Increase local MTU
737 mtu "${ns_a}" veth_A-R2 1500
738 mtu "${ns_r2}" veth_R2-A 1500
739 pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
740 check_pmtu_value "1500" "${pmtu_2}" "increasing local MTU" || return 1
741
742 # Get new exception
743 run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2}
744 pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
745 check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1
746 }
747
748 test_pmtu_ipv4_exception() {
749 test_pmtu_ipvX 4
750 }
751
752 test_pmtu_ipv6_exception() {
753 test_pmtu_ipvX 6
754 }
755
756 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() {
757 type=${1}
758 family=${2}
759 outer_family=${3}
760 ll_mtu=4000
761
762 if [ ${outer_family} -eq 4 ]; then
763 setup namespaces routing ${type}4 || return 2
764 # IPv4 header UDP header VXLAN/GENEVE header Ethernet header
765 exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14))
766 else
767 setup namespaces routing ${type}6 || return 2
768 # IPv6 header UDP header VXLAN/GENEVE header Ethernet header
769 exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14))
770 fi
771
772 trace "${ns_a}" ${type}_a "${ns_b}" ${type}_b \
773 "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
774 "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B
775
776 if [ ${family} -eq 4 ]; then
777 ping=ping
778 dst=${tunnel4_b_addr}
779 else
780 ping=${ping6}
781 dst=${tunnel6_b_addr}
782 fi
783
784 # Create route exception by exceeding link layer MTU
785 mtu "${ns_a}" veth_A-R1 $((${ll_mtu} + 1000))
786 mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
787 mtu "${ns_b}" veth_B-R1 ${ll_mtu}
788 mtu "${ns_r1}" veth_R1-B ${ll_mtu}
789
790 mtu "${ns_a}" ${type}_a $((${ll_mtu} + 1000))
791 mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000))
792 run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst}
793
794 # Check that exception was created
795 pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
796 check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on ${type} interface"
797 }
798
799 test_pmtu_ipv4_vxlan4_exception() {
800 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan 4 4
801 }
802
803 test_pmtu_ipv6_vxlan4_exception() {
804 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan 6 4
805 }
806
807 test_pmtu_ipv4_geneve4_exception() {
808 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 4 4
809 }
810
811 test_pmtu_ipv6_geneve4_exception() {
812 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 6 4
813 }
814
815 test_pmtu_ipv4_vxlan6_exception() {
816 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan 4 6
817 }
818
819 test_pmtu_ipv6_vxlan6_exception() {
820 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan 6 6
821 }
822
823 test_pmtu_ipv4_geneve6_exception() {
824 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 4 6
825 }
826
827 test_pmtu_ipv6_geneve6_exception() {
828 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 6 6
829 }
830
831 test_pmtu_ipvX_over_fouY_or_gueY() {
832 inner_family=${1}
833 outer_family=${2}
834 encap=${3}
835 ll_mtu=4000
836
837 setup namespaces routing ${encap}${outer_family}${inner_family} || return 2
838 trace "${ns_a}" ${encap}_a "${ns_b}" ${encap}_b \
839 "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
840 "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B
841
842 if [ ${inner_family} -eq 4 ]; then
843 ping=ping
844 dst=${tunnel4_b_addr}
845 else
846 ping=${ping6}
847 dst=${tunnel6_b_addr}
848 fi
849
850 if [ "${encap}" = "gue" ]; then
851 encap_overhead=4
852 else
853 encap_overhead=0
854 fi
855
856 if [ ${outer_family} -eq 4 ]; then
857 # IPv4 header UDP header
858 exp_mtu=$((${ll_mtu} - 20 - 8 - ${encap_overhead}))
859 else
860 # IPv6 header Option 4 UDP header
861 exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - ${encap_overhead}))
862 fi
863
864 # Create route exception by exceeding link layer MTU
865 mtu "${ns_a}" veth_A-R1 $((${ll_mtu} + 1000))
866 mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
867 mtu "${ns_b}" veth_B-R1 ${ll_mtu}
868 mtu "${ns_r1}" veth_R1-B ${ll_mtu}
869
870 mtu "${ns_a}" ${encap}_a $((${ll_mtu} + 1000))
871 mtu "${ns_b}" ${encap}_b $((${ll_mtu} + 1000))
872 run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst}
873
874 # Check that exception was created
875 pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
876 check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on ${encap} interface"
877 }
878
879 test_pmtu_ipv4_fou4_exception() {
880 test_pmtu_ipvX_over_fouY_or_gueY 4 4 fou
881 }
882
883 test_pmtu_ipv6_fou4_exception() {
884 test_pmtu_ipvX_over_fouY_or_gueY 6 4 fou
885 }
886
887 test_pmtu_ipv4_fou6_exception() {
888 test_pmtu_ipvX_over_fouY_or_gueY 4 6 fou
889 }
890
891 test_pmtu_ipv6_fou6_exception() {
892 test_pmtu_ipvX_over_fouY_or_gueY 6 6 fou
893 }
894
895 test_pmtu_ipv4_gue4_exception() {
896 test_pmtu_ipvX_over_fouY_or_gueY 4 4 gue
897 }
898
899 test_pmtu_ipv6_gue4_exception() {
900 test_pmtu_ipvX_over_fouY_or_gueY 6 4 gue
901 }
902
903 test_pmtu_ipv4_gue6_exception() {
904 test_pmtu_ipvX_over_fouY_or_gueY 4 6 gue
905 }
906
907 test_pmtu_ipv6_gue6_exception() {
908 test_pmtu_ipvX_over_fouY_or_gueY 6 6 gue
909 }
910
911 test_pmtu_vti4_exception() {
912 setup namespaces veth vti4 xfrm4 || return 2
913 trace "${ns_a}" veth_a "${ns_b}" veth_b \
914 "${ns_a}" vti4_a "${ns_b}" vti4_b
915
916 veth_mtu=1500
917 vti_mtu=$((veth_mtu - 20))
918
919 # SPI SN IV ICV pad length next header
920 esp_payload_rfc4106=$((vti_mtu - 4 - 4 - 8 - 16 - 1 - 1))
921 ping_payload=$((esp_payload_rfc4106 - 28))
922
923 mtu "${ns_a}" veth_a ${veth_mtu}
924 mtu "${ns_b}" veth_b ${veth_mtu}
925 mtu "${ns_a}" vti4_a ${vti_mtu}
926 mtu "${ns_b}" vti4_b ${vti_mtu}
927
928 # Send DF packet without exceeding link layer MTU, check that no
929 # exception is created
930 run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr}
931 pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
932 check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
933
934 # Now exceed link layer MTU by one byte, check that exception is created
935 # with the right PMTU value
936 run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr}
937 pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
938 check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))"
939 }
940
941 test_pmtu_vti6_exception() {
942 setup namespaces veth vti6 xfrm6 || return 2
943 trace "${ns_a}" veth_a "${ns_b}" veth_b \
944 "${ns_a}" vti6_a "${ns_b}" vti6_b
945 fail=0
946
947 # Create route exception by exceeding link layer MTU
948 mtu "${ns_a}" veth_a 4000
949 mtu "${ns_b}" veth_b 4000
950 mtu "${ns_a}" vti6_a 5000
951 mtu "${ns_b}" vti6_b 5000
952 run_cmd ${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr}
953
954 # Check that exception was created
955 pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
956 check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1
957
958 # Decrease tunnel MTU, check for PMTU decrease in route exception
959 mtu "${ns_a}" vti6_a 3000
960 pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
961 check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1
962
963 # Increase tunnel MTU, check for PMTU increase in route exception
964 mtu "${ns_a}" vti6_a 9000
965 pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
966 check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1
967
968 return ${fail}
969 }
970
971 test_pmtu_vti4_default_mtu() {
972 setup namespaces veth vti4 || return 2
973
974 # Check that MTU of vti device is MTU of veth minus IPv4 header length
975 veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
976 vti4_mtu="$(link_get_mtu "${ns_a}" vti4_a)"
977 if [ $((veth_mtu - vti4_mtu)) -ne 20 ]; then
978 err " vti MTU ${vti4_mtu} is not veth MTU ${veth_mtu} minus IPv4 header length"
979 return 1
980 fi
981 }
982
983 test_pmtu_vti6_default_mtu() {
984 setup namespaces veth vti6 || return 2
985
986 # Check that MTU of vti device is MTU of veth minus IPv6 header length
987 veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
988 vti6_mtu="$(link_get_mtu "${ns_a}" vti6_a)"
989 if [ $((veth_mtu - vti6_mtu)) -ne 40 ]; then
990 err " vti MTU ${vti6_mtu} is not veth MTU ${veth_mtu} minus IPv6 header length"
991 return 1
992 fi
993 }
994
995 test_pmtu_vti4_link_add_mtu() {
996 setup namespaces || return 2
997
998 run_cmd ${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
999 [ $? -ne 0 ] && err " vti not supported" && return 2
1000 run_cmd ${ns_a} ip link del vti4_a
1001
1002 fail=0
1003
1004 min=68
1005 max=$((65535 - 20))
1006 # Check invalid values first
1007 for v in $((min - 1)) $((max + 1)); do
1008 run_cmd ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
1009 # This can fail, or MTU can be adjusted to a proper value
1010 [ $? -ne 0 ] && continue
1011 mtu="$(link_get_mtu "${ns_a}" vti4_a)"
1012 if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
1013 err " vti tunnel created with invalid MTU ${mtu}"
1014 fail=1
1015 fi
1016 run_cmd ${ns_a} ip link del vti4_a
1017 done
1018
1019 # Now check valid values
1020 for v in ${min} 1300 ${max}; do
1021 run_cmd ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
1022 mtu="$(link_get_mtu "${ns_a}" vti4_a)"
1023 run_cmd ${ns_a} ip link del vti4_a
1024 if [ "${mtu}" != "${v}" ]; then
1025 err " vti MTU ${mtu} doesn't match configured value ${v}"
1026 fail=1
1027 fi
1028 done
1029
1030 return ${fail}
1031 }
1032
1033 test_pmtu_vti6_link_add_mtu() {
1034 setup namespaces || return 2
1035
1036 run_cmd ${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
1037 [ $? -ne 0 ] && err " vti6 not supported" && return 2
1038 run_cmd ${ns_a} ip link del vti6_a
1039
1040 fail=0
1041
1042 min=68 # vti6 can carry IPv4 packets too
1043 max=$((65535 - 40))
1044 # Check invalid values first
1045 for v in $((min - 1)) $((max + 1)); do
1046 run_cmd ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
1047 # This can fail, or MTU can be adjusted to a proper value
1048 [ $? -ne 0 ] && continue
1049 mtu="$(link_get_mtu "${ns_a}" vti6_a)"
1050 if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
1051 err " vti6 tunnel created with invalid MTU ${v}"
1052 fail=1
1053 fi
1054 run_cmd ${ns_a} ip link del vti6_a
1055 done
1056
1057 # Now check valid values
1058 for v in 68 1280 1300 $((65535 - 40)); do
1059 run_cmd ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
1060 mtu="$(link_get_mtu "${ns_a}" vti6_a)"
1061 run_cmd ${ns_a} ip link del vti6_a
1062 if [ "${mtu}" != "${v}" ]; then
1063 err " vti6 MTU ${mtu} doesn't match configured value ${v}"
1064 fail=1
1065 fi
1066 done
1067
1068 return ${fail}
1069 }
1070
1071 test_pmtu_vti6_link_change_mtu() {
1072 setup namespaces || return 2
1073
1074 run_cmd ${ns_a} ip link add dummy0 mtu 1500 type dummy
1075 [ $? -ne 0 ] && err " dummy not supported" && return 2
1076 run_cmd ${ns_a} ip link add dummy1 mtu 3000 type dummy
1077 run_cmd ${ns_a} ip link set dummy0 up
1078 run_cmd ${ns_a} ip link set dummy1 up
1079
1080 run_cmd ${ns_a} ip addr add ${dummy6_0_prefix}1/${dummy6_mask} dev dummy0
1081 run_cmd ${ns_a} ip addr add ${dummy6_1_prefix}1/${dummy6_mask} dev dummy1
1082
1083 fail=0
1084
1085 # Create vti6 interface bound to device, passing MTU, check it
1086 run_cmd ${ns_a} ip link add vti6_a mtu 1300 type vti6 remote ${dummy6_0_prefix}2 local ${dummy6_0_prefix}1
1087 mtu="$(link_get_mtu "${ns_a}" vti6_a)"
1088 if [ ${mtu} -ne 1300 ]; then
1089 err " vti6 MTU ${mtu} doesn't match configured value 1300"
1090 fail=1
1091 fi
1092
1093 # Move to another device with different MTU, without passing MTU, check
1094 # MTU is adjusted
1095 run_cmd ${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_prefix}2 local ${dummy6_1_prefix}1
1096 mtu="$(link_get_mtu "${ns_a}" vti6_a)"
1097 if [ ${mtu} -ne $((3000 - 40)) ]; then
1098 err " vti MTU ${mtu} is not dummy MTU 3000 minus IPv6 header length"
1099 fail=1
1100 fi
1101
1102 # Move it back, passing MTU, check MTU is not overridden
1103 run_cmd ${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_prefix}2 local ${dummy6_0_prefix}1
1104 mtu="$(link_get_mtu "${ns_a}" vti6_a)"
1105 if [ ${mtu} -ne 1280 ]; then
1106 err " vti6 MTU ${mtu} doesn't match configured value 1280"
1107 fail=1
1108 fi
1109
1110 return ${fail}
1111 }
1112
1113 check_command() {
1114 cmd=${1}
1115
1116 if ! which ${cmd} > /dev/null 2>&1; then
1117 err " missing required command: '${cmd}'"
1118 return 1
1119 fi
1120 return 0
1121 }
1122
1123 test_cleanup_vxlanX_exception() {
1124 outer="${1}"
1125 encap="vxlan"
1126 ll_mtu=4000
1127
1128 check_command taskset || return 2
1129 cpu_list=$(grep -m 2 processor /proc/cpuinfo | cut -d ' ' -f 2)
1130
1131 setup namespaces routing ${encap}${outer} || return 2
1132 trace "${ns_a}" ${encap}_a "${ns_b}" ${encap}_b \
1133 "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
1134 "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B
1135
1136 # Create route exception by exceeding link layer MTU
1137 mtu "${ns_a}" veth_A-R1 $((${ll_mtu} + 1000))
1138 mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
1139 mtu "${ns_b}" veth_B-R1 ${ll_mtu}
1140 mtu "${ns_r1}" veth_R1-B ${ll_mtu}
1141
1142 mtu "${ns_a}" ${encap}_a $((${ll_mtu} + 1000))
1143 mtu "${ns_b}" ${encap}_b $((${ll_mtu} + 1000))
1144
1145 # Fill exception cache for multiple CPUs (2)
1146 # we can always use inner IPv4 for that
1147 for cpu in ${cpu_list}; do
1148 run_cmd taskset --cpu-list ${cpu} ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${tunnel4_b_addr}
1149 done
1150
1151 ${ns_a} ip link del dev veth_A-R1 &
1152 iplink_pid=$!
1153 sleep 1
1154 if [ "$(cat /proc/${iplink_pid}/cmdline 2>/dev/null | tr -d '\0')" = "iplinkdeldevveth_A-R1" ]; then
1155 err " can't delete veth device in a timely manner, PMTU dst likely leaked"
1156 return 1
1157 fi
1158 }
1159
1160 test_cleanup_ipv6_exception() {
1161 test_cleanup_vxlanX_exception 6
1162 }
1163
1164 test_cleanup_ipv4_exception() {
1165 test_cleanup_vxlanX_exception 4
1166 }
1167
1168 run_test() {
1169 (
1170 tname="$1"
1171 tdesc="$2"
1172
1173 unset IFS
1174
1175 if [ "$VERBOSE" = "1" ]; then
1176 printf "\n##########################################################################\n\n"
1177 fi
1178
1179 eval test_${tname}
1180 ret=$?
1181
1182 if [ $ret -eq 0 ]; then
1183 printf "TEST: %-60s [ OK ]\n" "${tdesc}"
1184 elif [ $ret -eq 1 ]; then
1185 printf "TEST: %-60s [FAIL]\n" "${tdesc}"
1186 if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
1187 echo
1188 echo "Pausing. Hit enter to continue"
1189 read a
1190 fi
1191 err_flush
1192 exit 1
1193 elif [ $ret -eq 2 ]; then
1194 printf "TEST: %-60s [SKIP]\n" "${tdesc}"
1195 err_flush
1196 fi
1197
1198 return $ret
1199 )
1200 ret=$?
1201 [ $ret -ne 0 ] && exitcode=1
1202
1203 return $ret
1204 }
1205
1206 run_test_nh() {
1207 tname="$1"
1208 tdesc="$2"
1209
1210 USE_NH=yes
1211 run_test "${tname}" "${tdesc} - nexthop objects"
1212 USE_NH=no
1213 }
1214
1215 test_list_flush_ipv4_exception() {
1216 setup namespaces routing || return 2
1217 trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
1218 "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \
1219 "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \
1220 "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2
1221
1222 dst_prefix1="${prefix4}.${b_r1}."
1223 dst2="${prefix4}.${b_r2}.1"
1224
1225 # Set up initial MTU values
1226 mtu "${ns_a}" veth_A-R1 2000
1227 mtu "${ns_r1}" veth_R1-A 2000
1228 mtu "${ns_r1}" veth_R1-B 1500
1229 mtu "${ns_b}" veth_B-R1 1500
1230
1231 mtu "${ns_a}" veth_A-R2 2000
1232 mtu "${ns_r2}" veth_R2-A 2000
1233 mtu "${ns_r2}" veth_R2-B 1500
1234 mtu "${ns_b}" veth_B-R2 1500
1235
1236 fail=0
1237
1238 # Add 100 addresses for veth endpoint on B reached by default A route
1239 for i in $(seq 100 199); do
1240 run_cmd ${ns_b} ip addr add "${dst_prefix1}${i}" dev veth_B-R1
1241 done
1242
1243 # Create 100 cached route exceptions for path via R1, one via R2. Note
1244 # that with IPv4 we need to actually cause a route lookup that matches
1245 # the exception caused by ICMP, in order to actually have a cached
1246 # route, so we need to ping each destination twice
1247 for i in $(seq 100 199); do
1248 run_cmd ${ns_a} ping -q -M want -i 0.1 -c 2 -s 1800 "${dst_prefix1}${i}"
1249 done
1250 run_cmd ${ns_a} ping -q -M want -i 0.1 -c 2 -s 1800 "${dst2}"
1251
1252 if [ "$(${ns_a} ip -oneline route list cache | wc -l)" -ne 101 ]; then
1253 err " can't list cached exceptions"
1254 fail=1
1255 fi
1256
1257 run_cmd ${ns_a} ip route flush cache
1258 pmtu1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst_prefix}1)"
1259 pmtu2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst_prefix}2)"
1260 if [ -n "${pmtu1}" ] || [ -n "${pmtu2}" ] || \
1261 [ -n "$(${ns_a} ip route list cache)" ]; then
1262 err " can't flush cached exceptions"
1263 fail=1
1264 fi
1265
1266 return ${fail}
1267 }
1268
1269 test_list_flush_ipv6_exception() {
1270 setup namespaces routing || return 2
1271 trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
1272 "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \
1273 "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \
1274 "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2
1275
1276 dst_prefix1="${prefix6}:${b_r1}::"
1277 dst2="${prefix6}:${b_r2}::1"
1278
1279 # Set up initial MTU values
1280 mtu "${ns_a}" veth_A-R1 2000
1281 mtu "${ns_r1}" veth_R1-A 2000
1282 mtu "${ns_r1}" veth_R1-B 1500
1283 mtu "${ns_b}" veth_B-R1 1500
1284
1285 mtu "${ns_a}" veth_A-R2 2000
1286 mtu "${ns_r2}" veth_R2-A 2000
1287 mtu "${ns_r2}" veth_R2-B 1500
1288 mtu "${ns_b}" veth_B-R2 1500
1289
1290 fail=0
1291
1292 # Add 100 addresses for veth endpoint on B reached by default A route
1293 for i in $(seq 100 199); do
1294 run_cmd ${ns_b} ip addr add "${dst_prefix1}${i}" dev veth_B-R1
1295 done
1296
1297 # Create 100 cached route exceptions for path via R1, one via R2
1298 for i in $(seq 100 199); do
1299 run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s 1800 "${dst_prefix1}${i}"
1300 done
1301 run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s 1800 "${dst2}"
1302 if [ "$(${ns_a} ip -oneline -6 route list cache | wc -l)" -ne 101 ]; then
1303 err " can't list cached exceptions"
1304 fail=1
1305 fi
1306
1307 run_cmd ${ns_a} ip -6 route flush cache
1308 pmtu1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst_prefix1}100")"
1309 pmtu2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
1310 if [ -n "${pmtu1}" ] || [ -n "${pmtu2}" ] || \
1311 [ -n "$(${ns_a} ip -6 route list cache)" ]; then
1312 err " can't flush cached exceptions"
1313 fail=1
1314 fi
1315
1316 return ${fail}
1317 }
1318
1319 usage() {
1320 echo
1321 echo "$0 [OPTIONS] [TEST]..."
1322 echo "If no TEST argument is given, all tests will be run."
1323 echo
1324 echo "Options"
1325 echo " --trace: capture traffic to TEST_INTERFACE.pcap"
1326 echo
1327 echo "Available tests${tests}"
1328 exit 1
1329 }
1330
1331 ################################################################################
1332 #
1333 exitcode=0
1334 desc=0
1335
1336 while getopts :ptv o
1337 do
1338 case $o in
1339 p) PAUSE_ON_FAIL=yes;;
1340 v) VERBOSE=1;;
1341 t) if which tcpdump > /dev/null 2>&1; then
1342 TRACING=1
1343 else
1344 echo "=== tcpdump not available, tracing disabled"
1345 fi
1346 ;;
1347 *) usage;;
1348 esac
1349 done
1350 shift $(($OPTIND-1))
1351
1352 IFS="
1353 "
1354
1355 for arg do
1356 # Check first that all requested tests are available before running any
1357 command -v > /dev/null "test_${arg}" || { echo "=== Test ${arg} not found"; usage; }
1358 done
1359
1360 trap cleanup EXIT
1361
1362 # start clean
1363 cleanup
1364
1365 HAVE_NH=no
1366 ip nexthop ls >/dev/null 2>&1
1367 [ $? -eq 0 ] && HAVE_NH=yes
1368
1369 name=""
1370 desc=""
1371 rerun_nh=0
1372 for t in ${tests}; do
1373 [ "${name}" = "" ] && name="${t}" && continue
1374 [ "${desc}" = "" ] && desc="${t}" && continue
1375
1376 if [ "${HAVE_NH}" = "yes" ]; then
1377 rerun_nh="${t}"
1378 fi
1379
1380 run_this=1
1381 for arg do
1382 [ "${arg}" != "${arg#--*}" ] && continue
1383 [ "${arg}" = "${name}" ] && run_this=1 && break
1384 run_this=0
1385 done
1386 if [ $run_this -eq 1 ]; then
1387 run_test "${name}" "${desc}"
1388 # if test was skipped no need to retry with nexthop objects
1389 [ $? -eq 2 ] && rerun_nh=0
1390
1391 if [ "${rerun_nh}" = "1" ]; then
1392 run_test_nh "${name}" "${desc}"
1393 fi
1394 fi
1395 name=""
1396 desc=""
1397 rerun_nh=0
1398 done
1399
1400 exit ${exitcode}