]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - tools/testing/selftests/net/pmtu.sh
selftests: pmtu: use -oneline for ip route list cache
[mirror_ubuntu-hirsute-kernel.git] / tools / testing / selftests / net / pmtu.sh
CommitLineData
d1f1b9cb
SB
1#!/bin/sh
2# SPDX-License-Identifier: GPL-2.0
3#
a41c789b
SB
4# Check that route PMTU values match expectations, and that initial device MTU
5# values are assigned correctly
d1f1b9cb
SB
6#
7# Tests currently implemented:
8#
e44e428f
SD
9# - pmtu_ipv4
10# Set up two namespaces, A and B, with two paths between them over routers
11# R1 and R2 (also implemented with namespaces), with different MTUs:
12#
13# segment a_r1 segment b_r1 a_r1: 2000
14# .--------------R1--------------. a_r2: 1500
15# A B a_r3: 2000
16# '--------------R2--------------' a_r4: 1400
17# segment a_r2 segment b_r2
18#
19# Check that PMTU exceptions with the correct PMTU are created. Then
20# decrease and increase the MTU of the local link for one of the paths,
21# A to R1, checking that route exception PMTU changes accordingly over
22# this path. Also check that locked exceptions are created when an ICMP
23# message advertising a PMTU smaller than net.ipv4.route.min_pmtu is
24# received
25#
26# - pmtu_ipv6
27# Same as pmtu_ipv4, except for locked PMTU tests, using IPv6
28#
58288879
SB
29# - pmtu_ipv4_vxlan4_exception
30# Set up the same network topology as pmtu_ipv4, create a VXLAN tunnel
31# over IPv4 between A and B, routed via R1. On the link between R1 and B,
32# set a MTU lower than the VXLAN MTU and the MTU on the link between A and
33# R1. Send IPv4 packets, exceeding the MTU between R1 and B, over VXLAN
34# from A to B and check that the PMTU exception is created with the right
35# value on A
36#
37# - pmtu_ipv6_vxlan4_exception
38# Same as pmtu_ipv4_vxlan4_exception, but send IPv6 packets from A to B
39#
40# - pmtu_ipv4_vxlan6_exception
41# Same as pmtu_ipv4_vxlan4_exception, but use IPv6 transport from A to B
42#
43# - pmtu_ipv6_vxlan6_exception
44# Same as pmtu_ipv4_vxlan6_exception, but send IPv6 packets from A to B
45#
ce733661
SB
46# - pmtu_ipv4_geneve4_exception
47# Same as pmtu_ipv4_vxlan4_exception, but using a GENEVE tunnel instead of
48# VXLAN
49#
50# - pmtu_ipv6_geneve4_exception
51# Same as pmtu_ipv6_vxlan4_exception, but using a GENEVE tunnel instead of
52# VXLAN
53#
54# - pmtu_ipv4_geneve6_exception
55# Same as pmtu_ipv4_vxlan6_exception, but using a GENEVE tunnel instead of
56# VXLAN
57#
58# - pmtu_ipv6_geneve6_exception
59# Same as pmtu_ipv6_vxlan6_exception, but using a GENEVE tunnel instead of
60# VXLAN
61#
56fd865f
SB
62# - pmtu_ipv{4,6}_fou{4,6}_exception
63# Same as pmtu_ipv4_vxlan4, but using a direct IPv4/IPv6 encapsulation
64# (FoU) over IPv4/IPv6, instead of VXLAN
65#
66# - pmtu_ipv{4,6}_fou{4,6}_exception
67# Same as pmtu_ipv4_vxlan4, but using a generic UDP IPv4/IPv6
68# encapsulation (GUE) over IPv4/IPv6, instead of VXLAN
69#
5e84430b
SB
70# - pmtu_vti4_exception
71# Set up vti tunnel on top of veth, with xfrm states and policies, in two
72# namespaces with matching endpoints. Check that route exception is not
73# created if link layer MTU is not exceeded, then exceed it and check that
74# exception is created with the expected PMTU. The approach described
75# below for IPv6 doesn't apply here, because, on IPv4, administrative MTU
76# changes alone won't affect PMTU
77#
36455bd1 78# - pmtu_vti6_exception
d1f1b9cb
SB
79# Set up vti6 tunnel on top of veth, with xfrm states and policies, in two
80# namespaces with matching endpoints. Check that route exception is
81# created by exceeding link layer MTU with ping to other endpoint. Then
82# decrease and increase MTU of tunnel, checking that route exception PMTU
83# changes accordingly
a41c789b
SB
84#
85# - pmtu_vti4_default_mtu
86# Set up vti4 tunnel on top of veth, in two namespaces with matching
87# endpoints. Check that MTU assigned to vti interface is the MTU of the
88# lower layer (veth) minus additional lower layer headers (zero, for veth)
89# minus IPv4 header length
35b49424
SB
90#
91# - pmtu_vti6_default_mtu
92# Same as above, for IPv6
719e1215
SB
93#
94# - pmtu_vti4_link_add_mtu
95# Set up vti4 interface passing MTU value at link creation, check MTU is
96# configured, and that link is not created with invalid MTU values
8b6022fc
SB
97#
98# - pmtu_vti6_link_add_mtu
99# Same as above, for IPv6
1fad59ea
SB
100#
101# - pmtu_vti6_link_change_mtu
102# Set up two dummy interfaces with different MTUs, create a vti6 tunnel
103# and check that configured MTU is used on link creation and changes, and
104# that MTU is properly calculated instead when MTU is not configured from
105# userspace
b3cc4f8a
PA
106#
107# - cleanup_ipv4_exception
108# Similar to pmtu_ipv4_vxlan4_exception, but explicitly generate PMTU
109# exceptions on multiple CPUs and check that the veth device tear-down
110# happens in a timely manner
111#
112# - cleanup_ipv6_exception
113# Same as above, but use IPv6 transport from A to B
e28799e5 114#
de755a85
SB
115# - list_flush_ipv4_exception
116# Using the same topology as in pmtu_ipv4, create exceptions, and check
117# they are shown when listing exception caches, gone after flushing them
118#
e28799e5
SB
119# - list_flush_ipv6_exception
120# Using the same topology as in pmtu_ipv6, create exceptions, and check
121# they are shown when listing exception caches, gone after flushing them
b3cc4f8a 122
d1f1b9cb 123
57aefc7c
SKSO
124# Kselftest framework requirement - SKIP code is 4.
125ksft_skip=4
126
56490b62
DA
127PAUSE_ON_FAIL=no
128VERBOSE=0
129TRACING=0
130
c81c7012
SD
131# Some systems don't have a ping6 binary anymore
132which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
133
438a9a85 134# Name Description re-run with nh
36455bd1 135tests="
438a9a85
DA
136 pmtu_ipv4_exception ipv4: PMTU exceptions 1
137 pmtu_ipv6_exception ipv6: PMTU exceptions 1
138 pmtu_ipv4_vxlan4_exception IPv4 over vxlan4: PMTU exceptions 1
139 pmtu_ipv6_vxlan4_exception IPv6 over vxlan4: PMTU exceptions 1
140 pmtu_ipv4_vxlan6_exception IPv4 over vxlan6: PMTU exceptions 1
141 pmtu_ipv6_vxlan6_exception IPv6 over vxlan6: PMTU exceptions 1
142 pmtu_ipv4_geneve4_exception IPv4 over geneve4: PMTU exceptions 1
143 pmtu_ipv6_geneve4_exception IPv6 over geneve4: PMTU exceptions 1
144 pmtu_ipv4_geneve6_exception IPv4 over geneve6: PMTU exceptions 1
145 pmtu_ipv6_geneve6_exception IPv6 over geneve6: PMTU exceptions 1
146 pmtu_ipv4_fou4_exception IPv4 over fou4: PMTU exceptions 1
147 pmtu_ipv6_fou4_exception IPv6 over fou4: PMTU exceptions 1
148 pmtu_ipv4_fou6_exception IPv4 over fou6: PMTU exceptions 1
149 pmtu_ipv6_fou6_exception IPv6 over fou6: PMTU exceptions 1
150 pmtu_ipv4_gue4_exception IPv4 over gue4: PMTU exceptions 1
151 pmtu_ipv6_gue4_exception IPv6 over gue4: PMTU exceptions 1
152 pmtu_ipv4_gue6_exception IPv4 over gue6: PMTU exceptions 1
153 pmtu_ipv6_gue6_exception IPv6 over gue6: PMTU exceptions 1
154 pmtu_vti6_exception vti6: PMTU exceptions 0
155 pmtu_vti4_exception vti4: PMTU exceptions 0
156 pmtu_vti4_default_mtu vti4: default MTU assignment 0
157 pmtu_vti6_default_mtu vti6: default MTU assignment 0
158 pmtu_vti4_link_add_mtu vti4: MTU setting on link creation 0
159 pmtu_vti6_link_add_mtu vti6: MTU setting on link creation 0
160 pmtu_vti6_link_change_mtu vti6: MTU changes on link changes 0
161 cleanup_ipv4_exception ipv4: cleanup of cached exceptions 1
e28799e5 162 cleanup_ipv6_exception ipv6: cleanup of cached exceptions 1
de755a85 163 list_flush_ipv4_exception ipv4: list and flush cached exceptions 1
e28799e5 164 list_flush_ipv6_exception ipv6: list and flush cached exceptions 1"
36455bd1 165
a92a0a7b
DA
166NS_A="ns-A"
167NS_B="ns-B"
168NS_R1="ns-R1"
169NS_R2="ns-R2"
d1f1b9cb
SB
170ns_a="ip netns exec ${NS_A}"
171ns_b="ip netns exec ${NS_B}"
e44e428f
SD
172ns_r1="ip netns exec ${NS_R1}"
173ns_r2="ip netns exec ${NS_R2}"
174
175# Addressing and routing for tests with routers: four network segments, with
176# index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an
177# identifier ID, which is 1 for hosts (A and B), 2 for routers (R1 and R2).
178# Addresses are:
179# - IPv4: PREFIX4.SEGMENT.ID (/24)
180# - IPv6: PREFIX6:SEGMENT::ID (/64)
58288879
SB
181prefix4="10.0"
182prefix6="fc00"
e44e428f
SD
183a_r1=1
184a_r2=2
185b_r1=3
186b_r2=4
187# ns peer segment
188routing_addrs="
189 A R1 ${a_r1}
190 A R2 ${a_r2}
191 B R1 ${b_r1}
192 B R2 ${b_r2}
193"
194# Traffic from A to B goes through R1 by default, and through R2, if destined to
195# B's address on the b_r2 segment.
196# Traffic from B to A goes through R1.
197# ns destination gateway
198routes="
199 A default ${prefix4}.${a_r1}.2
200 A ${prefix4}.${b_r2}.1 ${prefix4}.${a_r2}.2
201 B default ${prefix4}.${b_r1}.2
202
203 A default ${prefix6}:${a_r1}::2
204 A ${prefix6}:${b_r2}::1 ${prefix6}:${a_r2}::2
205 B default ${prefix6}:${b_r1}::2
206"
d1f1b9cb 207
438a9a85
DA
208USE_NH="no"
209# ns family nh id destination gateway
210nexthops="
211 A 4 41 ${prefix4}.${a_r1}.2 veth_A-R1
212 A 4 42 ${prefix4}.${a_r2}.2 veth_A-R2
213 B 4 41 ${prefix4}.${b_r1}.2 veth_B-R1
214
215 A 6 61 ${prefix6}:${a_r1}::2 veth_A-R1
216 A 6 62 ${prefix6}:${a_r2}::2 veth_A-R2
217 B 6 61 ${prefix6}:${b_r1}::2 veth_B-R1
218"
219
220# nexthop id correlates to id in nexthops config above
221# ns family prefix nh id
222routes_nh="
223 A 4 default 41
224 A 4 ${prefix4}.${b_r2}.1 42
225 B 4 default 41
226
227 A 6 default 61
228 A 6 ${prefix6}:${b_r2}::1 62
229 B 6 default 61
230"
231
a41c789b
SB
232veth4_a_addr="192.168.1.1"
233veth4_b_addr="192.168.1.2"
234veth4_mask="24"
d1f1b9cb
SB
235veth6_a_addr="fd00:1::a"
236veth6_b_addr="fd00:1::b"
237veth6_mask="64"
238
58288879
SB
239tunnel4_a_addr="192.168.2.1"
240tunnel4_b_addr="192.168.2.2"
241tunnel4_mask="24"
242tunnel6_a_addr="fd00:2::a"
243tunnel6_b_addr="fd00:2::b"
244tunnel6_mask="64"
d1f1b9cb 245
73f51d15
SB
246dummy6_0_prefix="fc00:1000::"
247dummy6_1_prefix="fc00:1001::"
1fad59ea
SB
248dummy6_mask="64"
249
36455bd1 250err_buf=
bb059fb2 251tcpdump_pids=
36455bd1
SB
252
253err() {
254 err_buf="${err_buf}${1}
255"
256}
257
258err_flush() {
259 echo -n "${err_buf}"
260 err_buf=
261}
262
56490b62
DA
263run_cmd() {
264 cmd="$*"
265
266 if [ "$VERBOSE" = "1" ]; then
267 printf " COMMAND: $cmd\n"
268 fi
269
270 out="$($cmd 2>&1)"
271 rc=$?
272 if [ "$VERBOSE" = "1" -a -n "$out" ]; then
273 echo " $out"
274 echo
275 fi
276
277 return $rc
278}
279
e44e428f
SD
280# Find the auto-generated name for this namespace
281nsname() {
282 eval echo \$NS_$1
283}
284
56fd865f
SB
285setup_fou_or_gue() {
286 outer="${1}"
287 inner="${2}"
288 encap="${3}"
289
290 if [ "${outer}" = "4" ]; then
291 modprobe fou || return 2
292 a_addr="${prefix4}.${a_r1}.1"
293 b_addr="${prefix4}.${b_r1}.1"
294 if [ "${inner}" = "4" ]; then
295 type="ipip"
296 ipproto="4"
297 else
298 type="sit"
299 ipproto="41"
300 fi
301 else
302 modprobe fou6 || return 2
303 a_addr="${prefix6}:${a_r1}::1"
304 b_addr="${prefix6}:${b_r1}::1"
305 if [ "${inner}" = "4" ]; then
306 type="ip6tnl"
307 mode="mode ipip6"
308 ipproto="4 -6"
309 else
310 type="ip6tnl"
311 mode="mode ip6ip6"
312 ipproto="41 -6"
313 fi
314 fi
315
56490b62
DA
316 run_cmd ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return 2
317 run_cmd ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return 2
56fd865f 318
56490b62
DA
319 run_cmd ${ns_b} ip fou add port 5556 ipproto ${ipproto}
320 run_cmd ${ns_b} ip link add ${encap}_b type ${type} ${mode} local ${b_addr} remote ${a_addr} encap ${encap} encap-sport auto encap-dport 5555
56fd865f
SB
321
322 if [ "${inner}" = "4" ]; then
56490b62
DA
323 run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${encap}_a
324 run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${encap}_b
56fd865f 325 else
56490b62
DA
326 run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${encap}_a
327 run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${encap}_b
56fd865f
SB
328 fi
329
56490b62
DA
330 run_cmd ${ns_a} ip link set ${encap}_a up
331 run_cmd ${ns_b} ip link set ${encap}_b up
56fd865f
SB
332}
333
334setup_fou44() {
335 setup_fou_or_gue 4 4 fou
336}
337
338setup_fou46() {
339 setup_fou_or_gue 4 6 fou
340}
341
342setup_fou64() {
343 setup_fou_or_gue 6 4 fou
344}
345
346setup_fou66() {
347 setup_fou_or_gue 6 6 fou
348}
349
350setup_gue44() {
351 setup_fou_or_gue 4 4 gue
352}
353
354setup_gue46() {
355 setup_fou_or_gue 4 6 gue
356}
357
358setup_gue64() {
359 setup_fou_or_gue 6 4 gue
360}
361
362setup_gue66() {
363 setup_fou_or_gue 6 6 gue
364}
365
d1f1b9cb 366setup_namespaces() {
e44e428f
SD
367 for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
368 ip netns add ${n} || return 1
651eb32e
PA
369
370 # Disable DAD, so that we don't have to wait to use the
371 # configured IPv6 addresses
372 ip netns exec ${n} sysctl -q net/ipv6/conf/default/accept_dad=0
e44e428f 373 done
d1f1b9cb
SB
374}
375
376setup_veth() {
56490b62
DA
377 run_cmd ${ns_a} ip link add veth_a type veth peer name veth_b || return 1
378 run_cmd ${ns_a} ip link set veth_b netns ${NS_B}
d1f1b9cb 379
56490b62
DA
380 run_cmd ${ns_a} ip addr add ${veth4_a_addr}/${veth4_mask} dev veth_a
381 run_cmd ${ns_b} ip addr add ${veth4_b_addr}/${veth4_mask} dev veth_b
a41c789b 382
56490b62
DA
383 run_cmd ${ns_a} ip addr add ${veth6_a_addr}/${veth6_mask} dev veth_a
384 run_cmd ${ns_b} ip addr add ${veth6_b_addr}/${veth6_mask} dev veth_b
d1f1b9cb 385
56490b62
DA
386 run_cmd ${ns_a} ip link set veth_a up
387 run_cmd ${ns_b} ip link set veth_b up
d1f1b9cb
SB
388}
389
a41c789b
SB
390setup_vti() {
391 proto=${1}
392 veth_a_addr="${2}"
393 veth_b_addr="${3}"
394 vti_a_addr="${4}"
395 vti_b_addr="${5}"
396 vti_mask=${6}
d1f1b9cb 397
a41c789b 398 [ ${proto} -eq 6 ] && vti_type="vti6" || vti_type="vti"
d1f1b9cb 399
56490b62
DA
400 run_cmd ${ns_a} ip link add vti${proto}_a type ${vti_type} local ${veth_a_addr} remote ${veth_b_addr} key 10 || return 1
401 run_cmd ${ns_b} ip link add vti${proto}_b type ${vti_type} local ${veth_b_addr} remote ${veth_a_addr} key 10
a41c789b 402
56490b62
DA
403 run_cmd ${ns_a} ip addr add ${vti_a_addr}/${vti_mask} dev vti${proto}_a
404 run_cmd ${ns_b} ip addr add ${vti_b_addr}/${vti_mask} dev vti${proto}_b
a41c789b 405
56490b62
DA
406 run_cmd ${ns_a} ip link set vti${proto}_a up
407 run_cmd ${ns_b} ip link set vti${proto}_b up
d1f1b9cb
SB
408}
409
a41c789b 410setup_vti4() {
58288879 411 setup_vti 4 ${veth4_a_addr} ${veth4_b_addr} ${tunnel4_a_addr} ${tunnel4_b_addr} ${tunnel4_mask}
a41c789b
SB
412}
413
414setup_vti6() {
58288879
SB
415 setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask}
416}
417
ce733661
SB
418setup_vxlan_or_geneve() {
419 type="${1}"
420 a_addr="${2}"
421 b_addr="${3}"
422 opts="${4}"
423
424 if [ "${type}" = "vxlan" ]; then
425 opts="${opts} ttl 64 dstport 4789"
426 opts_a="local ${a_addr}"
427 opts_b="local ${b_addr}"
428 else
429 opts_a=""
430 opts_b=""
431 fi
58288879 432
56490b62
DA
433 run_cmd ${ns_a} ip link add ${type}_a type ${type} id 1 ${opts_a} remote ${b_addr} ${opts} || return 1
434 run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts}
58288879 435
56490b62
DA
436 run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${type}_a
437 run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b
58288879 438
56490b62
DA
439 run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${type}_a
440 run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b
58288879 441
56490b62
DA
442 run_cmd ${ns_a} ip link set ${type}_a up
443 run_cmd ${ns_b} ip link set ${type}_b up
58288879
SB
444}
445
ce733661
SB
446setup_geneve4() {
447 setup_vxlan_or_geneve geneve ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "df set"
448}
449
58288879 450setup_vxlan4() {
ce733661
SB
451 setup_vxlan_or_geneve vxlan ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "df set"
452}
453
454setup_geneve6() {
455 setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
58288879
SB
456}
457
458setup_vxlan6() {
ce733661 459 setup_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
a41c789b
SB
460}
461
d1f1b9cb 462setup_xfrm() {
5e84430b
SB
463 proto=${1}
464 veth_a_addr="${2}"
465 veth_b_addr="${3}"
466
9a6c8bf9
DA
467 run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1
468 run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
469 run_cmd ${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
470 run_cmd ${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
471
472 run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
473 run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
474 run_cmd ${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
475 run_cmd ${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
5e84430b
SB
476}
477
478setup_xfrm4() {
479 setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr}
480}
d1f1b9cb 481
5e84430b
SB
482setup_xfrm6() {
483 setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr}
d1f1b9cb
SB
484}
485
f4ca0c34
DA
486setup_routing_old() {
487 for i in ${routes}; do
488 [ "${ns}" = "" ] && ns="${i}" && continue
489 [ "${addr}" = "" ] && addr="${i}" && continue
490 [ "${gw}" = "" ] && gw="${i}"
491
492 ns_name="$(nsname ${ns})"
493
494 ip -n ${ns_name} route add ${addr} via ${gw}
495
496 ns=""; addr=""; gw=""
497 done
498}
499
438a9a85
DA
500setup_routing_new() {
501 for i in ${nexthops}; do
502 [ "${ns}" = "" ] && ns="${i}" && continue
503 [ "${fam}" = "" ] && fam="${i}" && continue
504 [ "${nhid}" = "" ] && nhid="${i}" && continue
505 [ "${gw}" = "" ] && gw="${i}" && continue
506 [ "${dev}" = "" ] && dev="${i}"
507
508 ns_name="$(nsname ${ns})"
509
510 ip -n ${ns_name} -${fam} nexthop add id ${nhid} via ${gw} dev ${dev}
511
512 ns=""; fam=""; nhid=""; gw=""; dev=""
513
514 done
515
516 for i in ${routes_nh}; do
517 [ "${ns}" = "" ] && ns="${i}" && continue
518 [ "${fam}" = "" ] && fam="${i}" && continue
519 [ "${addr}" = "" ] && addr="${i}" && continue
520 [ "${nhid}" = "" ] && nhid="${i}"
521
522 ns_name="$(nsname ${ns})"
523
524 ip -n ${ns_name} -${fam} route add ${addr} nhid ${nhid}
525
526 ns=""; fam=""; addr=""; nhid=""
527 done
528}
529
e44e428f
SD
530setup_routing() {
531 for i in ${NS_R1} ${NS_R2}; do
532 ip netns exec ${i} sysctl -q net/ipv4/ip_forward=1
533 ip netns exec ${i} sysctl -q net/ipv6/conf/all/forwarding=1
534 done
535
536 for i in ${routing_addrs}; do
537 [ "${ns}" = "" ] && ns="${i}" && continue
538 [ "${peer}" = "" ] && peer="${i}" && continue
539 [ "${segment}" = "" ] && segment="${i}"
540
541 ns_name="$(nsname ${ns})"
542 peer_name="$(nsname ${peer})"
543 if="veth_${ns}-${peer}"
544 ifpeer="veth_${peer}-${ns}"
545
546 # Create veth links
547 ip link add ${if} up netns ${ns_name} type veth peer name ${ifpeer} netns ${peer_name} || return 1
548 ip -n ${peer_name} link set dev ${ifpeer} up
549
550 # Add addresses
551 ip -n ${ns_name} addr add ${prefix4}.${segment}.1/24 dev ${if}
552 ip -n ${ns_name} addr add ${prefix6}:${segment}::1/64 dev ${if}
553
554 ip -n ${peer_name} addr add ${prefix4}.${segment}.2/24 dev ${ifpeer}
555 ip -n ${peer_name} addr add ${prefix6}:${segment}::2/64 dev ${ifpeer}
556
557 ns=""; peer=""; segment=""
558 done
559
438a9a85
DA
560 if [ "$USE_NH" = "yes" ]; then
561 setup_routing_new
562 else
563 setup_routing_old
564 fi
565
566 return 0
e44e428f
SD
567}
568
d1f1b9cb 569setup() {
57aefc7c 570 [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip
d1f1b9cb 571
a92a0a7b 572 cleanup
36455bd1
SB
573 for arg do
574 eval setup_${arg} || { echo " ${arg} not supported"; return 1; }
575 done
d1f1b9cb
SB
576}
577
bb059fb2 578trace() {
56490b62 579 [ $TRACING -eq 0 ] && return
bb059fb2
SB
580
581 for arg do
582 [ "${ns_cmd}" = "" ] && ns_cmd="${arg}" && continue
583 ${ns_cmd} tcpdump -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null &
584 tcpdump_pids="${tcpdump_pids} $!"
585 ns_cmd=
586 done
587 sleep 1
588}
589
d1f1b9cb 590cleanup() {
bb059fb2
SB
591 for pid in ${tcpdump_pids}; do
592 kill ${pid}
593 done
594 tcpdump_pids=
595
e44e428f
SD
596 for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
597 ip netns del ${n} 2> /dev/null
598 done
d1f1b9cb
SB
599}
600
601mtu() {
602 ns_cmd="${1}"
603 dev="${2}"
604 mtu="${3}"
605
606 ${ns_cmd} ip link set dev ${dev} mtu ${mtu}
607}
608
f2c929fe
SB
609mtu_parse() {
610 input="${1}"
611
612 next=0
613 for i in ${input}; do
72ebddd7 614 [ ${next} -eq 1 -a "${i}" = "lock" ] && next=2 && continue
f2c929fe 615 [ ${next} -eq 1 ] && echo "${i}" && return
72ebddd7 616 [ ${next} -eq 2 ] && echo "lock ${i}" && return
f2c929fe
SB
617 [ "${i}" = "mtu" ] && next=1
618 done
619}
620
a41c789b
SB
621link_get() {
622 ns_cmd="${1}"
623 name="${2}"
624
625 ${ns_cmd} ip link show dev "${name}"
626}
627
628link_get_mtu() {
629 ns_cmd="${1}"
630 name="${2}"
631
632 mtu_parse "$(link_get "${ns_cmd}" ${name})"
633}
634
d1f1b9cb 635route_get_dst_exception() {
822d2f86
SB
636 ns_cmd="${1}"
637 dst="${2}"
d1f1b9cb 638
822d2f86 639 ${ns_cmd} ip route get "${dst}"
d1f1b9cb
SB
640}
641
642route_get_dst_pmtu_from_exception() {
822d2f86
SB
643 ns_cmd="${1}"
644 dst="${2}"
d1f1b9cb 645
f2c929fe 646 mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})"
d1f1b9cb
SB
647}
648
1e0a7207
SB
649check_pmtu_value() {
650 expected="${1}"
651 value="${2}"
652 event="${3}"
653
654 [ "${expected}" = "any" ] && [ -n "${value}" ] && return 0
655 [ "${value}" = "${expected}" ] && return 0
656 [ -z "${value}" ] && err " PMTU exception wasn't created after ${event}" && return 1
657 [ -z "${expected}" ] && err " PMTU exception shouldn't exist after ${event}" && return 1
658 err " found PMTU exception with incorrect MTU ${value}, expected ${expected}, after ${event}"
659 return 1
660}
661
e44e428f
SD
662test_pmtu_ipvX() {
663 family=${1}
664
665 setup namespaces routing || return 2
bb059fb2
SB
666 trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
667 "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \
668 "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \
669 "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2
e44e428f
SD
670
671 if [ ${family} -eq 4 ]; then
672 ping=ping
673 dst1="${prefix4}.${b_r1}.1"
674 dst2="${prefix4}.${b_r2}.1"
675 else
676 ping=${ping6}
677 dst1="${prefix6}:${b_r1}::1"
678 dst2="${prefix6}:${b_r2}::1"
679 fi
680
681 # Set up initial MTU values
682 mtu "${ns_a}" veth_A-R1 2000
683 mtu "${ns_r1}" veth_R1-A 2000
684 mtu "${ns_r1}" veth_R1-B 1400
685 mtu "${ns_b}" veth_B-R1 1400
686
687 mtu "${ns_a}" veth_A-R2 2000
688 mtu "${ns_r2}" veth_R2-A 2000
689 mtu "${ns_r2}" veth_R2-B 1500
690 mtu "${ns_b}" veth_B-R2 1500
691
692 # Create route exceptions
56490b62
DA
693 run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst1}
694 run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst2}
e44e428f
SD
695
696 # Check that exceptions have been created with the correct PMTU
697 pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
698 check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
699 pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
700 check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
701
702 # Decrease local MTU below PMTU, check for PMTU decrease in route exception
703 mtu "${ns_a}" veth_A-R1 1300
704 mtu "${ns_r1}" veth_R1-A 1300
705 pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
706 check_pmtu_value "1300" "${pmtu_1}" "decreasing local MTU" || return 1
707 # Second exception shouldn't be modified
708 pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
709 check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1
710
711 # Increase MTU, check for PMTU increase in route exception
712 mtu "${ns_a}" veth_A-R1 1700
713 mtu "${ns_r1}" veth_R1-A 1700
714 pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
715 check_pmtu_value "1700" "${pmtu_1}" "increasing local MTU" || return 1
716 # Second exception shouldn't be modified
717 pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
718 check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1
719
720 # Skip PMTU locking tests for IPv6
721 [ $family -eq 6 ] && return 0
722
723 # Decrease remote MTU on path via R2, get new exception
724 mtu "${ns_r2}" veth_R2-B 400
725 mtu "${ns_b}" veth_B-R2 400
56490b62 726 run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2}
e44e428f
SD
727 pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
728 check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1
729
730 # Decrease local MTU below PMTU
731 mtu "${ns_a}" veth_A-R2 500
732 mtu "${ns_r2}" veth_R2-A 500
733 pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
734 check_pmtu_value "500" "${pmtu_2}" "decreasing local MTU" || return 1
735
736 # Increase local MTU
737 mtu "${ns_a}" veth_A-R2 1500
738 mtu "${ns_r2}" veth_R2-A 1500
739 pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
740 check_pmtu_value "1500" "${pmtu_2}" "increasing local MTU" || return 1
741
742 # Get new exception
56490b62 743 run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2}
e44e428f
SD
744 pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
745 check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1
746}
747
748test_pmtu_ipv4_exception() {
749 test_pmtu_ipvX 4
750}
751
752test_pmtu_ipv6_exception() {
753 test_pmtu_ipvX 6
754}
755
ce733661
SB
756test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() {
757 type=${1}
758 family=${2}
759 outer_family=${3}
58288879
SB
760 ll_mtu=4000
761
762 if [ ${outer_family} -eq 4 ]; then
ce733661
SB
763 setup namespaces routing ${type}4 || return 2
764 # IPv4 header UDP header VXLAN/GENEVE header Ethernet header
765 exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14))
58288879 766 else
ce733661
SB
767 setup namespaces routing ${type}6 || return 2
768 # IPv6 header UDP header VXLAN/GENEVE header Ethernet header
769 exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14))
58288879
SB
770 fi
771
ce733661 772 trace "${ns_a}" ${type}_a "${ns_b}" ${type}_b \
58288879
SB
773 "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
774 "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B
775
776 if [ ${family} -eq 4 ]; then
777 ping=ping
778 dst=${tunnel4_b_addr}
779 else
780 ping=${ping6}
781 dst=${tunnel6_b_addr}
782 fi
783
784 # Create route exception by exceeding link layer MTU
785 mtu "${ns_a}" veth_A-R1 $((${ll_mtu} + 1000))
786 mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
787 mtu "${ns_b}" veth_B-R1 ${ll_mtu}
788 mtu "${ns_r1}" veth_R1-B ${ll_mtu}
789
ce733661
SB
790 mtu "${ns_a}" ${type}_a $((${ll_mtu} + 1000))
791 mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000))
56490b62 792 run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst}
58288879
SB
793
794 # Check that exception was created
795 pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
ce733661 796 check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on ${type} interface"
58288879
SB
797}
798
799test_pmtu_ipv4_vxlan4_exception() {
ce733661 800 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan 4 4
58288879
SB
801}
802
803test_pmtu_ipv6_vxlan4_exception() {
ce733661
SB
804 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan 6 4
805}
806
807test_pmtu_ipv4_geneve4_exception() {
808 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 4 4
809}
810
811test_pmtu_ipv6_geneve4_exception() {
812 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 6 4
58288879
SB
813}
814
815test_pmtu_ipv4_vxlan6_exception() {
ce733661 816 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan 4 6
58288879
SB
817}
818
819test_pmtu_ipv6_vxlan6_exception() {
ce733661
SB
820 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan 6 6
821}
822
823test_pmtu_ipv4_geneve6_exception() {
824 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 4 6
825}
826
827test_pmtu_ipv6_geneve6_exception() {
828 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 6 6
58288879
SB
829}
830
56fd865f
SB
831test_pmtu_ipvX_over_fouY_or_gueY() {
832 inner_family=${1}
833 outer_family=${2}
834 encap=${3}
835 ll_mtu=4000
836
837 setup namespaces routing ${encap}${outer_family}${inner_family} || return 2
838 trace "${ns_a}" ${encap}_a "${ns_b}" ${encap}_b \
839 "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
840 "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B
841
842 if [ ${inner_family} -eq 4 ]; then
843 ping=ping
844 dst=${tunnel4_b_addr}
845 else
846 ping=${ping6}
847 dst=${tunnel6_b_addr}
848 fi
849
850 if [ "${encap}" = "gue" ]; then
851 encap_overhead=4
852 else
853 encap_overhead=0
854 fi
855
856 if [ ${outer_family} -eq 4 ]; then
857 # IPv4 header UDP header
858 exp_mtu=$((${ll_mtu} - 20 - 8 - ${encap_overhead}))
859 else
860 # IPv6 header Option 4 UDP header
861 exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - ${encap_overhead}))
862 fi
863
864 # Create route exception by exceeding link layer MTU
865 mtu "${ns_a}" veth_A-R1 $((${ll_mtu} + 1000))
866 mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
867 mtu "${ns_b}" veth_B-R1 ${ll_mtu}
868 mtu "${ns_r1}" veth_R1-B ${ll_mtu}
869
870 mtu "${ns_a}" ${encap}_a $((${ll_mtu} + 1000))
871 mtu "${ns_b}" ${encap}_b $((${ll_mtu} + 1000))
56490b62 872 run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst}
56fd865f
SB
873
874 # Check that exception was created
875 pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
876 check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on ${encap} interface"
877}
878
879test_pmtu_ipv4_fou4_exception() {
880 test_pmtu_ipvX_over_fouY_or_gueY 4 4 fou
881}
882
883test_pmtu_ipv6_fou4_exception() {
884 test_pmtu_ipvX_over_fouY_or_gueY 6 4 fou
885}
886
887test_pmtu_ipv4_fou6_exception() {
888 test_pmtu_ipvX_over_fouY_or_gueY 4 6 fou
889}
890
891test_pmtu_ipv6_fou6_exception() {
892 test_pmtu_ipvX_over_fouY_or_gueY 6 6 fou
893}
894
895test_pmtu_ipv4_gue4_exception() {
896 test_pmtu_ipvX_over_fouY_or_gueY 4 4 gue
897}
898
899test_pmtu_ipv6_gue4_exception() {
900 test_pmtu_ipvX_over_fouY_or_gueY 6 4 gue
901}
902
903test_pmtu_ipv4_gue6_exception() {
904 test_pmtu_ipvX_over_fouY_or_gueY 4 6 gue
905}
906
907test_pmtu_ipv6_gue6_exception() {
908 test_pmtu_ipvX_over_fouY_or_gueY 6 6 gue
909}
910
5e84430b
SB
911test_pmtu_vti4_exception() {
912 setup namespaces veth vti4 xfrm4 || return 2
bb059fb2
SB
913 trace "${ns_a}" veth_a "${ns_b}" veth_b \
914 "${ns_a}" vti4_a "${ns_b}" vti4_b
5e84430b
SB
915
916 veth_mtu=1500
917 vti_mtu=$((veth_mtu - 20))
918
919 # SPI SN IV ICV pad length next header
920 esp_payload_rfc4106=$((vti_mtu - 4 - 4 - 8 - 16 - 1 - 1))
921 ping_payload=$((esp_payload_rfc4106 - 28))
922
923 mtu "${ns_a}" veth_a ${veth_mtu}
924 mtu "${ns_b}" veth_b ${veth_mtu}
925 mtu "${ns_a}" vti4_a ${vti_mtu}
926 mtu "${ns_b}" vti4_b ${vti_mtu}
927
928 # Send DF packet without exceeding link layer MTU, check that no
929 # exception is created
56490b62 930 run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr}
58288879 931 pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
1e0a7207 932 check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
5e84430b
SB
933
934 # Now exceed link layer MTU by one byte, check that exception is created
1e0a7207 935 # with the right PMTU value
56490b62 936 run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr}
58288879 937 pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
1e0a7207 938 check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))"
5e84430b
SB
939}
940
d1f1b9cb 941test_pmtu_vti6_exception() {
5e84430b 942 setup namespaces veth vti6 xfrm6 || return 2
bb059fb2
SB
943 trace "${ns_a}" veth_a "${ns_b}" veth_b \
944 "${ns_a}" vti6_a "${ns_b}" vti6_b
36455bd1 945 fail=0
d1f1b9cb
SB
946
947 # Create route exception by exceeding link layer MTU
948 mtu "${ns_a}" veth_a 4000
949 mtu "${ns_b}" veth_b 4000
a41c789b
SB
950 mtu "${ns_a}" vti6_a 5000
951 mtu "${ns_b}" vti6_b 5000
56490b62 952 run_cmd ${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr}
d1f1b9cb
SB
953
954 # Check that exception was created
58288879 955 pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
1e0a7207 956 check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1
d1f1b9cb
SB
957
958 # Decrease tunnel MTU, check for PMTU decrease in route exception
a41c789b 959 mtu "${ns_a}" vti6_a 3000
58288879 960 pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
1e0a7207 961 check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1
d1f1b9cb
SB
962
963 # Increase tunnel MTU, check for PMTU increase in route exception
a41c789b 964 mtu "${ns_a}" vti6_a 9000
58288879 965 pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
1e0a7207 966 check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1
d1f1b9cb 967
36455bd1 968 return ${fail}
d1f1b9cb
SB
969}
970
a41c789b
SB
971test_pmtu_vti4_default_mtu() {
972 setup namespaces veth vti4 || return 2
973
974 # Check that MTU of vti device is MTU of veth minus IPv4 header length
975 veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
976 vti4_mtu="$(link_get_mtu "${ns_a}" vti4_a)"
977 if [ $((veth_mtu - vti4_mtu)) -ne 20 ]; then
978 err " vti MTU ${vti4_mtu} is not veth MTU ${veth_mtu} minus IPv4 header length"
979 return 1
980 fi
981}
982
35b49424
SB
983test_pmtu_vti6_default_mtu() {
984 setup namespaces veth vti6 || return 2
985
986 # Check that MTU of vti device is MTU of veth minus IPv6 header length
987 veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
988 vti6_mtu="$(link_get_mtu "${ns_a}" vti6_a)"
989 if [ $((veth_mtu - vti6_mtu)) -ne 40 ]; then
990 err " vti MTU ${vti6_mtu} is not veth MTU ${veth_mtu} minus IPv6 header length"
991 return 1
992 fi
993}
994
719e1215
SB
995test_pmtu_vti4_link_add_mtu() {
996 setup namespaces || return 2
997
56490b62 998 run_cmd ${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
719e1215 999 [ $? -ne 0 ] && err " vti not supported" && return 2
56490b62 1000 run_cmd ${ns_a} ip link del vti4_a
719e1215
SB
1001
1002 fail=0
1003
1004 min=68
902b5417 1005 max=$((65535 - 20))
719e1215
SB
1006 # Check invalid values first
1007 for v in $((min - 1)) $((max + 1)); do
56490b62 1008 run_cmd ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
719e1215
SB
1009 # This can fail, or MTU can be adjusted to a proper value
1010 [ $? -ne 0 ] && continue
1011 mtu="$(link_get_mtu "${ns_a}" vti4_a)"
1012 if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
1013 err " vti tunnel created with invalid MTU ${mtu}"
1014 fail=1
1015 fi
56490b62 1016 run_cmd ${ns_a} ip link del vti4_a
719e1215
SB
1017 done
1018
1019 # Now check valid values
1020 for v in ${min} 1300 ${max}; do
56490b62 1021 run_cmd ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
719e1215 1022 mtu="$(link_get_mtu "${ns_a}" vti4_a)"
56490b62 1023 run_cmd ${ns_a} ip link del vti4_a
719e1215
SB
1024 if [ "${mtu}" != "${v}" ]; then
1025 err " vti MTU ${mtu} doesn't match configured value ${v}"
1026 fail=1
1027 fi
1028 done
1029
1030 return ${fail}
1031}
1032
8b6022fc
SB
1033test_pmtu_vti6_link_add_mtu() {
1034 setup namespaces || return 2
1035
56490b62 1036 run_cmd ${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
8b6022fc 1037 [ $? -ne 0 ] && err " vti6 not supported" && return 2
56490b62 1038 run_cmd ${ns_a} ip link del vti6_a
8b6022fc
SB
1039
1040 fail=0
1041
5a643c86 1042 min=68 # vti6 can carry IPv4 packets too
8b6022fc
SB
1043 max=$((65535 - 40))
1044 # Check invalid values first
1045 for v in $((min - 1)) $((max + 1)); do
56490b62 1046 run_cmd ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
8b6022fc
SB
1047 # This can fail, or MTU can be adjusted to a proper value
1048 [ $? -ne 0 ] && continue
1049 mtu="$(link_get_mtu "${ns_a}" vti6_a)"
1050 if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
1051 err " vti6 tunnel created with invalid MTU ${v}"
1052 fail=1
1053 fi
56490b62 1054 run_cmd ${ns_a} ip link del vti6_a
8b6022fc
SB
1055 done
1056
1057 # Now check valid values
5a643c86 1058 for v in 68 1280 1300 $((65535 - 40)); do
56490b62 1059 run_cmd ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
8b6022fc 1060 mtu="$(link_get_mtu "${ns_a}" vti6_a)"
56490b62 1061 run_cmd ${ns_a} ip link del vti6_a
8b6022fc
SB
1062 if [ "${mtu}" != "${v}" ]; then
1063 err " vti6 MTU ${mtu} doesn't match configured value ${v}"
1064 fail=1
1065 fi
1066 done
1067
1068 return ${fail}
1069}
1070
1fad59ea
SB
1071test_pmtu_vti6_link_change_mtu() {
1072 setup namespaces || return 2
1073
56490b62 1074 run_cmd ${ns_a} ip link add dummy0 mtu 1500 type dummy
1fad59ea 1075 [ $? -ne 0 ] && err " dummy not supported" && return 2
56490b62
DA
1076 run_cmd ${ns_a} ip link add dummy1 mtu 3000 type dummy
1077 run_cmd ${ns_a} ip link set dummy0 up
1078 run_cmd ${ns_a} ip link set dummy1 up
1fad59ea 1079
73f51d15
SB
1080 run_cmd ${ns_a} ip addr add ${dummy6_0_prefix}1/${dummy6_mask} dev dummy0
1081 run_cmd ${ns_a} ip addr add ${dummy6_1_prefix}1/${dummy6_mask} dev dummy1
1fad59ea
SB
1082
1083 fail=0
1084
1085 # Create vti6 interface bound to device, passing MTU, check it
73f51d15 1086 run_cmd ${ns_a} ip link add vti6_a mtu 1300 type vti6 remote ${dummy6_0_prefix}2 local ${dummy6_0_prefix}1
1fad59ea
SB
1087 mtu="$(link_get_mtu "${ns_a}" vti6_a)"
1088 if [ ${mtu} -ne 1300 ]; then
1089 err " vti6 MTU ${mtu} doesn't match configured value 1300"
1090 fail=1
1091 fi
1092
1093 # Move to another device with different MTU, without passing MTU, check
1094 # MTU is adjusted
73f51d15 1095 run_cmd ${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_prefix}2 local ${dummy6_1_prefix}1
1fad59ea
SB
1096 mtu="$(link_get_mtu "${ns_a}" vti6_a)"
1097 if [ ${mtu} -ne $((3000 - 40)) ]; then
1098 err " vti MTU ${mtu} is not dummy MTU 3000 minus IPv6 header length"
1099 fail=1
1100 fi
1101
1102 # Move it back, passing MTU, check MTU is not overridden
73f51d15 1103 run_cmd ${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_prefix}2 local ${dummy6_0_prefix}1
1fad59ea
SB
1104 mtu="$(link_get_mtu "${ns_a}" vti6_a)"
1105 if [ ${mtu} -ne 1280 ]; then
1106 err " vti6 MTU ${mtu} doesn't match configured value 1280"
1107 fail=1
1108 fi
1109
1110 return ${fail}
1111}
1112
b3cc4f8a
PA
1113check_command() {
1114 cmd=${1}
1115
1116 if ! which ${cmd} > /dev/null 2>&1; then
1117 err " missing required command: '${cmd}'"
1118 return 1
1119 fi
1120 return 0
1121}
1122
1123test_cleanup_vxlanX_exception() {
1124 outer="${1}"
1125 encap="vxlan"
1126 ll_mtu=4000
1127
1128 check_command taskset || return 2
1129 cpu_list=$(grep -m 2 processor /proc/cpuinfo | cut -d ' ' -f 2)
1130
1131 setup namespaces routing ${encap}${outer} || return 2
1132 trace "${ns_a}" ${encap}_a "${ns_b}" ${encap}_b \
1133 "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
1134 "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B
1135
1136 # Create route exception by exceeding link layer MTU
1137 mtu "${ns_a}" veth_A-R1 $((${ll_mtu} + 1000))
1138 mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
1139 mtu "${ns_b}" veth_B-R1 ${ll_mtu}
1140 mtu "${ns_r1}" veth_R1-B ${ll_mtu}
1141
1142 mtu "${ns_a}" ${encap}_a $((${ll_mtu} + 1000))
1143 mtu "${ns_b}" ${encap}_b $((${ll_mtu} + 1000))
1144
1145 # Fill exception cache for multiple CPUs (2)
1146 # we can always use inner IPv4 for that
1147 for cpu in ${cpu_list}; do
56490b62 1148 run_cmd taskset --cpu-list ${cpu} ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${tunnel4_b_addr}
b3cc4f8a
PA
1149 done
1150
1151 ${ns_a} ip link del dev veth_A-R1 &
1152 iplink_pid=$!
1153 sleep 1
1154 if [ "$(cat /proc/${iplink_pid}/cmdline 2>/dev/null | tr -d '\0')" = "iplinkdeldevveth_A-R1" ]; then
1155 err " can't delete veth device in a timely manner, PMTU dst likely leaked"
1156 return 1
1157 fi
1158}
1159
1160test_cleanup_ipv6_exception() {
1161 test_cleanup_vxlanX_exception 6
1162}
1163
1164test_cleanup_ipv4_exception() {
1165 test_cleanup_vxlanX_exception 4
1166}
1167
243781db
DA
1168run_test() {
1169 (
1170 tname="$1"
1171 tdesc="$2"
1172
1173 unset IFS
1174
1175 if [ "$VERBOSE" = "1" ]; then
1176 printf "\n##########################################################################\n\n"
1177 fi
1178
1179 eval test_${tname}
1180 ret=$?
1181
1182 if [ $ret -eq 0 ]; then
1183 printf "TEST: %-60s [ OK ]\n" "${tdesc}"
1184 elif [ $ret -eq 1 ]; then
1185 printf "TEST: %-60s [FAIL]\n" "${tdesc}"
1186 if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
1187 echo
1188 echo "Pausing. Hit enter to continue"
1189 read a
1190 fi
1191 err_flush
1192 exit 1
1193 elif [ $ret -eq 2 ]; then
1194 printf "TEST: %-60s [SKIP]\n" "${tdesc}"
1195 err_flush
1196 fi
1197
1198 return $ret
1199 )
438a9a85
DA
1200 ret=$?
1201 [ $ret -ne 0 ] && exitcode=1
1202
1203 return $ret
1204}
1205
1206run_test_nh() {
1207 tname="$1"
1208 tdesc="$2"
1209
1210 USE_NH=yes
1211 run_test "${tname}" "${tdesc} - nexthop objects"
1212 USE_NH=no
243781db
DA
1213}
1214
de755a85
SB
1215test_list_flush_ipv4_exception() {
1216 setup namespaces routing || return 2
1217 trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
1218 "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \
1219 "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \
1220 "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2
1221
1222 dst_prefix1="${prefix4}.${b_r1}."
1223 dst2="${prefix4}.${b_r2}.1"
1224
1225 # Set up initial MTU values
1226 mtu "${ns_a}" veth_A-R1 2000
1227 mtu "${ns_r1}" veth_R1-A 2000
1228 mtu "${ns_r1}" veth_R1-B 1500
1229 mtu "${ns_b}" veth_B-R1 1500
1230
1231 mtu "${ns_a}" veth_A-R2 2000
1232 mtu "${ns_r2}" veth_R2-A 2000
1233 mtu "${ns_r2}" veth_R2-B 1500
1234 mtu "${ns_b}" veth_B-R2 1500
1235
1236 fail=0
1237
1238 # Add 100 addresses for veth endpoint on B reached by default A route
1239 for i in $(seq 100 199); do
1240 run_cmd ${ns_b} ip addr add "${dst_prefix1}${i}" dev veth_B-R1
1241 done
1242
1243 # Create 100 cached route exceptions for path via R1, one via R2. Note
1244 # that with IPv4 we need to actually cause a route lookup that matches
1245 # the exception caused by ICMP, in order to actually have a cached
1246 # route, so we need to ping each destination twice
1247 for i in $(seq 100 199); do
1248 run_cmd ${ns_a} ping -q -M want -i 0.1 -c 2 -s 1800 "${dst_prefix1}${i}"
1249 done
1250 run_cmd ${ns_a} ping -q -M want -i 0.1 -c 2 -s 1800 "${dst2}"
1251
2745aea6 1252 if [ "$(${ns_a} ip -oneline route list cache | wc -l)" -ne 101 ]; then
de755a85
SB
1253 err " can't list cached exceptions"
1254 fail=1
1255 fi
1256
1257 run_cmd ${ns_a} ip route flush cache
1258 pmtu1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst_prefix}1)"
1259 pmtu2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst_prefix}2)"
1260 if [ -n "${pmtu1}" ] || [ -n "${pmtu2}" ] || \
1261 [ -n "$(${ns_a} ip route list cache)" ]; then
1262 err " can't flush cached exceptions"
1263 fail=1
1264 fi
1265
1266 return ${fail}
1267}
1268
e28799e5
SB
1269test_list_flush_ipv6_exception() {
1270 setup namespaces routing || return 2
1271 trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
1272 "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \
1273 "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \
1274 "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2
1275
b964641e 1276 dst_prefix1="${prefix6}:${b_r1}::"
e28799e5
SB
1277 dst2="${prefix6}:${b_r2}::1"
1278
1279 # Set up initial MTU values
1280 mtu "${ns_a}" veth_A-R1 2000
1281 mtu "${ns_r1}" veth_R1-A 2000
1282 mtu "${ns_r1}" veth_R1-B 1500
1283 mtu "${ns_b}" veth_B-R1 1500
1284
1285 mtu "${ns_a}" veth_A-R2 2000
1286 mtu "${ns_r2}" veth_R2-A 2000
1287 mtu "${ns_r2}" veth_R2-B 1500
1288 mtu "${ns_b}" veth_B-R2 1500
1289
1290 fail=0
1291
b964641e
SB
1292 # Add 100 addresses for veth endpoint on B reached by default A route
1293 for i in $(seq 100 199); do
1294 run_cmd ${ns_b} ip addr add "${dst_prefix1}${i}" dev veth_B-R1
1295 done
e28799e5 1296
b964641e
SB
1297 # Create 100 cached route exceptions for path via R1, one via R2
1298 for i in $(seq 100 199); do
1299 run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s 1800 "${dst_prefix1}${i}"
1300 done
1301 run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s 1800 "${dst2}"
2745aea6 1302 if [ "$(${ns_a} ip -oneline -6 route list cache | wc -l)" -ne 101 ]; then
e28799e5
SB
1303 err " can't list cached exceptions"
1304 fail=1
1305 fi
1306
1307 run_cmd ${ns_a} ip -6 route flush cache
b964641e 1308 pmtu1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst_prefix1}100")"
e28799e5 1309 pmtu2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
b964641e
SB
1310 if [ -n "${pmtu1}" ] || [ -n "${pmtu2}" ] || \
1311 [ -n "$(${ns_a} ip -6 route list cache)" ]; then
e28799e5
SB
1312 err " can't flush cached exceptions"
1313 fail=1
1314 fi
1315
1316 return ${fail}
1317}
1318
55bbc8ff
SB
1319usage() {
1320 echo
bb059fb2 1321 echo "$0 [OPTIONS] [TEST]..."
55bbc8ff
SB
1322 echo "If no TEST argument is given, all tests will be run."
1323 echo
bb059fb2
SB
1324 echo "Options"
1325 echo " --trace: capture traffic to TEST_INTERFACE.pcap"
1326 echo
55bbc8ff
SB
1327 echo "Available tests${tests}"
1328 exit 1
1329}
1330
56490b62
DA
1331################################################################################
1332#
bb059fb2
SB
1333exitcode=0
1334desc=0
56490b62
DA
1335
1336while getopts :ptv o
1337do
1338 case $o in
1339 p) PAUSE_ON_FAIL=yes;;
1340 v) VERBOSE=1;;
1341 t) if which tcpdump > /dev/null 2>&1; then
1342 TRACING=1
1343 else
1344 echo "=== tcpdump not available, tracing disabled"
1345 fi
1346 ;;
1347 *) usage;;
1348 esac
1349done
1350shift $(($OPTIND-1))
1351
bb059fb2
SB
1352IFS="
1353"
1354
55bbc8ff 1355for arg do
56490b62
DA
1356 # Check first that all requested tests are available before running any
1357 command -v > /dev/null "test_${arg}" || { echo "=== Test ${arg} not found"; usage; }
55bbc8ff
SB
1358done
1359
d1f1b9cb
SB
1360trap cleanup EXIT
1361
a92a0a7b
DA
1362# start clean
1363cleanup
1364
438a9a85
DA
1365HAVE_NH=no
1366ip nexthop ls >/dev/null 2>&1
1367[ $? -eq 0 ] && HAVE_NH=yes
1368
1369name=""
1370desc=""
1371rerun_nh=0
36455bd1 1372for t in ${tests}; do
438a9a85
DA
1373 [ "${name}" = "" ] && name="${t}" && continue
1374 [ "${desc}" = "" ] && desc="${t}" && continue
1375
1376 if [ "${HAVE_NH}" = "yes" ]; then
1377 rerun_nh="${t}"
1378 fi
36455bd1 1379
55bbc8ff
SB
1380 run_this=1
1381 for arg do
bb059fb2 1382 [ "${arg}" != "${arg#--*}" ] && continue
55bbc8ff
SB
1383 [ "${arg}" = "${name}" ] && run_this=1 && break
1384 run_this=0
1385 done
438a9a85
DA
1386 if [ $run_this -eq 1 ]; then
1387 run_test "${name}" "${desc}"
1388 # if test was skipped no need to retry with nexthop objects
1389 [ $? -eq 2 ] && rerun_nh=0
55bbc8ff 1390
438a9a85
DA
1391 if [ "${rerun_nh}" = "1" ]; then
1392 run_test_nh "${name}" "${desc}"
1393 fi
1394 fi
1395 name=""
1396 desc=""
1397 rerun_nh=0
36455bd1
SB
1398done
1399
1400exit ${exitcode}