]>
Commit | Line | Data |
---|---|---|
b5638d46 PM |
1 | #!/bin/bash |
2 | # SPDX-License-Identifier: GPL-2.0 | |
3 | # | |
4 | # A test for switch behavior under MC overload. An issue in Spectrum chips | |
5 | # causes throughput of UC traffic to drop severely when a switch is under heavy | |
6 | # MC load. This issue can be overcome by putting the switch to MC-aware mode. | |
7 | # This test verifies that UC performance stays intact even as the switch is | |
8 | # under MC flood, and therefore that the MC-aware mode is enabled and correctly | |
9 | # configured. | |
10 | # | |
11 | # Because mlxsw throttles CPU port, the traffic can't actually reach userspace | |
12 | # at full speed. That makes it impossible to use iperf3 to simply measure the | |
13 | # throughput, because many packets (that reach $h3) don't get to the kernel at | |
14 | # all even in UDP mode (the situation is even worse in TCP mode, where one can't | |
15 | # hope to see more than a couple Mbps). | |
16 | # | |
17 | # So instead we send traffic with mausezahn and use RX ethtool counters at $h3. | |
18 | # Multicast traffic is untagged, unicast traffic is tagged with PCP 1. Therefore | |
19 | # each gets a different priority and we can use per-prio ethtool counters to | |
20 | # measure the throughput. In order to avoid prioritizing unicast traffic, prio | |
21 | # qdisc is installed on $swp3 and maps all priorities to the same band #7 (and | |
22 | # thus TC 0). | |
23 | # | |
24 | # Mausezahn can't actually saturate the links unless it's using large frames. | |
25 | # Thus we set MTU to 10K on all involved interfaces. Then both unicast and | |
26 | # multicast traffic uses 8K frames. | |
27 | # | |
a5ee171d PM |
28 | # +---------------------------+ +----------------------------------+ |
29 | # | H1 | | H2 | | |
30 | # | | | unicast --> + $h2.111 | | |
31 | # | multicast | | traffic | 192.0.2.129/28 | | |
32 | # | traffic | | | e-qos-map 0:1 | | |
33 | # | $h1 + <----- | | | | | |
34 | # | 192.0.2.65/28 | | | + $h2 | | |
35 | # +---------------|-----------+ +--------------|-------------------+ | |
36 | # | | | |
37 | # +---------------|---------------------------------------|-------------------+ | |
38 | # | $swp1 + + $swp2 | | |
39 | # | >1Gbps | | >1Gbps | | |
40 | # | +-------------|------+ +----------|----------------+ | | |
41 | # | | $swp1.1 + | | + $swp2.111 | | | |
b5638d46 | 42 | # | | BR1 | SW | BR111 | | |
a5ee171d PM |
43 | # | | $swp3.1 + | | + $swp3.111 | | |
44 | # | +-------------|------+ +----------|----------------+ | | |
45 | # | \_______________________________________/ | | |
b5638d46 PM |
46 | # | | | |
47 | # | + $swp3 | | |
48 | # | | 1Gbps bottleneck | | |
49 | # | | prio qdisc: {0..7} -> 7 | | |
50 | # +------------------------------------|--------------------------------------+ | |
51 | # | | |
52 | # +--|-----------------+ | |
53 | # | + $h3 H3 | | |
a5ee171d | 54 | # | | 192.0.2.66/28 | |
b5638d46 PM |
55 | # | | | |
56 | # | + $h3.111 | | |
57 | # | 192.0.2.130/28 | | |
58 | # +--------------------+ | |
59 | ||
60 | ALL_TESTS=" | |
61 | ping_ipv4 | |
62 | test_mc_aware | |
a5ee171d | 63 | test_uc_aware |
b5638d46 PM |
64 | " |
65 | ||
66 | lib_dir=$(dirname $0)/../../../net/forwarding | |
67 | ||
68 | NUM_NETIFS=6 | |
69 | source $lib_dir/lib.sh | |
5dde21b3 | 70 | source $lib_dir/devlink_lib.sh |
573363a6 | 71 | source qos_lib.sh |
b5638d46 PM |
72 | |
73 | h1_create() | |
74 | { | |
a5ee171d | 75 | simple_if_init $h1 192.0.2.65/28 |
b5638d46 PM |
76 | mtu_set $h1 10000 |
77 | } | |
78 | ||
79 | h1_destroy() | |
80 | { | |
81 | mtu_restore $h1 | |
a5ee171d | 82 | simple_if_fini $h1 192.0.2.65/28 |
b5638d46 PM |
83 | } |
84 | ||
85 | h2_create() | |
86 | { | |
87 | simple_if_init $h2 | |
88 | mtu_set $h2 10000 | |
89 | ||
90 | vlan_create $h2 111 v$h2 192.0.2.129/28 | |
91 | ip link set dev $h2.111 type vlan egress-qos-map 0:1 | |
92 | } | |
93 | ||
94 | h2_destroy() | |
95 | { | |
96 | vlan_destroy $h2 111 | |
97 | ||
98 | mtu_restore $h2 | |
99 | simple_if_fini $h2 | |
100 | } | |
101 | ||
102 | h3_create() | |
103 | { | |
a5ee171d | 104 | simple_if_init $h3 192.0.2.66/28 |
b5638d46 PM |
105 | mtu_set $h3 10000 |
106 | ||
107 | vlan_create $h3 111 v$h3 192.0.2.130/28 | |
108 | } | |
109 | ||
110 | h3_destroy() | |
111 | { | |
112 | vlan_destroy $h3 111 | |
113 | ||
114 | mtu_restore $h3 | |
a5ee171d | 115 | simple_if_fini $h3 192.0.2.66/28 |
b5638d46 PM |
116 | } |
117 | ||
118 | switch_create() | |
119 | { | |
120 | ip link set dev $swp1 up | |
121 | mtu_set $swp1 10000 | |
122 | ||
123 | ip link set dev $swp2 up | |
124 | mtu_set $swp2 10000 | |
125 | ||
126 | ip link set dev $swp3 up | |
127 | mtu_set $swp3 10000 | |
128 | ||
129 | vlan_create $swp2 111 | |
130 | vlan_create $swp3 111 | |
131 | ||
132 | ethtool -s $swp3 speed 1000 autoneg off | |
133 | tc qdisc replace dev $swp3 root handle 3: \ | |
134 | prio bands 8 priomap 7 7 7 7 7 7 7 7 | |
135 | ||
136 | ip link add name br1 type bridge vlan_filtering 0 | |
137 | ip link set dev br1 up | |
138 | ip link set dev $swp1 master br1 | |
139 | ip link set dev $swp3 master br1 | |
140 | ||
141 | ip link add name br111 type bridge vlan_filtering 0 | |
142 | ip link set dev br111 up | |
143 | ip link set dev $swp2.111 master br111 | |
144 | ip link set dev $swp3.111 master br111 | |
5dde21b3 PM |
145 | |
146 | # Make sure that ingress quotas are smaller than egress so that there is | |
147 | # room for both streams of traffic to be admitted to shared buffer. | |
148 | devlink_port_pool_th_set $swp1 0 5 | |
149 | devlink_tc_bind_pool_th_set $swp1 0 ingress 0 5 | |
150 | ||
151 | devlink_port_pool_th_set $swp2 0 5 | |
152 | devlink_tc_bind_pool_th_set $swp2 1 ingress 0 5 | |
153 | ||
154 | devlink_port_pool_th_set $swp3 4 12 | |
b5638d46 PM |
155 | } |
156 | ||
157 | switch_destroy() | |
158 | { | |
5dde21b3 PM |
159 | devlink_port_pool_th_restore $swp3 4 |
160 | ||
161 | devlink_tc_bind_pool_th_restore $swp2 1 ingress | |
162 | devlink_port_pool_th_restore $swp2 0 | |
163 | ||
164 | devlink_tc_bind_pool_th_restore $swp1 0 ingress | |
165 | devlink_port_pool_th_restore $swp1 0 | |
166 | ||
b5638d46 PM |
167 | ip link del dev br111 |
168 | ip link del dev br1 | |
169 | ||
170 | tc qdisc del dev $swp3 root handle 3: | |
171 | ethtool -s $swp3 autoneg on | |
172 | ||
173 | vlan_destroy $swp3 111 | |
174 | vlan_destroy $swp2 111 | |
175 | ||
176 | mtu_restore $swp3 | |
177 | ip link set dev $swp3 down | |
178 | ||
179 | mtu_restore $swp2 | |
180 | ip link set dev $swp2 down | |
181 | ||
182 | mtu_restore $swp1 | |
183 | ip link set dev $swp1 down | |
184 | } | |
185 | ||
186 | setup_prepare() | |
187 | { | |
188 | h1=${NETIFS[p1]} | |
189 | swp1=${NETIFS[p2]} | |
190 | ||
191 | swp2=${NETIFS[p3]} | |
192 | h2=${NETIFS[p4]} | |
193 | ||
194 | swp3=${NETIFS[p5]} | |
195 | h3=${NETIFS[p6]} | |
196 | ||
197 | h3mac=$(mac_get $h3) | |
198 | ||
199 | vrf_prepare | |
200 | ||
201 | h1_create | |
202 | h2_create | |
203 | h3_create | |
204 | switch_create | |
205 | } | |
206 | ||
207 | cleanup() | |
208 | { | |
209 | pre_cleanup | |
210 | ||
211 | switch_destroy | |
212 | h3_destroy | |
213 | h2_destroy | |
214 | h1_destroy | |
215 | ||
216 | vrf_cleanup | |
217 | } | |
218 | ||
219 | ping_ipv4() | |
220 | { | |
221 | ping_test $h2 192.0.2.130 | |
222 | } | |
223 | ||
b5638d46 PM |
224 | test_mc_aware() |
225 | { | |
226 | RET=0 | |
227 | ||
228 | local -a uc_rate | |
573363a6 PM |
229 | start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac |
230 | uc_rate=($(measure_rate $swp2 $h3 rx_octets_prio_1 "UC-only")) | |
b5638d46 | 231 | check_err $? "Could not get high enough UC-only ingress rate" |
573363a6 | 232 | stop_traffic |
b5638d46 PM |
233 | local ucth1=${uc_rate[1]} |
234 | ||
0bcc6452 | 235 | start_traffic $h1 192.0.2.65 bc bc |
b5638d46 PM |
236 | |
237 | local d0=$(date +%s) | |
238 | local t0=$(ethtool_stats_get $h3 rx_octets_prio_0) | |
239 | local u0=$(ethtool_stats_get $swp1 rx_octets_prio_0) | |
240 | ||
241 | local -a uc_rate_2 | |
573363a6 PM |
242 | start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac |
243 | uc_rate_2=($(measure_rate $swp2 $h3 rx_octets_prio_1 "UC+MC")) | |
b5638d46 | 244 | check_err $? "Could not get high enough UC+MC ingress rate" |
573363a6 | 245 | stop_traffic |
b5638d46 PM |
246 | local ucth2=${uc_rate_2[1]} |
247 | ||
248 | local d1=$(date +%s) | |
249 | local t1=$(ethtool_stats_get $h3 rx_octets_prio_0) | |
250 | local u1=$(ethtool_stats_get $swp1 rx_octets_prio_0) | |
251 | ||
252 | local deg=$(bc <<< " | |
253 | scale=2 | |
254 | ret = 100 * ($ucth1 - $ucth2) / $ucth1 | |
255 | if (ret > 0) { ret } else { 0 } | |
256 | ") | |
0bcc6452 PM |
257 | |
258 | # Minimum shaper of 200Mbps on MC TCs should cause about 20% of | |
259 | # degradation on 1Gbps link. | |
260 | check_err $(bc <<< "$deg < 15") "Minimum shaper not in effect" | |
261 | check_err $(bc <<< "$deg > 25") "MC traffic degrades UC performance too much" | |
b5638d46 PM |
262 | |
263 | local interval=$((d1 - d0)) | |
264 | local mc_ir=$(rate $u0 $u1 $interval) | |
265 | local mc_er=$(rate $t0 $t1 $interval) | |
266 | ||
573363a6 | 267 | stop_traffic |
b5638d46 | 268 | |
2e05ec48 | 269 | log_test "UC performance under MC overload" |
b5638d46 PM |
270 | |
271 | echo "UC-only throughput $(humanize $ucth1)" | |
272 | echo "UC+MC throughput $(humanize $ucth2)" | |
273 | echo "Degradation $deg %" | |
274 | echo | |
275 | echo "Full report:" | |
276 | echo " UC only:" | |
277 | echo " ingress UC throughput $(humanize ${uc_rate[0]})" | |
278 | echo " egress UC throughput $(humanize ${uc_rate[1]})" | |
279 | echo " UC+MC:" | |
280 | echo " ingress UC throughput $(humanize ${uc_rate_2[0]})" | |
281 | echo " egress UC throughput $(humanize ${uc_rate_2[1]})" | |
282 | echo " ingress MC throughput $(humanize $mc_ir)" | |
283 | echo " egress MC throughput $(humanize $mc_er)" | |
a5ee171d PM |
284 | echo |
285 | } | |
286 | ||
287 | test_uc_aware() | |
288 | { | |
289 | RET=0 | |
290 | ||
573363a6 | 291 | start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac |
a5ee171d PM |
292 | |
293 | local d0=$(date +%s) | |
294 | local t0=$(ethtool_stats_get $h3 rx_octets_prio_1) | |
295 | local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1) | |
296 | sleep 1 | |
297 | ||
298 | local attempts=50 | |
299 | local passes=0 | |
300 | local i | |
301 | ||
302 | for ((i = 0; i < attempts; ++i)); do | |
303 | if $ARPING -c 1 -I $h1 -b 192.0.2.66 -q -w 0.1; then | |
304 | ((passes++)) | |
305 | fi | |
306 | ||
307 | sleep 0.1 | |
308 | done | |
309 | ||
310 | local d1=$(date +%s) | |
311 | local t1=$(ethtool_stats_get $h3 rx_octets_prio_1) | |
312 | local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1) | |
313 | ||
314 | local interval=$((d1 - d0)) | |
315 | local uc_ir=$(rate $u0 $u1 $interval) | |
316 | local uc_er=$(rate $t0 $t1 $interval) | |
317 | ||
318 | ((attempts == passes)) | |
319 | check_err $? | |
320 | ||
573363a6 | 321 | stop_traffic |
a5ee171d | 322 | |
2e05ec48 | 323 | log_test "MC performance under UC overload" |
a5ee171d PM |
324 | echo " ingress UC throughput $(humanize ${uc_ir})" |
325 | echo " egress UC throughput $(humanize ${uc_er})" | |
326 | echo " sent $attempts BC ARPs, got $passes responses" | |
b5638d46 PM |
327 | } |
328 | ||
329 | trap cleanup EXIT | |
330 | ||
331 | setup_prepare | |
332 | setup_wait | |
333 | ||
334 | tests_run | |
335 | ||
336 | exit $EXIT_STATUS |