]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blob - samples/bpf/do_hbm_test.sh
Merge tag 'block-5.13-2021-05-14' of git://git.kernel.dk/linux-block
[mirror_ubuntu-jammy-kernel.git] / samples / bpf / do_hbm_test.sh
1 #!/bin/bash
2 # SPDX-License-Identifier: GPL-2.0
3 #
4 # Copyright (c) 2019 Facebook
5 #
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of version 2 of the GNU General Public
8 # License as published by the Free Software Foundation.
9
10 Usage() {
11 echo "Script for testing HBM (Host Bandwidth Manager) framework."
12 echo "It creates a cgroup to use for testing and load a BPF program to limit"
13 echo "egress or ingress bandwidth. It then uses iperf3 or netperf to create"
14 echo "loads. The output is the goodput in Mbps (unless -D was used)."
15 echo ""
16 echo "USAGE: $name [out] [-b=<prog>|--bpf=<prog>] [-c=<cc>|--cc=<cc>]"
17 echo " [-D] [-d=<delay>|--delay=<delay>] [--debug] [-E] [--edt]"
18 echo " [-f=<#flows>|--flows=<#flows>] [-h] [-i=<id>|--id=<id >]"
19 echo " [-l] [-N] [--no_cn] [-p=<port>|--port=<port>] [-P]"
20 echo " [-q=<qdisc>] [-R] [-s=<server>|--server=<server]"
21 echo " [-S|--stats] -t=<time>|--time=<time>] [-w] [cubic|dctcp]"
22 echo " Where:"
23 echo " out egress (default)"
24 echo " -b or --bpf BPF program filename to load and attach."
25 echo " Default is hbm_out_kern.o for egress,"
26 echo " -c or -cc TCP congestion control (cubic or dctcp)"
27 echo " --debug print BPF trace buffer"
28 echo " -d or --delay add a delay in ms using netem"
29 echo " -D In addition to the goodput in Mbps, it also outputs"
30 echo " other detailed information. This information is"
31 echo " test dependent (i.e. iperf3 or netperf)."
32 echo " -E enable ECN (not required for dctcp)"
33 echo " --edt use fq's Earliest Departure Time (requires fq)"
34 echo " -f or --flows number of concurrent flows (default=1)"
35 echo " -i or --id cgroup id (an integer, default is 1)"
36 echo " -N use netperf instead of iperf3"
37 echo " --no_cn Do not return CN notifications"
38 echo " -l do not limit flows using loopback"
39 echo " -h Help"
40 echo " -p or --port iperf3 port (default is 5201)"
41 echo " -P use an iperf3 instance for each flow"
42 echo " -q use the specified qdisc"
43 echo " -r or --rate rate in Mbps (default 1s 1Gbps)"
44 echo " -R Use TCP_RR for netperf. 1st flow has req"
45 echo " size of 10KB, rest of 1MB. Reply in all"
46 echo " cases is 1 byte."
47 echo " More detailed output for each flow can be found"
48 echo " in the files netperf.<cg>.<flow>, where <cg> is the"
49 echo " cgroup id as specified with the -i flag, and <flow>"
50 echo " is the flow id starting at 1 and increasing by 1 for"
51 echo " flow (as specified by -f)."
52 echo " -s or --server hostname of netperf server. Used to create netperf"
53 echo " test traffic between to hosts (default is within host)"
54 echo " netserver must be running on the host."
55 echo " -S or --stats whether to update hbm stats (default is yes)."
56 echo " -t or --time duration of iperf3 in seconds (default=5)"
57 echo " -w Work conserving flag. cgroup can increase its"
58 echo " bandwidth beyond the rate limit specified"
59 echo " while there is available bandwidth. Current"
60 echo " implementation assumes there is only one NIC"
61 echo " (eth0), but can be extended to support multiple"
62 echo " NICs."
63 echo " cubic or dctcp specify which TCP CC to use"
64 echo " "
65 exit
66 }
67
68 #set -x
69
70 debug_flag=0
71 args="$@"
72 name="$0"
73 netem=0
74 cc=x
75 dir="-o"
76 dir_name="out"
77 dur=5
78 flows=1
79 id=1
80 prog=""
81 port=5201
82 rate=1000
83 multi_iperf=0
84 flow_cnt=1
85 use_netperf=0
86 rr=0
87 ecn=0
88 details=0
89 server=""
90 qdisc=""
91 flags=""
92 do_stats=0
93
94 BPFFS=/sys/fs/bpf
95 function config_bpffs () {
96 if mount | grep $BPFFS > /dev/null; then
97 echo "bpffs already mounted"
98 else
99 echo "bpffs not mounted. Mounting..."
100 mount -t bpf none $BPFFS
101 fi
102 }
103
104 function start_hbm () {
105 rm -f hbm.out
106 echo "./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog" > hbm.out
107 echo " " >> hbm.out
108 ./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog >> hbm.out 2>&1 &
109 echo $!
110 }
111
112 processArgs () {
113 for i in $args ; do
114 case $i in
115 # Support for upcomming ingress rate limiting
116 #in) # support for upcoming ingress rate limiting
117 # dir="-i"
118 # dir_name="in"
119 # ;;
120 out)
121 dir="-o"
122 dir_name="out"
123 ;;
124 -b=*|--bpf=*)
125 prog="${i#*=}"
126 ;;
127 -c=*|--cc=*)
128 cc="${i#*=}"
129 ;;
130 --no_cn)
131 flags="$flags --no_cn"
132 ;;
133 --debug)
134 flags="$flags -d"
135 debug_flag=1
136 ;;
137 -d=*|--delay=*)
138 netem="${i#*=}"
139 ;;
140 -D)
141 details=1
142 ;;
143 -E)
144 ecn=1
145 ;;
146 --edt)
147 flags="$flags --edt"
148 qdisc="fq"
149 ;;
150 -f=*|--flows=*)
151 flows="${i#*=}"
152 ;;
153 -i=*|--id=*)
154 id="${i#*=}"
155 ;;
156 -l)
157 flags="$flags -l"
158 ;;
159 -N)
160 use_netperf=1
161 ;;
162 -p=*|--port=*)
163 port="${i#*=}"
164 ;;
165 -P)
166 multi_iperf=1
167 ;;
168 -q=*)
169 qdisc="${i#*=}"
170 ;;
171 -r=*|--rate=*)
172 rate="${i#*=}"
173 ;;
174 -R)
175 rr=1
176 ;;
177 -s=*|--server=*)
178 server="${i#*=}"
179 ;;
180 -S|--stats)
181 flags="$flags -s"
182 do_stats=1
183 ;;
184 -t=*|--time=*)
185 dur="${i#*=}"
186 ;;
187 -w)
188 flags="$flags -w"
189 ;;
190 cubic)
191 cc=cubic
192 ;;
193 dctcp)
194 cc=dctcp
195 ;;
196 *)
197 echo "Unknown arg:$i"
198 Usage
199 ;;
200 esac
201 done
202 }
203
204 processArgs
205 config_bpffs
206
207 if [ $debug_flag -eq 1 ] ; then
208 rm -f hbm_out.log
209 fi
210
211 hbm_pid=$(start_hbm)
212 usleep 100000
213
214 host=`hostname`
215 cg_base_dir=/sys/fs/cgroup/unified
216 cg_dir="$cg_base_dir/cgroup-test-work-dir/hbm$id"
217
218 echo $$ >> $cg_dir/cgroup.procs
219
220 ulimit -l unlimited
221
222 rm -f ss.out
223 rm -f hbm.[0-9]*.$dir_name
224 if [ $ecn -ne 0 ] ; then
225 sysctl -w -q -n net.ipv4.tcp_ecn=1
226 fi
227
228 if [ $use_netperf -eq 0 ] ; then
229 cur_cc=`sysctl -n net.ipv4.tcp_congestion_control`
230 if [ "$cc" != "x" ] ; then
231 sysctl -w -q -n net.ipv4.tcp_congestion_control=$cc
232 fi
233 fi
234
235 if [ "$netem" -ne "0" ] ; then
236 if [ "$qdisc" != "" ] ; then
237 echo "WARNING: Ignoring -q options because -d option used"
238 fi
239 tc qdisc del dev lo root > /dev/null 2>&1
240 tc qdisc add dev lo root netem delay $netem\ms > /dev/null 2>&1
241 elif [ "$qdisc" != "" ] ; then
242 tc qdisc del dev eth0 root > /dev/null 2>&1
243 tc qdisc add dev eth0 root $qdisc > /dev/null 2>&1
244 fi
245
246 n=0
247 m=$[$dur * 5]
248 hn="::1"
249 if [ $use_netperf -ne 0 ] ; then
250 if [ "$server" != "" ] ; then
251 hn=$server
252 fi
253 fi
254
255 ( ping6 -i 0.2 -c $m $hn > ping.out 2>&1 ) &
256
257 if [ $use_netperf -ne 0 ] ; then
258 begNetserverPid=`ps ax | grep netserver | grep --invert-match "grep" | \
259 awk '{ print $1 }'`
260 if [ "$begNetserverPid" == "" ] ; then
261 if [ "$server" == "" ] ; then
262 ( ./netserver > /dev/null 2>&1) &
263 usleep 100000
264 fi
265 fi
266 flow_cnt=1
267 if [ "$server" == "" ] ; then
268 np_server=$host
269 else
270 np_server=$server
271 fi
272 if [ "$cc" == "x" ] ; then
273 np_cc=""
274 else
275 np_cc="-K $cc,$cc"
276 fi
277 replySize=1
278 while [ $flow_cnt -le $flows ] ; do
279 if [ $rr -ne 0 ] ; then
280 reqSize=1M
281 if [ $flow_cnt -eq 1 ] ; then
282 reqSize=10K
283 fi
284 if [ "$dir" == "-i" ] ; then
285 replySize=$reqSize
286 reqSize=1
287 fi
288 ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r $reqSize,$replySize $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,REMOTE_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,LOCAL_RECV_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
289 else
290 if [ "$dir" == "-i" ] ; then
291 ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r 1,10M $np_cc -k P50_LATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REMOTE_TRANSPORT_RETRANS,REMOTE_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
292 else
293 ( ./netperf -H $np_server -l $dur -f m -j -t TCP_STREAM -- $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
294 fi
295 fi
296 flow_cnt=$[flow_cnt+1]
297 done
298
299 # sleep for duration of test (plus some buffer)
300 n=$[dur+2]
301 sleep $n
302
303 # force graceful termination of netperf
304 pids=`pgrep netperf`
305 for p in $pids ; do
306 kill -SIGALRM $p
307 done
308
309 flow_cnt=1
310 rate=0
311 if [ $details -ne 0 ] ; then
312 echo ""
313 echo "Details for HBM in cgroup $id"
314 if [ $do_stats -eq 1 ] ; then
315 if [ -e hbm.$id.$dir_name ] ; then
316 cat hbm.$id.$dir_name
317 fi
318 fi
319 fi
320 while [ $flow_cnt -le $flows ] ; do
321 if [ "$dir" == "-i" ] ; then
322 r=`cat netperf.$id.$flow_cnt | grep -o "REMOTE_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"`
323 else
324 r=`cat netperf.$id.$flow_cnt | grep -o "LOCAL_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"`
325 fi
326 echo "rate for flow $flow_cnt: $r"
327 rate=$[rate+r]
328 if [ $details -ne 0 ] ; then
329 echo "-----"
330 echo "Details for cgroup $id, flow $flow_cnt"
331 cat netperf.$id.$flow_cnt
332 fi
333 flow_cnt=$[flow_cnt+1]
334 done
335 if [ $details -ne 0 ] ; then
336 echo ""
337 delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
338 echo "PING AVG DELAY:$delay"
339 echo "AGGREGATE_GOODPUT:$rate"
340 else
341 echo $rate
342 fi
343 elif [ $multi_iperf -eq 0 ] ; then
344 (iperf3 -s -p $port -1 > /dev/null 2>&1) &
345 usleep 100000
346 iperf3 -c $host -p $port -i 0 -P $flows -f m -t $dur > iperf.$id
347 rates=`grep receiver iperf.$id | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*"`
348 rate=`echo $rates | grep -o "[0-9]*$"`
349
350 if [ $details -ne 0 ] ; then
351 echo ""
352 echo "Details for HBM in cgroup $id"
353 if [ $do_stats -eq 1 ] ; then
354 if [ -e hbm.$id.$dir_name ] ; then
355 cat hbm.$id.$dir_name
356 fi
357 fi
358 delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
359 echo "PING AVG DELAY:$delay"
360 echo "AGGREGATE_GOODPUT:$rate"
361 else
362 echo $rate
363 fi
364 else
365 flow_cnt=1
366 while [ $flow_cnt -le $flows ] ; do
367 (iperf3 -s -p $port -1 > /dev/null 2>&1) &
368 ( iperf3 -c $host -p $port -i 0 -P 1 -f m -t $dur | grep receiver | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*" | grep -o "[0-9]*$" > iperf3.$id.$flow_cnt ) &
369 port=$[port+1]
370 flow_cnt=$[flow_cnt+1]
371 done
372 n=$[dur+1]
373 sleep $n
374 flow_cnt=1
375 rate=0
376 if [ $details -ne 0 ] ; then
377 echo ""
378 echo "Details for HBM in cgroup $id"
379 if [ $do_stats -eq 1 ] ; then
380 if [ -e hbm.$id.$dir_name ] ; then
381 cat hbm.$id.$dir_name
382 fi
383 fi
384 fi
385
386 while [ $flow_cnt -le $flows ] ; do
387 r=`cat iperf3.$id.$flow_cnt`
388 # echo "rate for flow $flow_cnt: $r"
389 if [ $details -ne 0 ] ; then
390 echo "Rate for cgroup $id, flow $flow_cnt LOCAL_SEND_THROUGHPUT=$r"
391 fi
392 rate=$[rate+r]
393 flow_cnt=$[flow_cnt+1]
394 done
395 if [ $details -ne 0 ] ; then
396 delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
397 echo "PING AVG DELAY:$delay"
398 echo "AGGREGATE_GOODPUT:$rate"
399 else
400 echo $rate
401 fi
402 fi
403
404 if [ $use_netperf -eq 0 ] ; then
405 sysctl -w -q -n net.ipv4.tcp_congestion_control=$cur_cc
406 fi
407 if [ $ecn -ne 0 ] ; then
408 sysctl -w -q -n net.ipv4.tcp_ecn=0
409 fi
410 if [ "$netem" -ne "0" ] ; then
411 tc qdisc del dev lo root > /dev/null 2>&1
412 fi
413 if [ "$qdisc" != "" ] ; then
414 tc qdisc del dev eth0 root > /dev/null 2>&1
415 fi
416 sleep 2
417
418 hbmPid=`ps ax | grep "hbm " | grep --invert-match "grep" | awk '{ print $1 }'`
419 if [ "$hbmPid" == "$hbm_pid" ] ; then
420 kill $hbm_pid
421 fi
422
423 sleep 1
424
425 # Detach any pinned BPF programs that may have lingered
426 rm -rf $BPFFS/hbm*
427
428 if [ $use_netperf -ne 0 ] ; then
429 if [ "$server" == "" ] ; then
430 if [ "$begNetserverPid" == "" ] ; then
431 netserverPid=`ps ax | grep netserver | grep --invert-match "grep" | awk '{ print $1 }'`
432 if [ "$netserverPid" != "" ] ; then
433 kill $netserverPid
434 fi
435 fi
436 fi
437 fi
438 exit