]> git.proxmox.com Git - ceph.git/blame - ceph/src/spdk/test/vhost/perf_bench/vhost_perf.sh
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / spdk / test / vhost / perf_bench / vhost_perf.sh
CommitLineData
11fdf7f2 1#!/usr/bin/env bash
11fdf7f2 2
9f95a23c
TL
3testdir=$(readlink -f $(dirname $0))
4rootdir=$(readlink -f $testdir/../../..)
5source $rootdir/test/common/autotest_common.sh
6source $rootdir/test/vhost/common.sh
7
f67539c2 8vhost_num="0"
11fdf7f2 9vm_memory=2048
9f95a23c 10vm_sar_enable=false
f67539c2
TL
11host_sar_enable=false
12sar_delay="0"
13sar_interval="1"
14sar_count="10"
9f95a23c 15vm_throttle=""
11fdf7f2
TL
16ctrl_type="spdk_vhost_scsi"
17use_split=false
9f95a23c
TL
18kernel_cpus=""
19run_precondition=false
11fdf7f2
TL
20lvol_stores=()
21lvol_bdevs=()
f67539c2 22split_bdevs=()
11fdf7f2 23used_vms=""
9f95a23c 24wwpn_prefix="naa.5001405bc6498"
f67539c2 25packed_ring=false
11fdf7f2 26
9f95a23c 27fio_iterations=1
f67539c2
TL
28fio_gtod=""
29precond_fio_bin=$CONFIG_FIO_SOURCE_DIR/fio
30disk_map=""
31
32disk_cfg_bdfs=()
33disk_cfg_spdk_names=()
34disk_cfg_splits=()
35disk_cfg_vms=()
36disk_cfg_kernel_names=()
37
38function usage() {
39 [[ -n $2 ]] && (
40 echo "$2"
41 echo ""
42 )
11fdf7f2
TL
43 echo "Shortcut script for doing automated test"
44 echo "Usage: $(basename $1) [OPTIONS]"
45 echo
46 echo "-h, --help Print help and exit"
47 echo " --fio-bin=PATH Path to FIO binary on host.;"
48 echo " Binary will be copied to VM, static compilation"
49 echo " of binary is recommended."
f67539c2 50 echo " --fio-jobs=PATH Comma separated list of fio config files to use for test."
9f95a23c 51 echo " --fio-iterations=INT Number of times to run specified workload."
f67539c2 52 echo " --fio-gtod-reduce Enable fio gtod_reduce option in test."
11fdf7f2
TL
53 echo " --vm-memory=INT Amount of RAM memory (in MB) to pass to a single VM."
54 echo " Default: 2048 MB"
55 echo " --vm-image=PATH OS image to use for running the VMs."
f67539c2
TL
56 echo " Default: \$HOME/vhost_vm_image.qcow2"
57 echo " --vm-sar-enable Measure CPU utilization in guest VMs using sar."
58 echo " --host-sar-enable Measure CPU utilization on host using sar."
59 echo " --sar-delay=INT Wait for X seconds before starting SAR measurement. Default: 0."
60 echo " --sar-interval=INT Interval (seconds) argument for SAR. Default: 1s."
61 echo " --sar-count=INT Count argument for SAR. Default: 10."
9f95a23c 62 echo " --vm-throttle-iops=INT I/Os throttle rate in IOPS for each device on the VMs."
11fdf7f2
TL
63 echo " --ctrl-type=TYPE Controller type to use for test:"
64 echo " spdk_vhost_scsi - use spdk vhost scsi"
65 echo " spdk_vhost_blk - use spdk vhost block"
9f95a23c 66 echo " kernel_vhost - use kernel vhost scsi"
11fdf7f2 67 echo " Default: spdk_vhost_scsi"
f67539c2 68 echo " --packed-ring Use packed ring support. Requires Qemu 4.2.0 or greater. Default: disabled."
11fdf7f2 69 echo " --use-split Use split vbdevs instead of Logical Volumes"
9f95a23c
TL
70 echo " --limit-kernel-vhost=INT Limit kernel vhost to run only on a number of CPU cores."
71 echo " --run-precondition Precondition lvols after creating. Default: true."
f67539c2 72 echo " --precond-fio-bin FIO binary used for SPDK fio plugin precondition. Default: $CONFIG_FIO_SOURCE_DIR/fio."
11fdf7f2
TL
73 echo " --custom-cpu-cfg=PATH Custom CPU config for test."
74 echo " Default: spdk/test/vhost/common/autotest.config"
f67539c2
TL
75 echo " --disk-map Disk map for given test. Specify which disks to use, their SPDK name,"
76 echo " how many times to split them and which VMs should be attached to created bdevs."
77 echo " Example:"
78 echo " NVME PCI BDF,Spdk Bdev Name,Split Count,VM List"
79 echo " 0000:1a:00.0,Nvme0,2,0 1"
80 echo " 0000:1b:00.0,Nvme1,2,2 3"
11fdf7f2
TL
81 echo "-x set -x for script debug"
82 exit 0
83}
84
f67539c2 85function cleanup_lvol_cfg() {
11fdf7f2
TL
86 notice "Removing lvol bdevs"
87 for lvol_bdev in "${lvol_bdevs[@]}"; do
f67539c2 88 $rpc_py bdev_lvol_delete $lvol_bdev
11fdf7f2
TL
89 notice "lvol bdev $lvol_bdev removed"
90 done
91
92 notice "Removing lvol stores"
93 for lvol_store in "${lvol_stores[@]}"; do
f67539c2 94 $rpc_py bdev_lvol_delete_lvstore -u $lvol_store
11fdf7f2
TL
95 notice "lvol store $lvol_store removed"
96 done
97}
98
f67539c2 99function cleanup_split_cfg() {
11fdf7f2 100 notice "Removing split vbdevs"
f67539c2
TL
101 for disk in "${disk_cfg_spdk_names[@]}"; do
102 $rpc_py bdev_split_delete ${disk}n1
11fdf7f2
TL
103 done
104}
105
f67539c2
TL
106function cleanup_parted_config() {
107 notice "Removing parted disk configuration"
108 for disk in "${disk_cfg_kernel_names[@]}"; do
109 parted -s /dev/${disk}n1 rm 1
9f95a23c
TL
110 done
111}
112
f67539c2 113function cleanup_kernel_vhost() {
9f95a23c
TL
114 notice "Cleaning kernel vhost configration"
115 targetcli clearconfig confirm=True
116 cleanup_parted_config
117}
118
f67539c2
TL
119function create_vm() {
120 vm_num=$1
121 setup_cmd="vm_setup --disk-type=$ctrl_type --force=$vm_num --memory=$vm_memory --os=$VM_IMAGE"
122 if [[ "$ctrl_type" == "kernel_vhost" ]]; then
123 x=$(printf %03d $vm_num)
124 setup_cmd+=" --disks=${wwpn_prefix}${x}"
125 else
126 setup_cmd+=" --disks=0"
127 fi
128
129 if $packed_ring; then
130 setup_cmd+=" --packed"
131 fi
132
133 $setup_cmd
134 used_vms+=" $vm_num"
135 echo "Added to used vms"
136 echo $used_vms
137}
138
139function create_spdk_controller() {
140 vm_num=$1
141 bdev=$2
142
143 if [[ "$ctrl_type" == "spdk_vhost_scsi" ]]; then
144 $rpc_py vhost_create_scsi_controller naa.0.$vm_num
145 notice "Created vhost scsi controller naa.0.$vm_num"
146 $rpc_py vhost_scsi_controller_add_target naa.0.$vm_num 0 $bdev
147 notice "Added LUN 0/$bdev to controller naa.0.$vm_num"
148 elif [[ "$ctrl_type" == "spdk_vhost_blk" ]]; then
149 if $packed_ring; then
150 p_opt="-p"
151 fi
152
153 $rpc_py vhost_create_blk_controller naa.0.$vm_num $bdev $p_opt
154 notice "Created vhost blk controller naa.0.$vm_num $bdev"
155 fi
156}
157
11fdf7f2
TL
158while getopts 'xh-:' optchar; do
159 case "$optchar" in
160 -)
f67539c2
TL
161 case "$OPTARG" in
162 help) usage $0 ;;
163 fio-bin=*) fio_bin="--fio-bin=${OPTARG#*=}" ;;
164 fio-jobs=*) fio_jobs="${OPTARG#*=}" ;;
165 fio-iterations=*) fio_iterations="${OPTARG#*=}" ;;
166 fio-gtod-reduce) fio_gtod="--gtod-reduce" ;;
167 vm-memory=*) vm_memory="${OPTARG#*=}" ;;
168 vm-image=*) VM_IMAGE="${OPTARG#*=}" ;;
169 vm-sar-enable) vm_sar_enable=true ;;
170 host-sar-enable) host_sar_enable=true ;;
171 sar-delay=*) sar_delay="${OPTARG#*=}" ;;
172 sar-interval=*) sar_interval="${OPTARG#*=}" ;;
173 sar-count=*) sar_count="${OPTARG#*=}" ;;
174 vm-throttle-iops=*) vm_throttle="${OPTARG#*=}" ;;
175 ctrl-type=*) ctrl_type="${OPTARG#*=}" ;;
176 packed-ring) packed_ring=true ;;
177 use-split) use_split=true ;;
178 run-precondition) run_precondition=true ;;
179 precond-fio-bin=*) precond_fio_bin="${OPTARG#*=}" ;;
180 limit-kernel-vhost=*) kernel_cpus="${OPTARG#*=}" ;;
181 custom-cpu-cfg=*) custom_cpu_cfg="${OPTARG#*=}" ;;
182 disk-map=*) disk_map="${OPTARG#*=}" ;;
183 *) usage $0 "Invalid argument '$OPTARG'" ;;
184 esac
185 ;;
186 h) usage $0 ;;
187 x)
188 set -x
189 x="-x"
190 ;;
191 *) usage $0 "Invalid argument '$OPTARG'" ;;
11fdf7f2
TL
192 esac
193done
194
f67539c2 195rpc_py="$rootdir/scripts/rpc.py -s $(get_vhost_dir 0)/rpc.sock"
11fdf7f2
TL
196
197if [[ -n $custom_cpu_cfg ]]; then
198 source $custom_cpu_cfg
f67539c2
TL
199 vhost_reactor_mask="vhost_${vhost_num}_reactor_mask"
200 vhost_reactor_mask="${!vhost_reactor_mask}"
201 vhost_master_core="vhost_${vhost_num}_master_core"
202 vhost_master_core="${!vhost_master_core}"
11fdf7f2
TL
203fi
204
f67539c2
TL
205if [[ -z $fio_jobs ]]; then
206 error "No FIO job specified!"
11fdf7f2
TL
207fi
208
209trap 'error_exit "${FUNCNAME}" "${LINENO}"' INT ERR
11fdf7f2 210
f67539c2
TL
211if [[ -z $disk_map ]]; then
212 fail "No disk map provided for test. Exiting."
9f95a23c 213fi
9f95a23c
TL
214
215# ===== Precondition NVMes if specified =====
216if [[ $run_precondition == true ]]; then
217 # Using the same precondition routine possible for lvols thanks
218 # to --clear-method option. Lvols should not UNMAP on creation.
f67539c2
TL
219 json_cfg=$rootdir/nvme.json
220 cat <<- JSON > "$json_cfg"
221 {"subsystems":[
222 $("$rootdir/scripts/gen_nvme.sh" --json)
223 ]}
224 JSON
225 mapfile -t nvmes < <(grep -oP "Nvme\d+" "$json_cfg")
226 fio_filename=$(printf ":%sn1" "${nvmes[@]}")
227 fio_filename=${fio_filename:1}
228 $precond_fio_bin --name="precondition" \
229 --ioengine="${rootdir}/build/fio/spdk_bdev" \
230 --rw="write" --spdk_json_conf="$json_cfg" --thread="1" \
231 --group_reporting --direct="1" --size="100%" --loops="2" --bs="256k" \
232 --iodepth=32 --filename="${fio_filename}" || true
9f95a23c
TL
233fi
234
f67539c2
TL
235set +x
236readarray disk_cfg < $disk_map
237for line in "${disk_cfg[@]}"; do
238 echo $line
239 IFS=","
240 s=($line)
241 disk_cfg_bdfs+=(${s[0]})
242 disk_cfg_spdk_names+=(${s[1]})
243 disk_cfg_splits+=(${s[2]})
244 disk_cfg_vms+=("${s[3]}")
245
246 # Find kernel nvme names
247 if [[ "$ctrl_type" == "kernel_vhost" ]]; then
248 tmp=$(find /sys/devices/pci* -name ${s[0]} -print0 | xargs sh -c 'ls $0/nvme')
249 disk_cfg_kernel_names+=($tmp)
250 IFS=" "
251 fi
252done
253unset IFS
254set -x
255
9f95a23c 256if [[ "$ctrl_type" == "kernel_vhost" ]]; then
f67539c2 257 notice "Configuring kernel vhost..."
9f95a23c 258 trap 'vm_kill_all; sleep 1; cleanup_kernel_vhost; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR
f67539c2 259
9f95a23c
TL
260 # Split disks using parted for kernel vhost
261 newline=$'\n'
9f95a23c 262 backstores=()
f67539c2
TL
263 for ((i = 0; i < ${#disk_cfg_kernel_names[@]}; i++)); do
264 nvme=${disk_cfg_kernel_names[$i]}
265 splits=${disk_cfg_splits[$i]}
266 notice " Creating extended partition on disk /dev/${nvme}n1"
267 parted -s /dev/${nvme}n1 mklabel msdos
268 parted -s /dev/${nvme}n1 mkpart extended 2048s 100%
269
270 part_size=$((100 / ${disk_cfg_splits[$i]})) # Split 100% of disk into roughly even parts
271 echo " Creating ${splits} partitions of relative disk size ${part_size}"
272 for p in $(seq 0 $((splits - 1))); do
273 p_start=$((p * part_size))
274 p_end=$((p_start + part_size))
275 parted -s /dev/${nvme}n1 mkpart logical ${p_start}% ${p_end}%
276 sleep 3
277 done
9f95a23c 278
f67539c2
TL
279 # Prepare kernel vhost configuration
280 # Below grep: match only NVMe partitions which are not "Extended" type.
281 # For example: will match nvme0n1p15 but not nvme0n1p1
282 partitions=$(find /dev -name "${nvme}n1*" | sort --version-sort | grep -P 'p(?!1$)\d+')
283 # Create block backstores for vhost kernel process
284 for p in $partitions; do
285 backstore_name=$(basename $p)
286 backstores+=("$backstore_name")
287 targetcli backstores/block create $backstore_name $p
288 done
289 partitions=($partitions)
290
291 # Create kernel vhost controllers and add LUNs
292 # Setup VM configurations
293 vms_to_run=(${disk_cfg_vms[i]})
294 for ((j = 0; j < ${#vms_to_run[@]}; j++)); do
295 # WWPN prefix misses 3 characters. Need to complete it
296 # using block backstore number
297 x=$(printf %03d ${vms_to_run[$j]})
298 wwpn="${wwpn_prefix}${x}"
299 targetcli vhost/ create $wwpn
300 targetcli vhost/$wwpn/tpg1/luns create /backstores/block/$(basename ${partitions[$j]})
301 create_vm ${vms_to_run[j]}
302 sleep 1
303 done
9f95a23c 304 done
f67539c2 305 targetcli ls
11fdf7f2 306else
f67539c2
TL
307 notice "Configuring SPDK vhost..."
308 vhost_run "${vhost_num}" "--no-gen-nvme" "-p ${vhost_master_core}" "-m ${vhost_reactor_mask}"
9f95a23c
TL
309 notice "..."
310
311 if [[ $use_split == true ]]; then
f67539c2 312 notice "Configuring split bdevs configuration..."
9f95a23c 313 trap 'cleanup_split_cfg; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR
f67539c2
TL
314 for ((i = 0; i < ${#disk_cfg_bdfs[@]}; i++)); do
315 nvme_bdev=$($rpc_py bdev_nvme_attach_controller -b ${disk_cfg_spdk_names[$i]} -t pcie -a ${disk_cfg_bdfs[$i]})
316 notice "Created NVMe Bdev: $nvme_bdev with BDF ${disk_cfg_bdfs[$i]}"
317
318 splits=$($rpc_py bdev_split_create $nvme_bdev ${disk_cfg_splits[$i]})
319 splits=($splits)
320 notice "Created splits: ${splits[*]} on Bdev ${nvme_bdev}"
321 for s in "${splits[@]}"; do
322 split_bdevs+=($s)
323 done
324
325 vms_to_run=(${disk_cfg_vms[i]})
326 for ((j = 0; j < ${#vms_to_run[@]}; j++)); do
327 notice "Setting up VM ${vms_to_run[j]}"
328 create_spdk_controller "${vms_to_run[j]}" ${splits[j]}
329 create_vm ${vms_to_run[j]}
9f95a23c 330 done
f67539c2 331 echo " "
11fdf7f2 332 done
9f95a23c
TL
333 bdevs=("${split_bdevs[@]}")
334 else
f67539c2 335 notice "Configuring LVOLs..."
9f95a23c 336 trap 'cleanup_lvol_cfg; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR
f67539c2
TL
337 for ((i = 0; i < ${#disk_cfg_bdfs[@]}; i++)); do
338 nvme_bdev=$($rpc_py bdev_nvme_attach_controller -b ${disk_cfg_spdk_names[$i]} -t pcie -a ${disk_cfg_bdfs[$i]})
339 notice "Created NVMe Bdev: $nvme_bdev with BDF ${disk_cfg_bdfs[$i]}"
340
341 ls_guid=$($rpc_py bdev_lvol_create_lvstore $nvme_bdev lvs_$i --clear-method none)
9f95a23c 342 lvol_stores+=("$ls_guid")
f67539c2
TL
343 notice "Created Lvol Store: $ls_guid on Bdev $nvme_bdev"
344
345 vms_to_run=(${disk_cfg_vms[i]})
346 for ((j = 0; j < ${disk_cfg_splits[$i]}; j++)); do
9f95a23c 347 free_mb=$(get_lvs_free_mb "$ls_guid")
f67539c2
TL
348 size=$((free_mb / ((${disk_cfg_splits[$i]} - j))))
349 lb_name=$($rpc_py bdev_lvol_create -u $ls_guid lbd_$j $size --clear-method none)
9f95a23c 350 lvol_bdevs+=("$lb_name")
f67539c2
TL
351 notice "Created LVOL Bdev $lb_name on Lvol Store $ls_guid on Bdev $nvme_bdev"
352
353 notice "Setting up VM ${vms_to_run[j]}"
354 create_spdk_controller "${vms_to_run[j]}" ${lb_name}
355 create_vm ${vms_to_run[j]}
9f95a23c 356 done
f67539c2 357 echo " "
9f95a23c 358 done
f67539c2 359 $rpc_py bdev_lvol_get_lvstores
9f95a23c 360 fi
f67539c2
TL
361 $rpc_py bdev_get_bdevs
362 $rpc_py vhost_get_controllers
11fdf7f2
TL
363fi
364
11fdf7f2
TL
365# Start VMs
366# Run VMs
367vm_run $used_vms
368vm_wait_for_boot 300 $used_vms
369
9f95a23c
TL
370if [[ -n "$kernel_cpus" ]]; then
371 mkdir -p /sys/fs/cgroup/cpuset/spdk
372 kernel_mask=$vhost_0_reactor_mask
373 kernel_mask=${kernel_mask#"["}
374 kernel_mask=${kernel_mask%"]"}
375
376 echo "$kernel_mask" >> /sys/fs/cgroup/cpuset/spdk/cpuset.cpus
377 echo "0-1" >> /sys/fs/cgroup/cpuset/spdk/cpuset.mems
378
f67539c2 379 kernel_vhost_pids=$(pgrep "vhost" -U root)
9f95a23c
TL
380 for kpid in $kernel_vhost_pids; do
381 echo "Limiting kernel vhost pid ${kpid}"
382 echo "${kpid}" >> /sys/fs/cgroup/cpuset/spdk/tasks
383 done
384fi
385
11fdf7f2
TL
386# Run FIO
387fio_disks=""
388for vm_num in $used_vms; do
11fdf7f2 389 host_name="VM-$vm_num"
f67539c2 390 vm_exec $vm_num "hostname $host_name"
11fdf7f2
TL
391 vm_start_fio_server $fio_bin $vm_num
392
393 if [[ "$ctrl_type" == "spdk_vhost_scsi" ]]; then
394 vm_check_scsi_location $vm_num
395 elif [[ "$ctrl_type" == "spdk_vhost_blk" ]]; then
396 vm_check_blk_location $vm_num
9f95a23c
TL
397 elif [[ "$ctrl_type" == "kernel_vhost" ]]; then
398 vm_check_scsi_location $vm_num
399 fi
400
401 if [[ -n "$vm_throttle" ]]; then
402 block=$(printf '%s' $SCSI_DISK)
f67539c2
TL
403 major_minor=$(vm_exec "$vm_num" "cat /sys/block/$block/dev")
404 vm_exec "$vm_num" "echo \"$major_minor $vm_throttle\" > /sys/fs/cgroup/blkio/blkio.throttle.read_iops_device"
405 vm_exec "$vm_num" "echo \"$major_minor $vm_throttle\" > /sys/fs/cgroup/blkio/blkio.throttle.write_iops_device"
11fdf7f2
TL
406 fi
407
408 fio_disks+=" --vm=${vm_num}$(printf ':/dev/%s' $SCSI_DISK)"
409done
410
411# Run FIO traffic
f67539c2
TL
412for fio_job in ${fio_jobs//,/ }; do
413 fio_job_fname=$(basename $fio_job)
414 fio_log_fname="${fio_job_fname%%.*}.log"
415 for i in $(seq 1 $fio_iterations); do
416 echo "Running FIO iteration $i for $fio_job_fname"
417 run_fio $fio_bin --hide-results --job-file="$fio_job" --out="$VHOST_DIR/fio_results" --json $fio_disks $fio_gtod &
418 fio_pid=$!
419
420 if $host_sar_enable || $vm_sar_enable; then
421 pids=""
422 mkdir -p $VHOST_DIR/fio_results/sar_stats
423 sleep $sar_delay
424 fi
425
426 if $host_sar_enable; then
427 sar -P ALL $sar_interval $sar_count > "$VHOST_DIR/fio_results/sar_stats/sar_stats_host.txt" &
9f95a23c 428 pids+=" $!"
f67539c2
TL
429 fi
430
431 if $vm_sar_enable; then
432 for vm_num in $used_vms; do
433 vm_exec "$vm_num" "mkdir -p /root/sar; sar -P ALL $sar_interval $sar_count >> /root/sar/sar_stats_VM${vm_num}_run${i}.txt" &
434 pids+=" $!"
435 done
436 fi
437
9f95a23c
TL
438 for j in $pids; do
439 wait $j
440 done
9f95a23c 441
f67539c2
TL
442 if $vm_sar_enable; then
443 for vm_num in $used_vms; do
444 vm_scp "$vm_num" "root@127.0.0.1:/root/sar/sar_stats_VM${vm_num}_run${i}.txt" "$VHOST_DIR/fio_results/sar_stats"
445 done
446 fi
447
448 wait $fio_pid
449 mv $VHOST_DIR/fio_results/$fio_log_fname $VHOST_DIR/fio_results/$fio_log_fname.$i
450 sleep 1
451 done
452
453 parse_fio_results "$VHOST_DIR/fio_results" "$fio_log_fname"
9f95a23c 454done
11fdf7f2
TL
455
456notice "Shutting down virtual machines..."
457vm_shutdown_all
458
9f95a23c
TL
459if [[ "$ctrl_type" == "kernel_vhost" ]]; then
460 cleanup_kernel_vhost || true
11fdf7f2 461else
9f95a23c
TL
462 notice "Shutting down SPDK vhost app..."
463 if [[ $use_split == true ]]; then
464 cleanup_split_cfg
465 else
466 cleanup_lvol_cfg
467 fi
f67539c2 468 vhost_kill "${vhost_num}"
9f95a23c
TL
469fi
470
471if [[ -n "$kernel_cpus" ]]; then
472 rmdir /sys/fs/cgroup/cpuset/spdk
11fdf7f2 473fi