]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/test/vhost/perf_bench/vhost_perf.sh
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / spdk / test / vhost / perf_bench / vhost_perf.sh
1 #!/usr/bin/env bash
2
3 testdir=$(readlink -f $(dirname $0))
4 rootdir=$(readlink -f $testdir/../../..)
5 source $rootdir/test/common/autotest_common.sh
6 source $rootdir/test/vhost/common.sh
7
8 vhost_num="0"
9 vm_memory=2048
10 vm_sar_enable=false
11 host_sar_enable=false
12 sar_delay="0"
13 sar_interval="1"
14 sar_count="10"
15 vm_throttle=""
16 ctrl_type="spdk_vhost_scsi"
17 use_split=false
18 kernel_cpus=""
19 run_precondition=false
20 lvol_stores=()
21 lvol_bdevs=()
22 split_bdevs=()
23 used_vms=""
24 wwpn_prefix="naa.5001405bc6498"
25 packed_ring=false
26
27 fio_iterations=1
28 fio_gtod=""
29 precond_fio_bin=$CONFIG_FIO_SOURCE_DIR/fio
30 disk_map=""
31
32 disk_cfg_bdfs=()
33 disk_cfg_spdk_names=()
34 disk_cfg_splits=()
35 disk_cfg_vms=()
36 disk_cfg_kernel_names=()
37
38 function usage() {
39 [[ -n $2 ]] && (
40 echo "$2"
41 echo ""
42 )
43 echo "Shortcut script for doing automated test"
44 echo "Usage: $(basename $1) [OPTIONS]"
45 echo
46 echo "-h, --help Print help and exit"
47 echo " --fio-bin=PATH Path to FIO binary on host.;"
48 echo " Binary will be copied to VM, static compilation"
49 echo " of binary is recommended."
50 echo " --fio-jobs=PATH Comma separated list of fio config files to use for test."
51 echo " --fio-iterations=INT Number of times to run specified workload."
52 echo " --fio-gtod-reduce Enable fio gtod_reduce option in test."
53 echo " --vm-memory=INT Amount of RAM memory (in MB) to pass to a single VM."
54 echo " Default: 2048 MB"
55 echo " --vm-image=PATH OS image to use for running the VMs."
56 echo " Default: \$HOME/vhost_vm_image.qcow2"
57 echo " --vm-sar-enable Measure CPU utilization in guest VMs using sar."
58 echo " --host-sar-enable Measure CPU utilization on host using sar."
59 echo " --sar-delay=INT Wait for X seconds before starting SAR measurement. Default: 0."
60 echo " --sar-interval=INT Interval (seconds) argument for SAR. Default: 1s."
61 echo " --sar-count=INT Count argument for SAR. Default: 10."
62 echo " --vm-throttle-iops=INT I/Os throttle rate in IOPS for each device on the VMs."
63 echo " --ctrl-type=TYPE Controller type to use for test:"
64 echo " spdk_vhost_scsi - use spdk vhost scsi"
65 echo " spdk_vhost_blk - use spdk vhost block"
66 echo " kernel_vhost - use kernel vhost scsi"
67 echo " Default: spdk_vhost_scsi"
68 echo " --packed-ring Use packed ring support. Requires Qemu 4.2.0 or greater. Default: disabled."
69 echo " --use-split Use split vbdevs instead of Logical Volumes"
70 echo " --limit-kernel-vhost=INT Limit kernel vhost to run only on a number of CPU cores."
71 echo " --run-precondition Precondition lvols after creating. Default: true."
72 echo " --precond-fio-bin FIO binary used for SPDK fio plugin precondition. Default: $CONFIG_FIO_SOURCE_DIR/fio."
73 echo " --custom-cpu-cfg=PATH Custom CPU config for test."
74 echo " Default: spdk/test/vhost/common/autotest.config"
75 echo " --disk-map Disk map for given test. Specify which disks to use, their SPDK name,"
76 echo " how many times to split them and which VMs should be attached to created bdevs."
77 echo " Example:"
78 echo " NVME PCI BDF,Spdk Bdev Name,Split Count,VM List"
79 echo " 0000:1a:00.0,Nvme0,2,0 1"
80 echo " 0000:1b:00.0,Nvme1,2,2 3"
81 echo "-x set -x for script debug"
82 exit 0
83 }
84
85 function cleanup_lvol_cfg() {
86 notice "Removing lvol bdevs"
87 for lvol_bdev in "${lvol_bdevs[@]}"; do
88 $rpc_py bdev_lvol_delete $lvol_bdev
89 notice "lvol bdev $lvol_bdev removed"
90 done
91
92 notice "Removing lvol stores"
93 for lvol_store in "${lvol_stores[@]}"; do
94 $rpc_py bdev_lvol_delete_lvstore -u $lvol_store
95 notice "lvol store $lvol_store removed"
96 done
97 }
98
99 function cleanup_split_cfg() {
100 notice "Removing split vbdevs"
101 for disk in "${disk_cfg_spdk_names[@]}"; do
102 $rpc_py bdev_split_delete ${disk}n1
103 done
104 }
105
106 function cleanup_parted_config() {
107 notice "Removing parted disk configuration"
108 for disk in "${disk_cfg_kernel_names[@]}"; do
109 parted -s /dev/${disk}n1 rm 1
110 done
111 }
112
113 function cleanup_kernel_vhost() {
114 notice "Cleaning kernel vhost configration"
115 targetcli clearconfig confirm=True
116 cleanup_parted_config
117 }
118
119 function create_vm() {
120 vm_num=$1
121 setup_cmd="vm_setup --disk-type=$ctrl_type --force=$vm_num --memory=$vm_memory --os=$VM_IMAGE"
122 if [[ "$ctrl_type" == "kernel_vhost" ]]; then
123 x=$(printf %03d $vm_num)
124 setup_cmd+=" --disks=${wwpn_prefix}${x}"
125 else
126 setup_cmd+=" --disks=0"
127 fi
128
129 if $packed_ring; then
130 setup_cmd+=" --packed"
131 fi
132
133 $setup_cmd
134 used_vms+=" $vm_num"
135 echo "Added to used vms"
136 echo $used_vms
137 }
138
139 function create_spdk_controller() {
140 vm_num=$1
141 bdev=$2
142
143 if [[ "$ctrl_type" == "spdk_vhost_scsi" ]]; then
144 $rpc_py vhost_create_scsi_controller naa.0.$vm_num
145 notice "Created vhost scsi controller naa.0.$vm_num"
146 $rpc_py vhost_scsi_controller_add_target naa.0.$vm_num 0 $bdev
147 notice "Added LUN 0/$bdev to controller naa.0.$vm_num"
148 elif [[ "$ctrl_type" == "spdk_vhost_blk" ]]; then
149 if $packed_ring; then
150 p_opt="-p"
151 fi
152
153 $rpc_py vhost_create_blk_controller naa.0.$vm_num $bdev $p_opt
154 notice "Created vhost blk controller naa.0.$vm_num $bdev"
155 fi
156 }
157
158 while getopts 'xh-:' optchar; do
159 case "$optchar" in
160 -)
161 case "$OPTARG" in
162 help) usage $0 ;;
163 fio-bin=*) fio_bin="--fio-bin=${OPTARG#*=}" ;;
164 fio-jobs=*) fio_jobs="${OPTARG#*=}" ;;
165 fio-iterations=*) fio_iterations="${OPTARG#*=}" ;;
166 fio-gtod-reduce) fio_gtod="--gtod-reduce" ;;
167 vm-memory=*) vm_memory="${OPTARG#*=}" ;;
168 vm-image=*) VM_IMAGE="${OPTARG#*=}" ;;
169 vm-sar-enable) vm_sar_enable=true ;;
170 host-sar-enable) host_sar_enable=true ;;
171 sar-delay=*) sar_delay="${OPTARG#*=}" ;;
172 sar-interval=*) sar_interval="${OPTARG#*=}" ;;
173 sar-count=*) sar_count="${OPTARG#*=}" ;;
174 vm-throttle-iops=*) vm_throttle="${OPTARG#*=}" ;;
175 ctrl-type=*) ctrl_type="${OPTARG#*=}" ;;
176 packed-ring) packed_ring=true ;;
177 use-split) use_split=true ;;
178 run-precondition) run_precondition=true ;;
179 precond-fio-bin=*) precond_fio_bin="${OPTARG#*=}" ;;
180 limit-kernel-vhost=*) kernel_cpus="${OPTARG#*=}" ;;
181 custom-cpu-cfg=*) custom_cpu_cfg="${OPTARG#*=}" ;;
182 disk-map=*) disk_map="${OPTARG#*=}" ;;
183 *) usage $0 "Invalid argument '$OPTARG'" ;;
184 esac
185 ;;
186 h) usage $0 ;;
187 x)
188 set -x
189 x="-x"
190 ;;
191 *) usage $0 "Invalid argument '$OPTARG'" ;;
192 esac
193 done
194
195 rpc_py="$rootdir/scripts/rpc.py -s $(get_vhost_dir 0)/rpc.sock"
196
197 if [[ -n $custom_cpu_cfg ]]; then
198 source $custom_cpu_cfg
199 vhost_reactor_mask="vhost_${vhost_num}_reactor_mask"
200 vhost_reactor_mask="${!vhost_reactor_mask}"
201 vhost_master_core="vhost_${vhost_num}_master_core"
202 vhost_master_core="${!vhost_master_core}"
203 fi
204
205 if [[ -z $fio_jobs ]]; then
206 error "No FIO job specified!"
207 fi
208
209 trap 'error_exit "${FUNCNAME}" "${LINENO}"' INT ERR
210
211 if [[ -z $disk_map ]]; then
212 fail "No disk map provided for test. Exiting."
213 fi
214
215 # ===== Precondition NVMes if specified =====
216 if [[ $run_precondition == true ]]; then
217 # Using the same precondition routine possible for lvols thanks
218 # to --clear-method option. Lvols should not UNMAP on creation.
219 json_cfg=$rootdir/nvme.json
220 cat <<- JSON > "$json_cfg"
221 {"subsystems":[
222 $("$rootdir/scripts/gen_nvme.sh" --json)
223 ]}
224 JSON
225 mapfile -t nvmes < <(grep -oP "Nvme\d+" "$json_cfg")
226 fio_filename=$(printf ":%sn1" "${nvmes[@]}")
227 fio_filename=${fio_filename:1}
228 $precond_fio_bin --name="precondition" \
229 --ioengine="${rootdir}/build/fio/spdk_bdev" \
230 --rw="write" --spdk_json_conf="$json_cfg" --thread="1" \
231 --group_reporting --direct="1" --size="100%" --loops="2" --bs="256k" \
232 --iodepth=32 --filename="${fio_filename}" || true
233 fi
234
235 set +x
236 readarray disk_cfg < $disk_map
237 for line in "${disk_cfg[@]}"; do
238 echo $line
239 IFS=","
240 s=($line)
241 disk_cfg_bdfs+=(${s[0]})
242 disk_cfg_spdk_names+=(${s[1]})
243 disk_cfg_splits+=(${s[2]})
244 disk_cfg_vms+=("${s[3]}")
245
246 # Find kernel nvme names
247 if [[ "$ctrl_type" == "kernel_vhost" ]]; then
248 tmp=$(find /sys/devices/pci* -name ${s[0]} -print0 | xargs sh -c 'ls $0/nvme')
249 disk_cfg_kernel_names+=($tmp)
250 IFS=" "
251 fi
252 done
253 unset IFS
254 set -x
255
256 if [[ "$ctrl_type" == "kernel_vhost" ]]; then
257 notice "Configuring kernel vhost..."
258 trap 'vm_kill_all; sleep 1; cleanup_kernel_vhost; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR
259
260 # Split disks using parted for kernel vhost
261 newline=$'\n'
262 backstores=()
263 for ((i = 0; i < ${#disk_cfg_kernel_names[@]}; i++)); do
264 nvme=${disk_cfg_kernel_names[$i]}
265 splits=${disk_cfg_splits[$i]}
266 notice " Creating extended partition on disk /dev/${nvme}n1"
267 parted -s /dev/${nvme}n1 mklabel msdos
268 parted -s /dev/${nvme}n1 mkpart extended 2048s 100%
269
270 part_size=$((100 / ${disk_cfg_splits[$i]})) # Split 100% of disk into roughly even parts
271 echo " Creating ${splits} partitions of relative disk size ${part_size}"
272 for p in $(seq 0 $((splits - 1))); do
273 p_start=$((p * part_size))
274 p_end=$((p_start + part_size))
275 parted -s /dev/${nvme}n1 mkpart logical ${p_start}% ${p_end}%
276 sleep 3
277 done
278
279 # Prepare kernel vhost configuration
280 # Below grep: match only NVMe partitions which are not "Extended" type.
281 # For example: will match nvme0n1p15 but not nvme0n1p1
282 partitions=$(find /dev -name "${nvme}n1*" | sort --version-sort | grep -P 'p(?!1$)\d+')
283 # Create block backstores for vhost kernel process
284 for p in $partitions; do
285 backstore_name=$(basename $p)
286 backstores+=("$backstore_name")
287 targetcli backstores/block create $backstore_name $p
288 done
289 partitions=($partitions)
290
291 # Create kernel vhost controllers and add LUNs
292 # Setup VM configurations
293 vms_to_run=(${disk_cfg_vms[i]})
294 for ((j = 0; j < ${#vms_to_run[@]}; j++)); do
295 # WWPN prefix misses 3 characters. Need to complete it
296 # using block backstore number
297 x=$(printf %03d ${vms_to_run[$j]})
298 wwpn="${wwpn_prefix}${x}"
299 targetcli vhost/ create $wwpn
300 targetcli vhost/$wwpn/tpg1/luns create /backstores/block/$(basename ${partitions[$j]})
301 create_vm ${vms_to_run[j]}
302 sleep 1
303 done
304 done
305 targetcli ls
306 else
307 notice "Configuring SPDK vhost..."
308 vhost_run "${vhost_num}" "--no-gen-nvme" "-p ${vhost_master_core}" "-m ${vhost_reactor_mask}"
309 notice "..."
310
311 if [[ $use_split == true ]]; then
312 notice "Configuring split bdevs configuration..."
313 trap 'cleanup_split_cfg; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR
314 for ((i = 0; i < ${#disk_cfg_bdfs[@]}; i++)); do
315 nvme_bdev=$($rpc_py bdev_nvme_attach_controller -b ${disk_cfg_spdk_names[$i]} -t pcie -a ${disk_cfg_bdfs[$i]})
316 notice "Created NVMe Bdev: $nvme_bdev with BDF ${disk_cfg_bdfs[$i]}"
317
318 splits=$($rpc_py bdev_split_create $nvme_bdev ${disk_cfg_splits[$i]})
319 splits=($splits)
320 notice "Created splits: ${splits[*]} on Bdev ${nvme_bdev}"
321 for s in "${splits[@]}"; do
322 split_bdevs+=($s)
323 done
324
325 vms_to_run=(${disk_cfg_vms[i]})
326 for ((j = 0; j < ${#vms_to_run[@]}; j++)); do
327 notice "Setting up VM ${vms_to_run[j]}"
328 create_spdk_controller "${vms_to_run[j]}" ${splits[j]}
329 create_vm ${vms_to_run[j]}
330 done
331 echo " "
332 done
333 bdevs=("${split_bdevs[@]}")
334 else
335 notice "Configuring LVOLs..."
336 trap 'cleanup_lvol_cfg; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR
337 for ((i = 0; i < ${#disk_cfg_bdfs[@]}; i++)); do
338 nvme_bdev=$($rpc_py bdev_nvme_attach_controller -b ${disk_cfg_spdk_names[$i]} -t pcie -a ${disk_cfg_bdfs[$i]})
339 notice "Created NVMe Bdev: $nvme_bdev with BDF ${disk_cfg_bdfs[$i]}"
340
341 ls_guid=$($rpc_py bdev_lvol_create_lvstore $nvme_bdev lvs_$i --clear-method none)
342 lvol_stores+=("$ls_guid")
343 notice "Created Lvol Store: $ls_guid on Bdev $nvme_bdev"
344
345 vms_to_run=(${disk_cfg_vms[i]})
346 for ((j = 0; j < ${disk_cfg_splits[$i]}; j++)); do
347 free_mb=$(get_lvs_free_mb "$ls_guid")
348 size=$((free_mb / ((${disk_cfg_splits[$i]} - j))))
349 lb_name=$($rpc_py bdev_lvol_create -u $ls_guid lbd_$j $size --clear-method none)
350 lvol_bdevs+=("$lb_name")
351 notice "Created LVOL Bdev $lb_name on Lvol Store $ls_guid on Bdev $nvme_bdev"
352
353 notice "Setting up VM ${vms_to_run[j]}"
354 create_spdk_controller "${vms_to_run[j]}" ${lb_name}
355 create_vm ${vms_to_run[j]}
356 done
357 echo " "
358 done
359 $rpc_py bdev_lvol_get_lvstores
360 fi
361 $rpc_py bdev_get_bdevs
362 $rpc_py vhost_get_controllers
363 fi
364
365 # Start VMs
366 # Run VMs
367 vm_run $used_vms
368 vm_wait_for_boot 300 $used_vms
369
370 if [[ -n "$kernel_cpus" ]]; then
371 mkdir -p /sys/fs/cgroup/cpuset/spdk
372 kernel_mask=$vhost_0_reactor_mask
373 kernel_mask=${kernel_mask#"["}
374 kernel_mask=${kernel_mask%"]"}
375
376 echo "$kernel_mask" >> /sys/fs/cgroup/cpuset/spdk/cpuset.cpus
377 echo "0-1" >> /sys/fs/cgroup/cpuset/spdk/cpuset.mems
378
379 kernel_vhost_pids=$(pgrep "vhost" -U root)
380 for kpid in $kernel_vhost_pids; do
381 echo "Limiting kernel vhost pid ${kpid}"
382 echo "${kpid}" >> /sys/fs/cgroup/cpuset/spdk/tasks
383 done
384 fi
385
386 # Run FIO
387 fio_disks=""
388 for vm_num in $used_vms; do
389 host_name="VM-$vm_num"
390 vm_exec $vm_num "hostname $host_name"
391 vm_start_fio_server $fio_bin $vm_num
392
393 if [[ "$ctrl_type" == "spdk_vhost_scsi" ]]; then
394 vm_check_scsi_location $vm_num
395 elif [[ "$ctrl_type" == "spdk_vhost_blk" ]]; then
396 vm_check_blk_location $vm_num
397 elif [[ "$ctrl_type" == "kernel_vhost" ]]; then
398 vm_check_scsi_location $vm_num
399 fi
400
401 if [[ -n "$vm_throttle" ]]; then
402 block=$(printf '%s' $SCSI_DISK)
403 major_minor=$(vm_exec "$vm_num" "cat /sys/block/$block/dev")
404 vm_exec "$vm_num" "echo \"$major_minor $vm_throttle\" > /sys/fs/cgroup/blkio/blkio.throttle.read_iops_device"
405 vm_exec "$vm_num" "echo \"$major_minor $vm_throttle\" > /sys/fs/cgroup/blkio/blkio.throttle.write_iops_device"
406 fi
407
408 fio_disks+=" --vm=${vm_num}$(printf ':/dev/%s' $SCSI_DISK)"
409 done
410
411 # Run FIO traffic
412 for fio_job in ${fio_jobs//,/ }; do
413 fio_job_fname=$(basename $fio_job)
414 fio_log_fname="${fio_job_fname%%.*}.log"
415 for i in $(seq 1 $fio_iterations); do
416 echo "Running FIO iteration $i for $fio_job_fname"
417 run_fio $fio_bin --hide-results --job-file="$fio_job" --out="$VHOST_DIR/fio_results" --json $fio_disks $fio_gtod &
418 fio_pid=$!
419
420 if $host_sar_enable || $vm_sar_enable; then
421 pids=""
422 mkdir -p $VHOST_DIR/fio_results/sar_stats
423 sleep $sar_delay
424 fi
425
426 if $host_sar_enable; then
427 sar -P ALL $sar_interval $sar_count > "$VHOST_DIR/fio_results/sar_stats/sar_stats_host.txt" &
428 pids+=" $!"
429 fi
430
431 if $vm_sar_enable; then
432 for vm_num in $used_vms; do
433 vm_exec "$vm_num" "mkdir -p /root/sar; sar -P ALL $sar_interval $sar_count >> /root/sar/sar_stats_VM${vm_num}_run${i}.txt" &
434 pids+=" $!"
435 done
436 fi
437
438 for j in $pids; do
439 wait $j
440 done
441
442 if $vm_sar_enable; then
443 for vm_num in $used_vms; do
444 vm_scp "$vm_num" "root@127.0.0.1:/root/sar/sar_stats_VM${vm_num}_run${i}.txt" "$VHOST_DIR/fio_results/sar_stats"
445 done
446 fi
447
448 wait $fio_pid
449 mv $VHOST_DIR/fio_results/$fio_log_fname $VHOST_DIR/fio_results/$fio_log_fname.$i
450 sleep 1
451 done
452
453 parse_fio_results "$VHOST_DIR/fio_results" "$fio_log_fname"
454 done
455
456 notice "Shutting down virtual machines..."
457 vm_shutdown_all
458
459 if [[ "$ctrl_type" == "kernel_vhost" ]]; then
460 cleanup_kernel_vhost || true
461 else
462 notice "Shutting down SPDK vhost app..."
463 if [[ $use_split == true ]]; then
464 cleanup_split_cfg
465 else
466 cleanup_lvol_cfg
467 fi
468 vhost_kill "${vhost_num}"
469 fi
470
471 if [[ -n "$kernel_cpus" ]]; then
472 rmdir /sys/fs/cgroup/cpuset/spdk
473 fi