]> git.proxmox.com Git - ceph.git/blame - ceph/src/spdk/test/vhost/perf_bench/vhost_perf.sh
import 15.2.0 Octopus source
[ceph.git] / ceph / src / spdk / test / vhost / perf_bench / vhost_perf.sh
CommitLineData
11fdf7f2
TL
1#!/usr/bin/env bash
2set -e
3
9f95a23c
TL
4testdir=$(readlink -f $(dirname $0))
5rootdir=$(readlink -f $testdir/../../..)
6source $rootdir/test/common/autotest_common.sh
7source $rootdir/test/vhost/common.sh
8
11fdf7f2
TL
9vm_count=1
10vm_memory=2048
11vm_image="/home/sys_sgsw/vhost_vm_image.qcow2"
9f95a23c
TL
12vm_sar_enable=false
13vm_sar_delay="0"
14vm_sar_interval="1"
15vm_sar_count="10"
16vm_throttle=""
11fdf7f2
TL
17max_disks=""
18ctrl_type="spdk_vhost_scsi"
19use_split=false
9f95a23c
TL
20kernel_cpus=""
21run_precondition=false
11fdf7f2
TL
22lvol_stores=()
23lvol_bdevs=()
24used_vms=""
9f95a23c 25wwpn_prefix="naa.5001405bc6498"
11fdf7f2
TL
26
27fio_bin="--fio-bin=/home/sys_sgsw/fio_ubuntu"
9f95a23c
TL
28fio_iterations=1
29precond_fio_bin="/usr/src/fio/fio"
11fdf7f2
TL
30
31function usage()
32{
33 [[ ! -z $2 ]] && ( echo "$2"; echo ""; )
34 echo "Shortcut script for doing automated test"
35 echo "Usage: $(basename $1) [OPTIONS]"
36 echo
37 echo "-h, --help Print help and exit"
38 echo " --fio-bin=PATH Path to FIO binary on host.;"
39 echo " Binary will be copied to VM, static compilation"
40 echo " of binary is recommended."
41 echo " --fio-job=PATH Fio config to use for test."
9f95a23c 42 echo " --fio-iterations=INT Number of times to run specified workload."
11fdf7f2
TL
43 echo " --vm-count=INT Total number of virtual machines to launch in this test;"
44 echo " Each VM will get one bdev (lvol or split vbdev)"
45 echo " to run FIO test."
46 echo " Default: 1"
47 echo " --vm-memory=INT Amount of RAM memory (in MB) to pass to a single VM."
48 echo " Default: 2048 MB"
49 echo " --vm-image=PATH OS image to use for running the VMs."
50 echo " Default: /home/sys_sgsw/vhost_vm_image.qcow2"
9f95a23c
TL
51 echo " --vm-sar-enable Measure CPU utilization on VM using sar."
52 echo " --vm-sar-delay=INT Wait for X seconds before sarting SAR measurement on VMs. Default: 0."
53 echo " --vm-sar-interval=INT Interval (seconds) argument for SAR. Default: 1s."
54 echo " --vm-sar-count=INT Count argument for SAR. Default: 10."
55 echo " --vm-throttle-iops=INT I/Os throttle rate in IOPS for each device on the VMs."
11fdf7f2
TL
56 echo " --max-disks=INT Maximum number of NVMe drives to use in test."
57 echo " Default: will use all available NVMes."
58 echo " --ctrl-type=TYPE Controller type to use for test:"
59 echo " spdk_vhost_scsi - use spdk vhost scsi"
60 echo " spdk_vhost_blk - use spdk vhost block"
9f95a23c 61 echo " kernel_vhost - use kernel vhost scsi"
11fdf7f2
TL
62 echo " Default: spdk_vhost_scsi"
63 echo " --use-split Use split vbdevs instead of Logical Volumes"
9f95a23c
TL
64 echo " --limit-kernel-vhost=INT Limit kernel vhost to run only on a number of CPU cores."
65 echo " --run-precondition Precondition lvols after creating. Default: true."
66 echo " --precond-fio-bin FIO binary used for SPDK fio plugin precondition. Default: /usr/src/fio/fio."
11fdf7f2
TL
67 echo " --custom-cpu-cfg=PATH Custom CPU config for test."
68 echo " Default: spdk/test/vhost/common/autotest.config"
69 echo "-x set -x for script debug"
70 exit 0
71}
72
73function cleanup_lvol_cfg()
74{
75 notice "Removing lvol bdevs"
76 for lvol_bdev in "${lvol_bdevs[@]}"; do
77 $rpc_py destroy_lvol_bdev $lvol_bdev
78 notice "lvol bdev $lvol_bdev removed"
79 done
80
81 notice "Removing lvol stores"
82 for lvol_store in "${lvol_stores[@]}"; do
83 $rpc_py destroy_lvol_store -u $lvol_store
84 notice "lvol store $lvol_store removed"
85 done
86}
87
88function cleanup_split_cfg()
89{
90 notice "Removing split vbdevs"
91 for (( i=0; i<$max_disks; i++ ));do
92 $rpc_py destruct_split_vbdev Nvme${i}n1
93 done
94}
95
9f95a23c
TL
96function cleanup_parted_config()
97{
98 local disks=$(ls /dev/nvme*n1 | sort --version-sort)
99 for disk in $disks; do
100 parted -s $disk rm 1
101 done
102}
103
104function cleanup_kernel_vhost()
105{
106 notice "Cleaning kernel vhost configration"
107 targetcli clearconfig confirm=True
108 cleanup_parted_config
109}
110
11fdf7f2
TL
111while getopts 'xh-:' optchar; do
112 case "$optchar" in
113 -)
114 case "$OPTARG" in
115 help) usage $0 ;;
116 fio-bin=*) fio_bin="--fio-bin=${OPTARG#*=}" ;;
117 fio-job=*) fio_job="${OPTARG#*=}" ;;
9f95a23c 118 fio-iterations=*) fio_iterations="${OPTARG#*=}" ;;
11fdf7f2
TL
119 vm-count=*) vm_count="${OPTARG#*=}" ;;
120 vm-memory=*) vm_memory="${OPTARG#*=}" ;;
121 vm-image=*) vm_image="${OPTARG#*=}" ;;
9f95a23c
TL
122 vm-sar-enable) vm_sar_enable=true ;;
123 vm-sar-delay=*) vm_sar_delay="${OPTARG#*=}" ;;
124 vm-sar-interval=*) vm_sar_interval="${OPTARG#*=}" ;;
125 vm-sar-count=*) vm_sar_count="${OPTARG#*=}" ;;
126 vm-throttle-iops=*) vm_throttle="${OPTARG#*=}" ;;
11fdf7f2
TL
127 max-disks=*) max_disks="${OPTARG#*=}" ;;
128 ctrl-type=*) ctrl_type="${OPTARG#*=}" ;;
129 use-split) use_split=true ;;
9f95a23c
TL
130 run-precondition) run_precondition=true ;;
131 precond-fio-bin=*) precond_fio_bin="${OPTARG#*=}" ;;
132 limit-kernel-vhost=*) kernel_cpus="${OPTARG#*=}" ;;
11fdf7f2 133 custom-cpu-cfg=*) custom_cpu_cfg="${OPTARG#*=}" ;;
11fdf7f2
TL
134 *) usage $0 "Invalid argument '$OPTARG'" ;;
135 esac
136 ;;
137 h) usage $0 ;;
138 x) set -x
139 x="-x" ;;
140 *) usage $0 "Invalid argument '$OPTARG'"
141 esac
142done
143
9f95a23c 144rpc_py="$rootdir/scripts/rpc.py -s $(get_vhost_dir)/rpc.sock"
11fdf7f2
TL
145
146if [[ -n $custom_cpu_cfg ]]; then
147 source $custom_cpu_cfg
148fi
149
150if [[ -z $fio_job ]]; then
151 warning "No FIO job specified! Will use default from common directory."
9f95a23c 152 fio_job="$rootdir/test/vhost/common/fio_jobs/default_integrity.job"
11fdf7f2
TL
153fi
154
155trap 'error_exit "${FUNCNAME}" "${LINENO}"' INT ERR
156notice "Get NVMe disks:"
157nvmes=($(iter_pci_class_code 01 08 02))
158
159if [[ -z $max_disks ]]; then
160 max_disks=${#nvmes[@]}
161fi
162
163if [[ ${#nvmes[@]} -lt max_disks ]]; then
164 fail "Number of NVMe drives (${#nvmes[@]}) is lower than number of requested disks for test ($max_disks)"
165fi
166
11fdf7f2
TL
167
168# Calculate number of needed splits per NVMe
9f95a23c 169# so that each VM gets it's own bdev during test.
11fdf7f2
TL
170splits=()
171
9f95a23c
TL
172if [[ $vm_count -le $max_disks ]]; then
173 for i in $(seq 0 $((max_disks - 1))); do
174 splits+=("1")
175 done
176else
177 #Calculate least minimum number of splits on each disks
178 for i in `seq 0 $((max_disks - 1))`; do
179 splits+=( $((vm_count / max_disks)) )
180 done
181 # Split up the remainder
182 for i in `seq 0 $((vm_count % max_disks - 1))`; do
183 (( splits[i]++ ))
184 done
185fi
11fdf7f2
TL
186notice "Preparing NVMe setup..."
187notice "Using $max_disks physical NVMe drives"
188notice "Nvme split list: ${splits[@]}"
9f95a23c
TL
189
190# ===== Precondition NVMes if specified =====
191if [[ $run_precondition == true ]]; then
192 # Using the same precondition routine possible for lvols thanks
193 # to --clear-method option. Lvols should not UNMAP on creation.
194 $rootdir/scripts/gen_nvme.sh > $rootdir/nvme.cfg
195 nvmes=$(cat $rootdir/nvme.cfg | grep -oP "Nvme\d+")
196 nvmes=($nvmes)
197 fio_filename=$(printf ":%sn1" "${nvmes[@]}")
198 fio_filename=${fio_filename:1}
199 $precond_fio_bin --name="precondition" \
200 --ioengine="${rootdir}/examples/bdev/fio_plugin/fio_plugin" \
201 --rw="write" --spdk_conf="${rootdir}/nvme.cfg" --thread="1" \
202 --group_reporting --direct="1" --size="100%" --loops="2" --bs="256k" \
203 --iodepth=32 --filename="${fio_filename}" || true
204fi
205
206# ===== Prepare NVMe splits & run vhost process =====
207if [[ "$ctrl_type" == "kernel_vhost" ]]; then
208 trap 'vm_kill_all; sleep 1; cleanup_kernel_vhost; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR
209 # Split disks using parted for kernel vhost
210 newline=$'\n'
11fdf7f2 211 for (( i=0; i<$max_disks; i++ ));do
9f95a23c
TL
212 parted -s /dev/nvme${i}n1 mklabel msdos
213 parted -s /dev/nvme${i}n1 mkpart extended 2048s 100%
214 part_size=$((100/${splits[$i]})) # Split 100% of disk into roughly even parts
215 echo " Creating ${splits[$i]} partitions of relative disk size ${part_size}"
216
217 for p in $(seq 0 $((${splits[$i]} - 1))); do
218 p_start=$(($p*$part_size))
219 p_end=$(($p_start+$part_size))
220 parted -s /dev/nvme${i}n1 mkpart logical ${p_start}% ${p_end}%
11fdf7f2
TL
221 done
222 done
9f95a23c
TL
223 sleep 1
224
225 # Prepare kernel vhost configuration
226 # Below grep: match only NVMe partitions which are not "Extended" type.
227 # For example: will match nvme0n1p15 but not nvme0n1p1
228 partitions=$(ls -1 /dev/nvme* | sort --version-sort | grep -P 'p(?!1$)\d+')
229 backstores=()
230
231 # Create block backstores for vhost kernel process
232 for p in $partitions; do
233 backstore_name=$(basename $p)
234 backstores+=("$backstore_name")
235 targetcli backstores/block create $backstore_name $p
236 done
237
238 # Create kernel vhost controllers and add LUNs
239 for ((i=0; i<${#backstores[*]}; i++)); do
240 # WWPN prefix misses 3 characters. Need to complete it
241 # using block backstore number
242 x=$(printf %03d $i)
243 wwpn="${wwpn_prefix}${x}"
244 targetcli vhost/ create $wwpn
245 targetcli vhost/$wwpn/tpg1/luns create /backstores/block/${backstores[$i]}
246 done
11fdf7f2 247else
9f95a23c
TL
248 # Run vhost process and prepare split vbdevs or lvol bdevs
249 notice "running SPDK vhost"
250 vhost_run
251 notice "..."
252
253 if [[ $use_split == true ]]; then
254 notice "Using split vbdevs"
255 trap 'cleanup_split_cfg; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR
256 split_bdevs=()
257 for (( i=0; i<$max_disks; i++ ));do
258 out=$($rpc_py construct_split_vbdev Nvme${i}n1 ${splits[$i]})
259 for s in $(seq 0 $((${splits[$i]}-1))); do
260 split_bdevs+=("Nvme${i}n1p${s}")
261 done
11fdf7f2 262 done
9f95a23c
TL
263 bdevs=("${split_bdevs[@]}")
264 else
265 notice "Using logical volumes"
266 trap 'cleanup_lvol_cfg; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR
267 for (( i=0; i<$max_disks; i++ ));do
268 ls_guid=$($rpc_py construct_lvol_store Nvme${i}n1 lvs_$i --clear-method none)
269 lvol_stores+=("$ls_guid")
270 for (( j=0; j<${splits[$i]}; j++)); do
271 free_mb=$(get_lvs_free_mb "$ls_guid")
272 size=$((free_mb / (${splits[$i]}-j) ))
273 lb_name=$($rpc_py construct_lvol_bdev -u $ls_guid lbd_$j $size --clear-method none)
274 lvol_bdevs+=("$lb_name")
275 done
276 done
277 bdevs=("${lvol_bdevs[@]}")
278 fi
11fdf7f2
TL
279fi
280
281# Prepare VMs and controllers
282for (( i=0; i<$vm_count; i++)); do
283 vm="vm_$i"
284
9f95a23c 285 setup_cmd="vm_setup --disk-type=$ctrl_type --force=$i --memory=$vm_memory"
11fdf7f2
TL
286 setup_cmd+=" --os=$vm_image"
287
288 if [[ "$ctrl_type" == "spdk_vhost_scsi" ]]; then
289 $rpc_py construct_vhost_scsi_controller naa.0.$i
290 $rpc_py add_vhost_scsi_lun naa.0.$i 0 ${bdevs[$i]}
291 setup_cmd+=" --disks=0"
292 elif [[ "$ctrl_type" == "spdk_vhost_blk" ]]; then
293 $rpc_py construct_vhost_blk_controller naa.$i.$i ${bdevs[$i]}
294 setup_cmd+=" --disks=$i"
9f95a23c
TL
295 elif [[ "$ctrl_type" == "kernel_vhost" ]]; then
296 x=$(printf %03d $i)
297 setup_cmd+=" --disks=${wwpn_prefix}${x}"
11fdf7f2
TL
298 fi
299 $setup_cmd
300 used_vms+=" $i"
301done
302
303# Start VMs
304# Run VMs
305vm_run $used_vms
306vm_wait_for_boot 300 $used_vms
307
9f95a23c
TL
308if [[ -n "$kernel_cpus" ]]; then
309 mkdir -p /sys/fs/cgroup/cpuset/spdk
310 kernel_mask=$vhost_0_reactor_mask
311 kernel_mask=${kernel_mask#"["}
312 kernel_mask=${kernel_mask%"]"}
313
314 echo "$kernel_mask" >> /sys/fs/cgroup/cpuset/spdk/cpuset.cpus
315 echo "0-1" >> /sys/fs/cgroup/cpuset/spdk/cpuset.mems
316
317 kernel_vhost_pids=$(ps aux | grep -Po "^root\s+\K(\d+)(?=.*\[vhost-\d+\])")
318 for kpid in $kernel_vhost_pids; do
319 echo "Limiting kernel vhost pid ${kpid}"
320 echo "${kpid}" >> /sys/fs/cgroup/cpuset/spdk/tasks
321 done
322fi
323
11fdf7f2
TL
324# Run FIO
325fio_disks=""
326for vm_num in $used_vms; do
327 vm_dir=$VM_BASE_DIR/$vm_num
328 host_name="VM-$vm_num"
329 vm_ssh $vm_num "hostname $host_name"
330 vm_start_fio_server $fio_bin $vm_num
331
332 if [[ "$ctrl_type" == "spdk_vhost_scsi" ]]; then
333 vm_check_scsi_location $vm_num
334 elif [[ "$ctrl_type" == "spdk_vhost_blk" ]]; then
335 vm_check_blk_location $vm_num
9f95a23c
TL
336 elif [[ "$ctrl_type" == "kernel_vhost" ]]; then
337 vm_check_scsi_location $vm_num
338 fi
339
340 if [[ -n "$vm_throttle" ]]; then
341 block=$(printf '%s' $SCSI_DISK)
342 major_minor=$(vm_ssh "$vm_num" "cat /sys/block/$block/dev")
343 vm_ssh "$vm_num" "echo \"$major_minor $vm_throttle\" > /sys/fs/cgroup/blkio/blkio.throttle.read_iops_device"
344 vm_ssh "$vm_num" "echo \"$major_minor $vm_throttle\" > /sys/fs/cgroup/blkio/blkio.throttle.write_iops_device"
11fdf7f2
TL
345 fi
346
347 fio_disks+=" --vm=${vm_num}$(printf ':/dev/%s' $SCSI_DISK)"
348done
349
350# Run FIO traffic
9f95a23c
TL
351fio_job_fname=$(basename $fio_job)
352fio_log_fname="${fio_job_fname%%.*}.log"
353for i in $(seq 1 $fio_iterations); do
354 echo "Running FIO iteration $i"
355 run_fio $fio_bin --job-file="$fio_job" --out="$TEST_DIR/fio_results" --json $fio_disks &
356 fio_pid=$!
357
358 if $vm_sar_enable; then
359 sleep $vm_sar_delay
360 mkdir -p $TEST_DIR/fio_results/sar_stats
361 pids=""
362 for vm_num in $used_vms; do
363 vm_ssh "$vm_num" "mkdir -p /root/sar; sar -P ALL $vm_sar_interval $vm_sar_count >> /root/sar/sar_stats_VM${vm_num}_run${i}.txt" &
364 pids+=" $!"
365 done
366 for j in $pids; do
367 wait $j
368 done
369 for vm_num in $used_vms; do
370 vm_scp "$vm_num" "root@127.0.0.1:/root/sar/sar_stats_VM${vm_num}_run${i}.txt" "$TEST_DIR/fio_results/sar_stats"
371 done
372 fi
373
374 wait $fio_pid
375 mv $TEST_DIR/fio_results/$fio_log_fname $TEST_DIR/fio_results/$fio_log_fname.$i
376 sleep 1
377done
11fdf7f2
TL
378
379notice "Shutting down virtual machines..."
380vm_shutdown_all
381
9f95a23c
TL
382if [[ "$ctrl_type" == "kernel_vhost" ]]; then
383 cleanup_kernel_vhost || true
11fdf7f2 384else
9f95a23c
TL
385 notice "Shutting down SPDK vhost app..."
386 if [[ $use_split == true ]]; then
387 cleanup_split_cfg
388 else
389 cleanup_lvol_cfg
390 fi
391 vhost_kill
392fi
393
394if [[ -n "$kernel_cpus" ]]; then
395 rmdir /sys/fs/cgroup/cpuset/spdk
11fdf7f2 396fi