]> git.proxmox.com Git - ceph.git/blob - ceph/src/zstd/tests/test-zstd-speed.py
bump version to 15.2.11-pve1
[ceph.git] / ceph / src / zstd / tests / test-zstd-speed.py
1 #! /usr/bin/env python3
2
3 # ################################################################
4 # Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
5 # All rights reserved.
6 #
7 # This source code is licensed under both the BSD-style license (found in the
8 # LICENSE file in the root directory of this source tree) and the GPLv2 (found
9 # in the COPYING file in the root directory of this source tree).
10 # ##########################################################################
11
12 # Limitations:
13 # - doesn't support filenames with spaces
14 # - dir1/zstd and dir2/zstd will be merged in a single results file
15
16 import argparse
17 import os # getloadavg
18 import string
19 import subprocess
20 import time # strftime
21 import traceback
22 import hashlib
23 import platform # system
24
25 script_version = 'v1.1.2 (2017-03-26)'
26 default_repo_url = 'https://github.com/facebook/zstd.git'
27 working_dir_name = 'speedTest'
28 working_path = os.getcwd() + '/' + working_dir_name # /path/to/zstd/tests/speedTest
29 clone_path = working_path + '/' + 'zstd' # /path/to/zstd/tests/speedTest/zstd
30 email_header = 'ZSTD_speedTest'
31 pid = str(os.getpid())
32 verbose = False
33 clang_version = "unknown"
34 gcc_version = "unknown"
35 args = None
36
37
38 def hashfile(hasher, fname, blocksize=65536):
39 with open(fname, "rb") as f:
40 for chunk in iter(lambda: f.read(blocksize), b""):
41 hasher.update(chunk)
42 return hasher.hexdigest()
43
44
45 def log(text):
46 print(time.strftime("%Y/%m/%d %H:%M:%S") + ' - ' + text)
47
48
49 def execute(command, print_command=True, print_output=False, print_error=True, param_shell=True):
50 if print_command:
51 log("> " + command)
52 popen = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=param_shell, cwd=execute.cwd)
53 stdout_lines, stderr_lines = popen.communicate(timeout=args.timeout)
54 stderr_lines = stderr_lines.decode("utf-8")
55 stdout_lines = stdout_lines.decode("utf-8")
56 if print_output:
57 if stdout_lines:
58 print(stdout_lines)
59 if stderr_lines:
60 print(stderr_lines)
61 if popen.returncode is not None and popen.returncode != 0:
62 if stderr_lines and not print_output and print_error:
63 print(stderr_lines)
64 raise RuntimeError(stdout_lines + stderr_lines)
65 return (stdout_lines + stderr_lines).splitlines()
66 execute.cwd = None
67
68
69 def does_command_exist(command):
70 try:
71 execute(command, verbose, False, False)
72 except Exception:
73 return False
74 return True
75
76
77 def send_email(emails, topic, text, have_mutt, have_mail):
78 logFileName = working_path + '/' + 'tmpEmailContent'
79 with open(logFileName, "w") as myfile:
80 myfile.writelines(text)
81 myfile.close()
82 if have_mutt:
83 execute('mutt -s "' + topic + '" ' + emails + ' < ' + logFileName, verbose)
84 elif have_mail:
85 execute('mail -s "' + topic + '" ' + emails + ' < ' + logFileName, verbose)
86 else:
87 log("e-mail cannot be sent (mail or mutt not found)")
88
89
90 def send_email_with_attachments(branch, commit, last_commit, args, text, results_files,
91 logFileName, have_mutt, have_mail):
92 with open(logFileName, "w") as myfile:
93 myfile.writelines(text)
94 myfile.close()
95 email_topic = '[%s:%s] Warning for %s:%s last_commit=%s speed<%s ratio<%s' \
96 % (email_header, pid, branch, commit, last_commit,
97 args.lowerLimit, args.ratioLimit)
98 if have_mutt:
99 execute('mutt -s "' + email_topic + '" ' + args.emails + ' -a ' + results_files
100 + ' < ' + logFileName)
101 elif have_mail:
102 execute('mail -s "' + email_topic + '" ' + args.emails + ' < ' + logFileName)
103 else:
104 log("e-mail cannot be sent (mail or mutt not found)")
105
106
107 def git_get_branches():
108 execute('git fetch -p', verbose)
109 branches = execute('git branch -rl', verbose)
110 output = []
111 for line in branches:
112 if ("HEAD" not in line) and ("coverity_scan" not in line) and ("gh-pages" not in line):
113 output.append(line.strip())
114 return output
115
116
117 def git_get_changes(branch, commit, last_commit):
118 fmt = '--format="%h: (%an) %s, %ar"'
119 if last_commit is None:
120 commits = execute('git log -n 10 %s %s' % (fmt, commit))
121 else:
122 commits = execute('git --no-pager log %s %s..%s' % (fmt, last_commit, commit))
123 return str('Changes in %s since %s:\n' % (branch, last_commit)) + '\n'.join(commits)
124
125
126 def get_last_results(resultsFileName):
127 if not os.path.isfile(resultsFileName):
128 return None, None, None, None
129 commit = None
130 csize = []
131 cspeed = []
132 dspeed = []
133 with open(resultsFileName, 'r') as f:
134 for line in f:
135 words = line.split()
136 if len(words) <= 4: # branch + commit + compilerVer + md5
137 commit = words[1]
138 csize = []
139 cspeed = []
140 dspeed = []
141 if (len(words) == 8) or (len(words) == 9): # results: "filename" or "XX files"
142 csize.append(int(words[1]))
143 cspeed.append(float(words[3]))
144 dspeed.append(float(words[5]))
145 return commit, csize, cspeed, dspeed
146
147
148 def benchmark_and_compare(branch, commit, last_commit, args, executableName, md5sum, compilerVersion, resultsFileName,
149 testFilePath, fileName, last_csize, last_cspeed, last_dspeed):
150 sleepTime = 30
151 while os.getloadavg()[0] > args.maxLoadAvg:
152 log("WARNING: bench loadavg=%.2f is higher than %s, sleeping for %s seconds"
153 % (os.getloadavg()[0], args.maxLoadAvg, sleepTime))
154 time.sleep(sleepTime)
155 start_load = str(os.getloadavg())
156 osType = platform.system()
157 if osType == 'Linux':
158 cpuSelector = "taskset --cpu-list 0"
159 else:
160 cpuSelector = ""
161 if args.dictionary:
162 result = execute('%s programs/%s -rqi5b1e%s -D %s %s' % (cpuSelector, executableName, args.lastCLevel, args.dictionary, testFilePath), print_output=True)
163 else:
164 result = execute('%s programs/%s -rqi5b1e%s %s' % (cpuSelector, executableName, args.lastCLevel, testFilePath), print_output=True)
165 end_load = str(os.getloadavg())
166 linesExpected = args.lastCLevel + 1
167 if len(result) != linesExpected:
168 raise RuntimeError("ERROR: number of result lines=%d is different that expected %d\n%s" % (len(result), linesExpected, '\n'.join(result)))
169 with open(resultsFileName, "a") as myfile:
170 myfile.write('%s %s %s md5=%s\n' % (branch, commit, compilerVersion, md5sum))
171 myfile.write('\n'.join(result) + '\n')
172 myfile.close()
173 if (last_cspeed == None):
174 log("WARNING: No data for comparison for branch=%s file=%s " % (branch, fileName))
175 return ""
176 commit, csize, cspeed, dspeed = get_last_results(resultsFileName)
177 text = ""
178 for i in range(0, min(len(cspeed), len(last_cspeed))):
179 print("%s:%s -%d cSpeed=%6.2f cLast=%6.2f cDiff=%1.4f dSpeed=%6.2f dLast=%6.2f dDiff=%1.4f ratioDiff=%1.4f %s" % (branch, commit, i+1, cspeed[i], last_cspeed[i], cspeed[i]/last_cspeed[i], dspeed[i], last_dspeed[i], dspeed[i]/last_dspeed[i], float(last_csize[i])/csize[i], fileName))
180 if (cspeed[i]/last_cspeed[i] < args.lowerLimit):
181 text += "WARNING: %s -%d cSpeed=%.2f cLast=%.2f cDiff=%.4f %s\n" % (executableName, i+1, cspeed[i], last_cspeed[i], cspeed[i]/last_cspeed[i], fileName)
182 if (dspeed[i]/last_dspeed[i] < args.lowerLimit):
183 text += "WARNING: %s -%d dSpeed=%.2f dLast=%.2f dDiff=%.4f %s\n" % (executableName, i+1, dspeed[i], last_dspeed[i], dspeed[i]/last_dspeed[i], fileName)
184 if (float(last_csize[i])/csize[i] < args.ratioLimit):
185 text += "WARNING: %s -%d cSize=%d last_cSize=%d diff=%.4f %s\n" % (executableName, i+1, csize[i], last_csize[i], float(last_csize[i])/csize[i], fileName)
186 if text:
187 text = args.message + ("\nmaxLoadAvg=%s load average at start=%s end=%s\n%s last_commit=%s md5=%s\n" % (args.maxLoadAvg, start_load, end_load, compilerVersion, last_commit, md5sum)) + text
188 return text
189
190
191 def update_config_file(branch, commit):
192 last_commit = None
193 commitFileName = working_path + "/commit_" + branch.replace("/", "_") + ".txt"
194 if os.path.isfile(commitFileName):
195 with open(commitFileName, 'r') as infile:
196 last_commit = infile.read()
197 with open(commitFileName, 'w') as outfile:
198 outfile.write(commit)
199 return last_commit
200
201
202 def double_check(branch, commit, args, executableName, md5sum, compilerVersion, resultsFileName, filePath, fileName):
203 last_commit, csize, cspeed, dspeed = get_last_results(resultsFileName)
204 if not args.dry_run:
205 text = benchmark_and_compare(branch, commit, last_commit, args, executableName, md5sum, compilerVersion, resultsFileName, filePath, fileName, csize, cspeed, dspeed)
206 if text:
207 log("WARNING: redoing tests for branch %s: commit %s" % (branch, commit))
208 text = benchmark_and_compare(branch, commit, last_commit, args, executableName, md5sum, compilerVersion, resultsFileName, filePath, fileName, csize, cspeed, dspeed)
209 return text
210
211
212 def test_commit(branch, commit, last_commit, args, testFilePaths, have_mutt, have_mail):
213 local_branch = branch.split('/')[1]
214 version = local_branch.rpartition('-')[2] + '_' + commit
215 if not args.dry_run:
216 execute('make -C programs clean zstd CC=clang MOREFLAGS="-Werror -Wconversion -Wno-sign-conversion -DZSTD_GIT_COMMIT=%s" && ' % version +
217 'mv programs/zstd programs/zstd_clang && ' +
218 'make -C programs clean zstd zstd32 MOREFLAGS="-DZSTD_GIT_COMMIT=%s"' % version)
219 md5_zstd = hashfile(hashlib.md5(), clone_path + '/programs/zstd')
220 md5_zstd32 = hashfile(hashlib.md5(), clone_path + '/programs/zstd32')
221 md5_zstd_clang = hashfile(hashlib.md5(), clone_path + '/programs/zstd_clang')
222 print("md5(zstd)=%s\nmd5(zstd32)=%s\nmd5(zstd_clang)=%s" % (md5_zstd, md5_zstd32, md5_zstd_clang))
223 print("gcc_version=%s clang_version=%s" % (gcc_version, clang_version))
224
225 logFileName = working_path + "/log_" + branch.replace("/", "_") + ".txt"
226 text_to_send = []
227 results_files = ""
228 if args.dictionary:
229 dictName = args.dictionary.rpartition('/')[2]
230 else:
231 dictName = None
232
233 for filePath in testFilePaths:
234 fileName = filePath.rpartition('/')[2]
235 if dictName:
236 resultsFileName = working_path + "/" + dictName.replace(".", "_") + "_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt"
237 else:
238 resultsFileName = working_path + "/results_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt"
239 text = double_check(branch, commit, args, 'zstd', md5_zstd, 'gcc_version='+gcc_version, resultsFileName, filePath, fileName)
240 if text:
241 text_to_send.append(text)
242 results_files += resultsFileName + " "
243 resultsFileName = working_path + "/results32_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt"
244 text = double_check(branch, commit, args, 'zstd32', md5_zstd32, 'gcc_version='+gcc_version, resultsFileName, filePath, fileName)
245 if text:
246 text_to_send.append(text)
247 results_files += resultsFileName + " "
248 resultsFileName = working_path + "/resultsClang_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt"
249 text = double_check(branch, commit, args, 'zstd_clang', md5_zstd_clang, 'clang_version='+clang_version, resultsFileName, filePath, fileName)
250 if text:
251 text_to_send.append(text)
252 results_files += resultsFileName + " "
253 if text_to_send:
254 send_email_with_attachments(branch, commit, last_commit, args, text_to_send, results_files, logFileName, have_mutt, have_mail)
255
256
257 if __name__ == '__main__':
258 parser = argparse.ArgumentParser()
259 parser.add_argument('testFileNames', help='file or directory names list for speed benchmark')
260 parser.add_argument('emails', help='list of e-mail addresses to send warnings')
261 parser.add_argument('--dictionary', '-D', help='path to the dictionary')
262 parser.add_argument('--message', '-m', help='attach an additional message to e-mail', default="")
263 parser.add_argument('--repoURL', help='changes default repository URL', default=default_repo_url)
264 parser.add_argument('--lowerLimit', '-l', type=float, help='send email if speed is lower than given limit', default=0.98)
265 parser.add_argument('--ratioLimit', '-r', type=float, help='send email if ratio is lower than given limit', default=0.999)
266 parser.add_argument('--maxLoadAvg', type=float, help='maximum load average to start testing', default=0.75)
267 parser.add_argument('--lastCLevel', type=int, help='last compression level for testing', default=5)
268 parser.add_argument('--sleepTime', '-s', type=int, help='frequency of repository checking in seconds', default=300)
269 parser.add_argument('--timeout', '-t', type=int, help='timeout for executing shell commands', default=1800)
270 parser.add_argument('--dry-run', dest='dry_run', action='store_true', help='not build', default=False)
271 parser.add_argument('--verbose', '-v', action='store_true', help='more verbose logs', default=False)
272 args = parser.parse_args()
273 verbose = args.verbose
274
275 # check if test files are accessible
276 testFileNames = args.testFileNames.split()
277 testFilePaths = []
278 for fileName in testFileNames:
279 fileName = os.path.expanduser(fileName)
280 if os.path.isfile(fileName) or os.path.isdir(fileName):
281 testFilePaths.append(os.path.abspath(fileName))
282 else:
283 log("ERROR: File/directory not found: " + fileName)
284 exit(1)
285
286 # check if dictionary is accessible
287 if args.dictionary:
288 args.dictionary = os.path.abspath(os.path.expanduser(args.dictionary))
289 if not os.path.isfile(args.dictionary):
290 log("ERROR: Dictionary not found: " + args.dictionary)
291 exit(1)
292
293 # check availability of e-mail senders
294 have_mutt = does_command_exist("mutt -h")
295 have_mail = does_command_exist("mail -V")
296 if not have_mutt and not have_mail:
297 log("ERROR: e-mail senders 'mail' or 'mutt' not found")
298 exit(1)
299
300 clang_version = execute("clang -v 2>&1 | grep ' version ' | sed -e 's:.*version \\([0-9.]*\\).*:\\1:' -e 's:\\.\\([0-9][0-9]\\):\\1:g'", verbose)[0];
301 gcc_version = execute("gcc -dumpversion", verbose)[0];
302
303 if verbose:
304 print("PARAMETERS:\nrepoURL=%s" % args.repoURL)
305 print("working_path=%s" % working_path)
306 print("clone_path=%s" % clone_path)
307 print("testFilePath(%s)=%s" % (len(testFilePaths), testFilePaths))
308 print("message=%s" % args.message)
309 print("emails=%s" % args.emails)
310 print("dictionary=%s" % args.dictionary)
311 print("maxLoadAvg=%s" % args.maxLoadAvg)
312 print("lowerLimit=%s" % args.lowerLimit)
313 print("ratioLimit=%s" % args.ratioLimit)
314 print("lastCLevel=%s" % args.lastCLevel)
315 print("sleepTime=%s" % args.sleepTime)
316 print("timeout=%s" % args.timeout)
317 print("dry_run=%s" % args.dry_run)
318 print("verbose=%s" % args.verbose)
319 print("have_mutt=%s have_mail=%s" % (have_mutt, have_mail))
320
321 # clone ZSTD repo if needed
322 if not os.path.isdir(working_path):
323 os.mkdir(working_path)
324 if not os.path.isdir(clone_path):
325 execute.cwd = working_path
326 execute('git clone ' + args.repoURL)
327 if not os.path.isdir(clone_path):
328 log("ERROR: ZSTD clone not found: " + clone_path)
329 exit(1)
330 execute.cwd = clone_path
331
332 # check if speedTest.pid already exists
333 pidfile = "./speedTest.pid"
334 if os.path.isfile(pidfile):
335 log("ERROR: %s already exists, exiting" % pidfile)
336 exit(1)
337
338 send_email(args.emails, '[%s:%s] test-zstd-speed.py %s has been started' % (email_header, pid, script_version), args.message, have_mutt, have_mail)
339 with open(pidfile, 'w') as the_file:
340 the_file.write(pid)
341
342 branch = ""
343 commit = ""
344 first_time = True
345 while True:
346 try:
347 if first_time:
348 first_time = False
349 else:
350 time.sleep(args.sleepTime)
351 loadavg = os.getloadavg()[0]
352 if (loadavg <= args.maxLoadAvg):
353 branches = git_get_branches()
354 for branch in branches:
355 commit = execute('git show -s --format=%h ' + branch, verbose)[0]
356 last_commit = update_config_file(branch, commit)
357 if commit == last_commit:
358 log("skipping branch %s: head %s already processed" % (branch, commit))
359 else:
360 log("build branch %s: head %s is different from prev %s" % (branch, commit, last_commit))
361 execute('git checkout -- . && git checkout ' + branch)
362 print(git_get_changes(branch, commit, last_commit))
363 test_commit(branch, commit, last_commit, args, testFilePaths, have_mutt, have_mail)
364 else:
365 log("WARNING: main loadavg=%.2f is higher than %s" % (loadavg, args.maxLoadAvg))
366 if verbose:
367 log("sleep for %s seconds" % args.sleepTime)
368 except Exception as e:
369 stack = traceback.format_exc()
370 email_topic = '[%s:%s] ERROR in %s:%s' % (email_header, pid, branch, commit)
371 send_email(args.emails, email_topic, stack, have_mutt, have_mail)
372 print(stack)
373 except KeyboardInterrupt:
374 os.unlink(pidfile)
375 send_email(args.emails, '[%s:%s] test-zstd-speed.py %s has been stopped' % (email_header, pid, script_version), args.message, have_mutt, have_mail)
376 exit(0)