]>
git.proxmox.com Git - ceph.git/blob - ceph/src/zstd/tests/test-zstd-speed.py
1 #! /usr/bin/env python3
3 # ################################################################
4 # Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
7 # This source code is licensed under both the BSD-style license (found in the
8 # LICENSE file in the root directory of this source tree) and the GPLv2 (found
9 # in the COPYING file in the root directory of this source tree).
10 # ##########################################################################
13 # - doesn't support filenames with spaces
14 # - dir1/zstd and dir2/zstd will be merged in a single results file
17 import os
# getloadavg
20 import time
# strftime
23 import platform
# system
25 script_version
= 'v1.1.2 (2017-03-26)'
26 default_repo_url
= 'https://github.com/facebook/zstd.git'
27 working_dir_name
= 'speedTest'
28 working_path
= os
. getcwd () + '/' + working_dir_name
# /path/to/zstd/tests/speedTest
29 clone_path
= working_path
+ '/' + 'zstd' # /path/to/zstd/tests/speedTest/zstd
30 email_header
= 'ZSTD_speedTest'
31 pid
= str ( os
. getpid ())
33 clang_version
= "unknown"
34 gcc_version
= "unknown"
38 def hashfile ( hasher
, fname
, blocksize
= 65536 ):
39 with
open ( fname
, "rb" ) as f
:
40 for chunk
in iter ( lambda : f
. read ( blocksize
), b
"" ):
42 return hasher
. hexdigest ()
46 print ( time
. strftime ( "%Y/%m/ %d %H:%M:%S" ) + ' - ' + text
)
49 def execute ( command
, print_command
= True , print_output
= False , print_error
= True , param_shell
= True ):
52 popen
= subprocess
. Popen ( command
, stdout
= subprocess
. PIPE
, stderr
= subprocess
. PIPE
, shell
= param_shell
, cwd
= execute
. cwd
)
53 stdout_lines
, stderr_lines
= popen
. communicate ( timeout
= args
. timeout
)
54 stderr_lines
= stderr_lines
. decode ( "utf-8" )
55 stdout_lines
= stdout_lines
. decode ( "utf-8" )
61 if popen
. returncode
is not None and popen
. returncode
!= 0 :
62 if stderr_lines
and not print_output
and print_error
:
64 raise RuntimeError ( stdout_lines
+ stderr_lines
)
65 return ( stdout_lines
+ stderr_lines
). splitlines ()
69 def does_command_exist ( command
):
71 execute ( command
, verbose
, False , False )
77 def send_email ( emails
, topic
, text
, have_mutt
, have_mail
):
78 logFileName
= working_path
+ '/' + 'tmpEmailContent'
79 with
open ( logFileName
, "w" ) as myfile
:
80 myfile
. writelines ( text
)
83 execute ( 'mutt -s "' + topic
+ '" ' + emails
+ ' < ' + logFileName
, verbose
)
85 execute ( 'mail -s "' + topic
+ '" ' + emails
+ ' < ' + logFileName
, verbose
)
87 log ( "e-mail cannot be sent (mail or mutt not found)" )
90 def send_email_with_attachments ( branch
, commit
, last_commit
, args
, text
, results_files
,
91 logFileName
, have_mutt
, have_mail
):
92 with
open ( logFileName
, "w" ) as myfile
:
93 myfile
. writelines ( text
)
95 email_topic
= '[ %s : %s ] Warning for %s : %s last_commit= %s speed< %s ratio< %s ' \
96 % ( email_header
, pid
, branch
, commit
, last_commit
,
97 args
. lowerLimit
, args
. ratioLimit
)
99 execute ( 'mutt -s "' + email_topic
+ '" ' + args
. emails
+ ' -a ' + results_files
100 + ' < ' + logFileName
)
102 execute ( 'mail -s "' + email_topic
+ '" ' + args
. emails
+ ' < ' + logFileName
)
104 log ( "e-mail cannot be sent (mail or mutt not found)" )
107 def git_get_branches ():
108 execute ( 'git fetch -p' , verbose
)
109 branches
= execute ( 'git branch -rl' , verbose
)
111 for line
in branches
:
112 if ( "HEAD" not in line
) and ( "coverity_scan" not in line
) and ( "gh-pages" not in line
):
113 output
. append ( line
. strip ())
117 def git_get_changes ( branch
, commit
, last_commit
):
118 fmt
= '--format="%h: (%an) %s , %ar"'
119 if last_commit
is None :
120 commits
= execute ( 'git log -n 10 %s %s ' % ( fmt
, commit
))
122 commits
= execute ( 'git --no-pager log %s %s .. %s ' % ( fmt
, last_commit
, commit
))
123 return str ( 'Changes in %s since %s : \n ' % ( branch
, last_commit
)) + ' \n ' . join ( commits
)
126 def get_last_results ( resultsFileName
):
127 if not os
. path
. isfile ( resultsFileName
):
128 return None , None , None , None
133 with
open ( resultsFileName
, 'r' ) as f
:
136 if len ( words
) <= 4 : # branch + commit + compilerVer + md5
141 if ( len ( words
) == 8 ) or ( len ( words
) == 9 ): # results: "filename" or "XX files"
142 csize
. append ( int ( words
[ 1 ]))
143 cspeed
. append ( float ( words
[ 3 ]))
144 dspeed
. append ( float ( words
[ 5 ]))
145 return commit
, csize
, cspeed
, dspeed
148 def benchmark_and_compare ( branch
, commit
, last_commit
, args
, executableName
, md5sum
, compilerVersion
, resultsFileName
,
149 testFilePath
, fileName
, last_csize
, last_cspeed
, last_dspeed
):
151 while os
. getloadavg ()[ 0 ] > args
. maxLoadAvg
:
152 log ( "WARNING: bench loadavg=%.2f is higher than %s , sleeping for %s seconds"
153 % ( os
. getloadavg ()[ 0 ], args
. maxLoadAvg
, sleepTime
))
154 time
. sleep ( sleepTime
)
155 start_load
= str ( os
. getloadavg ())
156 osType
= platform
. system ()
157 if osType
== 'Linux' :
158 cpuSelector
= "taskset --cpu-list 0"
162 result
= execute ( ' %s programs/ %s -rqi5b1e %s -D %s %s ' % ( cpuSelector
, executableName
, args
. lastCLevel
, args
. dictionary
, testFilePath
), print_output
= True )
164 result
= execute ( ' %s programs/ %s -rqi5b1e %s %s ' % ( cpuSelector
, executableName
, args
. lastCLevel
, testFilePath
), print_output
= True )
165 end_load
= str ( os
. getloadavg ())
166 linesExpected
= args
. lastCLevel
+ 1
167 if len ( result
) != linesExpected
:
168 raise RuntimeError ( "ERROR: number of result lines= %d is different that expected %d \n %s " % ( len ( result
), linesExpected
, ' \n ' . join ( result
)))
169 with
open ( resultsFileName
, "a" ) as myfile
:
170 myfile
. write ( ' %s %s %s md5= %s \n ' % ( branch
, commit
, compilerVersion
, md5sum
))
171 myfile
. write ( ' \n ' . join ( result
) + ' \n ' )
173 if ( last_cspeed
== None ):
174 log ( "WARNING: No data for comparison for branch= %s file= %s " % ( branch
, fileName
))
176 commit
, csize
, cspeed
, dspeed
= get_last_results ( resultsFileName
)
178 for i
in range ( 0 , min ( len ( cspeed
), len ( last_cspeed
))):
179 print ( " %s : %s - %d cSpeed=%6.2f cLast=%6.2f cDiff=%1.4f dSpeed=%6.2f dLast=%6.2f dDiff=%1.4f ratioDiff=%1.4f %s " % ( branch
, commit
, i
+ 1 , cspeed
[ i
], last_cspeed
[ i
], cspeed
[ i
]/ last_cspeed
[ i
], dspeed
[ i
], last_dspeed
[ i
], dspeed
[ i
]/ last_dspeed
[ i
], float ( last_csize
[ i
])/ csize
[ i
], fileName
))
180 if ( cspeed
[ i
]/ last_cspeed
[ i
] < args
. lowerLimit
):
181 text
+= "WARNING: %s - %d cSpeed=%.2f cLast=%.2f cDiff=%.4f %s \n " % ( executableName
, i
+ 1 , cspeed
[ i
], last_cspeed
[ i
], cspeed
[ i
]/ last_cspeed
[ i
], fileName
)
182 if ( dspeed
[ i
]/ last_dspeed
[ i
] < args
. lowerLimit
):
183 text
+= "WARNING: %s - %d dSpeed=%.2f dLast=%.2f dDiff=%.4f %s \n " % ( executableName
, i
+ 1 , dspeed
[ i
], last_dspeed
[ i
], dspeed
[ i
]/ last_dspeed
[ i
], fileName
)
184 if ( float ( last_csize
[ i
])/ csize
[ i
] < args
. ratioLimit
):
185 text
+= "WARNING: %s - %d cSize= %d last_cSize= %d diff=%.4f %s \n " % ( executableName
, i
+ 1 , csize
[ i
], last_csize
[ i
], float ( last_csize
[ i
])/ csize
[ i
], fileName
)
187 text
= args
. message
+ ( " \n maxLoadAvg= %s load average at start= %s end= %s \n %s last_commit= %s md5= %s \n " % ( args
. maxLoadAvg
, start_load
, end_load
, compilerVersion
, last_commit
, md5sum
)) + text
191 def update_config_file ( branch
, commit
):
193 commitFileName
= working_path
+ "/commit_" + branch
. replace ( "/" , "_" ) + ".txt"
194 if os
. path
. isfile ( commitFileName
):
195 with
open ( commitFileName
, 'r' ) as infile
:
196 last_commit
= infile
. read ()
197 with
open ( commitFileName
, 'w' ) as outfile
:
198 outfile
. write ( commit
)
202 def double_check ( branch
, commit
, args
, executableName
, md5sum
, compilerVersion
, resultsFileName
, filePath
, fileName
):
203 last_commit
, csize
, cspeed
, dspeed
= get_last_results ( resultsFileName
)
205 text
= benchmark_and_compare ( branch
, commit
, last_commit
, args
, executableName
, md5sum
, compilerVersion
, resultsFileName
, filePath
, fileName
, csize
, cspeed
, dspeed
)
207 log ( "WARNING: redoing tests for branch %s : commit %s " % ( branch
, commit
))
208 text
= benchmark_and_compare ( branch
, commit
, last_commit
, args
, executableName
, md5sum
, compilerVersion
, resultsFileName
, filePath
, fileName
, csize
, cspeed
, dspeed
)
212 def test_commit ( branch
, commit
, last_commit
, args
, testFilePaths
, have_mutt
, have_mail
):
213 local_branch
= branch
. split ( '/' )[ 1 ]
214 version
= local_branch
. rpartition ( '-' )[ 2 ] + '_' + commit
216 execute ( 'make -C programs clean zstd CC=clang MOREFLAGS="-Werror -Wconversion -Wno-sign-conversion -DZSTD_GIT_COMMIT= %s " && ' % version
+
217 'mv programs/zstd programs/zstd_clang && ' +
218 'make -C programs clean zstd zstd32 MOREFLAGS="-DZSTD_GIT_COMMIT= %s "' % version
)
219 md5_zstd
= hashfile ( hashlib
. md5 (), clone_path
+ '/programs/zstd' )
220 md5_zstd32
= hashfile ( hashlib
. md5 (), clone_path
+ '/programs/zstd32' )
221 md5_zstd_clang
= hashfile ( hashlib
. md5 (), clone_path
+ '/programs/zstd_clang' )
222 print ( "md5(zstd)= %s \n md5(zstd32)= %s \n md5(zstd_clang)= %s " % ( md5_zstd
, md5_zstd32
, md5_zstd_clang
))
223 print ( "gcc_version= %s clang_version= %s " % ( gcc_version
, clang_version
))
225 logFileName
= working_path
+ "/log_" + branch
. replace ( "/" , "_" ) + ".txt"
229 dictName
= args
. dictionary
. rpartition ( '/' )[ 2 ]
233 for filePath
in testFilePaths
:
234 fileName
= filePath
. rpartition ( '/' )[ 2 ]
236 resultsFileName
= working_path
+ "/" + dictName
. replace ( "." , "_" ) + "_" + branch
. replace ( "/" , "_" ) + "_" + fileName
. replace ( "." , "_" ) + ".txt"
238 resultsFileName
= working_path
+ "/results_" + branch
. replace ( "/" , "_" ) + "_" + fileName
. replace ( "." , "_" ) + ".txt"
239 text
= double_check ( branch
, commit
, args
, 'zstd' , md5_zstd
, 'gcc_version=' + gcc_version
, resultsFileName
, filePath
, fileName
)
241 text_to_send
. append ( text
)
242 results_files
+= resultsFileName
+ " "
243 resultsFileName
= working_path
+ "/results32_" + branch
. replace ( "/" , "_" ) + "_" + fileName
. replace ( "." , "_" ) + ".txt"
244 text
= double_check ( branch
, commit
, args
, 'zstd32' , md5_zstd32
, 'gcc_version=' + gcc_version
, resultsFileName
, filePath
, fileName
)
246 text_to_send
. append ( text
)
247 results_files
+= resultsFileName
+ " "
248 resultsFileName
= working_path
+ "/resultsClang_" + branch
. replace ( "/" , "_" ) + "_" + fileName
. replace ( "." , "_" ) + ".txt"
249 text
= double_check ( branch
, commit
, args
, 'zstd_clang' , md5_zstd_clang
, 'clang_version=' + clang_version
, resultsFileName
, filePath
, fileName
)
251 text_to_send
. append ( text
)
252 results_files
+= resultsFileName
+ " "
254 send_email_with_attachments ( branch
, commit
, last_commit
, args
, text_to_send
, results_files
, logFileName
, have_mutt
, have_mail
)
257 if __name__
== '__main__' :
258 parser
= argparse
. ArgumentParser ()
259 parser
. add_argument ( 'testFileNames' , help = 'file or directory names list for speed benchmark' )
260 parser
. add_argument ( 'emails' , help = 'list of e-mail addresses to send warnings' )
261 parser
. add_argument ( '--dictionary' , '-D' , help = 'path to the dictionary' )
262 parser
. add_argument ( '--message' , '-m' , help = 'attach an additional message to e-mail' , default
= "" )
263 parser
. add_argument ( '--repoURL' , help = 'changes default repository URL' , default
= default_repo_url
)
264 parser
. add_argument ( '--lowerLimit' , '-l' , type = float , help = 'send email if speed is lower than given limit' , default
= 0.98 )
265 parser
. add_argument ( '--ratioLimit' , '-r' , type = float , help = 'send email if ratio is lower than given limit' , default
= 0.999 )
266 parser
. add_argument ( '--maxLoadAvg' , type = float , help = 'maximum load average to start testing' , default
= 0.75 )
267 parser
. add_argument ( '--lastCLevel' , type = int , help = 'last compression level for testing' , default
= 5 )
268 parser
. add_argument ( '--sleepTime' , '-s' , type = int , help = 'frequency of repository checking in seconds' , default
= 300 )
269 parser
. add_argument ( '--timeout' , '-t' , type = int , help = 'timeout for executing shell commands' , default
= 1800 )
270 parser
. add_argument ( '--dry-run' , dest
= 'dry_run' , action
= 'store_true' , help = 'not build' , default
= False )
271 parser
. add_argument ( '--verbose' , '-v' , action
= 'store_true' , help = 'more verbose logs' , default
= False )
272 args
= parser
. parse_args ()
273 verbose
= args
. verbose
275 # check if test files are accessible
276 testFileNames
= args
. testFileNames
. split ()
278 for fileName
in testFileNames
:
279 fileName
= os
. path
. expanduser ( fileName
)
280 if os
. path
. isfile ( fileName
) or os
. path
. isdir ( fileName
):
281 testFilePaths
. append ( os
. path
. abspath ( fileName
))
283 log ( "ERROR: File/directory not found: " + fileName
)
286 # check if dictionary is accessible
288 args
. dictionary
= os
. path
. abspath ( os
. path
. expanduser ( args
. dictionary
))
289 if not os
. path
. isfile ( args
. dictionary
):
290 log ( "ERROR: Dictionary not found: " + args
. dictionary
)
293 # check availability of e-mail senders
294 have_mutt
= does_command_exist ( "mutt -h" )
295 have_mail
= does_command_exist ( "mail -V" )
296 if not have_mutt
and not have_mail
:
297 log ( "ERROR: e-mail senders 'mail' or 'mutt' not found" )
300 clang_version
= execute ( "clang -v 2>&1 | grep ' version ' | sed -e 's:.*version \\ ([0-9.]* \\ ).*: \\ 1:' -e 's: \\ . \\ ([0-9][0-9] \\ ): \\ 1:g'" , verbose
)[ 0 ];
301 gcc_version
= execute ( "gcc -dumpversion" , verbose
)[ 0 ];
304 print ( "PARAMETERS: \n repoURL= %s " % args
. repoURL
)
305 print ( "working_path= %s " % working_path
)
306 print ( "clone_path= %s " % clone_path
)
307 print ( "testFilePath( %s )= %s " % ( len ( testFilePaths
), testFilePaths
))
308 print ( "message= %s " % args
. message
)
309 print ( "emails= %s " % args
. emails
)
310 print ( "dictionary= %s " % args
. dictionary
)
311 print ( "maxLoadAvg= %s " % args
. maxLoadAvg
)
312 print ( "lowerLimit= %s " % args
. lowerLimit
)
313 print ( "ratioLimit= %s " % args
. ratioLimit
)
314 print ( "lastCLevel= %s " % args
. lastCLevel
)
315 print ( "sleepTime= %s " % args
. sleepTime
)
316 print ( "timeout= %s " % args
. timeout
)
317 print ( "dry_run= %s " % args
. dry_run
)
318 print ( "verbose= %s " % args
. verbose
)
319 print ( "have_mutt= %s have_mail= %s " % ( have_mutt
, have_mail
))
321 # clone ZSTD repo if needed
322 if not os
. path
. isdir ( working_path
):
323 os
. mkdir ( working_path
)
324 if not os
. path
. isdir ( clone_path
):
325 execute
. cwd
= working_path
326 execute ( 'git clone ' + args
. repoURL
)
327 if not os
. path
. isdir ( clone_path
):
328 log ( "ERROR: ZSTD clone not found: " + clone_path
)
330 execute
. cwd
= clone_path
332 # check if speedTest.pid already exists
333 pidfile
= "./speedTest.pid"
334 if os
. path
. isfile ( pidfile
):
335 log ( "ERROR: %s already exists, exiting" % pidfile
)
338 send_email ( args
. emails
, '[ %s : %s ] test-zstd-speed.py %s has been started' % ( email_header
, pid
, script_version
), args
. message
, have_mutt
, have_mail
)
339 with
open ( pidfile
, 'w' ) as the_file
:
350 time
. sleep ( args
. sleepTime
)
351 loadavg
= os
. getloadavg ()[ 0 ]
352 if ( loadavg
<= args
. maxLoadAvg
):
353 branches
= git_get_branches ()
354 for branch
in branches
:
355 commit
= execute ( 'git show -s --format=%h ' + branch
, verbose
)[ 0 ]
356 last_commit
= update_config_file ( branch
, commit
)
357 if commit
== last_commit
:
358 log ( "skipping branch %s : head %s already processed" % ( branch
, commit
))
360 log ( "build branch %s : head %s is different from prev %s " % ( branch
, commit
, last_commit
))
361 execute ( 'git checkout -- . && git checkout ' + branch
)
362 print ( git_get_changes ( branch
, commit
, last_commit
))
363 test_commit ( branch
, commit
, last_commit
, args
, testFilePaths
, have_mutt
, have_mail
)
365 log ( "WARNING: main loadavg=%.2f is higher than %s " % ( loadavg
, args
. maxLoadAvg
))
367 log ( "sleep for %s seconds" % args
. sleepTime
)
368 except Exception as e
:
369 stack
= traceback
. format_exc ()
370 email_topic
= '[ %s : %s ] ERROR in %s : %s ' % ( email_header
, pid
, branch
, commit
)
371 send_email ( args
. emails
, email_topic
, stack
, have_mutt
, have_mail
)
373 except KeyboardInterrupt :
375 send_email ( args
. emails
, '[ %s : %s ] test-zstd-speed.py %s has been stopped' % ( email_header
, pid
, script_version
), args
. message
, have_mutt
, have_mail
)