2 files changed, 116 insertions, 97 deletions
diff --git a/tests/runner.py b/tests/runner.py
index 8a5e1129..37e307e9 100755
--- a/tests/runner.py
+++ b/tests/runner.py
@@ -328,7 +328,10 @@ process(sys.argv[1])
       os.makedirs(ret)
     return ret
 
-  def get_library(self, name, generated_libs, configure=['sh', './configure'], configure_args=[], make=['make'], make_args=['-j', '2'], cache=True, env_init={}, cache_name_extra='', native=False):
+  def get_library(self, name, generated_libs, configure=['sh', './configure'], configure_args=[], make=['make'], make_args='help', cache=True, env_init={}, cache_name_extra='', native=False):
+    if make_args == 'help':
+      make_args = ['-j', str(multiprocessing.cpu_count())]
+
     build_dir = self.get_build_dir()
     output_dir = self.get_dir()
 
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
index 63e0041f..2f4d26fd 100644
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@@ -14,6 +14,109 @@ DEFAULT_ARG = '4'
 
 TEST_REPS = 2
 
+CORE_BENCHMARKS = True # core benchmarks vs full regression suite
+
+class Benchmarker:
+  def __init__(self, name):
+    self.name = name
+
+  def bench(self, args, output_parser=None):
+    self.times = []
+    for i in range(TEST_REPS):
+      start = time.time()
+      output = self.run(args)
+      if not output_parser:
+        curr = time.time()-start
+      else:
+        curr = output_parser(output)
+      self.times.append(curr)
+
+  def display(self, baseline=None):
+    if baseline == self: baseline = None
+    mean = sum(self.times)/len(self.times)
+    squared_times = map(lambda x: x*x, self.times)
+    mean_of_squared = sum(squared_times)/len(self.times)
+    std = math.sqrt(mean_of_squared - mean*mean)
+    sorted_times = self.times[:]
+    sorted_times.sort()
+    median = sum(sorted_times[len(sorted_times)/2 - 1:len(sorted_times)/2 + 1])/2
+
+    print '   %10s: mean: %4.3f (+-%4.3f) secs  median: %4.3f  range: %4.3f-%4.3f  (noise: %4.3f%%)  (%d runs)' % (self.name, mean, std, median, min(self.times), max(self.times), 100*std/mean, TEST_REPS),
+
+    if baseline:
+      mean_baseline = sum(baseline.times)/len(baseline.times)
+      final = mean / mean_baseline
+      print '  Relative: %.2f X slower' % final
+    else:
+      print
+
+class NativeBenchmarker(Benchmarker):
+  def __init__(self, name, cc, cxx):
+    self.name = name
+    self.cc = cc
+    self.cxx = cxx
+
+  def build(self, parent, filename, args, shared_args, emcc_args, native_args, native_exec):
+    self.parent = parent
+    if not native_exec:
+      compiler = self.cxx if filename.endswith('cpp') else self.cc
+      process = Popen([compiler, '-O2', '-fno-math-errno', filename, '-o', filename+'.native'] + shared_args + native_args, stdout=PIPE, stderr=parent.stderr_redirect)
+      output = process.communicate()
+      if process.returncode is not 0:
+        print >> sys.stderr, "Building native executable with command '%s' failed with a return code %d!" % (' '.join([compiler, '-O2', filename, '-o', filename+'.native']), process.returncode)
+        print "Output: " + output[0]
+    else:
+      print '(using clang)'
+      shutil.copyfile(native_exec, filename + '.native')
+      shutil.copymode(native_exec, filename + '.native')
+    self.filename = filename
+
+  def run(self, args):
+    process = Popen([self.filename+'.native'] + args, stdout=PIPE, stderr=PIPE)
+    return process.communicate()[0]
+
+class JSBenchmarker(Benchmarker):
+  def __init__(self, name, engine, extra_args=[]):
+    self.name = name
+    self.engine = engine
+    self.extra_args = extra_args
+
+  def build(self, parent, filename, args, shared_args, emcc_args, native_args, native_exec):
+    self.filename = filename
+
+    open('hardcode.py', 'w').write('''
+def process(filename):
+  js = open(filename).read()
+  replaced = js.replace("run();", "run(%s.concat(Module[\\"arguments\\"]));")
+  assert js != replaced
+  open(filename, 'w').write(replaced)
+import sys
+process(sys.argv[1])
+''' % str(args[:-1]) # do not hardcode in the last argument, the default arg
+)
+
+    try_delete(filename + '.js')
+    output = Popen([PYTHON, EMCC, filename, #'-O3',
+                    '-O2', '-s', 'DOUBLE_MODE=0', '-s', 'PRECISE_I64_MATH=0',
+                    '--memory-init-file', '0', '--js-transform', 'python hardcode.py',
+                    '-s', 'TOTAL_MEMORY=128*1024*1024',
+                    #'--closure', '1',
+                    #'-g',
+                    '-o', filename + '.js'] + shared_args + emcc_args + self.extra_args, stdout=PIPE, stderr=PIPE).communicate()
+    assert os.path.exists(filename + '.js'), 'Failed to compile file: ' + output[0]
+
+  def run(self, args):
+    return run_js(self.filename + '.js', engine=self.engine, args=args, stderr=PIPE, full_output=True)
+
+# Benchmarkers
+benchmarkers = [
+  NativeBenchmarker('clang', CLANG_CC, CLANG),
+  NativeBenchmarker('gcc', 'gcc', 'g++'),
+  JSBenchmarker('sm-f32', SPIDERMONKEY_ENGINE, ['-s', 'PRECISE_F32=2']),
+  JSBenchmarker('sm',     SPIDERMONKEY_ENGINE),
+  JSBenchmarker('v8',     V8_ENGINE)
+]
+
 class benchmark(RunnerCore):
   save_dir = True
 
@@ -54,41 +157,6 @@ class benchmark(RunnerCore):
     JS_ENGINE = Building.JS_ENGINE_OVERRIDE if Building.JS_ENGINE_OVERRIDE is not None else JS_ENGINES[0]
     print 'Benchmarking JS engine: %s' % JS_ENGINE
 
-  def print_stats(self, times, native_times, last=False, reps=TEST_REPS):
-    if reps == 0:
-      print '(no reps)'
-      return
-    mean = sum(times)/len(times)
-    squared_times = map(lambda x: x*x, times)
-    mean_of_squared = sum(squared_times)/len(times)
-    std = math.sqrt(mean_of_squared - mean*mean)
-    sorted_times = times[:]
-    sorted_times.sort()
-    median = sum(sorted_times[len(sorted_times)/2 - 1:len(sorted_times)/2 + 1])/2
-
-    mean_native = sum(native_times)/len(native_times)
-    squared_native_times = map(lambda x: x*x, native_times)
-    mean_of_squared_native = sum(squared_native_times)/len(native_times)
-    std_native = math.sqrt(mean_of_squared_native - mean_native*mean_native)
-    sorted_native_times = native_times[:]
-    sorted_native_times.sort()
-    median_native = sum(sorted_native_times[len(sorted_native_times)/2 - 1:len(sorted_native_times)/2 + 1])/2
-
-    final = mean / mean_native
-
-    if last:
-      norm = 0
-      for i in range(len(times)):
-        norm += times[i]/native_times[i]
-      norm /= len(times)
-      print
-      print '  JavaScript: %.3f    Native: %.3f   Ratio:  %.3f  Normalized ratio: %.3f' % (mean, mean_native, final, norm)
-      return
-
-    print
-    print '   JavaScript: mean: %.3f (+-%.3f) secs  median: %.3f  range: %.3f-%.3f  (noise: %3.3f%%)  (%d runs)' % (mean, std, median, min(times), max(times), 100*std/mean, reps)
-    print '   Native    : mean: %.3f (+-%.3f) secs  median: %.3f  range: %.3f-%.3f  (noise: %3.3f%%)  JS is %.2f X slower' % (mean_native, std_native, median_native, min(native_times), max(native_times), 100*std_native/mean_native, final)
-
   def do_benchmark(self, name, src, expected_output='FAIL', args=[], emcc_args=[], native_args=[], shared_args=[], force_c=False, reps=TEST_REPS, native_exec=None, output_parser=None, args_processor=None):
     args = args or [DEFAULT_ARG]
     if args_processor: args = args_processor(args)
@@ -98,68 +166,12 @@ class benchmark(RunnerCore):
     f = open(filename, 'w')
     f.write(src)
     f.close()
-    final_filename = os.path.join(dirname, name + '.js')
 
-    open('hardcode.py', 'w').write('''
-def process(filename):
-  js = open(filename).read()
-  replaced = js.replace("run();", "run(%s.concat(Module[\\"arguments\\"]));")
-  assert js != replaced
-  open(filename, 'w').write(replaced)
-import sys
-process(sys.argv[1])
-''' % str(args[:-1]) # do not hardcode in the last argument, the default arg
-)
-
-    try_delete(final_filename)
-    output = Popen([PYTHON, EMCC, filename, #'-O3',
-                    '-O2', '-s', 'DOUBLE_MODE=0', '-s', 'PRECISE_I64_MATH=0',
-                    '--memory-init-file', '0', '--js-transform', 'python hardcode.py',
-                    '-s', 'TOTAL_MEMORY=128*1024*1024',
-                    '--closure', '1',
-                    #'-s', 'PRECISE_F32=1',
-                    #'-g',
-                    '-o', final_filename] + shared_args + emcc_args, stdout=PIPE, stderr=self.stderr_redirect).communicate()
-    assert os.path.exists(final_filename), 'Failed to compile file: ' + output[0]
-
-    # Run JS
-    times = []
-    for i in range(reps):
-      start = time.time()
-      js_output = run_js(final_filename, engine=JS_ENGINE, args=args, stderr=PIPE, full_output=True)
-
-      if i == 0 and 'uccessfully compiled asm.js code' in js_output:
-        if 'asm.js link error' not in js_output:
-          print "[%s was asm.js'ified]" % name
-      if not output_parser:
-        curr = time.time()-start
-      else:
-        curr = output_parser(js_output)
-      times.append(curr)
-      if i == 0:
-        # Sanity check on output
-        self.assertContained(expected_output, js_output)
-
-    # Run natively
-    if not native_exec:
-      self.build_native(filename, shared_args + native_args)
-    else:
-      shutil.copyfile(native_exec, filename + '.native')
-      shutil.copymode(native_exec, filename + '.native')
-    native_times = []
-    for i in range(reps):
-      start = time.time()
-      native_output = self.run_native(filename, args)
-      if i == 0:
-        # Sanity check on output
-        self.assertContained(expected_output, native_output)
-      if not output_parser:
-        curr = time.time()-start
-      else:
-        curr = output_parser(native_output)
-      native_times.append(curr)
-
-    self.print_stats(times, native_times, reps=reps)
+    print
+    for b in benchmarkers:
+      b.build(self, filename, args, shared_args, emcc_args, native_args, native_exec)
+      b.bench(args, output_parser)
+      b.display(benchmarkers[0])
 
   def test_primes(self):
     src = r'''
@@ -402,9 +414,11 @@ process(sys.argv[1])
     self.fasta('fasta_float', 'float')
 
   def test_fasta_double(self):
+    if CORE_BENCHMARKS: return
     self.fasta('fasta_double', 'double')
 
   def test_fasta_double_full(self):
+    if CORE_BENCHMARKS: return
     self.fasta('fasta_double_full', 'double', emcc_args=['-s', 'DOUBLE_MODE=1'])
 
   def test_skinning(self):
@@ -412,10 +426,12 @@ process(sys.argv[1])
     self.do_benchmark('skinning', src, 'blah=0.000000')
 
   def test_life(self):
+    if CORE_BENCHMARKS: return
     src = open(path_from_root('tests', 'life.c'), 'r').read()
     self.do_benchmark('life', src, '''--------------------------------''', shared_args=['-std=c99'], force_c=True)
 
   def test_linpack_double(self):
+    if CORE_BENCHMARKS: return
     def output_parser(output):
       return 100.0/float(re.search('Unrolled Double  Precision +([\d\.]+) Mflops', output).group(1))
     self.do_benchmark('linpack_double', open(path_from_root('tests', 'linpack.c')).read(), '''Unrolled Double  Precision''', force_c=True, output_parser=output_parser)