2 files changed, 12 insertions, 6 deletions
diff --git a/emscripten.py b/emscripten.py
index bf049ee5..e64e11e7 100755
--- a/emscripten.py
+++ b/emscripten.py
@@ -43,7 +43,7 @@ def scan(ll, settings):
   if len(blockaddrs) > 0:
     settings['NECESSARY_BLOCKADDRS'] = blockaddrs
 
-NUM_CHUNKS_PER_CORE = 2
+NUM_CHUNKS_PER_CORE = 1.25
 MIN_CHUNK_SIZE = 1024*1024
 MAX_CHUNK_SIZE = float(os.environ.get('EMSCRIPT_MAX_CHUNK_SIZE') or 'inf') # configuring this is just for debugging purposes
 
@@ -163,7 +163,7 @@ def emscript(infile, settings, outfile, libraries=[]):
   cores = multiprocessing.cpu_count()
   assert cores >= 1
   if cores > 1:
-    intended_num_chunks = int(cores * NUM_CHUNKS_PER_CORE)
+    intended_num_chunks = int(round(cores * NUM_CHUNKS_PER_CORE))
     chunk_size = max(MIN_CHUNK_SIZE, total_ll_size / intended_num_chunks)
     chunk_size += 3*len(meta) + len(forwarded_data)/3 # keep ratio of lots of function code to meta (expensive to process, and done in each parallel task) and forwarded data (less expensive but potentially significant)
     chunk_size = min(MAX_CHUNK_SIZE, chunk_size)
diff --git a/tools/js_optimizer.py b/tools/js_optimizer.py
index cbf64486..32481bf2 100644
--- a/tools/js_optimizer.py
+++ b/tools/js_optimizer.py
@@ -10,7 +10,9 @@ def path_from_root(*pathelems):
 
 JS_OPTIMIZER = path_from_root('tools', 'js-optimizer.js')
 
-BEST_JS_PROCESS_SIZE = 1024*1024
+NUM_CHUNKS_PER_CORE = 1.25
+MIN_CHUNK_SIZE = 1024*1024
+MAX_CHUNK_SIZE = 20*1024*1024
 
 WINDOWS = sys.platform.startswith('win')
 
@@ -98,7 +100,11 @@ def run_on_js(filename, passes, js_engine, jcache):
   total_size = len(js)
   js = None
 
-  chunks = shared.JCache.chunkify(funcs, BEST_JS_PROCESS_SIZE, 'jsopt' if jcache else None)
+  cores = multiprocessing.cpu_count()
+  intended_num_chunks = int(round(cores * NUM_CHUNKS_PER_CORE))
+  chunk_size = min(MAX_CHUNK_SIZE, max(MIN_CHUNK_SIZE, total_size / intended_num_chunks))
+
+  chunks = shared.JCache.chunkify(funcs, chunk_size, 'jsopt' if jcache else None)
 
   if jcache:
     # load chunks from cache where we can # TODO: ignore small chunks
@@ -137,12 +143,12 @@ def run_on_js(filename, passes, js_engine, jcache):
     cores = min(multiprocessing.cpu_count(), filenames)
     if len(chunks) > 1 and cores >= 2:
       # We can parallelize
-      if DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks, using %d cores  (total: %.2f MB)' % (len(chunks), cores, total_size/(1024*1024.))
+      if DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks of size %d, using %d cores  (total: %.2f MB)' % (len(chunks), chunk_size, cores, total_size/(1024*1024.))
       pool = multiprocessing.Pool(processes=cores)
       filenames = pool.map(run_on_chunk, commands, chunksize=1)
     else:
       # We can't parallize, but still break into chunks to avoid uglify/node memory issues
-      if len(chunks) > 1 and DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks' % (len(chunks))
+      if len(chunks) > 1 and DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks of size %d' % (len(chunks), chunk_size)
       filenames = [run_on_chunk(command) for command in commands]
   else:
     filenames = []