stop using lots of memory to manage strings at the end of compilation

author: Alon Zakai <alonzakai@gmail.com> 2011-03-15 20:13:57 -0700
committer: Alon Zakai <alonzakai@gmail.com> 2011-03-15 20:13:57 -0700
commit: 5524f8bf30941862789eadef176b254fe3186cdf (patch)
tree: d0fed0756943a15ac4ee6a259a8f462c92a8c543
parent: 6c22a66671b6f42ad63044b0fd46a5dc892298b4 (diff)
7 files changed, 86 insertions, 43 deletions
diff --git a/src/compiler.js b/src/compiler.js
index b60f650e..1a48fad2 100644
--- a/src/compiler.js
+++ b/src/compiler.js
@@ -58,5 +58,5 @@ do {
 
 // Do it
 
-print(JSify(analyzer(intertyper(lines))));
+JSify(analyzer(intertyper(lines)));
 
diff --git a/src/jsifier.js b/src/jsifier.js
index 6742f33c..7a25983c 100644
--- a/src/jsifier.js
+++ b/src/jsifier.js
@@ -1135,28 +1135,39 @@ function JSify(data, functionsOnly, givenFunctions, givenGlobalVariables) {
     });
     items = null;
 
-    var ret = [];
+    var generated = [];
     if (!functionsOnly) {
-      ret = ret.concat(itemsDict.type).concat(itemsDict.GlobalVariableStub).concat(itemsDict.functionStub);
+      generated = generated.concat(itemsDict.type).concat(itemsDict.GlobalVariableStub).concat(itemsDict.functionStub);
     }
-    ret = ret.concat(itemsDict.function).concat(data.unparsedFunctions);
-
-    ret = ret.map(function(item) { return item.JS }).join('\n');
-
-    if (functionsOnly) return ret;
+    generated = generated.concat(itemsDict.function).concat(data.unparsedFunctions);
+
+    if (functionsOnly) return generated.map(function(item) { return item.JS }).join('\n');
+
+    // We are ready to print out the data, but must do so carefully - we are
+    // dealing with potentially *huge* strings. Convenient replacements and
+    // manipulations may create in-memory copies, and we may OOM.
+    //
+    // Final shape that we now create:
+    //    shell
+    //      (body)
+    //        preamble
+    //          runtime
+    //        generated code
+    //        postamble
+    //          global_vars
 
-    var pre = preprocess(read('preamble.js').replace('{{RUNTIME}}', getRuntime()), CONSTANTS);
-    var post = preprocess(read('postamble.js'), CONSTANTS);
-    ret = pre + ret + post;
-    var globalVars = itemsDict.GlobalVariable.map(function(item) { return item.JS }).join('\n');
-    var globalVarsPostSets = itemsDict.GlobalVariablePostSet.map(function(item) { return item.JS }).join('\n');
-    ret = indentify(ret, 2);
-    // ret may be a very large string at this point - we may not be able to allocate two of it. So must be careful in these last steps
     var shellParts = read('shell.js').split('{{BODY}}');
-    ret = shellParts[0] + ret + shellParts[1];
-    globalVars = indentify(globalVars+'\n\n\n'+globalVarsPostSets, 4);
-    ret = ret.replace('{{GLOBAL_VARS}}', globalVars);
-    return processMacros(ret);
+    print(shellParts[0]);
+      var pre = processMacros(preprocess(read('preamble.js').replace('{{RUNTIME}}', getRuntime()), CONSTANTS));
+      print(pre);
+      generated.forEach(function(item) { print(indentify(item.JS || '', 2)); });
+
+      var postParts = processMacros(preprocess(read('postamble.js'), CONSTANTS)).split('{{GLOBAL_VARS}}');
+      print(postParts[0]);
+        itemsDict.GlobalVariable.forEach(function(item) { print(indentify(item.JS, 4)); });
+        itemsDict.GlobalVariablePostSet.forEach(function(item) { print(indentify(item.JS, 4)); });
+      print(postParts[1]);
+    print(shellParts[1]);
   }
 
   // Data
diff --git a/src/library.js b/src/library.js
index 1eadfa8b..81eb358a 100644
--- a/src/library.js
+++ b/src/library.js
@@ -1138,7 +1138,7 @@ var Library = {
   // setjmp.h
 
   _setjmp: function(env) {
-    print('WARNING: setjmp() not really implemented, will fail if longjmp() is actually called');
+    // XXX print('WARNING: setjmp() not really implemented, will fail if longjmp() is actually called');
     return 0;
   },
 
diff --git a/src/parseTools.js b/src/parseTools.js
index 92041f21..94ea4f46 100644
--- a/src/parseTools.js
+++ b/src/parseTools.js
@@ -1,6 +1,15 @@
 // Various tools for parsing LLVM. Utilities of various sorts, that are
 // specific to Emscripten (and hence not in utility.js).
 
+// Does simple 'macro' substitution, using Django-like syntax,
+// {{{ code }}} will be replaced with |eval(code)|.
+function processMacros(text) {
+  return text.replace(/{{{[^}]+}}}/g, function(str) {
+    str = str.substr(3, str.length-6);
+    return eval(str).toString();
+  });
+}
+
 // Simple #if/else/endif preprocessing for a file. Checks if the
 // ident checked is true in our global. Also replaces some constants.
 function preprocess(text, constants) {
@@ -545,6 +554,7 @@ function getActualLabelId(labelId) {
 // Misc
 
 function indentify(text, indent) {
+  if (text.length > 1024*1024) return text; // Don't try to indentify huge strings - we may run out of memory
   if (typeof indent === 'number') {
     var len = indent;
     indent = '';
diff --git a/tests/dlmalloc.c b/tests/dlmalloc.c
index 78ab20aa..c8fce144 100644
--- a/tests/dlmalloc.c
+++ b/tests/dlmalloc.c
@@ -5701,7 +5701,7 @@ History:
 // Emscripten tests
 
 int main() {
-  #define NUM 100
+  #define NUM 10
   char* allocations[NUM];
   for (int i = 0; i < NUM; i++) {
     allocations[i] = (char*)malloc(1024*(i+1));
diff --git a/tests/runner.py b/tests/runner.py
index 7680d283..696c3c25 100644
--- a/tests/runner.py
+++ b/tests/runner.py
@@ -191,12 +191,16 @@ class RunnerCore(unittest.TestCase):
     return Popen([LLVM_INTERPRETER] + args, stdout=PIPE, stderr=STDOUT).communicate()[0]
 
   def assertContained(self, value, string):
+    if type(value) is not str: value = value() # lazy loading
+    if type(string) is not str: string = string()
     if value not in string:
-      raise Exception("Expected to find '%s' in '%s'" % (value, string))
+      raise Exception("Expected to find '%s' in '%s'" % (limit_size(value), limit_size(string)))
 
   def assertNotContained(self, value, string):
+    if type(value) is not str: value = value() # lazy loading
+    if type(string) is not str: string = string()
     if value in string:
-      raise Exception("Expected to NOT find '%s' in '%s'" % (value, string))
+      raise Exception("Expected to NOT find '%s' in '%s'" % (limit_size(value), limit_size(string)))
 
 ###################################################################################################
 
@@ -1590,13 +1594,14 @@ if 'benchmark' not in sys.argv:
     def get_library(self, name, generated_libs, configure=['./configure'], configure_args=[], make=['make'], make_args=['-j', '2'], cache=True):
       if type(generated_libs) is not list: generated_libs = [generated_libs]
 
-      cache_name = name + '|' + COMPILER
-      if cache and GlobalCache.get(cache_name):
-        bc_file = os.path.join(self.get_dir(), 'lib' + name + '.bc')
-        f = open(bc_file, 'wb')
-        f.write(GlobalCache[cache_name])
-        f.close()
-        return bc_file
+      if GlobalCache:
+        cache_name = name + '|' + COMPILER
+        if cache and GlobalCache.get(cache_name):
+          bc_file = os.path.join(self.get_dir(), 'lib' + name + '.bc')
+          f = open(bc_file, 'wb')
+          f.write(GlobalCache[cache_name])
+          f.close()
+          return bc_file
 
       temp_dir = self.get_building_dir()
       project_dir = os.path.join(temp_dir, name)
@@ -1612,10 +1617,13 @@ if 'benchmark' not in sys.argv:
       Popen(make + make_args, stdout=PIPE, stderr=STDOUT, env=env).communicate()[0]
       bc_file = os.path.join(project_dir, 'bc.bc')
       self.do_link(map(lambda lib: os.path.join(project_dir, lib), generated_libs), bc_file)
-      if cache:
+      if cache and GlobalCache:
         GlobalCache[cache_name] = open(bc_file, 'rb').read()
       return bc_file
 
+    def get_freetype(self):
+      return self.get_library('freetype', os.path.join('objs', '.libs', 'libfreetype.so'))
+
     def test_freetype(self):
       if LLVM_OPTS or COMPILER == CLANG: global RELOOP; RELOOP = 0 # Too slow; we do care about typed arrays and OPTIMIZE though
       global CORRECT_SIGNS; CORRECT_SIGNS = 1 # Not sure why, but needed
@@ -1634,7 +1642,7 @@ if 'benchmark' not in sys.argv:
       self.do_test(open(path_from_root('tests', 'freetype', 'main.c'), 'r').read(),
                    open(path_from_root('tests', 'freetype', 'ref.txt'), 'r').read(),
                    ['font.ttf', 'test!', '150', '120', '25'],
-                   libraries=[self.get_library('freetype', os.path.join('objs', '.libs', 'libfreetype.so'))],
+                   libraries=[self.get_freetype()],
                    includes=[path_from_root('tests', 'freetype', 'include')],
                    post_build=post)
 
@@ -1665,37 +1673,45 @@ if 'benchmark' not in sys.argv:
 
       global COMPILER_TEST_OPTS; COMPILER_TEST_OPTS = ['-I' + path_from_root('tests', 'libcxx', 'include')] # Avoid libstdc++ linking issue, see libcxx test
 
+      # See post(), below
+      input_file = open(os.path.join(self.get_dir(), 'paper.pdf.js'), 'w')
+      input_file.write(str(map(ord, open(path_from_root('tests', 'poppler', 'paper.pdf'), 'rb').read())))
+      input_file.close()
+
       def post(filename):
-        src = open(filename, 'r').read().replace(
+        src = open(filename, 'r').read()
+        # Do not do several replacements of this huge string. Hack around that. This is why we create the input_file, above
+        src = src.replace( # XXX add full path to .pdf.js
           '// {{PRE_RUN_ADDITIONS}}',
-          '''this._STDIO.prepare('paper.pdf', %s);''' % str(
-            map(ord, open(path_from_root('tests', 'poppler', 'paper.pdf'), 'rb').read())
-          )
+          '''this._STDIO.prepare('paper.pdf', eval(read('paper.pdf.js')));
+             run(args);
+             run = function() {};
+             print("Data: " + JSON.stringify(this._STDIO.streams[this._STDIO.filenames['*s-0*d.']].data)); // work around __formatString__ fail'''
         )
         open(filename, 'w').write(src)
 
       fontconfig = self.get_library('fontconfig', [os.path.join('src', '.libs', 'libfontconfig.a')])
 
+      freetype = self.get_freetype()
+
       poppler = self.get_library('poppler',
                                  [os.path.join('poppler', '.libs', 'libpoppler.so.13.0.0'),
                                   os.path.join('goo', '.libs', 'libgoo.a'),
                                   os.path.join('fofi', '.libs', 'libfofi.a'),
                                   os.path.join('splash', '.libs', 'libsplash.a'),
-                                  #os.path.join('poppler', 'SplashOutputDev.o'),
                                   os.path.join('utils', 'pdftoppm.o'),
                                   os.path.join('utils', 'parseargs.o')],
                                  configure_args=['--disable-libjpeg', '--disable-libpng'])
 
       # Combine libraries
-      # TODO: FreeType XXX DO THIS
 
       combined = os.path.join(self.get_building_dir(), 'combined.bc')
-      self.do_link([fontconfig, poppler], combined)
+      self.do_link([fontconfig, freetype, poppler], combined)
 
       self.do_ll_test(combined,
-                      'halp',#open(path_from_root('tests', 'poppler', 'ref.txt'), 'r').read(),
+                      lambda: map(ord, open(path_from_root('tests', 'poppler', 'ref.ppm'), 'r').read()).__str__().replace(' ', ''),
                       args='-scale-to 512 paper.pdf filename'.split(' '),
-                      post_build=post, build_ll_hook=self.do_autodebug)
+                      post_build=post)#, build_ll_hook=self.do_autodebug)
 
     def test_openjpeg(self):
       global SAFE_HEAP; SAFE_HEAP = 0 # Very slow
@@ -1788,8 +1804,7 @@ if 'benchmark' not in sys.argv:
       global CORRECT_SIGNS; CORRECT_SIGNS = 1 # Not sure why, but needed
       self.do_ll_test(path_from_root('tests', 'python', 'python.ll'),
                       'hello python world!\n\n[0, 2, 4, 6]\n\n5\n\n22\n\n5.470',
-                      args=['-S', '-c' '''print "hello python world!"; print [x*2 for x in range(4)]; t=2; print 10-3-t; print (lambda x: x*2)(11); print '%f' % 5.47'''],
-                      js_engines=[V8_ENGINE]) # script stack space exceeded in SpiderMonkey, TODO
+                      args=['-S', '-c' '''print "hello python world!"; print [x*2 for x in range(4)]; t=2; print 10-3-t; print (lambda x: x*2)(11); print '%f' % 5.47'''])
 
     ### Test cases in separate files
 
diff --git a/tools/shared.py b/tools/shared.py
index 45473e2c..2257d4e4 100644
--- a/tools/shared.py
+++ b/tools/shared.py
@@ -22,6 +22,9 @@ LLVM_COMPILER=os.path.expanduser(os.path.join(LLVM_ROOT, 'llc'))
 if '-s' not in SPIDERMONKEY_ENGINE:
   SPIDERMONKEY_ENGINE += ['-s'] # Strict mode in SpiderMonkey. With V8 we check that fallback to non-strict works too
 
+if 'stackQuote' not in str(SPIDERMONKEY_ENGINE):
+  SPIDERMONKEY_ENGINE += ['-e', 'stackQuota(100000000000)'] # Our very large files need lots of stack space
+
 # Utilities
 
 def timeout_run(proc, timeout, note):
@@ -60,3 +63,7 @@ def line_splitter(data):
 
   return out
 
+def limit_size(string, MAX=80*20):
+  if len(string) < MAX: return string
+  return string[0:MAX] + '...'
+
author	Alon Zakai <alonzakai@gmail.com>	2011-03-15 20:13:57 -0700
committer	Alon Zakai <alonzakai@gmail.com>	2011-03-15 20:13:57 -0700
commit	5524f8bf30941862789eadef176b254fe3186cdf (patch)
tree	d0fed0756943a15ac4ee6a259a8f462c92a8c543
parent	6c22a66671b6f42ad63044b0fd46a5dc892298b4 (diff)