15 files changed, 2623 insertions, 0 deletions
diff --git a/docs/CommandGuide/lit.pod b/docs/CommandGuide/lit.pod
new file mode 100644
index 0000000000..a818302c24
--- /dev/null
+++ b/docs/CommandGuide/lit.pod
@@ -0,0 +1,222 @@
+=pod
+
+=head1 NAME
+
+lit - LLVM Integrated Tester
+
+=head1 SYNOPSIS
+
+B<lit> [I<options>] [I<tests>]
+
+=head1 DESCRIPTION
+
+B<lit> is a portable tool for executing LLVM and Clang style test suites,
+summarizing their results, and providing indication of failures. B<lit> is
+designed to be a lightweight testing tool with as simple a user interface as
+possible.
+
+B<lit> should be run with one or more I<tests> to run specified on the command
+line. Tests can be either individual test files or directories to search for
+tests (see L<"TEST DISCOVERY">).
+
+Each specified test will be executed (potentially in parallel) and once all
+tests have been run B<lit> will print summary information on the number of tests
+which passed or failed (see L<"TEST STATUS RESULTS">). The B<lit> program will
+execute with a non-zero exit code if any tests fail.
+
+By default B<lit> will use a succinct progress display and will only print
+summary information for test failures. See L<"OUTPUT OPTIONS"> for options
+controlling the B<lit> progress display and output.
+
+B<lit> also includes a number of options for controlling how tests are exected
+(specific features may depend on the particular test format). See L<"EXECUTION
+OPTIONS"> for more information.
+
+Finally, B<lit> also supports additional options for only running a subset of
+the options specified on the command line, see L<"SELECTION OPTIONS"> for
+more information.
+
+=head1 GENERAL OPTIONS
+
+=over
+
+=item B<-h>, B<--help>
+
+Show the B<lit> help message.
+
+=item B<-j> I<N>, B<--threads>=I<N>
+
+Run I<N> tests in parallel. By default, this is automatically chose to match the
+number of detected available CPUs.
+
+=back 
+
+=head1 OUTPUT OPTIONS
+
+=over
+
+=item B<-q>, B<--quiet>
+
+Suppress any output except for test failures.
+
+=item B<-s>, B<--succinct>
+
+Show less output, for example don't show information on tests that pass.
+
+=item B<-v>, B<--verbose>
+
+Show more information on test failures, for example the entire test output
+instead of just the test result.
+
+=item B<--no-progress-bar>
+
+Do not use curses based progress bar.
+
+=back 
+
+=head1 EXECUTION OPTIONS
+
+=over
+
+=item B<--path>=I<PATH>
+
+Specify an addition I<PATH> to use when searching for executables in tests.
+
+=item B<--vg>
+
+Run individual tests under valgrind (using the memcheck tool). The
+I<--error-exitcode> argument for valgrind is used so that valgrind failures will
+cause the program to exit with a non-zero status.
+
+=item B<--vg-arg>=I<ARG>
+
+When I<--vg> is used, specify an additional argument to pass to valgrind itself.
+
+=item B<--time-tests>
+
+Track the wall time individual tests take to execute and includes the results in
+the summary output. This is useful for determining which tests in a test suite
+take the most time to execute. Note that this option is most useful with I<-j
+1>.
+
+=back
+
+=head1 SELECTION OPTIONS
+
+=over
+
+=item B<--max-tests>=I<N>
+
+Run at most I<N> tests and then terminate.
+
+=item B<--max-time>=I<N>
+
+Spend at most I<N> seconds (approximately) running tests and then terminate.
+
+=item B<--shuffle>
+
+Run the tests in a random order.
+
+=back
+
+=head1 ADDITIONAL OPTIONS
+
+=over
+
+=item B<--debug>
+
+Run B<lit> in debug mode, for debugging configuration issues and B<lit> itself.
+
+=item B<--show-suites>
+
+List the discovered test suites as part of the standard output.
+
+=item B<--no-tcl-as-sh>
+
+Run Tcl scripts internally (instead of converting to shell scripts).
+
+=back
+
+=head1 EXIT STATUS
+
+B<lit> will exit with an exit code of 1 if there are any FAIL or XPASS
+results. Otherwise, it will exit with the status 0. Other exit codes used for
+non-test related failures (for example a user error or an internal program
+error).
+
+=head1 TEST DISCOVERY
+
+The inputs passed to B<lit> can be either individual tests, or entire
+directories or hierarchies of tests to run. When B<lit> starts up, the first
+thing it does is convert the inputs into a complete list of tests to run as part
+of I<test discovery>.
+
+In the B<lit> model, every test must exist inside some I<test suite>. B<lit>
+resolves the inputs specified on the command line to test suites by searching
+upwards from the input path until it finds a I<lit.cfg> or I<lit.site.cfg>
+file. These files serve as both a marker of test suites and as configuration
+files which B<lit> loads in order to understand how to find and run the tests
+inside the test suite.
+
+Once B<lit> has mapped the inputs into test suites it traverses the list of
+inputs adding tests for individual files and recursively searching for tests in
+directories.
+
+This behavior makes it easy to specify a subset of tests to run, while still
+allowing the test suite configuration to control exactly how tests are
+interpreted. In addition, B<lit> always identifies tests by the test suite they
+are in, and their relative path inside the test suite. For appropriately
+configured projects, this allows B<lit> to provide convenient and flexible
+support for out-of-tree builds.
+
+=head1 TEST STATUS RESULTS
+
+Each test ultimately produces one of the following six results:
+
+=over
+
+=item B<PASS>
+
+The test succeeded.
+
+=item B<XFAIL>
+
+The test failed, but that is expected. This is used for test formats which allow
+specifying that a test does not currently work, but wish to leave it in the test
+suite.
+
+=item B<XPASS>
+
+The test succeeded, but it was expected to fail. This is used for tests which
+were specified as expected to fail, but are now succeeding (generally because
+the feautre they test was broken and has been fixed).
+
+=item B<FAIL>
+
+The test failed.
+
+=item B<UNRESOLVED>
+
+The test result could not be determined. For example, this occurs when the test
+could not be run, the test itself is invalid, or the test was interrupted.
+
+=item B<UNSUPPORTED>
+
+The test is not supported in this environment. This is used by test formats
+which can report unsupported tests.
+
+=back
+
+Depending on the test format tests may produce additional information about
+their status (generally only for failures). See the L<Output|"LIT OUTPUT">
+section for more information.
+
+=head1 SEE ALSO
+
+L<valgrind(1)>
+
+=head1 AUTHOR
+
+Written by Daniel Dunbar and maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/utils/lit/LitConfig.py b/utils/lit/LitConfig.py
new file mode 100644
index 0000000000..4fb0ccc093
--- /dev/null
+++ b/utils/lit/LitConfig.py
@@ -0,0 +1,71 @@
+class LitConfig:
+    """LitConfig - Configuration data for a 'lit' test runner instance, shared
+    across all tests.
+
+    The LitConfig object is also used to communicate with client configuration
+    files, it is always passed in as the global variable 'lit' so that
+    configuration files can access common functionality and internal components
+    easily.
+    """
+
+    # Provide access to built-in formats.
+    import LitFormats as formats
+
+    # Provide access to built-in utility functions.
+    import Util as util
+
+    def __init__(self, progname, path, quiet,
+                 useValgrind, valgrindArgs,
+                 useTclAsSh,
+                 noExecute, debug, isWindows):
+        # The name of the test runner.
+        self.progname = progname
+        # The items to add to the PATH environment variable.
+        self.path = list(map(str, path))
+        self.quiet = bool(quiet)
+        self.useValgrind = bool(useValgrind)
+        self.valgrindArgs = list(valgrindArgs)
+        self.useTclAsSh = bool(useTclAsSh)
+        self.noExecute = noExecute
+        self.debug = debug
+        self.isWindows = bool(isWindows)
+
+        self.numErrors = 0
+        self.numWarnings = 0
+
+    def load_config(self, config, path):
+        """load_config(config, path) - Load a config object from an alternate
+        path."""
+        from TestingConfig import TestingConfig
+        return TestingConfig.frompath(path, config.parent, self,
+                                      mustExist = True,
+                                      config = config)
+
+    def _write_message(self, kind, message):
+        import inspect, os, sys
+
+        # Get the file/line where this message was generated.
+        f = inspect.currentframe()
+        # Step out of _write_message, and then out of wrapper.
+        f = f.f_back.f_back
+        file,line,_,_,_ = inspect.getframeinfo(f)
+        location = '%s:%d' % (os.path.basename(file), line)
+
+        print >>sys.stderr, '%s: %s: %s: %s' % (self.progname, location,
+                                                kind, message)
+
+    def note(self, message):
+        self._write_message('note', message)
+
+    def warning(self, message):
+        self._write_message('warning', message)
+        self.numWarnings += 1
+
+    def error(self, message):
+        self._write_message('error', message)
+        self.numErrors += 1
+
+    def fatal(self, message):
+        import sys
+        self._write_message('fatal', message)
+        sys.exit(2)
diff --git a/utils/lit/LitFormats.py b/utils/lit/LitFormats.py
new file mode 100644
index 0000000000..cc00ddc7e7
--- /dev/null
+++ b/utils/lit/LitFormats.py
@@ -0,0 +1,2 @@
+from ShTest import ShTest
+from TclTest import TclTest
diff --git a/utils/lit/ProgressBar.py b/utils/lit/ProgressBar.py
new file mode 100644
index 0000000000..85c95f57f7
--- /dev/null
+++ b/utils/lit/ProgressBar.py
@@ -0,0 +1,267 @@
+#!/usr/bin/env python
+
+# Source: http://code.activestate.com/recipes/475116/, with
+# modifications by Daniel Dunbar.
+
+import sys, re, time
+
+class TerminalController:
+    """
+    A class that can be used to portably generate formatted output to
+    a terminal.  
+    
+    `TerminalController` defines a set of instance variables whose
+    values are initialized to the control sequence necessary to
+    perform a given action.  These can be simply included in normal
+    output to the terminal:
+
+        >>> term = TerminalController()
+        >>> print 'This is '+term.GREEN+'green'+term.NORMAL
+
+    Alternatively, the `render()` method can used, which replaces
+    '${action}' with the string required to perform 'action':
+
+        >>> term = TerminalController()
+        >>> print term.render('This is ${GREEN}green${NORMAL}')
+
+    If the terminal doesn't support a given action, then the value of
+    the corresponding instance variable will be set to ''.  As a
+    result, the above code will still work on terminals that do not
+    support color, except that their output will not be colored.
+    Also, this means that you can test whether the terminal supports a
+    given action by simply testing the truth value of the
+    corresponding instance variable:
+
+        >>> term = TerminalController()
+        >>> if term.CLEAR_SCREEN:
+        ...     print 'This terminal supports clearning the screen.'
+
+    Finally, if the width and height of the terminal are known, then
+    they will be stored in the `COLS` and `LINES` attributes.
+    """
+    # Cursor movement:
+    BOL = ''             #: Move the cursor to the beginning of the line
+    UP = ''              #: Move the cursor up one line
+    DOWN = ''            #: Move the cursor down one line
+    LEFT = ''            #: Move the cursor left one char
+    RIGHT = ''           #: Move the cursor right one char
+
+    # Deletion:
+    CLEAR_SCREEN = ''    #: Clear the screen and move to home position
+    CLEAR_EOL = ''       #: Clear to the end of the line.
+    CLEAR_BOL = ''       #: Clear to the beginning of the line.
+    CLEAR_EOS = ''       #: Clear to the end of the screen
+
+    # Output modes:
+    BOLD = ''            #: Turn on bold mode
+    BLINK = ''           #: Turn on blink mode
+    DIM = ''             #: Turn on half-bright mode
+    REVERSE = ''         #: Turn on reverse-video mode
+    NORMAL = ''          #: Turn off all modes
+
+    # Cursor display:
+    HIDE_CURSOR = ''     #: Make the cursor invisible
+    SHOW_CURSOR = ''     #: Make the cursor visible
+
+    # Terminal size:
+    COLS = None          #: Width of the terminal (None for unknown)
+    LINES = None         #: Height of the terminal (None for unknown)
+
+    # Foreground colors:
+    BLACK = BLUE = GREEN = CYAN = RED = MAGENTA = YELLOW = WHITE = ''
+    
+    # Background colors:
+    BG_BLACK = BG_BLUE = BG_GREEN = BG_CYAN = ''
+    BG_RED = BG_MAGENTA = BG_YELLOW = BG_WHITE = ''
+    
+    _STRING_CAPABILITIES = """
+    BOL=cr UP=cuu1 DOWN=cud1 LEFT=cub1 RIGHT=cuf1
+    CLEAR_SCREEN=clear CLEAR_EOL=el CLEAR_BOL=el1 CLEAR_EOS=ed BOLD=bold
+    BLINK=blink DIM=dim REVERSE=rev UNDERLINE=smul NORMAL=sgr0
+    HIDE_CURSOR=cinvis SHOW_CURSOR=cnorm""".split()
+    _COLORS = """BLACK BLUE GREEN CYAN RED MAGENTA YELLOW WHITE""".split()
+    _ANSICOLORS = "BLACK RED GREEN YELLOW BLUE MAGENTA CYAN WHITE".split()
+
+    def __init__(self, term_stream=sys.stdout):
+        """
+        Create a `TerminalController` and initialize its attributes
+        with appropriate values for the current terminal.
+        `term_stream` is the stream that will be used for terminal
+        output; if this stream is not a tty, then the terminal is
+        assumed to be a dumb terminal (i.e., have no capabilities).
+        """
+        # Curses isn't available on all platforms
+        try: import curses
+        except: return
+
+        # If the stream isn't a tty, then assume it has no capabilities.
+        if not term_stream.isatty(): return
+
+        # Check the terminal type.  If we fail, then assume that the
+        # terminal has no capabilities.
+        try: curses.setupterm()
+        except: return
+
+        # Look up numeric capabilities.
+        self.COLS = curses.tigetnum('cols')
+        self.LINES = curses.tigetnum('lines')
+        
+        # Look up string capabilities.
+        for capability in self._STRING_CAPABILITIES:
+            (attrib, cap_name) = capability.split('=')
+            setattr(self, attrib, self._tigetstr(cap_name) or '')
+
+        # Colors
+        set_fg = self._tigetstr('setf')
+        if set_fg:
+            for i,color in zip(range(len(self._COLORS)), self._COLORS):
+                setattr(self, color, curses.tparm(set_fg, i) or '')
+        set_fg_ansi = self._tigetstr('setaf')
+        if set_fg_ansi:
+            for i,color in zip(range(len(self._ANSICOLORS)), self._ANSICOLORS):
+                setattr(self, color, curses.tparm(set_fg_ansi, i) or '')
+        set_bg = self._tigetstr('setb')
+        if set_bg:
+            for i,color in zip(range(len(self._COLORS)), self._COLORS):
+                setattr(self, 'BG_'+color, curses.tparm(set_bg, i) or '')
+        set_bg_ansi = self._tigetstr('setab')
+        if set_bg_ansi:
+            for i,color in zip(range(len(self._ANSICOLORS)), self._ANSICOLORS):
+                setattr(self, 'BG_'+color, curses.tparm(set_bg_ansi, i) or '')
+
+    def _tigetstr(self, cap_name):
+        # String capabilities can include "delays" of the form "$<2>".
+        # For any modern terminal, we should be able to just ignore
+        # these, so strip them out.
+        import curses
+        cap = curses.tigetstr(cap_name) or ''
+        return re.sub(r'\$<\d+>[/*]?', '', cap)
+
+    def render(self, template):
+        """
+        Replace each $-substitutions in the given template string with
+        the corresponding terminal control string (if it's defined) or
+        '' (if it's not).
+        """
+        return re.sub(r'\$\$|\${\w+}', self._render_sub, template)
+
+    def _render_sub(self, match):
+        s = match.group()
+        if s == '$$': return s
+        else: return getattr(self, s[2:-1])
+
+#######################################################################
+# Example use case: progress bar
+#######################################################################
+
+class SimpleProgressBar:
+    """
+    A simple progress bar which doesn't need any terminal support.
+
+    This prints out a progress bar like:
+      'Header: 0 .. 10.. 20.. ...'
+    """
+
+    def __init__(self, header):
+        self.header = header
+        self.atIndex = None
+
+    def update(self, percent, message):
+        if self.atIndex is None:
+            sys.stdout.write(self.header)
+            self.atIndex = 0
+
+        next = int(percent*50)
+        if next == self.atIndex:
+            return
+
+        for i in range(self.atIndex, next):
+            idx = i % 5
+            if idx == 0:
+                sys.stdout.write('%-2d' % (i*2))
+            elif idx == 1:
+                pass # Skip second char
+            elif idx < 4:
+                sys.stdout.write('.')
+            else:
+                sys.stdout.write(' ')
+        sys.stdout.flush()
+        self.atIndex = next
+
+    def clear(self):
+        if self.atIndex is not None:
+            sys.stdout.write('\n')
+            sys.stdout.flush()
+            self.atIndex = None
+
+class ProgressBar:
+    """
+    A 3-line progress bar, which looks like::
+    
+                                Header
+        20% [===========----------------------------------]
+                           progress message
+
+    The progress bar is colored, if the terminal supports color
+    output; and adjusts to the width of the terminal.
+    """
+    BAR = '%s${GREEN}[${BOLD}%s%s${NORMAL}${GREEN}]${NORMAL}%s\n'
+    HEADER = '${BOLD}${CYAN}%s${NORMAL}\n\n'
+        
+    def __init__(self, term, header, useETA=True):
+        self.term = term
+        if not (self.term.CLEAR_EOL and self.term.UP and self.term.BOL):
+            raise ValueError("Terminal isn't capable enough -- you "
+                             "should use a simpler progress dispaly.")
+        self.width = self.term.COLS or 75
+        self.bar = term.render(self.BAR)
+        self.header = self.term.render(self.HEADER % header.center(self.width))
+        self.cleared = 1 #: true if we haven't drawn the bar yet.
+        self.useETA = useETA
+        if self.useETA:
+            self.startTime = time.time()
+        self.update(0, '')
+
+    def update(self, percent, message):
+        if self.cleared:
+            sys.stdout.write(self.header)
+            self.cleared = 0
+        prefix = '%3d%% ' % (percent*100,)
+        suffix = ''
+        if self.useETA:
+            elapsed = time.time() - self.startTime
+            if percent > .0001 and elapsed > 1:
+                total = elapsed / percent
+                eta = int(total - elapsed)
+                h = eta//3600.
+                m = (eta//60) % 60
+                s = eta % 60
+                suffix = ' ETA: %02d:%02d:%02d'%(h,m,s)
+        barWidth = self.width - len(prefix) - len(suffix) - 2
+        n = int(barWidth*percent)
+        if len(message) < self.width:
+            message = message + ' '*(self.width - len(message))
+        else:
+            message = '... ' + message[-(self.width-4):]
+        sys.stdout.write(
+            self.term.BOL + self.term.UP + self.term.CLEAR_EOL +
+            (self.bar % (prefix, '='*n, '-'*(barWidth-n), suffix)) +
+            self.term.CLEAR_EOL + message)
+
+    def clear(self):
+        if not self.cleared:
+            sys.stdout.write(self.term.BOL + self.term.CLEAR_EOL +
+                             self.term.UP + self.term.CLEAR_EOL +
+                             self.term.UP + self.term.CLEAR_EOL)
+            self.cleared = 1
+
+def test():
+    import time
+    tc = TerminalController()
+    p = ProgressBar(tc, 'Tests')
+    for i in range(101):
+        p.update(i/100., str(i))        
+        time.sleep(.3)
+
+if __name__=='__main__':
+    test()
diff --git a/utils/lit/ShCommands.py b/utils/lit/ShCommands.py
new file mode 100644
index 0000000000..be3e680e9e
--- /dev/null
+++ b/utils/lit/ShCommands.py
@@ -0,0 +1,86 @@
+import ShUtil
+
+class Command:
+    def __init__(self, args, redirects):
+        self.args = list(args)
+        self.redirects = list(redirects)
+
+    def __repr__(self):
+        return 'Command(%r, %r)' % (self.args, self.redirects)
+
+    def __cmp__(self, other):
+        if not isinstance(other, Command):
+            return -1
+
+        return cmp((self.args, self.redirects),
+                   (other.args, other.redirects))
+
+    def toShell(self, file):
+        for arg in self.args:
+            if "'" not in arg:
+                quoted = "'%s'" % arg
+            elif '"' not in arg and '$' not in arg:
+                quoted = '"%s"' % arg
+            else:
+                raise NotImplementedError,'Unable to quote %r' % arg
+            print >>file, quoted,
+
+            # For debugging / validation.
+            dequoted = list(ShUtil.ShLexer(quoted).lex())
+            if dequoted != [arg]:
+                raise NotImplementedError,'Unable to quote %r' % arg
+
+        for r in self.redirects:
+            if len(r[0]) == 1:
+                print >>file, "%s '%s'" % (r[0][0], r[1]),
+            else:
+                print >>file, "%s%s '%s'" % (r[0][1], r[0][0], r[1]),
+
+class Pipeline:
+    def __init__(self, commands, negate=False, pipe_err=False):
+        self.commands = commands
+        self.negate = negate
+        self.pipe_err = pipe_err
+
+    def __repr__(self):
+        return 'Pipeline(%r, %r, %r)' % (self.commands, self.negate,
+                                         self.pipe_err)
+
+    def __cmp__(self, other):
+        if not isinstance(other, Pipeline):
+            return -1
+
+        return cmp((self.commands, self.negate, self.pipe_err),
+                   (other.commands, other.negate, self.pipe_err))
+
+    def toShell(self, file, pipefail=False):
+        if pipefail != self.pipe_err:
+            raise ValueError,'Inconsistent "pipefail" attribute!'
+        if self.negate:
+            print >>file, '!',
+        for cmd in self.commands:
+            cmd.toShell(file)
+            if cmd is not self.commands[-1]:
+                print >>file, '|\n ',
+
+class Seq:
+    def __init__(self, lhs, op, rhs):
+        assert op in (';', '&', '||', '&&')
+        self.op = op
+        self.lhs = lhs
+        self.rhs = rhs
+
+    def __repr__(self):
+        return 'Seq(%r, %r, %r)' % (self.lhs, self.op, self.rhs)
+
+    def __cmp__(self, other):
+        if not isinstance(other, Seq):
+            return -1
+
+        return cmp((self.lhs, self.op, self.rhs),
+                   (other.lhs, other.op, other.rhs))
+
+    def toShell(self, file, pipefail=False):
+        self.lhs.toShell(file, pipefail)
+        print >>file, ' %s\n' % self.op
+        self.rhs.toShell(file, pipefail)
diff --git a/utils/lit/ShTest.py b/utils/lit/ShTest.py
new file mode 100644
index 0000000000..fefdf7602b
--- /dev/null
+++ b/utils/lit/ShTest.py
@@ -0,0 +1,12 @@
+import TestRunner
+
+class ShTest:
+    def __init__(self, execute_external = False, require_and_and = False):
+        self.execute_external = execute_external
+        self.require_and_and = require_and_and
+
+    def execute(self, test, litConfig):
+        return TestRunner.executeShTest(test, litConfig,
+                                        self.execute_external,
+                                        self.require_and_and)
+
diff --git a/utils/lit/ShUtil.py b/utils/lit/ShUtil.py
new file mode 100644
index 0000000000..c4bbb3d373
--- /dev/null
+++ b/utils/lit/ShUtil.py
@@ -0,0 +1,346 @@
+import itertools
+
+import Util
+from ShCommands import Command, Pipeline, Seq
+
+class ShLexer:
+    def __init__(self, data, win32Escapes = False):
+        self.data = data
+        self.pos = 0
+        self.end = len(data)
+        self.win32Escapes = win32Escapes
+
+    def eat(self):
+        c = self.data[self.pos]
+        self.pos += 1
+        return c
+
+    def look(self):
+        return self.data[self.pos]
+
+    def maybe_eat(self, c):
+        """
+        maybe_eat(c) - Consume the character c if it is the next character,
+        returning True if a character was consumed. """
+        if self.data[self.pos] == c:
+            self.pos += 1
+            return True
+        return False
+
+    def lex_arg_fast(self, c):
+        # Get the leading whitespace free section.
+        chunk = self.data[self.pos - 1:].split(None, 1)[0]
+        
+        # If it has special characters, the fast path failed.
+        if ('|' in chunk or '&' in chunk or 
+            '<' in chunk or '>' in chunk or
+            "'" in chunk or '"' in chunk or
+            '\\' in chunk):
+            return None
+        
+        self.pos = self.pos - 1 + len(chunk)
+        return chunk
+        
+    def lex_arg_slow(self, c):
+        if c in "'\"":
+            str = self.lex_arg_quoted(c)
+        else:
+            str = c
+        while self.pos != self.end:
+            c = self.look()
+            if c.isspace() or c in "|&":
+                break
+            elif c in '><':
+                # This is an annoying case; we treat '2>' as a single token so
+                # we don't have to track whitespace tokens.
+
+                # If the parse string isn't an integer, do the usual thing.
+                if not str.isdigit():
+                    break
+
+                # Otherwise, lex the operator and convert to a redirection
+                # token.
+                num = int(str)
+                tok = self.lex_one_token()
+                assert isinstance(tok, tuple) and len(tok) == 1
+                return (tok[0], num)                    
+            elif c == '"':
+                self.eat()
+                str += self.lex_arg_quoted('"')
+            elif not self.win32Escapes and c == '\\':
+                # Outside of a string, '\\' escapes everything.
+                self.eat()
+                if self.pos == self.end:
+                    Util.warning("escape at end of quoted argument in: %r" % 
+                                 self.data)
+                    return str
+                str += self.eat()
+            else:
+                str += self.eat()
+        return str
+
+    def lex_arg_quoted(self, delim):
+        str = ''
+        while self.pos != self.end:
+            c = self.eat()
+            if c == delim:
+                return str
+            elif c == '\\' and delim == '"':
+                # Inside a '"' quoted string, '\\' only escapes the quote
+                # character and backslash, otherwise it is preserved.
+                if self.pos == self.end:
+                    Util.warning("escape at end of quoted argument in: %r" % 
+                                 self.data)
+                    return str
+                c = self.eat()
+                if c == '"': # 
+                    str += '"'
+                elif c == '\\':
+                    str += '\\'
+                else:
+                    str += '\\' + c
+            else:
+                str += c
+        Util.warning("missing quote character in %r" % self.data)
+        return str
+    
+    def lex_arg_checked(self, c):
+        pos = self.pos
+        res = self.lex_arg_fast(c)
+        end = self.pos
+
+        self.pos = pos
+        reference = self.lex_arg_slow(c)
+        if res is not None:
+            if res != reference:
+                raise ValueError,"Fast path failure: %r != %r" % (res, reference)
+            if self.pos != end:
+                raise ValueError,"Fast path failure: %r != %r" % (self.pos, end)
+        return reference
+        
+    def lex_arg(self, c):
+        return self.lex_arg_fast(c) or self.lex_arg_slow(c)
+        
+    def lex_one_token(self):
+        """
+        lex_one_token - Lex a single 'sh' token. """
+
+        c = self.eat()
+        if c in ';!':
+            return (c,)
+        if c == '|':
+            if self.maybe_eat('|'):
+                return ('||',)
+            return (c,)
+        if c == '&':
+            if self.maybe_eat('&'):
+                return ('&&',)
+            if self.maybe_eat('>'): 
+                return ('&>',)
+            return (c,)
+        if c == '>':
+            if self.maybe_eat('&'):
+                return ('>&',)
+            if self.maybe_eat('>'):
+                return ('>>',)
+            return (c,)
+        if c == '<':
+            if self.maybe_eat('&'):
+                return ('<&',)
+            if self.maybe_eat('>'):
+                return ('<<',)
+            return (c,)
+
+        return self.lex_arg(c)
+
+    def lex(self):
+        while self.pos != self.end:
+            if self.look().isspace():
+                self.eat()
+            else:
+                yield self.lex_one_token()
+
+###
+ 
+class ShParser:
+    def __init__(self, data, win32Escapes = False):
+        self.data = data
+        self.tokens = ShLexer(data, win32Escapes = win32Escapes).lex()
+    
+    def lex(self):
+        try:
+            return self.tokens.next()
+        except StopIteration:
+            return None
+    
+    def look(self):
+        next = self.lex()
+        if next is not None:
+            self.tokens = itertools.chain([next], self.tokens)
+        return next
+    
+    def parse_command(self):
+        tok = self.lex()
+        if not tok:
+            raise ValueError,"empty command!"
+        if isinstance(tok, tuple):
+            raise ValueError,"syntax error near unexpected token %r" % tok[0]
+        
+        args = [tok]
+        redirects = []
+        while 1:
+            tok = self.look()
+
+            # EOF?
+            if tok is None:
+                break
+
+            # If this is an argument, just add it to the current command.
+            if isinstance(tok, str):
+                args.append(self.lex())
+                continue
+
+            # Otherwise see if it is a terminator.
+            assert isinstance(tok, tuple)
+            if tok[0] in ('|',';','&','||','&&'):
+                break
+            
+            # Otherwise it must be a redirection.
+            op = self.lex()
+            arg = self.lex()
+            if not arg:
+                raise ValueError,"syntax error near token %r" % op[0]
+            redirects.append((op, arg))
+
+        return Command(args, redirects)
+
+    def parse_pipeline(self):
+        negate = False
+        if self.look() == ('!',):
+            self.lex()
+            negate = True
+
+        commands = [self.parse_command()]
+        while self.look() == ('|',):
+            self.lex()
+            commands.append(self.parse_command())
+        return Pipeline(commands, negate)
+            
+    def parse(self):
+        lhs = self.parse_pipeline()
+
+        while self.look():
+            operator = self.lex()
+            assert isinstance(operator, tuple) and len(operator) == 1
+
+            if not self.look():
+                raise ValueError, "missing argument to operator %r" % operator[0]
+            
+            # FIXME: Operator precedence!!
+            lhs = Seq(lhs, operator[0], self.parse_pipeline())
+
+        return lhs
+
+###
+
+import unittest
+
+class TestShLexer(unittest.TestCase):
+    def lex(self, str, *args, **kwargs):
+        return list(ShLexer(str, *args, **kwargs).lex())
+
+    def test_basic(self):
+        self.assertEqual(self.lex('a|b>c&d<e'),
+                         ['a', ('|',), 'b', ('>',), 'c', ('&',), 'd', 
+                          ('<',), 'e'])
+
+    def test_redirection_tokens(self):
+        self.assertEqual(self.lex('a2>c'),
+                         ['a2', ('>',), 'c'])
+        self.assertEqual(self.lex('a 2>c'),
+                         ['a', ('>',2), 'c'])
+        
+    def test_quoting(self):
+        self.assertEqual(self.lex("