aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/CommandGuide/lit.pod222
-rw-r--r--utils/lit/LitConfig.py71
-rw-r--r--utils/lit/LitFormats.py2
-rw-r--r--utils/lit/ProgressBar.py267
-rw-r--r--utils/lit/ShCommands.py86
-rw-r--r--utils/lit/ShTest.py12
-rw-r--r--utils/lit/ShUtil.py346
-rw-r--r--utils/lit/TODO19
-rw-r--r--utils/lit/TclTest.py7
-rw-r--r--utils/lit/TclUtil.py322
-rw-r--r--utils/lit/Test.py71
-rw-r--r--utils/lit/TestRunner.py460
-rw-r--r--utils/lit/TestingConfig.py95
-rw-r--r--utils/lit/Util.py124
-rwxr-xr-xutils/lit/lit.py519
15 files changed, 2623 insertions, 0 deletions
diff --git a/docs/CommandGuide/lit.pod b/docs/CommandGuide/lit.pod
new file mode 100644
index 0000000000..a818302c24
--- /dev/null
+++ b/docs/CommandGuide/lit.pod
@@ -0,0 +1,222 @@
+=pod
+
+=head1 NAME
+
+lit - LLVM Integrated Tester
+
+=head1 SYNOPSIS
+
+B<lit> [I<options>] [I<tests>]
+
+=head1 DESCRIPTION
+
+B<lit> is a portable tool for executing LLVM and Clang style test suites,
+summarizing their results, and providing indication of failures. B<lit> is
+designed to be a lightweight testing tool with as simple a user interface as
+possible.
+
+B<lit> should be run with one or more I<tests> to run specified on the command
+line. Tests can be either individual test files or directories to search for
+tests (see L<"TEST DISCOVERY">).
+
+Each specified test will be executed (potentially in parallel) and once all
+tests have been run B<lit> will print summary information on the number of tests
+which passed or failed (see L<"TEST STATUS RESULTS">). The B<lit> program will
+execute with a non-zero exit code if any tests fail.
+
+By default B<lit> will use a succinct progress display and will only print
+summary information for test failures. See L<"OUTPUT OPTIONS"> for options
+controlling the B<lit> progress display and output.
+
+B<lit> also includes a number of options for controlling how tests are exected
+(specific features may depend on the particular test format). See L<"EXECUTION
+OPTIONS"> for more information.
+
+Finally, B<lit> also supports additional options for only running a subset of
+the options specified on the command line, see L<"SELECTION OPTIONS"> for
+more information.
+
+=head1 GENERAL OPTIONS
+
+=over
+
+=item B<-h>, B<--help>
+
+Show the B<lit> help message.
+
+=item B<-j> I<N>, B<--threads>=I<N>
+
+Run I<N> tests in parallel. By default, this is automatically chose to match the
+number of detected available CPUs.
+
+=back
+
+=head1 OUTPUT OPTIONS
+
+=over
+
+=item B<-q>, B<--quiet>
+
+Suppress any output except for test failures.
+
+=item B<-s>, B<--succinct>
+
+Show less output, for example don't show information on tests that pass.
+
+=item B<-v>, B<--verbose>
+
+Show more information on test failures, for example the entire test output
+instead of just the test result.
+
+=item B<--no-progress-bar>
+
+Do not use curses based progress bar.
+
+=back
+
+=head1 EXECUTION OPTIONS
+
+=over
+
+=item B<--path>=I<PATH>
+
+Specify an addition I<PATH> to use when searching for executables in tests.
+
+=item B<--vg>
+
+Run individual tests under valgrind (using the memcheck tool). The
+I<--error-exitcode> argument for valgrind is used so that valgrind failures will
+cause the program to exit with a non-zero status.
+
+=item B<--vg-arg>=I<ARG>
+
+When I<--vg> is used, specify an additional argument to pass to valgrind itself.
+
+=item B<--time-tests>
+
+Track the wall time individual tests take to execute and includes the results in
+the summary output. This is useful for determining which tests in a test suite
+take the most time to execute. Note that this option is most useful with I<-j
+1>.
+
+=back
+
+=head1 SELECTION OPTIONS
+
+=over
+
+=item B<--max-tests>=I<N>
+
+Run at most I<N> tests and then terminate.
+
+=item B<--max-time>=I<N>
+
+Spend at most I<N> seconds (approximately) running tests and then terminate.
+
+=item B<--shuffle>
+
+Run the tests in a random order.
+
+=back
+
+=head1 ADDITIONAL OPTIONS
+
+=over
+
+=item B<--debug>
+
+Run B<lit> in debug mode, for debugging configuration issues and B<lit> itself.
+
+=item B<--show-suites>
+
+List the discovered test suites as part of the standard output.
+
+=item B<--no-tcl-as-sh>
+
+Run Tcl scripts internally (instead of converting to shell scripts).
+
+=back
+
+=head1 EXIT STATUS
+
+B<lit> will exit with an exit code of 1 if there are any FAIL or XPASS
+results. Otherwise, it will exit with the status 0. Other exit codes used for
+non-test related failures (for example a user error or an internal program
+error).
+
+=head1 TEST DISCOVERY
+
+The inputs passed to B<lit> can be either individual tests, or entire
+directories or hierarchies of tests to run. When B<lit> starts up, the first
+thing it does is convert the inputs into a complete list of tests to run as part
+of I<test discovery>.
+
+In the B<lit> model, every test must exist inside some I<test suite>. B<lit>
+resolves the inputs specified on the command line to test suites by searching
+upwards from the input path until it finds a I<lit.cfg> or I<lit.site.cfg>
+file. These files serve as both a marker of test suites and as configuration
+files which B<lit> loads in order to understand how to find and run the tests
+inside the test suite.
+
+Once B<lit> has mapped the inputs into test suites it traverses the list of
+inputs adding tests for individual files and recursively searching for tests in
+directories.
+
+This behavior makes it easy to specify a subset of tests to run, while still
+allowing the test suite configuration to control exactly how tests are
+interpreted. In addition, B<lit> always identifies tests by the test suite they
+are in, and their relative path inside the test suite. For appropriately
+configured projects, this allows B<lit> to provide convenient and flexible
+support for out-of-tree builds.
+
+=head1 TEST STATUS RESULTS
+
+Each test ultimately produces one of the following six results:
+
+=over
+
+=item B<PASS>
+
+The test succeeded.
+
+=item B<XFAIL>
+
+The test failed, but that is expected. This is used for test formats which allow
+specifying that a test does not currently work, but wish to leave it in the test
+suite.
+
+=item B<XPASS>
+
+The test succeeded, but it was expected to fail. This is used for tests which
+were specified as expected to fail, but are now succeeding (generally because
+the feautre they test was broken and has been fixed).
+
+=item B<FAIL>
+
+The test failed.
+
+=item B<UNRESOLVED>
+
+The test result could not be determined. For example, this occurs when the test
+could not be run, the test itself is invalid, or the test was interrupted.
+
+=item B<UNSUPPORTED>
+
+The test is not supported in this environment. This is used by test formats
+which can report unsupported tests.
+
+=back
+
+Depending on the test format tests may produce additional information about
+their status (generally only for failures). See the L<Output|"LIT OUTPUT">
+section for more information.
+
+=head1 SEE ALSO
+
+L<valgrind(1)>
+
+=head1 AUTHOR
+
+Written by Daniel Dunbar and maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/utils/lit/LitConfig.py b/utils/lit/LitConfig.py
new file mode 100644
index 0000000000..4fb0ccc093
--- /dev/null
+++ b/utils/lit/LitConfig.py
@@ -0,0 +1,71 @@
+class LitConfig:
+ """LitConfig - Configuration data for a 'lit' test runner instance, shared
+ across all tests.
+
+ The LitConfig object is also used to communicate with client configuration
+ files, it is always passed in as the global variable 'lit' so that
+ configuration files can access common functionality and internal components
+ easily.
+ """
+
+ # Provide access to built-in formats.
+ import LitFormats as formats
+
+ # Provide access to built-in utility functions.
+ import Util as util
+
+ def __init__(self, progname, path, quiet,
+ useValgrind, valgrindArgs,
+ useTclAsSh,
+ noExecute, debug, isWindows):
+ # The name of the test runner.
+ self.progname = progname
+ # The items to add to the PATH environment variable.
+ self.path = list(map(str, path))
+ self.quiet = bool(quiet)
+ self.useValgrind = bool(useValgrind)
+ self.valgrindArgs = list(valgrindArgs)
+ self.useTclAsSh = bool(useTclAsSh)
+ self.noExecute = noExecute
+ self.debug = debug
+ self.isWindows = bool(isWindows)
+
+ self.numErrors = 0
+ self.numWarnings = 0
+
+ def load_config(self, config, path):
+ """load_config(config, path) - Load a config object from an alternate
+ path."""
+ from TestingConfig import TestingConfig
+ return TestingConfig.frompath(path, config.parent, self,
+ mustExist = True,
+ config = config)
+
+ def _write_message(self, kind, message):
+ import inspect, os, sys
+
+ # Get the file/line where this message was generated.
+ f = inspect.currentframe()
+ # Step out of _write_message, and then out of wrapper.
+ f = f.f_back.f_back
+ file,line,_,_,_ = inspect.getframeinfo(f)
+ location = '%s:%d' % (os.path.basename(file), line)
+
+ print >>sys.stderr, '%s: %s: %s: %s' % (self.progname, location,
+ kind, message)
+
+ def note(self, message):
+ self._write_message('note', message)
+
+ def warning(self, message):
+ self._write_message('warning', message)
+ self.numWarnings += 1
+
+ def error(self, message):
+ self._write_message('error', message)
+ self.numErrors += 1
+
+ def fatal(self, message):
+ import sys
+ self._write_message('fatal', message)
+ sys.exit(2)
diff --git a/utils/lit/LitFormats.py b/utils/lit/LitFormats.py
new file mode 100644
index 0000000000..cc00ddc7e7
--- /dev/null
+++ b/utils/lit/LitFormats.py
@@ -0,0 +1,2 @@
+from ShTest import ShTest
+from TclTest import TclTest
diff --git a/utils/lit/ProgressBar.py b/utils/lit/ProgressBar.py
new file mode 100644
index 0000000000..85c95f57f7
--- /dev/null
+++ b/utils/lit/ProgressBar.py
@@ -0,0 +1,267 @@
+#!/usr/bin/env python
+
+# Source: http://code.activestate.com/recipes/475116/, with
+# modifications by Daniel Dunbar.
+
+import sys, re, time
+
+class TerminalController:
+ """
+ A class that can be used to portably generate formatted output to
+ a terminal.
+
+ `TerminalController` defines a set of instance variables whose
+ values are initialized to the control sequence necessary to
+ perform a given action. These can be simply included in normal
+ output to the terminal:
+
+ >>> term = TerminalController()
+ >>> print 'This is '+term.GREEN+'green'+term.NORMAL
+
+ Alternatively, the `render()` method can used, which replaces
+ '${action}' with the string required to perform 'action':
+
+ >>> term = TerminalController()
+ >>> print term.render('This is ${GREEN}green${NORMAL}')
+
+ If the terminal doesn't support a given action, then the value of
+ the corresponding instance variable will be set to ''. As a
+ result, the above code will still work on terminals that do not
+ support color, except that their output will not be colored.
+ Also, this means that you can test whether the terminal supports a
+ given action by simply testing the truth value of the
+ corresponding instance variable:
+
+ >>> term = TerminalController()
+ >>> if term.CLEAR_SCREEN:
+ ... print 'This terminal supports clearning the screen.'
+
+ Finally, if the width and height of the terminal are known, then
+ they will be stored in the `COLS` and `LINES` attributes.
+ """
+ # Cursor movement:
+ BOL = '' #: Move the cursor to the beginning of the line
+ UP = '' #: Move the cursor up one line
+ DOWN = '' #: Move the cursor down one line
+ LEFT = '' #: Move the cursor left one char
+ RIGHT = '' #: Move the cursor right one char
+
+ # Deletion:
+ CLEAR_SCREEN = '' #: Clear the screen and move to home position
+ CLEAR_EOL = '' #: Clear to the end of the line.
+ CLEAR_BOL = '' #: Clear to the beginning of the line.
+ CLEAR_EOS = '' #: Clear to the end of the screen
+
+ # Output modes:
+ BOLD = '' #: Turn on bold mode
+ BLINK = '' #: Turn on blink mode
+ DIM = '' #: Turn on half-bright mode
+ REVERSE = '' #: Turn on reverse-video mode
+ NORMAL = '' #: Turn off all modes
+
+ # Cursor display:
+ HIDE_CURSOR = '' #: Make the cursor invisible
+ SHOW_CURSOR = '' #: Make the cursor visible
+
+ # Terminal size:
+ COLS = None #: Width of the terminal (None for unknown)
+ LINES = None #: Height of the terminal (None for unknown)
+
+ # Foreground colors:
+ BLACK = BLUE = GREEN = CYAN = RED = MAGENTA = YELLOW = WHITE = ''
+
+ # Background colors:
+ BG_BLACK = BG_BLUE = BG_GREEN = BG_CYAN = ''
+ BG_RED = BG_MAGENTA = BG_YELLOW = BG_WHITE = ''
+
+ _STRING_CAPABILITIES = """
+ BOL=cr UP=cuu1 DOWN=cud1 LEFT=cub1 RIGHT=cuf1
+ CLEAR_SCREEN=clear CLEAR_EOL=el CLEAR_BOL=el1 CLEAR_EOS=ed BOLD=bold
+ BLINK=blink DIM=dim REVERSE=rev UNDERLINE=smul NORMAL=sgr0
+ HIDE_CURSOR=cinvis SHOW_CURSOR=cnorm""".split()
+ _COLORS = """BLACK BLUE GREEN CYAN RED MAGENTA YELLOW WHITE""".split()
+ _ANSICOLORS = "BLACK RED GREEN YELLOW BLUE MAGENTA CYAN WHITE".split()
+
+ def __init__(self, term_stream=sys.stdout):
+ """
+ Create a `TerminalController` and initialize its attributes
+ with appropriate values for the current terminal.
+ `term_stream` is the stream that will be used for terminal
+ output; if this stream is not a tty, then the terminal is
+ assumed to be a dumb terminal (i.e., have no capabilities).
+ """
+ # Curses isn't available on all platforms
+ try: import curses
+ except: return
+
+ # If the stream isn't a tty, then assume it has no capabilities.
+ if not term_stream.isatty(): return
+
+ # Check the terminal type. If we fail, then assume that the
+ # terminal has no capabilities.
+ try: curses.setupterm()
+ except: return
+
+ # Look up numeric capabilities.
+ self.COLS = curses.tigetnum('cols')
+ self.LINES = curses.tigetnum('lines')
+
+ # Look up string capabilities.
+ for capability in self._STRING_CAPABILITIES:
+ (attrib, cap_name) = capability.split('=')
+ setattr(self, attrib, self._tigetstr(cap_name) or '')
+
+ # Colors
+ set_fg = self._tigetstr('setf')
+ if set_fg:
+ for i,color in zip(range(len(self._COLORS)), self._COLORS):
+ setattr(self, color, curses.tparm(set_fg, i) or '')
+ set_fg_ansi = self._tigetstr('setaf')
+ if set_fg_ansi:
+ for i,color in zip(range(len(self._ANSICOLORS)), self._ANSICOLORS):
+ setattr(self, color, curses.tparm(set_fg_ansi, i) or '')
+ set_bg = self._tigetstr('setb')
+ if set_bg:
+ for i,color in zip(range(len(self._COLORS)), self._COLORS):
+ setattr(self, 'BG_'+color, curses.tparm(set_bg, i) or '')
+ set_bg_ansi = self._tigetstr('setab')
+ if set_bg_ansi:
+ for i,color in zip(range(len(self._ANSICOLORS)), self._ANSICOLORS):
+ setattr(self, 'BG_'+color, curses.tparm(set_bg_ansi, i) or '')
+
+ def _tigetstr(self, cap_name):
+ # String capabilities can include "delays" of the form "$<2>".
+ # For any modern terminal, we should be able to just ignore
+ # these, so strip them out.
+ import curses
+ cap = curses.tigetstr(cap_name) or ''
+ return re.sub(r'\$<\d+>[/*]?', '', cap)
+
+ def render(self, template):
+ """
+ Replace each $-substitutions in the given template string with
+ the corresponding terminal control string (if it's defined) or
+ '' (if it's not).
+ """
+ return re.sub(r'\$\$|\${\w+}', self._render_sub, template)
+
+ def _render_sub(self, match):
+ s = match.group()
+ if s == '$$': return s
+ else: return getattr(self, s[2:-1])
+
+#######################################################################
+# Example use case: progress bar
+#######################################################################
+
+class SimpleProgressBar:
+ """
+ A simple progress bar which doesn't need any terminal support.
+
+ This prints out a progress bar like:
+ 'Header: 0 .. 10.. 20.. ...'
+ """
+
+ def __init__(self, header):
+ self.header = header
+ self.atIndex = None
+
+ def update(self, percent, message):
+ if self.atIndex is None:
+ sys.stdout.write(self.header)
+ self.atIndex = 0
+
+ next = int(percent*50)
+ if next == self.atIndex:
+ return
+
+ for i in range(self.atIndex, next):
+ idx = i % 5
+ if idx == 0:
+ sys.stdout.write('%-2d' % (i*2))
+ elif idx == 1:
+ pass # Skip second char
+ elif idx < 4:
+ sys.stdout.write('.')
+ else:
+ sys.stdout.write(' ')
+ sys.stdout.flush()
+ self.atIndex = next
+
+ def clear(self):
+ if self.atIndex is not None:
+ sys.stdout.write('\n')
+ sys.stdout.flush()
+ self.atIndex = None
+
+class ProgressBar:
+ """
+ A 3-line progress bar, which looks like::
+
+ Header
+ 20% [===========----------------------------------]
+ progress message
+
+ The progress bar is colored, if the terminal supports color
+ output; and adjusts to the width of the terminal.
+ """
+ BAR = '%s${GREEN}[${BOLD}%s%s${NORMAL}${GREEN}]${NORMAL}%s\n'
+ HEADER = '${BOLD}${CYAN}%s${NORMAL}\n\n'
+
+ def __init__(self, term, header, useETA=True):
+ self.term = term
+ if not (self.term.CLEAR_EOL and self.term.UP and self.term.BOL):
+ raise ValueError("Terminal isn't capable enough -- you "
+ "should use a simpler progress dispaly.")
+ self.width = self.term.COLS or 75
+ self.bar = term.render(self.BAR)
+ self.header = self.term.render(self.HEADER % header.center(self.width))
+ self.cleared = 1 #: true if we haven't drawn the bar yet.
+ self.useETA = useETA
+ if self.useETA:
+ self.startTime = time.time()
+ self.update(0, '')
+
+ def update(self, percent, message):
+ if self.cleared:
+ sys.stdout.write(self.header)
+ self.cleared = 0
+ prefix = '%3d%% ' % (percent*100,)
+ suffix = ''
+ if self.useETA:
+ elapsed = time.time() - self.startTime
+ if percent > .0001 and elapsed > 1:
+ total = elapsed / percent
+ eta = int(total - elapsed)
+ h = eta//3600.
+ m = (eta//60) % 60
+ s = eta % 60
+ suffix = ' ETA: %02d:%02d:%02d'%(h,m,s)
+ barWidth = self.width - len(prefix) - len(suffix) - 2
+ n = int(barWidth*percent)
+ if len(message) < self.width:
+ message = message + ' '*(self.width - len(message))
+ else:
+ message = '... ' + message[-(self.width-4):]
+ sys.stdout.write(
+ self.term.BOL + self.term.UP + self.term.CLEAR_EOL +
+ (self.bar % (prefix, '='*n, '-'*(barWidth-n), suffix)) +
+ self.term.CLEAR_EOL + message)
+
+ def clear(self):
+ if not self.cleared:
+ sys.stdout.write(self.term.BOL + self.term.CLEAR_EOL +
+ self.term.UP + self.term.CLEAR_EOL +
+ self.term.UP + self.term.CLEAR_EOL)
+ self.cleared = 1
+
+def test():
+ import time
+ tc = TerminalController()
+ p = ProgressBar(tc, 'Tests')
+ for i in range(101):
+ p.update(i/100., str(i))
+ time.sleep(.3)
+
+if __name__=='__main__':
+ test()
diff --git a/utils/lit/ShCommands.py b/utils/lit/ShCommands.py
new file mode 100644
index 0000000000..be3e680e9e
--- /dev/null
+++ b/utils/lit/ShCommands.py
@@ -0,0 +1,86 @@
+import ShUtil
+
+class Command:
+ def __init__(self, args, redirects):
+ self.args = list(args)
+ self.redirects = list(redirects)
+
+ def __repr__(self):
+ return 'Command(%r, %r)' % (self.args, self.redirects)
+
+ def __cmp__(self, other):
+ if not isinstance(other, Command):
+ return -1
+
+ return cmp((self.args, self.redirects),
+ (other.args, other.redirects))
+
+ def toShell(self, file):
+ for arg in self.args:
+ if "'" not in arg:
+ quoted = "'%s'" % arg
+ elif '"' not in arg and '$' not in arg:
+ quoted = '"%s"' % arg
+ else:
+ raise NotImplementedError,'Unable to quote %r' % arg
+ print >>file, quoted,
+
+ # For debugging / validation.
+ dequoted = list(ShUtil.ShLexer(quoted).lex())
+ if dequoted != [arg]:
+ raise NotImplementedError,'Unable to quote %r' % arg
+
+ for r in self.redirects:
+ if len(r[0]) == 1:
+ print >>file, "%s '%s'" % (r[0][0], r[1]),
+ else:
+ print >>file, "%s%s '%s'" % (r[0][1], r[0][0], r[1]),
+
+class Pipeline:
+ def __init__(self, commands, negate=False, pipe_err=False):
+ self.commands = commands
+ self.negate = negate
+ self.pipe_err = pipe_err
+
+ def __repr__(self):
+ return 'Pipeline(%r, %r, %r)' % (self.commands, self.negate,
+ self.pipe_err)
+
+ def __cmp__(self, other):
+ if not isinstance(other, Pipeline):
+ return -1
+
+ return cmp((self.commands, self.negate, self.pipe_err),
+ (other.commands, other.negate, self.pipe_err))
+
+ def toShell(self, file, pipefail=False):
+ if pipefail != self.pipe_err:
+ raise ValueError,'Inconsistent "pipefail" attribute!'
+ if self.negate:
+ print >>file, '!',
+ for cmd in self.commands:
+ cmd.toShell(file)
+ if cmd is not self.commands[-1]:
+ print >>file, '|\n ',
+
+class Seq:
+ def __init__(self, lhs, op, rhs):
+ assert op in (';', '&', '||', '&&')
+ self.op = op
+ self.lhs = lhs
+ self.rhs = rhs
+
+ def __repr__(self):
+ return 'Seq(%r, %r, %r)' % (self.lhs, self.op, self.rhs)
+
+ def __cmp__(self, other):
+ if not isinstance(other, Seq):
+ return -1
+
+ return cmp((self.lhs, self.op, self.rhs),
+ (other.lhs, other.op, other.rhs))
+
+ def toShell(self, file, pipefail=False):
+ self.lhs.toShell(file, pipefail)
+ print >>file, ' %s\n' % self.op
+ self.rhs.toShell(file, pipefail)
diff --git a/utils/lit/ShTest.py b/utils/lit/ShTest.py
new file mode 100644
index 0000000000..fefdf7602b
--- /dev/null
+++ b/utils/lit/ShTest.py
@@ -0,0 +1,12 @@
+import TestRunner
+
+class ShTest:
+ def __init__(self, execute_external = False, require_and_and = False):
+ self.execute_external = execute_external
+ self.require_and_and = require_and_and
+
+ def execute(self, test, litConfig):
+ return TestRunner.executeShTest(test, litConfig,
+ self.execute_external,
+ self.require_and_and)
+
diff --git a/utils/lit/ShUtil.py b/utils/lit/ShUtil.py
new file mode 100644
index 0000000000..c4bbb3d373
--- /dev/null
+++ b/utils/lit/ShUtil.py
@@ -0,0 +1,346 @@
+import itertools
+
+import Util
+from ShCommands import Command, Pipeline, Seq
+
+class ShLexer:
+ def __init__(self, data, win32Escapes = False):
+ self.data = data
+ self.pos = 0
+ self.end = len(data)
+ self.win32Escapes = win32Escapes
+
+ def eat(self):
+ c = self.data[self.pos]
+ self.pos += 1
+ return c
+
+ def look(self):
+ return self.data[self.pos]
+
+ def maybe_eat(self, c):
+ """
+ maybe_eat(c) - Consume the character c if it is the next character,
+ returning True if a character was consumed. """
+ if self.data[self.pos] == c:
+ self.pos += 1
+ return True
+ return False
+
+ def lex_arg_fast(self, c):
+ # Get the leading whitespace free section.
+ chunk = self.data[self.pos - 1:].split(None, 1)[0]
+
+ # If it has special characters, the fast path failed.
+ if ('|' in chunk or '&' in chunk or
+ '<' in chunk or '>' in chunk or
+ "'" in chunk or '"' in chunk or
+ '\\' in chunk):
+ return None
+
+ self.pos = self.pos - 1 + len(chunk)
+ return chunk
+
+ def lex_arg_slow(self, c):
+ if c in "'\"":
+ str = self.lex_arg_quoted(c)
+ else:
+ str = c
+ while self.pos != self.end:
+ c = self.look()
+ if c.isspace() or c in "|&":
+ break
+ elif c in '><':
+ # This is an annoying case; we treat '2>' as a single token so
+ # we don't have to track whitespace tokens.
+
+ # If the parse string isn't an integer, do the usual thing.
+ if not str.isdigit():
+ break
+
+ # Otherwise, lex the operator and convert to a redirection
+ # token.
+ num = int(str)
+ tok = self.lex_one_token()
+ assert isinstance(tok, tuple) and len(tok) == 1
+ return (tok[0], num)
+ elif c == '"':
+ self.eat()
+ str += self.lex_arg_quoted('"')
+ elif not self.win32Escapes and c == '\\':
+ # Outside of a string, '\\' escapes everything.
+ self.eat()
+ if self.pos == self.end:
+ Util.warning("escape at end of quoted argument in: %r" %
+ self.data)
+ return str
+ str += self.eat()
+ else:
+ str += self.eat()
+ return str
+
+ def lex_arg_quoted(self, delim):
+ str = ''
+ while self.pos != self.end:
+ c = self.eat()
+ if c == delim:
+ return str
+ elif c == '\\' and delim == '"':
+ # Inside a '"' quoted string, '\\' only escapes the quote
+ # character and backslash, otherwise it is preserved.
+ if self.pos == self.end:
+ Util.warning("escape at end of quoted argument in: %r" %
+ self.data)
+ return str
+ c = self.eat()
+ if c == '"': #
+ str += '"'
+ elif c == '\\':
+ str += '\\'
+ else:
+ str += '\\' + c
+ else:
+ str += c
+ Util.warning("missing quote character in %r" % self.data)
+ return str
+
+ def lex_arg_checked(self, c):
+ pos = self.pos
+ res = self.lex_arg_fast(c)
+ end = self.pos
+
+ self.pos = pos
+ reference = self.lex_arg_slow(c)
+ if res is not None:
+ if res != reference:
+ raise ValueError,"Fast path failure: %r != %r" % (res, reference)
+ if self.pos != end:
+ raise ValueError,"Fast path failure: %r != %r" % (self.pos, end)
+ return reference
+
+ def lex_arg(self, c):
+ return self.lex_arg_fast(c) or self.lex_arg_slow(c)
+
+ def lex_one_token(self):
+ """
+ lex_one_token - Lex a single 'sh' token. """
+
+ c = self.eat()
+ if c in ';!':
+ return (c,)
+ if c == '|':
+ if self.maybe_eat('|'):
+ return ('||',)
+ return (c,)
+ if c == '&':
+ if self.maybe_eat('&'):
+ return ('&&',)
+ if self.maybe_eat('>'):
+ return ('&>',)
+ return (c,)
+ if c == '>':
+ if self.maybe_eat('&'):
+ return ('>&',)
+ if self.maybe_eat('>'):
+ return ('>>',)
+ return (c,)
+ if c == '<':
+ if self.maybe_eat('&'):
+ return ('<&',)
+ if self.maybe_eat('>'):
+ return ('<<',)
+ return (c,)
+
+ return self.lex_arg(c)
+
+ def lex(self):
+ while self.pos != self.end:
+ if self.look().isspace():
+ self.eat()
+ else:
+ yield self.lex_one_token()
+
+###
+
+class ShParser:
+ def __init__(self, data, win32Escapes = False):
+ self.data = data
+ self.tokens = ShLexer(data, win32Escapes = win32Escapes).lex()
+
+ def lex(self):
+ try:
+ return self.tokens.next()
+ except StopIteration:
+ return None
+
+ def look(self):
+ next = self.lex()
+ if next is not None:
+ self.tokens = itertools.chain([next], self.tokens)
+ return next
+
+ def parse_command(self):
+ tok = self.lex()
+ if not tok:
+ raise ValueError,"empty command!"
+ if isinstance(tok, tuple):
+ raise ValueError,"syntax error near unexpected token %r" % tok[0]
+
+ args = [tok]
+ redirects = []
+ while 1:
+ tok = self.look()
+
+ # EOF?
+ if tok is None:
+ break
+
+ # If this is an argument, just add it to the current command.
+ if isinstance(tok, str):
+ args.append(self.lex())
+ continue
+
+ # Otherwise see if it is a terminator.
+ assert isinstance(tok, tuple)
+ if tok[0] in ('|',';','&','||','&&'):
+ break
+
+ # Otherwise it must be a redirection.
+ op = self.lex()
+ arg = self.lex()
+ if not arg:
+ raise ValueError,"syntax error near token %r" % op[0]
+ redirects.append((op, arg))
+
+ return Command(args, redirects)
+
+ def parse_pipeline(self):
+ negate = False
+ if self.look() == ('!',):
+ self.lex()
+ negate = True
+
+ commands = [self.parse_command()]
+ while self.look() == ('|',):
+ self.lex()
+ commands.append(self.parse_command())
+ return Pipeline(commands, negate)
+
+ def parse(self):
+ lhs = self.parse_pipeline()
+
+ while self.look():
+ operator = self.lex()
+ assert isinstance(operator, tuple) and len(operator) == 1
+
+ if not self.look():
+ raise ValueError, "missing argument to operator %r" % operator[0]
+
+ # FIXME: Operator precedence!!
+ lhs = Seq(lhs, operator[0], self.parse_pipeline())
+
+ return lhs
+
+###
+
+import unittest
+
+class TestShLexer(unittest.TestCase):
+ def lex(self, str, *args, **kwargs):
+ return list(ShLexer(str, *args, **kwargs).lex())
+
+ def test_basic(self):
+ self.assertEqual(self.lex('a|b>c&d<e'),
+ ['a', ('|',), 'b', ('>',), 'c', ('&',), 'd',
+ ('<',), 'e'])
+
+ def test_redirection_tokens(self):
+ self.assertEqual(self.lex('a2>c'),
+ ['a2', ('>',), 'c'])
+ self.assertEqual(self.lex('a 2>c'),
+ ['a', ('>',2), 'c'])
+
+ def test_quoting(self):
+ self.assertEqual(self.lex("