diff options
-rw-r--r-- | docs/CommandGuide/lit.pod | 222 | ||||
-rw-r--r-- | utils/lit/LitConfig.py | 71 | ||||
-rw-r--r-- | utils/lit/LitFormats.py | 2 | ||||
-rw-r--r-- | utils/lit/ProgressBar.py | 267 | ||||
-rw-r--r-- | utils/lit/ShCommands.py | 86 | ||||
-rw-r--r-- | utils/lit/ShTest.py | 12 | ||||
-rw-r--r-- | utils/lit/ShUtil.py | 346 | ||||
-rw-r--r-- | utils/lit/TODO | 19 | ||||
-rw-r--r-- | utils/lit/TclTest.py | 7 | ||||
-rw-r--r-- | utils/lit/TclUtil.py | 322 | ||||
-rw-r--r-- | utils/lit/Test.py | 71 | ||||
-rw-r--r-- | utils/lit/TestRunner.py | 460 | ||||
-rw-r--r-- | utils/lit/TestingConfig.py | 95 | ||||
-rw-r--r-- | utils/lit/Util.py | 124 | ||||
-rwxr-xr-x | utils/lit/lit.py | 519 |
15 files changed, 2623 insertions, 0 deletions
diff --git a/docs/CommandGuide/lit.pod b/docs/CommandGuide/lit.pod new file mode 100644 index 0000000000..a818302c24 --- /dev/null +++ b/docs/CommandGuide/lit.pod @@ -0,0 +1,222 @@ +=pod + +=head1 NAME + +lit - LLVM Integrated Tester + +=head1 SYNOPSIS + +B<lit> [I<options>] [I<tests>] + +=head1 DESCRIPTION + +B<lit> is a portable tool for executing LLVM and Clang style test suites, +summarizing their results, and providing indication of failures. B<lit> is +designed to be a lightweight testing tool with as simple a user interface as +possible. + +B<lit> should be run with one or more I<tests> to run specified on the command +line. Tests can be either individual test files or directories to search for +tests (see L<"TEST DISCOVERY">). + +Each specified test will be executed (potentially in parallel) and once all +tests have been run B<lit> will print summary information on the number of tests +which passed or failed (see L<"TEST STATUS RESULTS">). The B<lit> program will +execute with a non-zero exit code if any tests fail. + +By default B<lit> will use a succinct progress display and will only print +summary information for test failures. See L<"OUTPUT OPTIONS"> for options +controlling the B<lit> progress display and output. + +B<lit> also includes a number of options for controlling how tests are exected +(specific features may depend on the particular test format). See L<"EXECUTION +OPTIONS"> for more information. + +Finally, B<lit> also supports additional options for only running a subset of +the options specified on the command line, see L<"SELECTION OPTIONS"> for +more information. + +=head1 GENERAL OPTIONS + +=over + +=item B<-h>, B<--help> + +Show the B<lit> help message. + +=item B<-j> I<N>, B<--threads>=I<N> + +Run I<N> tests in parallel. By default, this is automatically chose to match the +number of detected available CPUs. + +=back + +=head1 OUTPUT OPTIONS + +=over + +=item B<-q>, B<--quiet> + +Suppress any output except for test failures. + +=item B<-s>, B<--succinct> + +Show less output, for example don't show information on tests that pass. + +=item B<-v>, B<--verbose> + +Show more information on test failures, for example the entire test output +instead of just the test result. + +=item B<--no-progress-bar> + +Do not use curses based progress bar. + +=back + +=head1 EXECUTION OPTIONS + +=over + +=item B<--path>=I<PATH> + +Specify an addition I<PATH> to use when searching for executables in tests. + +=item B<--vg> + +Run individual tests under valgrind (using the memcheck tool). The +I<--error-exitcode> argument for valgrind is used so that valgrind failures will +cause the program to exit with a non-zero status. + +=item B<--vg-arg>=I<ARG> + +When I<--vg> is used, specify an additional argument to pass to valgrind itself. + +=item B<--time-tests> + +Track the wall time individual tests take to execute and includes the results in +the summary output. This is useful for determining which tests in a test suite +take the most time to execute. Note that this option is most useful with I<-j +1>. + +=back + +=head1 SELECTION OPTIONS + +=over + +=item B<--max-tests>=I<N> + +Run at most I<N> tests and then terminate. + +=item B<--max-time>=I<N> + +Spend at most I<N> seconds (approximately) running tests and then terminate. + +=item B<--shuffle> + +Run the tests in a random order. + +=back + +=head1 ADDITIONAL OPTIONS + +=over + +=item B<--debug> + +Run B<lit> in debug mode, for debugging configuration issues and B<lit> itself. + +=item B<--show-suites> + +List the discovered test suites as part of the standard output. + +=item B<--no-tcl-as-sh> + +Run Tcl scripts internally (instead of converting to shell scripts). + +=back + +=head1 EXIT STATUS + +B<lit> will exit with an exit code of 1 if there are any FAIL or XPASS +results. Otherwise, it will exit with the status 0. Other exit codes used for +non-test related failures (for example a user error or an internal program +error). + +=head1 TEST DISCOVERY + +The inputs passed to B<lit> can be either individual tests, or entire +directories or hierarchies of tests to run. When B<lit> starts up, the first +thing it does is convert the inputs into a complete list of tests to run as part +of I<test discovery>. + +In the B<lit> model, every test must exist inside some I<test suite>. B<lit> +resolves the inputs specified on the command line to test suites by searching +upwards from the input path until it finds a I<lit.cfg> or I<lit.site.cfg> +file. These files serve as both a marker of test suites and as configuration +files which B<lit> loads in order to understand how to find and run the tests +inside the test suite. + +Once B<lit> has mapped the inputs into test suites it traverses the list of +inputs adding tests for individual files and recursively searching for tests in +directories. + +This behavior makes it easy to specify a subset of tests to run, while still +allowing the test suite configuration to control exactly how tests are +interpreted. In addition, B<lit> always identifies tests by the test suite they +are in, and their relative path inside the test suite. For appropriately +configured projects, this allows B<lit> to provide convenient and flexible +support for out-of-tree builds. + +=head1 TEST STATUS RESULTS + +Each test ultimately produces one of the following six results: + +=over + +=item B<PASS> + +The test succeeded. + +=item B<XFAIL> + +The test failed, but that is expected. This is used for test formats which allow +specifying that a test does not currently work, but wish to leave it in the test +suite. + +=item B<XPASS> + +The test succeeded, but it was expected to fail. This is used for tests which +were specified as expected to fail, but are now succeeding (generally because +the feautre they test was broken and has been fixed). + +=item B<FAIL> + +The test failed. + +=item B<UNRESOLVED> + +The test result could not be determined. For example, this occurs when the test +could not be run, the test itself is invalid, or the test was interrupted. + +=item B<UNSUPPORTED> + +The test is not supported in this environment. This is used by test formats +which can report unsupported tests. + +=back + +Depending on the test format tests may produce additional information about +their status (generally only for failures). See the L<Output|"LIT OUTPUT"> +section for more information. + +=head1 SEE ALSO + +L<valgrind(1)> + +=head1 AUTHOR + +Written by Daniel Dunbar and maintained by the LLVM Team (L<http://llvm.org>). + +=cut diff --git a/utils/lit/LitConfig.py b/utils/lit/LitConfig.py new file mode 100644 index 0000000000..4fb0ccc093 --- /dev/null +++ b/utils/lit/LitConfig.py @@ -0,0 +1,71 @@ +class LitConfig: + """LitConfig - Configuration data for a 'lit' test runner instance, shared + across all tests. + + The LitConfig object is also used to communicate with client configuration + files, it is always passed in as the global variable 'lit' so that + configuration files can access common functionality and internal components + easily. + """ + + # Provide access to built-in formats. + import LitFormats as formats + + # Provide access to built-in utility functions. + import Util as util + + def __init__(self, progname, path, quiet, + useValgrind, valgrindArgs, + useTclAsSh, + noExecute, debug, isWindows): + # The name of the test runner. + self.progname = progname + # The items to add to the PATH environment variable. + self.path = list(map(str, path)) + self.quiet = bool(quiet) + self.useValgrind = bool(useValgrind) + self.valgrindArgs = list(valgrindArgs) + self.useTclAsSh = bool(useTclAsSh) + self.noExecute = noExecute + self.debug = debug + self.isWindows = bool(isWindows) + + self.numErrors = 0 + self.numWarnings = 0 + + def load_config(self, config, path): + """load_config(config, path) - Load a config object from an alternate + path.""" + from TestingConfig import TestingConfig + return TestingConfig.frompath(path, config.parent, self, + mustExist = True, + config = config) + + def _write_message(self, kind, message): + import inspect, os, sys + + # Get the file/line where this message was generated. + f = inspect.currentframe() + # Step out of _write_message, and then out of wrapper. + f = f.f_back.f_back + file,line,_,_,_ = inspect.getframeinfo(f) + location = '%s:%d' % (os.path.basename(file), line) + + print >>sys.stderr, '%s: %s: %s: %s' % (self.progname, location, + kind, message) + + def note(self, message): + self._write_message('note', message) + + def warning(self, message): + self._write_message('warning', message) + self.numWarnings += 1 + + def error(self, message): + self._write_message('error', message) + self.numErrors += 1 + + def fatal(self, message): + import sys + self._write_message('fatal', message) + sys.exit(2) diff --git a/utils/lit/LitFormats.py b/utils/lit/LitFormats.py new file mode 100644 index 0000000000..cc00ddc7e7 --- /dev/null +++ b/utils/lit/LitFormats.py @@ -0,0 +1,2 @@ +from ShTest import ShTest +from TclTest import TclTest diff --git a/utils/lit/ProgressBar.py b/utils/lit/ProgressBar.py new file mode 100644 index 0000000000..85c95f57f7 --- /dev/null +++ b/utils/lit/ProgressBar.py @@ -0,0 +1,267 @@ +#!/usr/bin/env python + +# Source: http://code.activestate.com/recipes/475116/, with +# modifications by Daniel Dunbar. + +import sys, re, time + +class TerminalController: + """ + A class that can be used to portably generate formatted output to + a terminal. + + `TerminalController` defines a set of instance variables whose + values are initialized to the control sequence necessary to + perform a given action. These can be simply included in normal + output to the terminal: + + >>> term = TerminalController() + >>> print 'This is '+term.GREEN+'green'+term.NORMAL + + Alternatively, the `render()` method can used, which replaces + '${action}' with the string required to perform 'action': + + >>> term = TerminalController() + >>> print term.render('This is ${GREEN}green${NORMAL}') + + If the terminal doesn't support a given action, then the value of + the corresponding instance variable will be set to ''. As a + result, the above code will still work on terminals that do not + support color, except that their output will not be colored. + Also, this means that you can test whether the terminal supports a + given action by simply testing the truth value of the + corresponding instance variable: + + >>> term = TerminalController() + >>> if term.CLEAR_SCREEN: + ... print 'This terminal supports clearning the screen.' + + Finally, if the width and height of the terminal are known, then + they will be stored in the `COLS` and `LINES` attributes. + """ + # Cursor movement: + BOL = '' #: Move the cursor to the beginning of the line + UP = '' #: Move the cursor up one line + DOWN = '' #: Move the cursor down one line + LEFT = '' #: Move the cursor left one char + RIGHT = '' #: Move the cursor right one char + + # Deletion: + CLEAR_SCREEN = '' #: Clear the screen and move to home position + CLEAR_EOL = '' #: Clear to the end of the line. + CLEAR_BOL = '' #: Clear to the beginning of the line. + CLEAR_EOS = '' #: Clear to the end of the screen + + # Output modes: + BOLD = '' #: Turn on bold mode + BLINK = '' #: Turn on blink mode + DIM = '' #: Turn on half-bright mode + REVERSE = '' #: Turn on reverse-video mode + NORMAL = '' #: Turn off all modes + + # Cursor display: + HIDE_CURSOR = '' #: Make the cursor invisible + SHOW_CURSOR = '' #: Make the cursor visible + + # Terminal size: + COLS = None #: Width of the terminal (None for unknown) + LINES = None #: Height of the terminal (None for unknown) + + # Foreground colors: + BLACK = BLUE = GREEN = CYAN = RED = MAGENTA = YELLOW = WHITE = '' + + # Background colors: + BG_BLACK = BG_BLUE = BG_GREEN = BG_CYAN = '' + BG_RED = BG_MAGENTA = BG_YELLOW = BG_WHITE = '' + + _STRING_CAPABILITIES = """ + BOL=cr UP=cuu1 DOWN=cud1 LEFT=cub1 RIGHT=cuf1 + CLEAR_SCREEN=clear CLEAR_EOL=el CLEAR_BOL=el1 CLEAR_EOS=ed BOLD=bold + BLINK=blink DIM=dim REVERSE=rev UNDERLINE=smul NORMAL=sgr0 + HIDE_CURSOR=cinvis SHOW_CURSOR=cnorm""".split() + _COLORS = """BLACK BLUE GREEN CYAN RED MAGENTA YELLOW WHITE""".split() + _ANSICOLORS = "BLACK RED GREEN YELLOW BLUE MAGENTA CYAN WHITE".split() + + def __init__(self, term_stream=sys.stdout): + """ + Create a `TerminalController` and initialize its attributes + with appropriate values for the current terminal. + `term_stream` is the stream that will be used for terminal + output; if this stream is not a tty, then the terminal is + assumed to be a dumb terminal (i.e., have no capabilities). + """ + # Curses isn't available on all platforms + try: import curses + except: return + + # If the stream isn't a tty, then assume it has no capabilities. + if not term_stream.isatty(): return + + # Check the terminal type. If we fail, then assume that the + # terminal has no capabilities. + try: curses.setupterm() + except: return + + # Look up numeric capabilities. + self.COLS = curses.tigetnum('cols') + self.LINES = curses.tigetnum('lines') + + # Look up string capabilities. + for capability in self._STRING_CAPABILITIES: + (attrib, cap_name) = capability.split('=') + setattr(self, attrib, self._tigetstr(cap_name) or '') + + # Colors + set_fg = self._tigetstr('setf') + if set_fg: + for i,color in zip(range(len(self._COLORS)), self._COLORS): + setattr(self, color, curses.tparm(set_fg, i) or '') + set_fg_ansi = self._tigetstr('setaf') + if set_fg_ansi: + for i,color in zip(range(len(self._ANSICOLORS)), self._ANSICOLORS): + setattr(self, color, curses.tparm(set_fg_ansi, i) or '') + set_bg = self._tigetstr('setb') + if set_bg: + for i,color in zip(range(len(self._COLORS)), self._COLORS): + setattr(self, 'BG_'+color, curses.tparm(set_bg, i) or '') + set_bg_ansi = self._tigetstr('setab') + if set_bg_ansi: + for i,color in zip(range(len(self._ANSICOLORS)), self._ANSICOLORS): + setattr(self, 'BG_'+color, curses.tparm(set_bg_ansi, i) or '') + + def _tigetstr(self, cap_name): + # String capabilities can include "delays" of the form "$<2>". + # For any modern terminal, we should be able to just ignore + # these, so strip them out. + import curses + cap = curses.tigetstr(cap_name) or '' + return re.sub(r'\$<\d+>[/*]?', '', cap) + + def render(self, template): + """ + Replace each $-substitutions in the given template string with + the corresponding terminal control string (if it's defined) or + '' (if it's not). + """ + return re.sub(r'\$\$|\${\w+}', self._render_sub, template) + + def _render_sub(self, match): + s = match.group() + if s == '$$': return s + else: return getattr(self, s[2:-1]) + +####################################################################### +# Example use case: progress bar +####################################################################### + +class SimpleProgressBar: + """ + A simple progress bar which doesn't need any terminal support. + + This prints out a progress bar like: + 'Header: 0 .. 10.. 20.. ...' + """ + + def __init__(self, header): + self.header = header + self.atIndex = None + + def update(self, percent, message): + if self.atIndex is None: + sys.stdout.write(self.header) + self.atIndex = 0 + + next = int(percent*50) + if next == self.atIndex: + return + + for i in range(self.atIndex, next): + idx = i % 5 + if idx == 0: + sys.stdout.write('%-2d' % (i*2)) + elif idx == 1: + pass # Skip second char + elif idx < 4: + sys.stdout.write('.') + else: + sys.stdout.write(' ') + sys.stdout.flush() + self.atIndex = next + + def clear(self): + if self.atIndex is not None: + sys.stdout.write('\n') + sys.stdout.flush() + self.atIndex = None + +class ProgressBar: + """ + A 3-line progress bar, which looks like:: + + Header + 20% [===========----------------------------------] + progress message + + The progress bar is colored, if the terminal supports color + output; and adjusts to the width of the terminal. + """ + BAR = '%s${GREEN}[${BOLD}%s%s${NORMAL}${GREEN}]${NORMAL}%s\n' + HEADER = '${BOLD}${CYAN}%s${NORMAL}\n\n' + + def __init__(self, term, header, useETA=True): + self.term = term + if not (self.term.CLEAR_EOL and self.term.UP and self.term.BOL): + raise ValueError("Terminal isn't capable enough -- you " + "should use a simpler progress dispaly.") + self.width = self.term.COLS or 75 + self.bar = term.render(self.BAR) + self.header = self.term.render(self.HEADER % header.center(self.width)) + self.cleared = 1 #: true if we haven't drawn the bar yet. + self.useETA = useETA + if self.useETA: + self.startTime = time.time() + self.update(0, '') + + def update(self, percent, message): + if self.cleared: + sys.stdout.write(self.header) + self.cleared = 0 + prefix = '%3d%% ' % (percent*100,) + suffix = '' + if self.useETA: + elapsed = time.time() - self.startTime + if percent > .0001 and elapsed > 1: + total = elapsed / percent + eta = int(total - elapsed) + h = eta//3600. + m = (eta//60) % 60 + s = eta % 60 + suffix = ' ETA: %02d:%02d:%02d'%(h,m,s) + barWidth = self.width - len(prefix) - len(suffix) - 2 + n = int(barWidth*percent) + if len(message) < self.width: + message = message + ' '*(self.width - len(message)) + else: + message = '... ' + message[-(self.width-4):] + sys.stdout.write( + self.term.BOL + self.term.UP + self.term.CLEAR_EOL + + (self.bar % (prefix, '='*n, '-'*(barWidth-n), suffix)) + + self.term.CLEAR_EOL + message) + + def clear(self): + if not self.cleared: + sys.stdout.write(self.term.BOL + self.term.CLEAR_EOL + + self.term.UP + self.term.CLEAR_EOL + + self.term.UP + self.term.CLEAR_EOL) + self.cleared = 1 + +def test(): + import time + tc = TerminalController() + p = ProgressBar(tc, 'Tests') + for i in range(101): + p.update(i/100., str(i)) + time.sleep(.3) + +if __name__=='__main__': + test() diff --git a/utils/lit/ShCommands.py b/utils/lit/ShCommands.py new file mode 100644 index 0000000000..be3e680e9e --- /dev/null +++ b/utils/lit/ShCommands.py @@ -0,0 +1,86 @@ +import ShUtil + +class Command: + def __init__(self, args, redirects): + self.args = list(args) + self.redirects = list(redirects) + + def __repr__(self): + return 'Command(%r, %r)' % (self.args, self.redirects) + + def __cmp__(self, other): + if not isinstance(other, Command): + return -1 + + return cmp((self.args, self.redirects), + (other.args, other.redirects)) + + def toShell(self, file): + for arg in self.args: + if "'" not in arg: + quoted = "'%s'" % arg + elif '"' not in arg and '$' not in arg: + quoted = '"%s"' % arg + else: + raise NotImplementedError,'Unable to quote %r' % arg + print >>file, quoted, + + # For debugging / validation. + dequoted = list(ShUtil.ShLexer(quoted).lex()) + if dequoted != [arg]: + raise NotImplementedError,'Unable to quote %r' % arg + + for r in self.redirects: + if len(r[0]) == 1: + print >>file, "%s '%s'" % (r[0][0], r[1]), + else: + print >>file, "%s%s '%s'" % (r[0][1], r[0][0], r[1]), + +class Pipeline: + def __init__(self, commands, negate=False, pipe_err=False): + self.commands = commands + self.negate = negate + self.pipe_err = pipe_err + + def __repr__(self): + return 'Pipeline(%r, %r, %r)' % (self.commands, self.negate, + self.pipe_err) + + def __cmp__(self, other): + if not isinstance(other, Pipeline): + return -1 + + return cmp((self.commands, self.negate, self.pipe_err), + (other.commands, other.negate, self.pipe_err)) + + def toShell(self, file, pipefail=False): + if pipefail != self.pipe_err: + raise ValueError,'Inconsistent "pipefail" attribute!' + if self.negate: + print >>file, '!', + for cmd in self.commands: + cmd.toShell(file) + if cmd is not self.commands[-1]: + print >>file, '|\n ', + +class Seq: + def __init__(self, lhs, op, rhs): + assert op in (';', '&', '||', '&&') + self.op = op + self.lhs = lhs + self.rhs = rhs + + def __repr__(self): + return 'Seq(%r, %r, %r)' % (self.lhs, self.op, self.rhs) + + def __cmp__(self, other): + if not isinstance(other, Seq): + return -1 + + return cmp((self.lhs, self.op, self.rhs), + (other.lhs, other.op, other.rhs)) + + def toShell(self, file, pipefail=False): + self.lhs.toShell(file, pipefail) + print >>file, ' %s\n' % self.op + self.rhs.toShell(file, pipefail) diff --git a/utils/lit/ShTest.py b/utils/lit/ShTest.py new file mode 100644 index 0000000000..fefdf7602b --- /dev/null +++ b/utils/lit/ShTest.py @@ -0,0 +1,12 @@ +import TestRunner + +class ShTest: + def __init__(self, execute_external = False, require_and_and = False): + self.execute_external = execute_external + self.require_and_and = require_and_and + + def execute(self, test, litConfig): + return TestRunner.executeShTest(test, litConfig, + self.execute_external, + self.require_and_and) + diff --git a/utils/lit/ShUtil.py b/utils/lit/ShUtil.py new file mode 100644 index 0000000000..c4bbb3d373 --- /dev/null +++ b/utils/lit/ShUtil.py @@ -0,0 +1,346 @@ +import itertools + +import Util +from ShCommands import Command, Pipeline, Seq + +class ShLexer: + def __init__(self, data, win32Escapes = False): + self.data = data + self.pos = 0 + self.end = len(data) + self.win32Escapes = win32Escapes + + def eat(self): + c = self.data[self.pos] + self.pos += 1 + return c + + def look(self): + return self.data[self.pos] + + def maybe_eat(self, c): + """ + maybe_eat(c) - Consume the character c if it is the next character, + returning True if a character was consumed. """ + if self.data[self.pos] == c: + self.pos += 1 + return True + return False + + def lex_arg_fast(self, c): + # Get the leading whitespace free section. + chunk = self.data[self.pos - 1:].split(None, 1)[0] + + # If it has special characters, the fast path failed. + if ('|' in chunk or '&' in chunk or + '<' in chunk or '>' in chunk or + "'" in chunk or '"' in chunk or + '\\' in chunk): + return None + + self.pos = self.pos - 1 + len(chunk) + return chunk + + def lex_arg_slow(self, c): + if c in "'\"": + str = self.lex_arg_quoted(c) + else: + str = c + while self.pos != self.end: + c = self.look() + if c.isspace() or c in "|&": + break + elif c in '><': + # This is an annoying case; we treat '2>' as a single token so + # we don't have to track whitespace tokens. + + # If the parse string isn't an integer, do the usual thing. + if not str.isdigit(): + break + + # Otherwise, lex the operator and convert to a redirection + # token. + num = int(str) + tok = self.lex_one_token() + assert isinstance(tok, tuple) and len(tok) == 1 + return (tok[0], num) + elif c == '"': + self.eat() + str += self.lex_arg_quoted('"')
+ elif not self.win32Escapes and c == '\\': + # Outside of a string, '\\' escapes everything. + self.eat() + if self.pos == self.end: + Util.warning("escape at end of quoted argument in: %r" % + self.data) + return str + str += self.eat() + else: + str += self.eat() + return str + + def lex_arg_quoted(self, delim): + str = '' + while self.pos != self.end: + c = self.eat() + if c == delim: + return str + elif c == '\\' and delim == '"': + # Inside a '"' quoted string, '\\' only escapes the quote + # character and backslash, otherwise it is preserved. + if self.pos == self.end: + Util.warning("escape at end of quoted argument in: %r" % + self.data) + return str + c = self.eat() + if c == '"': # + str += '"' + elif c == '\\': + str += '\\' + else: + str += '\\' + c + else: + str += c + Util.warning("missing quote character in %r" % self.data) + return str + + def lex_arg_checked(self, c): + pos = self.pos + res = self.lex_arg_fast(c) + end = self.pos + + self.pos = pos + reference = self.lex_arg_slow(c) + if res is not None: + if res != reference: + raise ValueError,"Fast path failure: %r != %r" % (res, reference) + if self.pos != end: + raise ValueError,"Fast path failure: %r != %r" % (self.pos, end) + return reference + + def lex_arg(self, c): + return self.lex_arg_fast(c) or self.lex_arg_slow(c) + + def lex_one_token(self): + """ + lex_one_token - Lex a single 'sh' token. """ + + c = self.eat() + if c in ';!': + return (c,) + if c == '|': + if self.maybe_eat('|'): + return ('||',) + return (c,) + if c == '&': + if self.maybe_eat('&'): + return ('&&',) + if self.maybe_eat('>'): + return ('&>',) + return (c,) + if c == '>': + if self.maybe_eat('&'): + return ('>&',) + if self.maybe_eat('>'): + return ('>>',) + return (c,) + if c == '<': + if self.maybe_eat('&'): + return ('<&',) + if self.maybe_eat('>'): + return ('<<',) + return (c,) + + return self.lex_arg(c) + + def lex(self): + while self.pos != self.end: + if self.look().isspace(): + self.eat() + else: + yield self.lex_one_token() + +### + +class ShParser: + def __init__(self, data, win32Escapes = False): + self.data = data + self.tokens = ShLexer(data, win32Escapes = win32Escapes).lex() + + def lex(self): + try: + return self.tokens.next() + except StopIteration: + return None + + def look(self): + next = self.lex() + if next is not None: + self.tokens = itertools.chain([next], self.tokens) + return next + + def parse_command(self): + tok = self.lex() + if not tok: + raise ValueError,"empty command!" + if isinstance(tok, tuple): + raise ValueError,"syntax error near unexpected token %r" % tok[0] + + args = [tok] + redirects = [] + while 1: + tok = self.look() + + # EOF? + if tok is None: + break + + # If this is an argument, just add it to the current command. + if isinstance(tok, str): + args.append(self.lex()) + continue + + # Otherwise see if it is a terminator. + assert isinstance(tok, tuple) + if tok[0] in ('|',';','&','||','&&'): + break + + # Otherwise it must be a redirection. + op = self.lex() + arg = self.lex() + if not arg: + raise ValueError,"syntax error near token %r" % op[0] + redirects.append((op, arg)) + + return Command(args, redirects) + + def parse_pipeline(self): + negate = False + if self.look() == ('!',): + self.lex() + negate = True + + commands = [self.parse_command()] + while self.look() == ('|',): + self.lex() + commands.append(self.parse_command()) + return Pipeline(commands, negate) + + def parse(self): + lhs = self.parse_pipeline() + + while self.look(): + operator = self.lex() + assert isinstance(operator, tuple) and len(operator) == 1 + + if not self.look(): + raise ValueError, "missing argument to operator %r" % operator[0] + + # FIXME: Operator precedence!! + lhs = Seq(lhs, operator[0], self.parse_pipeline()) + + return lhs + +### + +import unittest + +class TestShLexer(unittest.TestCase): + def lex(self, str, *args, **kwargs): + return list(ShLexer(str, *args, **kwargs).lex()) + + def test_basic(self): + self.assertEqual(self.lex('a|b>c&d<e'), + ['a', ('|',), 'b', ('>',), 'c', ('&',), 'd', + ('<',), 'e']) + + def test_redirection_tokens(self): + self.assertEqual(self.lex('a2>c'), + ['a2', ('>',), 'c']) + self.assertEqual(self.lex('a 2>c'), + ['a', ('>',2), 'c']) + + def test_quoting(self): + self.assertEqual(self.lex(" |