123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747 |
- # This Source Code Form is subject to the terms of the Mozilla Public
- # License, v. 2.0. If a copy of the MPL was not distributed with this
- # file, You can obtain one at http://mozilla.org/MPL/2.0/.
- """
- This is a very primitive line based preprocessor, for times when using
- a C preprocessor isn't an option.
- It currently supports the following grammar for expressions, whitespace is
- ignored:
- expression :
- and_cond ( '||' expression ) ? ;
- and_cond:
- test ( '&&' and_cond ) ? ;
- test:
- unary ( ( '==' | '!=' ) unary ) ? ;
- unary :
- '!'? value ;
- value :
- [0-9]+ # integer
- | 'defined(' \w+ ')'
- | \w+ # string identifier or value;
- """
- import sys
- import os
- import platform
- import re
- from optparse import OptionParser
- import errno
- # hack around win32 mangling our line endings
- # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/65443
- if sys.platform == "win32":
- import msvcrt
- msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
- os.linesep = '\n'
- __all__ = [
- 'Context',
- 'Expression',
- 'Preprocessor',
- ]
- class Expression:
- def __init__(self, expression_string):
- """
- Create a new expression with this string.
- The expression will already be parsed into an Abstract Syntax Tree.
- """
- self.content = expression_string
- self.offset = 0
- self.__ignore_whitespace()
- self.e = self.__get_logical_or()
- if self.content:
- raise Expression.ParseError, self
- def __get_logical_or(self):
- """
- Production: and_cond ( '||' expression ) ?
- """
- if not len(self.content):
- return None
- rv = Expression.__AST("logical_op")
- # test
- rv.append(self.__get_logical_and())
- self.__ignore_whitespace()
- if self.content[:2] != '||':
- # no logical op needed, short cut to our prime element
- return rv[0]
- # append operator
- rv.append(Expression.__ASTLeaf('op', self.content[:2]))
- self.__strip(2)
- self.__ignore_whitespace()
- rv.append(self.__get_logical_or())
- self.__ignore_whitespace()
- return rv
- def __get_logical_and(self):
- """
- Production: test ( '&&' and_cond ) ?
- """
- if not len(self.content):
- return None
- rv = Expression.__AST("logical_op")
- # test
- rv.append(self.__get_equality())
- self.__ignore_whitespace()
- if self.content[:2] != '&&':
- # no logical op needed, short cut to our prime element
- return rv[0]
- # append operator
- rv.append(Expression.__ASTLeaf('op', self.content[:2]))
- self.__strip(2)
- self.__ignore_whitespace()
- rv.append(self.__get_logical_and())
- self.__ignore_whitespace()
- return rv
- def __get_equality(self):
- """
- Production: unary ( ( '==' | '!=' ) unary ) ?
- """
- if not len(self.content):
- return None
- rv = Expression.__AST("equality")
- # unary
- rv.append(self.__get_unary())
- self.__ignore_whitespace()
- if not re.match('[=!]=', self.content):
- # no equality needed, short cut to our prime unary
- return rv[0]
- # append operator
- rv.append(Expression.__ASTLeaf('op', self.content[:2]))
- self.__strip(2)
- self.__ignore_whitespace()
- rv.append(self.__get_unary())
- self.__ignore_whitespace()
- return rv
- def __get_unary(self):
- """
- Production: '!'? value
- """
- # eat whitespace right away, too
- not_ws = re.match('!\s*', self.content)
- if not not_ws:
- return self.__get_value()
- rv = Expression.__AST('not')
- self.__strip(not_ws.end())
- rv.append(self.__get_value())
- self.__ignore_whitespace()
- return rv
- def __get_value(self):
- """
- Production: ( [0-9]+ | 'defined(' \w+ ')' | \w+ )
- Note that the order is important, and the expression is kind-of
- ambiguous as \w includes 0-9. One could make it unambiguous by
- removing 0-9 from the first char of a string literal.
- """
- rv = None
- m = re.match('defined\s*\(\s*(\w+)\s*\)', self.content)
- if m:
- word_len = m.end()
- rv = Expression.__ASTLeaf('defined', m.group(1))
- else:
- word_len = re.match('[0-9]*', self.content).end()
- if word_len:
- value = int(self.content[:word_len])
- rv = Expression.__ASTLeaf('int', value)
- else:
- word_len = re.match('\w*', self.content).end()
- if word_len:
- rv = Expression.__ASTLeaf('string', self.content[:word_len])
- else:
- raise Expression.ParseError, self
- self.__strip(word_len)
- self.__ignore_whitespace()
- return rv
- def __ignore_whitespace(self):
- ws_len = re.match('\s*', self.content).end()
- self.__strip(ws_len)
- return
- def __strip(self, length):
- """
- Remove a given amount of chars from the input and update
- the offset.
- """
- self.content = self.content[length:]
- self.offset += length
- def evaluate(self, context):
- """
- Evaluate the expression with the given context
- """
- # Helper function to evaluate __get_equality results
- def eval_equality(tok):
- left = opmap[tok[0].type](tok[0])
- right = opmap[tok[2].type](tok[2])
- rv = left == right
- if tok[1].value == '!=':
- rv = not rv
- return rv
- # Helper function to evaluate __get_logical_and and __get_logical_or results
- def eval_logical_op(tok):
- left = opmap[tok[0].type](tok[0])
- right = opmap[tok[2].type](tok[2])
- if tok[1].value == '&&':
- return left and right
- elif tok[1].value == '||':
- return left or right
- raise Expression.ParseError, self
- # Mapping from token types to evaluator functions
- # Apart from (non-)equality, all these can be simple lambda forms.
- opmap = {
- 'logical_op': eval_logical_op,
- 'equality': eval_equality,
- 'not': lambda tok: not opmap[tok[0].type](tok[0]),
- 'string': lambda tok: context[tok.value],
- 'defined': lambda tok: tok.value in context,
- 'int': lambda tok: tok.value}
- return opmap[self.e.type](self.e);
- class __AST(list):
- """
- Internal class implementing Abstract Syntax Tree nodes
- """
- def __init__(self, type):
- self.type = type
- super(self.__class__, self).__init__(self)
- class __ASTLeaf:
- """
- Internal class implementing Abstract Syntax Tree leafs
- """
- def __init__(self, type, value):
- self.value = value
- self.type = type
- def __str__(self):
- return self.value.__str__()
- def __repr__(self):
- return self.value.__repr__()
- class ParseError(StandardError):
- """
- Error raised when parsing fails.
- It has two members, offset and content, which give the offset of the
- error and the offending content.
- """
- def __init__(self, expression):
- self.offset = expression.offset
- self.content = expression.content[:3]
- def __str__(self):
- return 'Unexpected content at offset {0}, "{1}"'.format(self.offset,
- self.content)
- class Context(dict):
- """
- This class holds variable values by subclassing dict, and while it
- truthfully reports True and False on
- name in context
- it returns the variable name itself on
- context["name"]
- to reflect the ambiguity between string literals and preprocessor
- variables.
- """
- def __getitem__(self, key):
- if key in self:
- return super(self.__class__, self).__getitem__(key)
- return key
- class Preprocessor:
- """
- Class for preprocessing text files.
- """
- class Error(RuntimeError):
- def __init__(self, cpp, MSG, context):
- self.file = cpp.context['FILE']
- self.line = cpp.context['LINE']
- self.key = MSG
- RuntimeError.__init__(self, (self.file, self.line, self.key, context))
- def __init__(self, defines=None, marker='#'):
- self.context = Context()
- for k,v in {'FILE': '',
- 'LINE': 0,
- 'DIRECTORY': os.path.abspath('.')}.iteritems():
- self.context[k] = v
- self.actionLevel = 0
- self.disableLevel = 0
- # ifStates can be
- # 0: hadTrue
- # 1: wantsTrue
- # 2: #else found
- self.ifStates = []
- self.checkLineNumbers = False
- self.filters = []
- self.cmds = {}
- for cmd, level in {'define': 0,
- 'undef': 0,
- 'if': sys.maxint,
- 'ifdef': sys.maxint,
- 'ifndef': sys.maxint,
- 'else': 1,
- 'elif': 1,
- 'elifdef': 1,
- 'elifndef': 1,
- 'endif': sys.maxint,
- 'expand': 0,
- 'literal': 0,
- 'filter': 0,
- 'unfilter': 0,
- 'include': 0,
- 'includesubst': 0,
- 'error': 0}.iteritems():
- self.cmds[cmd] = (level, getattr(self, 'do_' + cmd))
- self.out = sys.stdout
- self.setMarker(marker)
- self.varsubst = re.compile('@(?P<VAR>\w+)@', re.U)
- self.includes = set()
- self.silenceMissingDirectiveWarnings = False
- if defines:
- self.context.update(defines)
- def warnUnused(self, file):
- msg = None
- if self.actionLevel == 0 and not self.silenceMissingDirectiveWarnings:
- sys.stderr.write('{0}: WARNING: no preprocessor directives found\n'.format(file))
- elif self.actionLevel == 1:
- sys.stderr.write('{0}: WARNING: no useful preprocessor directives found\n'.format(file))
- pass
- def setMarker(self, aMarker):
- """
- Set the marker to be used for processing directives.
- Used for handling CSS files, with pp.setMarker('%'), for example.
- The given marker may be None, in which case no markers are processed.
- """
- self.marker = aMarker
- if aMarker:
- self.instruction = re.compile('{0}(?P<cmd>[a-z]+)(?:\s(?P<args>.*))?$'
- .format(aMarker),
- re.U)
- self.comment = re.compile(aMarker, re.U)
- else:
- class NoMatch(object):
- def match(self, *args):
- return False
- self.instruction = self.comment = NoMatch()
- def setSilenceDirectiveWarnings(self, value):
- """
- Sets whether missing directive warnings are silenced, according to
- ``value``. The default behavior of the preprocessor is to emit
- such warnings.
- """
- self.silenceMissingDirectiveWarnings = value
- def addDefines(self, defines):
- """
- Adds the specified defines to the preprocessor.
- ``defines`` may be a dictionary object or an iterable of key/value pairs
- (as tuples or other iterables of length two)
- """
- self.context.update(defines)
- def clearDefines(self):
- self.context.clear
- def clone(self):
- """
- Create a clone of the current processor, including line ending
- settings, marker, variable definitions, output stream.
- """
- rv = Preprocessor()
- rv.context.update(self.context)
- rv.setMarker(self.marker)
- rv.out = self.out
- return rv
- def processFile(self, input, output):
- """
- Preprocesses the contents of the ``input`` stream and writes the result
- to the ``output`` stream.
- """
- self.out = output
- self.do_include(input, False)
- self.warnUnused(input.name)
- def applyFilters(self, aLine):
- for f in self.filters:
- aLine = f[1](aLine)
- return aLine
- def noteLineInfo(self):
- # Record the current line and file. Called once before transitioning
- # into or out of an included file and after writing each line.
- self.line_info = self.context['FILE'], self.context['LINE']
- def write(self, aLine):
- """
- Internal method for handling output.
- """
- if not self.out:
- return
- next_line, next_file = self.context['LINE'], self.context['FILE']
- if self.checkLineNumbers:
- expected_file, expected_line = self.line_info
- expected_line += 1
- if (expected_line != next_line or
- expected_file and expected_file != next_file):
- self.out.write('//@line {line} "{file}"\n'.format(line=next_line,
- file=next_file))
- self.noteLineInfo()
- filteredLine = self.applyFilters(aLine)
- if filteredLine != aLine:
- self.actionLevel = 2
- self.out.write(filteredLine)
- def handleCommandLine(self, args, defaultToStdin = False):
- """
- Parse a commandline into this parser.
- Uses OptionParser internally, no args mean sys.argv[1:].
- """
- def get_output_file(path):
- dir = os.path.dirname(path)
- if dir:
- try:
- os.makedirs(dir)
- except OSError as error:
- if error.errno != errno.EEXIST:
- raise
- return open(path, 'wb')
- p = self.getCommandLineParser()
- options, args = p.parse_args(args=args)
- out = self.out
- if options.output:
- out = get_output_file(options.output)
- if defaultToStdin and len(args) == 0:
- args = [sys.stdin]
- if args:
- for f in args:
- with open(f, 'rU') as input:
- self.processFile(input=input, output=out)
- if options.output:
- out.close()
- def getCommandLineParser(self, unescapeDefines = False):
- escapedValue = re.compile('".*"$')
- numberValue = re.compile('\d+$')
- def handleD(option, opt, value, parser):
- vals = value.split('=', 1)
- if len(vals) == 1:
- vals.append(1)
- elif unescapeDefines and escapedValue.match(vals[1]):
- # strip escaped string values
- vals[1] = vals[1][1:-1]
- elif numberValue.match(vals[1]):
- vals[1] = int(vals[1])
- self.context[vals[0]] = vals[1]
- def handleU(option, opt, value, parser):
- del self.context[value]
- def handleF(option, opt, value, parser):
- self.do_filter(value)
- def handleMarker(option, opt, value, parser):
- self.setMarker(value)
- def handleSilenceDirectiveWarnings(option, opt, value, parse):
- self.setSilenceDirectiveWarnings(True)
- p = OptionParser()
- p.add_option('-D', action='callback', callback=handleD, type="string",
- metavar="VAR[=VAL]", help='Define a variable')
- p.add_option('-U', action='callback', callback=handleU, type="string",
- metavar="VAR", help='Undefine a variable')
- p.add_option('-F', action='callback', callback=handleF, type="string",
- metavar="FILTER", help='Enable the specified filter')
- p.add_option('-o', '--output', type="string", default=None,
- metavar="FILENAME", help='Output to the specified file '+
- 'instead of stdout')
- p.add_option('--marker', action='callback', callback=handleMarker,
- type="string",
- help='Use the specified marker instead of #')
- p.add_option('--silence-missing-directive-warnings', action='callback',
- callback=handleSilenceDirectiveWarnings,
- help='Don\'t emit warnings about missing directives')
- return p
- def handleLine(self, aLine):
- """
- Handle a single line of input (internal).
- """
- if self.actionLevel == 0 and self.comment.match(aLine):
- self.actionLevel = 1
- m = self.instruction.match(aLine)
- if m:
- args = None
- cmd = m.group('cmd')
- try:
- args = m.group('args')
- except IndexError:
- pass
- if cmd not in self.cmds:
- raise Preprocessor.Error(self, 'INVALID_CMD', aLine)
- level, cmd = self.cmds[cmd]
- if (level >= self.disableLevel):
- cmd(args)
- if cmd != 'literal':
- self.actionLevel = 2
- elif self.disableLevel == 0 and not self.comment.match(aLine):
- self.write(aLine)
- # Instruction handlers
- # These are named do_'instruction name' and take one argument
- # Variables
- def do_define(self, args):
- m = re.match('(?P<name>\w+)(?:\s(?P<value>.*))?', args, re.U)
- if not m:
- raise Preprocessor.Error(self, 'SYNTAX_DEF', args)
- val = ''
- if m.group('value'):
- val = self.applyFilters(m.group('value'))
- try:
- val = int(val)
- except:
- pass
- self.context[m.group('name')] = val
- def do_undef(self, args):
- m = re.match('(?P<name>\w+)$', args, re.U)
- if not m:
- raise Preprocessor.Error(self, 'SYNTAX_DEF', args)
- if args in self.context:
- del self.context[args]
- # Logic
- def ensure_not_else(self):
- if len(self.ifStates) == 0 or self.ifStates[-1] == 2:
- sys.stderr.write('WARNING: bad nesting of #else\n')
- def do_if(self, args, replace=False):
- if self.disableLevel and not replace:
- self.disableLevel += 1
- return
- val = None
- try:
- e = Expression(args)
- val = e.evaluate(self.context)
- except Exception:
- # XXX do real error reporting
- raise Preprocessor.Error(self, 'SYNTAX_ERR', args)
- if type(val) == str:
- # we're looking for a number value, strings are false
- val = False
- if not val:
- self.disableLevel = 1
- if replace:
- if val:
- self.disableLevel = 0
- self.ifStates[-1] = self.disableLevel
- else:
- self.ifStates.append(self.disableLevel)
- pass
- def do_ifdef(self, args, replace=False):
- if self.disableLevel and not replace:
- self.disableLevel += 1
- return
- if re.match('\W', args, re.U):
- raise Preprocessor.Error(self, 'INVALID_VAR', args)
- if args not in self.context:
- self.disableLevel = 1
- if replace:
- if args in self.context:
- self.disableLevel = 0
- self.ifStates[-1] = self.disableLevel
- else:
- self.ifStates.append(self.disableLevel)
- pass
- def do_ifndef(self, args, replace=False):
- if self.disableLevel and not replace:
- self.disableLevel += 1
- return
- if re.match('\W', args, re.U):
- raise Preprocessor.Error(self, 'INVALID_VAR', args)
- if args in self.context:
- self.disableLevel = 1
- if replace:
- if args not in self.context:
- self.disableLevel = 0
- self.ifStates[-1] = self.disableLevel
- else:
- self.ifStates.append(self.disableLevel)
- pass
- def do_else(self, args, ifState = 2):
- self.ensure_not_else()
- hadTrue = self.ifStates[-1] == 0
- self.ifStates[-1] = ifState # in-else
- if hadTrue:
- self.disableLevel = 1
- return
- self.disableLevel = 0
- def do_elif(self, args):
- if self.disableLevel == 1:
- if self.ifStates[-1] == 1:
- self.do_if(args, replace=True)
- else:
- self.do_else(None, self.ifStates[-1])
- def do_elifdef(self, args):
- if self.disableLevel == 1:
- if self.ifStates[-1] == 1:
- self.do_ifdef(args, replace=True)
- else:
- self.do_else(None, self.ifStates[-1])
- def do_elifndef(self, args):
- if self.disableLevel == 1:
- if self.ifStates[-1] == 1:
- self.do_ifndef(args, replace=True)
- else:
- self.do_else(None, self.ifStates[-1])
- def do_endif(self, args):
- if self.disableLevel > 0:
- self.disableLevel -= 1
- if self.disableLevel == 0:
- self.ifStates.pop()
- # output processing
- def do_expand(self, args):
- lst = re.split('__(\w+)__', args, re.U)
- do_replace = False
- def vsubst(v):
- if v in self.context:
- return str(self.context[v])
- return ''
- for i in range(1, len(lst), 2):
- lst[i] = vsubst(lst[i])
- lst.append('\n') # add back the newline
- self.write(reduce(lambda x, y: x+y, lst, ''))
- def do_literal(self, args):
- self.write(args + '\n')
- def do_filter(self, args):
- filters = [f for f in args.split(' ') if hasattr(self, 'filter_' + f)]
- if len(filters) == 0:
- return
- current = dict(self.filters)
- for f in filters:
- current[f] = getattr(self, 'filter_' + f)
- filterNames = current.keys()
- filterNames.sort()
- self.filters = [(fn, current[fn]) for fn in filterNames]
- return
- def do_unfilter(self, args):
- filters = args.split(' ')
- current = dict(self.filters)
- for f in filters:
- if f in current:
- del current[f]
- filterNames = current.keys()
- filterNames.sort()
- self.filters = [(fn, current[fn]) for fn in filterNames]
- return
- # Filters
- #
- # emptyLines
- # Strips blank lines from the output.
- def filter_emptyLines(self, aLine):
- if aLine == '\n':
- return ''
- return aLine
- # slashslash
- # Strips everything after //
- def filter_slashslash(self, aLine):
- if (aLine.find('//') == -1):
- return aLine
- [aLine, rest] = aLine.split('//', 1)
- if rest:
- aLine += '\n'
- return aLine
- # spaces
- # Collapses sequences of spaces into a single space
- def filter_spaces(self, aLine):
- return re.sub(' +', ' ', aLine).strip(' ')
- # substition
- # helper to be used by both substition and attemptSubstitution
- def filter_substitution(self, aLine, fatal=True):
- def repl(matchobj):
- varname = matchobj.group('VAR')
- if varname in self.context:
- return str(self.context[varname])
- if fatal:
- raise Preprocessor.Error(self, 'UNDEFINED_VAR', varname)
- return matchobj.group(0)
- return self.varsubst.sub(repl, aLine)
- def filter_attemptSubstitution(self, aLine):
- return self.filter_substitution(aLine, fatal=False)
- # File ops
- def do_include(self, args, filters=True):
- """
- Preprocess a given file.
- args can either be a file name, or a file-like object.
- Files should be opened, and will be closed after processing.
- """
- isName = type(args) == str or type(args) == unicode
- oldCheckLineNumbers = self.checkLineNumbers
- self.checkLineNumbers = False
- if isName:
- try:
- args = str(args)
- if filters:
- args = self.applyFilters(args)
- if not os.path.isabs(args):
- args = os.path.join(self.context['DIRECTORY'], args)
- args = open(args, 'rU')
- except Preprocessor.Error:
- raise
- except:
- raise Preprocessor.Error(self, 'FILE_NOT_FOUND', str(args))
- self.checkLineNumbers = bool(re.search('\.(js|jsm|java)(?:\.in)?$', args.name))
- oldFile = self.context['FILE']
- oldLine = self.context['LINE']
- oldDir = self.context['DIRECTORY']
- self.noteLineInfo()
- if args.isatty():
- # we're stdin, use '-' and '' for file and dir
- self.context['FILE'] = '-'
- self.context['DIRECTORY'] = ''
- else:
- abspath = os.path.abspath(args.name)
- self.includes.add(abspath)
- self.context['FILE'] = abspath
- self.context['DIRECTORY'] = os.path.dirname(abspath)
- self.context['LINE'] = 0
- for l in args:
- self.context['LINE'] += 1
- self.handleLine(l)
- if isName:
- args.close()
- self.context['FILE'] = oldFile
- self.checkLineNumbers = oldCheckLineNumbers
- self.context['LINE'] = oldLine
- self.context['DIRECTORY'] = oldDir
- def do_includesubst(self, args):
- args = self.filter_substitution(args)
- self.do_include(args)
- def do_error(self, args):
- raise Preprocessor.Error(self, 'Error: ', str(args))
- # Keep this module independently executable.
- if __name__ == "__main__":
- pp = Preprocessor()
- pp.handleCommandLine(None, True)
|