diff options
Diffstat (limited to 'bitbake')
-rw-r--r-- | bitbake/lib/bb/codeparser.py | 273 | ||||
-rw-r--r-- | bitbake/lib/bb/data_smart.py | 6 |
2 files changed, 276 insertions, 3 deletions
diff --git a/bitbake/lib/bb/codeparser.py b/bitbake/lib/bb/codeparser.py new file mode 100644 index 000000000..88a26c82a --- /dev/null +++ b/bitbake/lib/bb/codeparser.py @@ -0,0 +1,273 @@ +from pysh import pyshyacc, pyshlex +from itertools import chain +from bb import msg, utils +import ast +import codegen + +def check_indent(codestr): + """If the code is indented, add a top level piece of code to 'remove' the indentation""" + + if codestr[0] is " " or codestr[0] is " ": + return "if 1:\n" + codestr + + return codestr + +pythonparsecache = {} + +class PythonParser(): + class ValueVisitor(): + """Visitor to traverse a python abstract syntax tree and obtain + the variables referenced via bitbake metadata APIs, and the external + functions called. + """ + + getvars = ("d.getVar", "bb.data.getVar", "data.getVar") + expands = ("d.expand", "bb.data.expand", "data.expand") + execs = ("bb.build.exec_func", "bb.build.exec_task") + + @classmethod + def _compare_name(cls, strparts, node): + """Given a sequence of strings representing a python name, + where the last component is the actual Name and the prior + elements are Attribute nodes, determine if the supplied node + matches. + """ + + if not strparts: + return True + + current, rest = strparts[0], strparts[1:] + if isinstance(node, ast.Attribute): + if current == node.attr: + return cls._compare_name(rest, node.value) + elif isinstance(node, ast.Name): + if current == node.id: + return True + return False + + @classmethod + def compare_name(cls, value, node): + """Convenience function for the _compare_node method, which + can accept a string (which is split by '.' for you), or an + iterable of strings, in which case it checks to see if any of + them match, similar to isinstance. + """ + + if isinstance(value, basestring): + return cls._compare_name(tuple(reversed(value.split("."))), + node) + else: + return any(cls.compare_name(item, node) for item in value) + + def __init__(self, value): + self.var_references = set() + self.var_execs = set() + self.direct_func_calls = set() + self.var_expands = set() + self.value = value + + @classmethod + def warn(cls, func, arg): + """Warn about calls of bitbake APIs which pass a non-literal + argument for the variable name, as we're not able to track such + a reference. + """ + + try: + funcstr = codegen.to_source(func) + argstr = codegen.to_source(arg) + except TypeError: + msg.debug(2, None, "Failed to convert function and argument to source form") + else: + msg.debug(1, None, "Warning: in call to '%s', argument '%s' is not a literal" % + (funcstr, argstr)) + + def visit_Call(self, node): + if self.compare_name(self.getvars, node.func): + if isinstance(node.args[0], ast.Str): + self.var_references.add(node.args[0].s) + else: + self.warn(node.func, node.args[0]) + elif self.compare_name(self.expands, node.func): + if isinstance(node.args[0], ast.Str): + self.warn(node.func, node.args[0]) + self.var_expands.update(node.args[0].s) + elif isinstance(node.args[0], ast.Call) and \ + self.compare_name(self.getvars, node.args[0].func): + pass + else: + self.warn(node.func, node.args[0]) + elif self.compare_name(self.execs, node.func): + if isinstance(node.args[0], ast.Str): + self.var_execs.add(node.args[0].s) + else: + self.warn(node.func, node.args[0]) + elif isinstance(node.func, ast.Name): + self.direct_func_calls.add(node.func.id) + elif isinstance(node.func, ast.Attribute): + # We must have a qualified name. Therefore we need + # to walk the chain of 'Attribute' nodes to determine + # the qualification. + attr_node = node.func.value + identifier = node.func.attr + while isinstance(attr_node, ast.Attribute): + identifier = attr_node.attr + "." + identifier + attr_node = attr_node.value + if isinstance(attr_node, ast.Name): + identifier = attr_node.id + "." + identifier + self.direct_func_calls.add(identifier) + + def __init__(self): + #self.funcdefs = set() + self.execs = set() + #self.external_cmds = set() + self.references = set() + + def parse_python(self, node): + + if node in pythonparsecache: + self.references = pythonparsecache[node].references + self.execs = pythonparsecache[node].execs + return + + code = compile(check_indent(str(node)), "<string>", "exec", + ast.PyCF_ONLY_AST) + + visitor = self.ValueVisitor(code) + for n in ast.walk(code): + if n.__class__.__name__ == "Call": + visitor.visit_Call(n) + + self.references.update(visitor.var_references) + self.references.update(visitor.var_execs) + self.execs = visitor.direct_func_calls + + pythonparsecache[node] = self + + +shellparsecache = {} + +class ShellParser(): + def __init__(self): + self.funcdefs = set() + self.allexecs = set() + self.execs = set() + + def parse_shell(self, value): + """Parse the supplied shell code in a string, returning the external + commands it executes. + """ + + if value in pythonparsecache: + self.execs = shellparsecache[value].execs + return + + try: + tokens, _ = pyshyacc.parse(value, eof=True, debug=False) + except pyshlex.NeedMore: + raise ShellSyntaxError("Unexpected EOF") + + for token in tokens: + self.process_tokens(token) + self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs) + + shellparsecache[value] = self + + return self.execs + + def process_tokens(self, tokens): + """Process a supplied portion of the syntax tree as returned by + pyshyacc.parse. + """ + + def function_definition(value): + self.funcdefs.add(value.name) + return [value.body], None + + def case_clause(value): + # Element 0 of each item in the case is the list of patterns, and + # Element 1 of each item in the case is the list of commands to be + # executed when that pattern matches. + words = chain(*[item[0] for item in value.items]) + cmds = chain(*[item[1] for item in value.items]) + return cmds, words + + def if_clause(value): + main = chain(value.cond, value.if_cmds) + rest = value.else_cmds + if isinstance(rest, tuple) and rest[0] == "elif": + return chain(main, if_clause(rest[1])) + else: + return chain(main, rest) + + def simple_command(value): + return None, chain(value.words, (assign[1] for assign in value.assigns)) + + token_handlers = { + "and_or": lambda x: ((x.left, x.right), None), + "async": lambda x: ([x], None), + "brace_group": lambda x: (x.cmds, None), + "for_clause": lambda x: (x.cmds, x.items), + "function_definition": function_definition, + "if_clause": lambda x: (if_clause(x), None), + "pipeline": lambda x: (x.commands, None), + "redirect_list": lambda x: ([x.cmd], None), + "subshell": lambda x: (x.cmds, None), + "while_clause": lambda x: (chain(x.condition, x.cmds), None), + "until_clause": lambda x: (chain(x.condition, x.cmds), None), + "simple_command": simple_command, + "case_clause": case_clause, + } + + for token in tokens: + name, value = token + try: + more_tokens, words = token_handlers[name](value) + except KeyError: + raise NotImplementedError("Unsupported token type " + name) + + if more_tokens: + self.process_tokens(more_tokens) + + if words: + self.process_words(words) + + def process_words(self, words): + """Process a set of 'words' in pyshyacc parlance, which includes + extraction of executed commands from $() blocks, as well as grabbing + the command name argument. + """ + + words = list(words) + for word in list(words): + wtree = pyshlex.make_wordtree(word[1]) + for part in wtree: + if not isinstance(part, list): + continue + + if part[0] in ('`', '$('): + command = pyshlex.wordtree_as_string(part[1:-1]) + self.parse_shell(command) + + if word[0] in ("cmd_name", "cmd_word"): + if word in words: + words.remove(word) + + usetoken = False + for word in words: + if word[0] in ("cmd_name", "cmd_word") or \ + (usetoken and word[0] == "TOKEN"): + if "=" in word[1]: + usetoken = True + continue + + cmd = word[1] + if cmd.startswith("$"): + msg.debug(1, None, "Warning: execution of non-literal command '%s'" % cmd) + elif cmd == "eval": + command = " ".join(word for _, word in words[1:]) + self.parse_shell(command) + else: + self.allexecs.add(cmd) + break + diff --git a/bitbake/lib/bb/data_smart.py b/bitbake/lib/bb/data_smart.py index 1ed04d50c..b9d9476fd 100644 --- a/bitbake/lib/bb/data_smart.py +++ b/bitbake/lib/bb/data_smart.py @@ -46,7 +46,7 @@ class VariableParse: self.value = val self.references = set() - self.funcrefs = set() + self.execs = set() def var_sub(self, match): key = match.group()[2:-1] @@ -64,10 +64,10 @@ class VariableParse: code = match.group()[3:-1] codeobj = compile(code.strip(), self.varname or "<expansion>", "eval") - parser = bb.rptest.PythonParser() + parser = bb.codeparser.PythonParser() parser.parse_python(code) self.references |= parser.references - self.funcrefs |= parser.execs + self.execs |= parser.execs value = utils.better_eval(codeobj, {"d": self.d}) return str(value) |