diff options
Diffstat (limited to 'lib/cffi/cparser.py')
-rw-r--r-- | lib/cffi/cparser.py | 1006 |
1 files changed, 1006 insertions, 0 deletions
diff --git a/lib/cffi/cparser.py b/lib/cffi/cparser.py new file mode 100644 index 0000000..74830e9 --- /dev/null +++ b/lib/cffi/cparser.py @@ -0,0 +1,1006 @@ +from . import model +from .commontypes import COMMON_TYPES, resolve_common_type +from .error import FFIError, CDefError +try: + from . import _pycparser as pycparser +except ImportError: + import pycparser +import weakref, re, sys + +try: + if sys.version_info < (3,): + import thread as _thread + else: + import _thread + lock = _thread.allocate_lock() +except ImportError: + lock = None + +def _workaround_for_static_import_finders(): + # Issue #392: packaging tools like cx_Freeze can not find these + # because pycparser uses exec dynamic import. This is an obscure + # workaround. This function is never called. + import pycparser.yacctab + import pycparser.lextab + +CDEF_SOURCE_STRING = "<cdef source string>" +_r_comment = re.compile(r"/\*.*?\*/|//([^\n\\]|\\.)*?$", + re.DOTALL | re.MULTILINE) +_r_define = re.compile(r"^\s*#\s*define\s+([A-Za-z_][A-Za-z_0-9]*)" + r"\b((?:[^\n\\]|\\.)*?)$", + re.DOTALL | re.MULTILINE) +_r_line_directive = re.compile(r"^[ \t]*#[ \t]*(?:line|\d+)\b.*$", re.MULTILINE) +_r_partial_enum = re.compile(r"=\s*\.\.\.\s*[,}]|\.\.\.\s*\}") +_r_enum_dotdotdot = re.compile(r"__dotdotdot\d+__$") +_r_partial_array = re.compile(r"\[\s*\.\.\.\s*\]") +_r_words = re.compile(r"\w+|\S") +_parser_cache = None +_r_int_literal = re.compile(r"-?0?x?[0-9a-f]+[lu]*$", re.IGNORECASE) +_r_stdcall1 = re.compile(r"\b(__stdcall|WINAPI)\b") +_r_stdcall2 = re.compile(r"[(]\s*(__stdcall|WINAPI)\b") +_r_cdecl = re.compile(r"\b__cdecl\b") +_r_extern_python = re.compile(r'\bextern\s*"' + r'(Python|Python\s*\+\s*C|C\s*\+\s*Python)"\s*.') +_r_star_const_space = re.compile( # matches "* const " + r"[*]\s*((const|volatile|restrict)\b\s*)+") +_r_int_dotdotdot = re.compile(r"(\b(int|long|short|signed|unsigned|char)\s*)+" + r"\.\.\.") +_r_float_dotdotdot = re.compile(r"\b(double|float)\s*\.\.\.") + +def _get_parser(): + global _parser_cache + if _parser_cache is None: + _parser_cache = pycparser.CParser() + return _parser_cache + +def _workaround_for_old_pycparser(csource): + # Workaround for a pycparser issue (fixed between pycparser 2.10 and + # 2.14): "char*const***" gives us a wrong syntax tree, the same as + # for "char***(*const)". This means we can't tell the difference + # afterwards. But "char(*const(***))" gives us the right syntax + # tree. The issue only occurs if there are several stars in + # sequence with no parenthesis inbetween, just possibly qualifiers. + # Attempt to fix it by adding some parentheses in the source: each + # time we see "* const" or "* const *", we add an opening + # parenthesis before each star---the hard part is figuring out where + # to close them. + parts = [] + while True: + match = _r_star_const_space.search(csource) + if not match: + break + #print repr(''.join(parts)+csource), '=>', + parts.append(csource[:match.start()]) + parts.append('('); closing = ')' + parts.append(match.group()) # e.g. "* const " + endpos = match.end() + if csource.startswith('*', endpos): + parts.append('('); closing += ')' + level = 0 + i = endpos + while i < len(csource): + c = csource[i] + if c == '(': + level += 1 + elif c == ')': + if level == 0: + break + level -= 1 + elif c in ',;=': + if level == 0: + break + i += 1 + csource = csource[endpos:i] + closing + csource[i:] + #print repr(''.join(parts)+csource) + parts.append(csource) + return ''.join(parts) + +def _preprocess_extern_python(csource): + # input: `extern "Python" int foo(int);` or + # `extern "Python" { int foo(int); }` + # output: + # void __cffi_extern_python_start; + # int foo(int); + # void __cffi_extern_python_stop; + # + # input: `extern "Python+C" int foo(int);` + # output: + # void __cffi_extern_python_plus_c_start; + # int foo(int); + # void __cffi_extern_python_stop; + parts = [] + while True: + match = _r_extern_python.search(csource) + if not match: + break + endpos = match.end() - 1 + #print + #print ''.join(parts)+csource + #print '=>' + parts.append(csource[:match.start()]) + if 'C' in match.group(1): + parts.append('void __cffi_extern_python_plus_c_start; ') + else: + parts.append('void __cffi_extern_python_start; ') + if csource[endpos] == '{': + # grouping variant + closing = csource.find('}', endpos) + if closing < 0: + raise CDefError("'extern \"Python\" {': no '}' found") + if csource.find('{', endpos + 1, closing) >= 0: + raise NotImplementedError("cannot use { } inside a block " + "'extern \"Python\" { ... }'") + parts.append(csource[endpos+1:closing]) + csource = csource[closing+1:] + else: + # non-grouping variant + semicolon = csource.find(';', endpos) + if semicolon < 0: + raise CDefError("'extern \"Python\": no ';' found") + parts.append(csource[endpos:semicolon+1]) + csource = csource[semicolon+1:] + parts.append(' void __cffi_extern_python_stop;') + #print ''.join(parts)+csource + #print + parts.append(csource) + return ''.join(parts) + +def _warn_for_string_literal(csource): + if '"' not in csource: + return + for line in csource.splitlines(): + if '"' in line and not line.lstrip().startswith('#'): + import warnings + warnings.warn("String literal found in cdef() or type source. " + "String literals are ignored here, but you should " + "remove them anyway because some character sequences " + "confuse pre-parsing.") + break + +def _warn_for_non_extern_non_static_global_variable(decl): + if not decl.storage: + import warnings + warnings.warn("Global variable '%s' in cdef(): for consistency " + "with C it should have a storage class specifier " + "(usually 'extern')" % (decl.name,)) + +def _remove_line_directives(csource): + # _r_line_directive matches whole lines, without the final \n, if they + # start with '#line' with some spacing allowed, or '#NUMBER'. This + # function stores them away and replaces them with exactly the string + # '#line@N', where N is the index in the list 'line_directives'. + line_directives = [] + def replace(m): + i = len(line_directives) + line_directives.append(m.group()) + return '#line@%d' % i + csource = _r_line_directive.sub(replace, csource) + return csource, line_directives + +def _put_back_line_directives(csource, line_directives): + def replace(m): + s = m.group() + if not s.startswith('#line@'): + raise AssertionError("unexpected #line directive " + "(should have been processed and removed") + return line_directives[int(s[6:])] + return _r_line_directive.sub(replace, csource) + +def _preprocess(csource): + # First, remove the lines of the form '#line N "filename"' because + # the "filename" part could confuse the rest + csource, line_directives = _remove_line_directives(csource) + # Remove comments. NOTE: this only work because the cdef() section + # should not contain any string literals (except in line directives)! + def replace_keeping_newlines(m): + return ' ' + m.group().count('\n') * '\n' + csource = _r_comment.sub(replace_keeping_newlines, csource) + # Remove the "#define FOO x" lines + macros = {} + for match in _r_define.finditer(csource): + macroname, macrovalue = match.groups() + macrovalue = macrovalue.replace('\\\n', '').strip() + macros[macroname] = macrovalue + csource = _r_define.sub('', csource) + # + if pycparser.__version__ < '2.14': + csource = _workaround_for_old_pycparser(csource) + # + # BIG HACK: replace WINAPI or __stdcall with "volatile const". + # It doesn't make sense for the return type of a function to be + # "volatile volatile const", so we abuse it to detect __stdcall... + # Hack number 2 is that "int(volatile *fptr)();" is not valid C + # syntax, so we place the "volatile" before the opening parenthesis. + csource = _r_stdcall2.sub(' volatile volatile const(', csource) + csource = _r_stdcall1.sub(' volatile volatile const ', csource) + csource = _r_cdecl.sub(' ', csource) + # + # Replace `extern "Python"` with start/end markers + csource = _preprocess_extern_python(csource) + # + # Now there should not be any string literal left; warn if we get one + _warn_for_string_literal(csource) + # + # Replace "[...]" with "[__dotdotdotarray__]" + csource = _r_partial_array.sub('[__dotdotdotarray__]', csource) + # + # Replace "...}" with "__dotdotdotNUM__}". This construction should + # occur only at the end of enums; at the end of structs we have "...;}" + # and at the end of vararg functions "...);". Also replace "=...[,}]" + # with ",__dotdotdotNUM__[,}]": this occurs in the enums too, when + # giving an unknown value. + matches = list(_r_partial_enum.finditer(csource)) + for number, match in enumerate(reversed(matches)): + p = match.start() + if csource[p] == '=': + p2 = csource.find('...', p, match.end()) + assert p2 > p + csource = '%s,__dotdotdot%d__ %s' % (csource[:p], number, + csource[p2+3:]) + else: + assert csource[p:p+3] == '...' + csource = '%s __dotdotdot%d__ %s' % (csource[:p], number, + csource[p+3:]) + # Replace "int ..." or "unsigned long int..." with "__dotdotdotint__" + csource = _r_int_dotdotdot.sub(' __dotdotdotint__ ', csource) + # Replace "float ..." or "double..." with "__dotdotdotfloat__" + csource = _r_float_dotdotdot.sub(' __dotdotdotfloat__ ', csource) + # Replace all remaining "..." with the same name, "__dotdotdot__", + # which is declared with a typedef for the purpose of C parsing. + csource = csource.replace('...', ' __dotdotdot__ ') + # Finally, put back the line directives + csource = _put_back_line_directives(csource, line_directives) + return csource, macros + +def _common_type_names(csource): + # Look in the source for what looks like usages of types from the + # list of common types. A "usage" is approximated here as the + # appearance of the word, minus a "definition" of the type, which + # is the last word in a "typedef" statement. Approximative only + # but should be fine for all the common types. + look_for_words = set(COMMON_TYPES) + look_for_words.add(';') + look_for_words.add(',') + look_for_words.add('(') + look_for_words.add(')') + look_for_words.add('typedef') + words_used = set() + is_typedef = False + paren = 0 + previous_word = '' + for word in _r_words.findall(csource): + if word in look_for_words: + if word == ';': + if is_typedef: + words_used.discard(previous_word) + look_for_words.discard(previous_word) + is_typedef = False + elif word == 'typedef': + is_typedef = True + paren = 0 + elif word == '(': + paren += 1 + elif word == ')': + paren -= 1 + elif word == ',': + if is_typedef and paren == 0: + words_used.discard(previous_word) + look_for_words.discard(previous_word) + else: # word in COMMON_TYPES + words_used.add(word) + previous_word = word + return words_used + + +class Parser(object): + + def __init__(self): + self._declarations = {} + self._included_declarations = set() + self._anonymous_counter = 0 + self._structnode2type = weakref.WeakKeyDictionary() + self._options = {} + self._int_constants = {} + self._recomplete = [] + self._uses_new_feature = None + + def _parse(self, csource): + csource, macros = _preprocess(csource) + # XXX: for more efficiency we would need to poke into the + # internals of CParser... the following registers the + # typedefs, because their presence or absence influences the + # parsing itself (but what they are typedef'ed to plays no role) + ctn = _common_type_names(csource) + typenames = [] + for name in sorted(self._declarations): + if name.startswith('typedef '): + name = name[8:] + typenames.append(name) + ctn.discard(name) + typenames += sorted(ctn) + # + csourcelines = [] + csourcelines.append('# 1 "<cdef automatic initialization code>"') + for typename in typenames: + csourcelines.append('typedef int %s;' % typename) + csourcelines.append('typedef int __dotdotdotint__, __dotdotdotfloat__,' + ' __dotdotdot__;') + # this forces pycparser to consider the following in the file + # called <cdef source string> from line 1 + csourcelines.append('# 1 "%s"' % (CDEF_SOURCE_STRING,)) + csourcelines.append(csource) + fullcsource = '\n'.join(csourcelines) + if lock is not None: + lock.acquire() # pycparser is not thread-safe... + try: + ast = _get_parser().parse(fullcsource) + except pycparser.c_parser.ParseError as e: + self.convert_pycparser_error(e, csource) + finally: + if lock is not None: + lock.release() + # csource will be used to find buggy source text + return ast, macros, csource + + def _convert_pycparser_error(self, e, csource): + # xxx look for "<cdef source string>:NUM:" at the start of str(e) + # and interpret that as a line number. This will not work if + # the user gives explicit ``# NUM "FILE"`` directives. + line = None + msg = str(e) + match = re.match(r"%s:(\d+):" % (CDEF_SOURCE_STRING,), msg) + if match: + linenum = int(match.group(1), 10) + csourcelines = csource.splitlines() + if 1 <= linenum <= len(csourcelines): + line = csourcelines[linenum-1] + return line + + def convert_pycparser_error(self, e, csource): + line = self._convert_pycparser_error(e, csource) + + msg = str(e) + if line: + msg = 'cannot parse "%s"\n%s' % (line.strip(), msg) + else: + msg = 'parse error\n%s' % (msg,) + raise CDefError(msg) + + def parse(self, csource, override=False, packed=False, pack=None, + dllexport=False): + if packed: + if packed != True: + raise ValueError("'packed' should be False or True; use " + "'pack' to give another value") + if pack: + raise ValueError("cannot give both 'pack' and 'packed'") + pack = 1 + elif pack: + if pack & (pack - 1): + raise ValueError("'pack' must be a power of two, not %r" % + (pack,)) + else: + pack = 0 + prev_options = self._options + try: + self._options = {'override': override, + 'packed': pack, + 'dllexport': dllexport} + self._internal_parse(csource) + finally: + self._options = prev_options + + def _internal_parse(self, csource): + ast, macros, csource = self._parse(csource) + # add the macros + self._process_macros(macros) + # find the first "__dotdotdot__" and use that as a separator + # between the repeated typedefs and the real csource + iterator = iter(ast.ext) + for decl in iterator: + if decl.name == '__dotdotdot__': + break + else: + assert 0 + current_decl = None + # + try: + self._inside_extern_python = '__cffi_extern_python_stop' + for decl in iterator: + current_decl = decl + if isinstance(decl, pycparser.c_ast.Decl): + self._parse_decl(decl) + elif isinstance(decl, pycparser.c_ast.Typedef): + if not decl.name: + raise CDefError("typedef does not declare any name", + decl) + quals = 0 + if (isinstance(decl.type.type, pycparser.c_ast.IdentifierType) and + decl.type.type.names[-1].startswith('__dotdotdot')): + realtype = self._get_unknown_type(decl) + elif (isinstance(decl.type, pycparser.c_ast.PtrDecl) and + isinstance(decl.type.type, pycparser.c_ast.TypeDecl) and + isinstance(decl.type.type.type, + pycparser.c_ast.IdentifierType) and + decl.type.type.type.names[-1].startswith('__dotdotdot')): + realtype = self._get_unknown_ptr_type(decl) + else: + realtype, quals = self._get_type_and_quals( + decl.type, name=decl.name, partial_length_ok=True, + typedef_example="*(%s *)0" % (decl.name,)) + self._declare('typedef ' + decl.name, realtype, quals=quals) + elif decl.__class__.__name__ == 'Pragma': + pass # skip pragma, only in pycparser 2.15 + else: + raise CDefError("unexpected <%s>: this construct is valid " + "C but not valid in cdef()" % + decl.__class__.__name__, decl) + except CDefError as e: + if len(e.args) == 1: + e.args = e.args + (current_decl,) + raise + except FFIError as e: + msg = self._convert_pycparser_error(e, csource) + if msg: + e.args = (e.args[0] + "\n *** Err: %s" % msg,) + raise + + def _add_constants(self, key, val): + if key in self._int_constants: + if self._int_constants[key] == val: + return # ignore identical double declarations + raise FFIError( + "multiple declarations of constant: %s" % (key,)) + self._int_constants[key] = val + + def _add_integer_constant(self, name, int_str): + int_str = int_str.lower().rstrip("ul") + neg = int_str.startswith('-') + if neg: + int_str = int_str[1:] + # "010" is not valid oct in py3 + if (int_str.startswith("0") and int_str != '0' + and not int_str.startswith("0x")): + int_str = "0o" + int_str[1:] + pyvalue = int(int_str, 0) + if neg: + pyvalue = -pyvalue + self._add_constants(name, pyvalue) + self._declare('macro ' + name, pyvalue) + + def _process_macros(self, macros): + for key, value in macros.items(): + value = value.strip() + if _r_int_literal.match(value): + self._add_integer_constant(key, value) + elif value == '...': + self._declare('macro ' + key, value) + else: + raise CDefError( + 'only supports one of the following syntax:\n' + ' #define %s ... (literally dot-dot-dot)\n' + ' #define %s NUMBER (with NUMBER an integer' + ' constant, decimal/hex/octal)\n' + 'got:\n' + ' #define %s %s' + % (key, key, key, value)) + + def _declare_function(self, tp, quals, decl): + tp = self._get_type_pointer(tp, quals) + if self._options.get('dllexport'): + tag = 'dllexport_python ' + elif self._inside_extern_python == '__cffi_extern_python_start': + tag = 'extern_python ' + elif self._inside_extern_python == '__cffi_extern_python_plus_c_start': + tag = 'extern_python_plus_c ' + else: + tag = 'function ' + self._declare(tag + decl.name, tp) + + def _parse_decl(self, decl): + node = decl.type + if isinstance(node, pycparser.c_ast.FuncDecl): + tp, quals = self._get_type_and_quals(node, name=decl.name) + assert isinstance(tp, model.RawFunctionType) + self._declare_function(tp, quals, decl) + else: + if isinstance(node, pycparser.c_ast.Struct): + self._get_struct_union_enum_type('struct', node) + elif isinstance(node, pycparser.c_ast.Union): + self._get_struct_union_enum_type('union', node) + elif isinstance(node, pycparser.c_ast.Enum): + self._get_struct_union_enum_type('enum', node) + elif not decl.name: + raise CDefError("construct does not declare any variable", + decl) + # + if decl.name: + tp, quals = self._get_type_and_quals(node, + partial_length_ok=True) + if tp.is_raw_function: + self._declare_function(tp, quals, decl) + elif (tp.is_integer_type() and + hasattr(decl, 'init') and + hasattr(decl.init, 'value') and + _r_int_literal.match(decl.init.value)): + self._add_integer_constant(decl.name, decl.init.value) + elif (tp.is_integer_type() and + isinstance(decl.init, pycparser.c_ast.UnaryOp) and + decl.init.op == '-' and + hasattr(decl.init.expr, 'value') and + _r_int_literal.match(decl.init.expr.value)): + self._add_integer_constant(decl.name, + '-' + decl.init.expr.value) + elif (tp is model.void_type and + decl.name.startswith('__cffi_extern_python_')): + # hack: `extern "Python"` in the C source is replaced + # with "void __cffi_extern_python_start;" and + # "void __cffi_extern_python_stop;" + self._inside_extern_python = decl.name + else: + if self._inside_extern_python !='__cffi_extern_python_stop': + raise CDefError( + "cannot declare constants or " + "variables with 'extern \"Python\"'") + if (quals & model.Q_CONST) and not tp.is_array_type: + self._declare('constant ' + decl.name, tp, quals=quals) + else: + _warn_for_non_extern_non_static_global_variable(decl) + self._declare('variable ' + decl.name, tp, quals=quals) + + def parse_type(self, cdecl): + return self.parse_type_and_quals(cdecl)[0] + + def parse_type_and_quals(self, cdecl): + ast, macros = self._parse('void __dummy(\n%s\n);' % cdecl)[:2] + assert not macros + exprnode = ast.ext[-1].type.args.params[0] + if isinstance(exprnode, pycparser.c_ast.ID): + raise CDefError("unknown identifier '%s'" % (exprnode.name,)) + return self._get_type_and_quals(exprnode.type) + + def _declare(self, name, obj, included=False, quals=0): + if name in self._declarations: + prevobj, prevquals = self._declarations[name] + if prevobj is obj and prevquals == quals: + return + if not self._options.get('override'): + raise FFIError( + "multiple declarations of %s (for interactive usage, " + "try cdef(xx, override=True))" % (name,)) + assert '__dotdotdot__' not in name.split() + self._declarations[name] = (obj, quals) + if included: + self._included_declarations.add(obj) + + def _extract_quals(self, type): + quals = 0 + if isinstance(type, (pycparser.c_ast.TypeDecl, + pycparser.c_ast.PtrDecl)): + if 'const' in type.quals: + quals |= model.Q_CONST + if 'volatile' in type.quals: + quals |= model.Q_VOLATILE + if 'restrict' in type.quals: + quals |= model.Q_RESTRICT + return quals + + def _get_type_pointer(self, type, quals, declname=None): + if isinstance(type, model.RawFunctionType): + return type.as_function_pointer() + if (isinstance(type, model.StructOrUnionOrEnum) and + type.name.startswith('$') and type.name[1:].isdigit() and + type.forcename is None and declname is not None): + return model.NamedPointerType(type, declname, quals) + return model.PointerType(type, quals) + + def _get_type_and_quals(self, typenode, name=None, partial_length_ok=False, + typedef_example=None): + # first, dereference typedefs, if we have it already parsed, we're good + if (isinstance(typenode, pycparser.c_ast.TypeDecl) and + isinstance(typenode.type, pycparser.c_ast.IdentifierType) and + len(typenode.type.names) == 1 and + ('typedef ' + typenode.type.names[0]) in self._declarations): + tp, quals = self._declarations['typedef ' + typenode.type.names[0]] + quals |= self._extract_quals(typenode) + return tp, quals + # + if isinstance(typenode, pycparser.c_ast.ArrayDecl): + # array type + if typenode.dim is None: + length = None + else: + length = self._parse_constant( + typenode.dim, partial_length_ok=partial_length_ok) + # a hack: in 'typedef int foo_t[...][...];', don't use '...' as + # the length but use directly the C expression that would be + # generated by recompiler.py. This lets the typedef be used in + # many more places within recompiler.py + if typedef_example is not None: + if length == '...': + length = '_cffi_array_len(%s)' % (typedef_example,) + typedef_example = "*" + typedef_example + # + tp, quals = self._get_type_and_quals(typenode.type, + partial_length_ok=partial_length_ok, + typedef_example=typedef_example) + return model.ArrayType(tp, length), quals + # + if isinstance(typenode, pycparser.c_ast.PtrDecl): + # pointer type + itemtype, itemquals = self._get_type_and_quals(typenode.type) + tp = self._get_type_pointer(itemtype, itemquals, declname=name) + quals = self._extract_quals(typenode) + return tp, quals + # + if isinstance(typenode, pycparser.c_ast.TypeDecl): + quals = self._extract_quals(typenode) + type = typenode.type + if isinstance(type, pycparser.c_ast.IdentifierType): + # assume a primitive type. get it from .names, but reduce + # synonyms to a single chosen combination + names = list(type.names) + if names != ['signed', 'char']: # keep this unmodified + prefixes = {} + while names: + name = names[0] + if name in ('short', 'long', 'signed', 'unsigned'): + prefixes[name] = prefixes.get(name, 0) + 1 + del names[0] + else: + break + # ignore the 'signed' prefix below, and reorder the others + newnames = [] + for prefix in ('unsigned', 'short', 'long'): + for i in range(prefixes.get(prefix, 0)): + newnames.append(prefix) + if not names: + names = ['int'] # implicitly + if names == ['int']: # but kill it if 'short' or 'long' + if 'short' in prefixes or 'long' in prefixes: + names = [] + names = newnames + names + ident = ' '.join(names) + if ident == 'void': + return model.void_type, quals + if ident == '__dotdotdot__': + raise FFIError(':%d: bad usage of "..."' % + typenode.coord.line) + tp0, quals0 = resolve_common_type(self, ident) + return tp0, (quals | quals0) + # + if isinstance(type, pycparser.c_ast.Struct): + # 'struct foobar' + tp = self._get_struct_union_enum_type('struct', type, name) + return tp, quals + # + if isinstance(type, pycparser.c_ast.Union): + # 'union foobar' + tp = self._get_struct_union_enum_type('union', type, name) + return tp, quals + # + if isinstance(type, pycparser.c_ast.Enum): + # 'enum foobar' + tp = self._get_struct_union_enum_type('enum', type, name) + return tp, quals + # + if isinstance(typenode, pycparser.c_ast.FuncDecl): + # a function type + return self._parse_function_type(typenode, name), 0 + # + # nested anonymous structs or unions end up here + if isinstance(typenode, pycparser.c_ast.Struct): + return self._get_struct_union_enum_type('struct', typenode, name, + nested=True), 0 + if isinstance(typenode, pycparser.c_ast.Union): + return self._get_struct_union_enum_type('union', typenode, name, + nested=True), 0 + # + raise FFIError(":%d: bad or unsupported type declaration" % + typenode.coord.line) + + def _parse_function_type(self, typenode, funcname=None): + params = list(getattr(typenode.args, 'params', [])) + for i, arg in enumerate(params): + if not hasattr(arg, 'type'): + raise CDefError("%s arg %d: unknown type '%s'" + " (if you meant to use the old C syntax of giving" + " untyped arguments, it is not supported)" + % (funcname or 'in expression', i + 1, + getattr(arg, 'name', '?'))) + ellipsis = ( + len(params) > 0 and + isinstance(params[-1].type, pycparser.c_ast.TypeDecl) and + isinstance(params[-1].type.type, + pycparser.c_ast.IdentifierType) and + params[-1].type.type.names == ['__dotdotdot__']) + if ellipsis: + params.pop() + if not params: + raise CDefError( + "%s: a function with only '(...)' as argument" + " is not correct C" % (funcname or 'in expression')) + args = [self._as_func_arg(*self._get_type_and_quals(argdeclnode.type)) + for argdeclnode in params] + if not ellipsis and args == [model.void_type]: + args = [] + result, quals = self._get_type_and_quals(typenode.type) + # the 'quals' on the result type are ignored. HACK: we absure them + # to detect __stdcall functions: we textually replace "__stdcall" + # with "volatile volatile const" above. + abi = None + if hasattr(typenode.type, 'quals'): # else, probable syntax error anyway + if typenode.type.quals[-3:] == ['volatile', 'volatile', 'const']: + abi = '__stdcall' + return model.RawFunctionType(tuple(args), result, ellipsis, abi) + + def _as_func_arg(self, type, quals): + if isinstance(type, model.ArrayType): + return model.PointerType(type.item, quals) + elif isinstance(type, model.RawFunctionType): + return type.as_function_pointer() + else: + return type + + def _get_struct_union_enum_type(self, kind, type, name=None, nested=False): + # First, a level of caching on the exact 'type' node of the AST. + # This is obscure, but needed because pycparser "unrolls" declarations + # such as "typedef struct { } foo_t, *foo_p" and we end up with + # an AST that is not a tree, but a DAG, with the "type" node of the + # two branches foo_t and foo_p of the trees being the same node. + # It's a bit silly but detecting "DAG-ness" in the AST tree seems + # to be the only way to distinguish this case from two independent + # structs. See test_struct_with_two_usages. + try: + return self._structnode2type[type] + except KeyError: + pass + # + # Note that this must handle parsing "struct foo" any number of + # times and always return the same StructType object. Additionally, + # one of these times (not necessarily the first), the fields of + # the struct can be specified with "struct foo { ...fields... }". + # If no name is given, then we have to create a new anonymous struct + # with no caching; in this case, the fields are either specified + # right now or never. + # + force_name = name + name = type.name + # + # get the type or create it if needed + if name is None: + # 'force_name' is used to guess a more readable name for + # anonymous structs, for the common case "typedef struct { } foo". + if force_name is not None: + explicit_name = '$%s' % force_name + else: + self._anonymous_counter += 1 + explicit_name = '$%d' % self._anonymous_counter + tp = None + else: + explicit_name = name + key = '%s %s' % (kind, name) + tp, _ = self._declarations.get(key, (None, None)) + # + if tp is None: + if kind == 'struct': + tp = model.StructType(explicit_name, None, None, None) + elif kind == 'union': + tp = model.UnionType(explicit_name, None, None, None) + elif kind == 'enum': + if explicit_name == '__dotdotdot__': + raise CDefError("Enums cannot be declared with ...") + tp = self._build_enum_type(explicit_name, type.values) + else: + raise AssertionError("kind = %r" % (kind,)) + if name is not None: + self._declare(key, tp) + else: + if kind == 'enum' and type.values is not None: + raise NotImplementedError( + "enum %s: the '{}' declaration should appear on the first " + "time the enum is mentioned, not later" % explicit_name) + if not tp.forcename: + tp.force_the_name(force_name) + if tp.forcename and '$' in tp.name: + self._declare('anonymous %s' % tp.forcename, tp) + # + self._structnode2type[type] = tp + # + # enums: done here + if kind == 'enum': + return tp + # + # is there a 'type.decls'? If yes, then this is the place in the + # C sources that declare the fields. If no, then just return the + # existing type, possibly still incomplete. + if type.decls is None: + return tp + # + if tp.fldnames is not None: + raise CDefError("duplicate declaration of struct %s" % name) + fldnames = [] + fldtypes = [] + fldbitsize = [] + fldquals = [] + for decl in type.decls: + if (isinstance(decl.type, pycparser.c_ast.IdentifierType) and + ''.join(decl.type.names) == '__dotdotdot__'): + # XXX pycparser is inconsistent: 'names' should be a list + # of strings, but is sometimes just one string. Use + # str.join() as a way to cope with both. + self._make_partial(tp, nested) + continue + if decl.bitsize is None: + bitsize = -1 + else: + bitsize = self._parse_constant(decl.bitsize) + self._partial_length = False + type, fqual = self._get_type_and_quals(decl.type, + partial_length_ok=True) + if self._partial_length: + self._make_partial(tp, nested) + if isinstance(type, model.StructType) and type.partial: + self._make_partial(tp, nested) + fldnames.append(decl.name or '') + fldtypes.append(type) + fldbitsize.append(bitsize) + fldquals.append(fqual) + tp.fldnames = tuple(fldnames) + tp.fldtypes = tuple(fldtypes) + tp.fldbitsize = tuple(fldbitsize) + tp.fldquals = tuple(fldquals) + if fldbitsize != [-1] * len(fldbitsize): + if isinstance(tp, model.StructType) and tp.partial: + raise NotImplementedError("%s: using both bitfields and '...;'" + % (tp,)) + tp.packed = self._options.get('packed') + if tp.completed: # must be re-completed: it is not opaque any more + tp.completed = 0 + self._recomplete.append(tp) + return tp + + def _make_partial(self, tp, nested): + if not isinstance(tp, model.StructOrUnion): + raise CDefError("%s cannot be partial" % (tp,)) + if not tp.has_c_name() and not nested: + raise NotImplementedError("%s is partial but has no C name" %(tp,)) + tp.partial = True + + def _parse_constant(self, exprnode, partial_length_ok=False): + # for now, limited to expressions that are an immediate number + # or positive/negative number + if isinstance(exprnode, pycparser.c_ast.Constant): + s = exprnode.value + if '0' <= s[0] <= '9': + s = s.rstrip('uUlL') + try: + if s.startswith('0'): + return int(s, 8) + else: + return int(s, 10) + except ValueError: + if len(s) > 1: + if s.lower()[0:2] == '0x': + return int(s, 16) + elif s.lower()[0:2] == '0b': + return int(s, 2) + raise CDefError("invalid constant %r" % (s,)) + elif s[0] == "'" and s[-1] == "'" and ( + len(s) == 3 or (len(s) == 4 and s[1] == "\\")): + return ord(s[-2]) + else: + raise CDefError("invalid constant %r" % (s,)) + # + if (isinstance(exprnode, pycparser.c_ast.UnaryOp) and + exprnode.op == '+'): + return self._parse_constant(exprnode.expr) + # + if (isinstance(exprnode, pycparser.c_ast.UnaryOp) and + exprnode.op == '-'): + return -self._parse_constant(exprnode.expr) + # load previously defined int constant + if (isinstance(exprnode, pycparser.c_ast.ID) and + exprnode.name in self._int_constants): + return self._int_constants[exprnode.name] + # + if (isinstance(exprnode, pycparser.c_ast.ID) and + exprnode.name == '__dotdotdotarray__'): + if partial_length_ok: + self._partial_length = True + return '...' + raise FFIError(":%d: unsupported '[...]' here, cannot derive " + "the actual array length in this context" + % exprnode.coord.line) + # + if isinstance(exprnode, pycparser.c_ast.BinaryOp): + left = self._parse_constant(exprnode.left) + right = self._parse_constant(exprnode.right) + if exprnode.op == '+': + return left + right + elif exprnode.op == '-': + return left - right + elif exprnode.op == '*': + return left * right + elif exprnode.op == '/': + return self._c_div(left, right) + elif exprnode.op == '%': + return left - self._c_div(left, right) * right + elif exprnode.op == '<<': + return left << right + elif exprnode.op == '>>': + return left >> right + elif exprnode.op == '&': + return left & right + elif exprnode.op == '|': + return left | right + elif exprnode.op == '^': + return left ^ right + # + raise FFIError(":%d: unsupported expression: expected a " + "simple numeric constant" % exprnode.coord.line) + + def _c_div(self, a, b): + result = a // b + if ((a < 0) ^ (b < 0)) and (a % b) != 0: + result += 1 + return result + + def _build_enum_type(self, explicit_name, decls): + if decls is not None: + partial = False + enumerators = [] + enumvalues = [] + nextenumvalue = 0 + for enum in decls.enumerators: + if _r_enum_dotdotdot.match(enum.name): + partial = True + continue + if enum.value is not None: + nextenumvalue = self._parse_constant(enum.value) + enumerators.append(enum.name) + enumvalues.append(nextenumvalue) + self._add_constants(enum.name, nextenumvalue) + nextenumvalue += 1 + enumerators = tuple(enumerators) + enumvalues = tuple(enumvalues) + tp = model.EnumType(explicit_name, enumerators, enumvalues) + tp.partial = partial + else: # opaque enum + tp = model.EnumType(explicit_name, (), ()) + return tp + + def include(self, other): + for name, (tp, quals) in other._declarations.items(): + if name.startswith('anonymous $enum_$'): + continue # fix for test_anonymous_enum_include + kind = name.split(' ', 1)[0] + if kind in ('struct', 'union', 'enum', 'anonymous', 'typedef'): + self._declare(name, tp, included=True, quals=quals) + for k, v in other._int_constants.items(): + self._add_constants(k, v) + + def _get_unknown_type(self, decl): + typenames = decl.type.type.names + if typenames == ['__dotdotdot__']: + return model.unknown_type(decl.name) + + if typenames == ['__dotdotdotint__']: + if self._uses_new_feature is None: + self._uses_new_feature = "'typedef int... %s'" % decl.name + return model.UnknownIntegerType(decl.name) + + if typenames == ['__dotdotdotfloat__']: + # note: not for 'long double' so far + if self._uses_new_feature is None: + self._uses_new_feature = "'typedef float... %s'" % decl.name + return model.UnknownFloatType(decl.name) + + raise FFIError(':%d: unsupported usage of "..." in typedef' + % decl.coord.line) + + def _get_unknown_ptr_type(self, decl): + if decl.type.type.type.names == ['__dotdotdot__']: + return model.unknown_ptr_type(decl.name) + raise FFIError(':%d: unsupported usage of "..." in typedef' + % decl.coord.line) |