summaryrefslogtreecommitdiffstats
path: root/lib/pycparser/c_parser.py
diff options
context:
space:
mode:
authorxiubuzhe <xiubuzhe@sina.com>2023-10-08 20:59:00 +0800
committerxiubuzhe <xiubuzhe@sina.com>2023-10-08 20:59:00 +0800
commit1dac2263372df2b85db5d029a45721fa158a5c9d (patch)
tree0365f9c57df04178a726d7584ca6a6b955a7ce6a /lib/pycparser/c_parser.py
parentb494be364bb39e1de128ada7dc576a729d99907e (diff)
downloadsunhpc-1dac2263372df2b85db5d029a45721fa158a5c9d.tar.gz
sunhpc-1dac2263372df2b85db5d029a45721fa158a5c9d.tar.bz2
sunhpc-1dac2263372df2b85db5d029a45721fa158a5c9d.zip
first add files
Diffstat (limited to 'lib/pycparser/c_parser.py')
-rw-r--r--lib/pycparser/c_parser.py1936
1 files changed, 1936 insertions, 0 deletions
diff --git a/lib/pycparser/c_parser.py b/lib/pycparser/c_parser.py
new file mode 100644
index 0000000..640a759
--- /dev/null
+++ b/lib/pycparser/c_parser.py
@@ -0,0 +1,1936 @@
+#------------------------------------------------------------------------------
+# pycparser: c_parser.py
+#
+# CParser class: Parser and AST builder for the C language
+#
+# Eli Bendersky [https://eli.thegreenplace.net/]
+# License: BSD
+#------------------------------------------------------------------------------
+from .ply import yacc
+
+from . import c_ast
+from .c_lexer import CLexer
+from .plyparser import PLYParser, ParseError, parameterized, template
+from .ast_transforms import fix_switch_cases, fix_atomic_specifiers
+
+
+@template
+class CParser(PLYParser):
+ def __init__(
+ self,
+ lex_optimize=True,
+ lexer=CLexer,
+ lextab='pycparser.lextab',
+ yacc_optimize=True,
+ yacctab='pycparser.yacctab',
+ yacc_debug=False,
+ taboutputdir=''):
+ """ Create a new CParser.
+
+ Some arguments for controlling the debug/optimization
+ level of the parser are provided. The defaults are
+ tuned for release/performance mode.
+ The simple rules for using them are:
+ *) When tweaking CParser/CLexer, set these to False
+ *) When releasing a stable parser, set to True
+
+ lex_optimize:
+ Set to False when you're modifying the lexer.
+ Otherwise, changes in the lexer won't be used, if
+ some lextab.py file exists.
+ When releasing with a stable lexer, set to True
+ to save the re-generation of the lexer table on
+ each run.
+
+ lexer:
+ Set this parameter to define the lexer to use if
+ you're not using the default CLexer.
+
+ lextab:
+ Points to the lex table that's used for optimized
+ mode. Only if you're modifying the lexer and want
+ some tests to avoid re-generating the table, make
+ this point to a local lex table file (that's been
+ earlier generated with lex_optimize=True)
+
+ yacc_optimize:
+ Set to False when you're modifying the parser.
+ Otherwise, changes in the parser won't be used, if
+ some parsetab.py file exists.
+ When releasing with a stable parser, set to True
+ to save the re-generation of the parser table on
+ each run.
+
+ yacctab:
+ Points to the yacc table that's used for optimized
+ mode. Only if you're modifying the parser, make
+ this point to a local yacc table file
+
+ yacc_debug:
+ Generate a parser.out file that explains how yacc
+ built the parsing table from the grammar.
+
+ taboutputdir:
+ Set this parameter to control the location of generated
+ lextab and yacctab files.
+ """
+ self.clex = lexer(
+ error_func=self._lex_error_func,
+ on_lbrace_func=self._lex_on_lbrace_func,
+ on_rbrace_func=self._lex_on_rbrace_func,
+ type_lookup_func=self._lex_type_lookup_func)
+
+ self.clex.build(
+ optimize=lex_optimize,
+ lextab=lextab,
+ outputdir=taboutputdir)
+ self.tokens = self.clex.tokens
+
+ rules_with_opt = [
+ 'abstract_declarator',
+ 'assignment_expression',
+ 'declaration_list',
+ 'declaration_specifiers_no_type',
+ 'designation',
+ 'expression',
+ 'identifier_list',
+ 'init_declarator_list',
+ 'id_init_declarator_list',
+ 'initializer_list',
+ 'parameter_type_list',
+ 'block_item_list',
+ 'type_qualifier_list',
+ 'struct_declarator_list'
+ ]
+
+ for rule in rules_with_opt:
+ self._create_opt_rule(rule)
+
+ self.cparser = yacc.yacc(
+ module=self,
+ start='translation_unit_or_empty',
+ debug=yacc_debug,
+ optimize=yacc_optimize,
+ tabmodule=yacctab,
+ outputdir=taboutputdir)
+
+ # Stack of scopes for keeping track of symbols. _scope_stack[-1] is
+ # the current (topmost) scope. Each scope is a dictionary that
+ # specifies whether a name is a type. If _scope_stack[n][name] is
+ # True, 'name' is currently a type in the scope. If it's False,
+ # 'name' is used in the scope but not as a type (for instance, if we
+ # saw: int name;
+ # If 'name' is not a key in _scope_stack[n] then 'name' was not defined
+ # in this scope at all.
+ self._scope_stack = [dict()]
+
+ # Keeps track of the last token given to yacc (the lookahead token)
+ self._last_yielded_token = None
+
+ def parse(self, text, filename='', debug=False):
+ """ Parses C code and returns an AST.
+
+ text:
+ A string containing the C source code
+
+ filename:
+ Name of the file being parsed (for meaningful
+ error messages)
+
+ debug:
+ Debug flag to YACC
+ """
+ self.clex.filename = filename
+ self.clex.reset_lineno()
+ self._scope_stack = [dict()]
+ self._last_yielded_token = None
+ return self.cparser.parse(
+ input=text,
+ lexer=self.clex,
+ debug=debug)
+
+ ######################-- PRIVATE --######################
+
+ def _push_scope(self):
+ self._scope_stack.append(dict())
+
+ def _pop_scope(self):
+ assert len(self._scope_stack) > 1
+ self._scope_stack.pop()
+
+ def _add_typedef_name(self, name, coord):
+ """ Add a new typedef name (ie a TYPEID) to the current scope
+ """
+ if not self._scope_stack[-1].get(name, True):
+ self._parse_error(
+ "Typedef %r previously declared as non-typedef "
+ "in this scope" % name, coord)
+ self._scope_stack[-1][name] = True
+
+ def _add_identifier(self, name, coord):
+ """ Add a new object, function, or enum member name (ie an ID) to the
+ current scope
+ """
+ if self._scope_stack[-1].get(name, False):
+ self._parse_error(
+ "Non-typedef %r previously declared as typedef "
+ "in this scope" % name, coord)
+ self._scope_stack[-1][name] = False
+
+ def _is_type_in_scope(self, name):
+ """ Is *name* a typedef-name in the current scope?
+ """
+ for scope in reversed(self._scope_stack):
+ # If name is an identifier in this scope it shadows typedefs in
+ # higher scopes.
+ in_scope = scope.get(name)
+ if in_scope is not None: return in_scope
+ return False
+
+ def _lex_error_func(self, msg, line, column):
+ self._parse_error(msg, self._coord(line, column))
+
+ def _lex_on_lbrace_func(self):
+ self._push_scope()
+
+ def _lex_on_rbrace_func(self):
+ self._pop_scope()
+
+ def _lex_type_lookup_func(self, name):
+ """ Looks up types that were previously defined with
+ typedef.
+ Passed to the lexer for recognizing identifiers that
+ are types.
+ """
+ is_type = self._is_type_in_scope(name)
+ return is_type
+
+ def _get_yacc_lookahead_token(self):
+ """ We need access to yacc's lookahead token in certain cases.
+ This is the last token yacc requested from the lexer, so we
+ ask the lexer.
+ """
+ return self.clex.last_token
+
+ # To understand what's going on here, read sections A.8.5 and
+ # A.8.6 of K&R2 very carefully.
+ #
+ # A C type consists of a basic type declaration, with a list
+ # of modifiers. For example:
+ #
+ # int *c[5];
+ #
+ # The basic declaration here is 'int c', and the pointer and
+ # the array are the modifiers.
+ #
+ # Basic declarations are represented by TypeDecl (from module c_ast) and the
+ # modifiers are FuncDecl, PtrDecl and ArrayDecl.
+ #
+ # The standard states that whenever a new modifier is parsed, it should be
+ # added to the end of the list of modifiers. For example:
+ #
+ # K&R2 A.8.6.2: Array Declarators
+ #
+ # In a declaration T D where D has the form
+ # D1 [constant-expression-opt]
+ # and the type of the identifier in the declaration T D1 is
+ # "type-modifier T", the type of the
+ # identifier of D is "type-modifier array of T"
+ #
+ # This is what this method does. The declarator it receives
+ # can be a list of declarators ending with TypeDecl. It
+ # tacks the modifier to the end of this list, just before
+ # the TypeDecl.
+ #
+ # Additionally, the modifier may be a list itself. This is
+ # useful for pointers, that can come as a chain from the rule
+ # p_pointer. In this case, the whole modifier list is spliced
+ # into the new location.
+ def _type_modify_decl(self, decl, modifier):
+ """ Tacks a type modifier on a declarator, and returns
+ the modified declarator.
+
+ Note: the declarator and modifier may be modified
+ """
+ #~ print '****'
+ #~ decl.show(offset=3)
+ #~ modifier.show(offset=3)
+ #~ print '****'
+
+ modifier_head = modifier
+ modifier_tail = modifier
+
+ # The modifier may be a nested list. Reach its tail.
+ while modifier_tail.type:
+ modifier_tail = modifier_tail.type
+
+ # If the decl is a basic type, just tack the modifier onto it.
+ if isinstance(decl, c_ast.TypeDecl):
+ modifier_tail.type = decl
+ return modifier
+ else:
+ # Otherwise, the decl is a list of modifiers. Reach
+ # its tail and splice the modifier onto the tail,
+ # pointing to the underlying basic type.
+ decl_tail = decl
+
+ while not isinstance(decl_tail.type, c_ast.TypeDecl):
+ decl_tail = decl_tail.type
+
+ modifier_tail.type = decl_tail.type
+ decl_tail.type = modifier_head
+ return decl
+
+ # Due to the order in which declarators are constructed,
+ # they have to be fixed in order to look like a normal AST.
+ #
+ # When a declaration arrives from syntax construction, it has
+ # these problems:
+ # * The innermost TypeDecl has no type (because the basic
+ # type is only known at the uppermost declaration level)
+ # * The declaration has no variable name, since that is saved
+ # in the innermost TypeDecl
+ # * The typename of the declaration is a list of type
+ # specifiers, and not a node. Here, basic identifier types
+ # should be separated from more complex types like enums
+ # and structs.
+ #
+ # This method fixes these problems.
+ def _fix_decl_name_type(self, decl, typename):
+ """ Fixes a declaration. Modifies decl.
+ """
+ # Reach the underlying basic type
+ #
+ type = decl
+ while not isinstance(type, c_ast.TypeDecl):
+ type = type.type
+
+ decl.name = type.declname
+ type.quals = decl.quals[:]
+
+ # The typename is a list of types. If any type in this
+ # list isn't an IdentifierType, it must be the only
+ # type in the list (it's illegal to declare "int enum ..")
+ # If all the types are basic, they're collected in the
+ # IdentifierType holder.
+ for tn in typename:
+ if not isinstance(tn, c_ast.IdentifierType):
+ if len(typename) > 1:
+ self._parse_error(
+ "Invalid multiple types specified", tn.coord)
+ else:
+ type.type = tn
+ return decl
+
+ if not typename:
+ # Functions default to returning int
+ #
+ if not isinstance(decl.type, c_ast.FuncDecl):
+ self._parse_error(
+ "Missing type in declaration", decl.coord)
+ type.type = c_ast.IdentifierType(
+ ['int'],
+ coord=decl.coord)
+ else:
+ # At this point, we know that typename is a list of IdentifierType
+ # nodes. Concatenate all the names into a single list.
+ #
+ type.type = c_ast.IdentifierType(
+ [name for id in typename for name in id.names],
+ coord=typename[0].coord)
+ return decl
+
+ def _add_declaration_specifier(self, declspec, newspec, kind, append=False):
+ """ Declaration specifiers are represented by a dictionary
+ with the entries:
+ * qual: a list of type qualifiers
+ * storage: a list of storage type qualifiers
+ * type: a list of type specifiers
+ * function: a list of function specifiers
+ * alignment: a list of alignment specifiers
+
+ This method is given a declaration specifier, and a
+ new specifier of a given kind.
+ If `append` is True, the new specifier is added to the end of
+ the specifiers list, otherwise it's added at the beginning.
+ Returns the declaration specifier, with the new
+ specifier incorporated.
+ """
+ spec = declspec or dict(qual=[], storage=[], type=[], function=[], alignment=[])
+
+ if append:
+ spec[kind].append(newspec)
+ else:
+ spec[kind].insert(0, newspec)
+
+ return spec
+
+ def _build_declarations(self, spec, decls, typedef_namespace=False):
+ """ Builds a list of declarations all sharing the given specifiers.
+ If typedef_namespace is true, each declared name is added
+ to the "typedef namespace", which also includes objects,
+ functions, and enum constants.
+ """
+ is_typedef = 'typedef' in spec['storage']
+ declarations = []
+
+ # Bit-fields are allowed to be unnamed.
+ if decls[0].get('bitsize') is not None:
+ pass
+
+ # When redeclaring typedef names as identifiers in inner scopes, a
+ # problem can occur where the identifier gets grouped into
+ # spec['type'], leaving decl as None. This can only occur for the
+ # first declarator.
+ elif decls[0]['decl'] is None:
+ if len(spec['type']) < 2 or len(spec['type'][-1].names) != 1 or \
+ not self._is_type_in_scope(spec['type'][-1].names[0]):
+ coord = '?'
+ for t in spec['type']:
+ if hasattr(t, 'coord'):
+ coord = t.coord
+ break
+ self._parse_error('Invalid declaration', coord)
+
+ # Make this look as if it came from "direct_declarator:ID"
+ decls[0]['decl'] = c_ast.TypeDecl(
+ declname=spec['type'][-1].names[0],
+ type=None,
+ quals=None,
+ align=spec['alignment'],
+ coord=spec['type'][-1].coord)
+ # Remove the "new" type's name from the end of spec['type']
+ del spec['type'][-1]
+
+ # A similar problem can occur where the declaration ends up looking
+ # like an abstract declarator. Give it a name if this is the case.
+ elif not isinstance(decls[0]['decl'], (
+ c_ast.Enum, c_ast.Struct, c_ast.Union, c_ast.IdentifierType)):
+ decls_0_tail = decls[0]['decl']
+ while not isinstance(decls_0_tail, c_ast.TypeDecl):
+ decls_0_tail = decls_0_tail.type
+ if decls_0_tail.declname is None:
+ decls_0_tail.declname = spec['type'][-1].names[0]
+ del spec['type'][-1]
+
+ for decl in decls:
+ assert decl['decl'] is not None
+ if is_typedef:
+ declaration = c_ast.Typedef(
+ name=None,
+ quals=spec['qual'],
+ storage=spec['storage'],
+ type=decl['decl'],
+ coord=decl['decl'].coord)
+ else:
+ declaration = c_ast.Decl(
+ name=None,
+ quals=spec['qual'],
+ align=spec['alignment'],
+ storage=spec['storage'],
+ funcspec=spec['function'],
+ type=decl['decl'],
+ init=decl.get('init'),
+ bitsize=decl.get('bitsize'),
+ coord=decl['decl'].coord)
+
+ if isinstance(declaration.type, (
+ c_ast.Enum, c_ast.Struct, c_ast.Union,
+ c_ast.IdentifierType)):
+ fixed_decl = declaration
+ else:
+ fixed_decl = self._fix_decl_name_type(declaration, spec['type'])
+
+ # Add the type name defined by typedef to a
+ # symbol table (for usage in the lexer)
+ if typedef_namespace:
+ if is_typedef:
+ self._add_typedef_name(fixed_decl.name, fixed_decl.coord)
+ else:
+ self._add_identifier(fixed_decl.name, fixed_decl.coord)
+
+ fixed_decl = fix_atomic_specifiers(fixed_decl)
+ declarations.append(fixed_decl)
+
+ return declarations
+
+ def _build_function_definition(self, spec, decl, param_decls, body):
+ """ Builds a function definition.
+ """
+ if 'typedef' in spec['storage']:
+ self._parse_error("Invalid typedef", decl.coord)
+
+ declaration = self._build_declarations(
+ spec=spec,
+ decls=[dict(decl=decl, init=None)],
+ typedef_namespace=True)[0]
+
+ return c_ast.FuncDef(
+ decl=declaration,
+ param_decls=param_decls,
+ body=body,
+ coord=decl.coord)
+
+ def _select_struct_union_class(self, token):
+ """ Given a token (either STRUCT or UNION), selects the
+ appropriate AST class.
+ """
+ if token == 'struct':
+ return c_ast.Struct
+ else:
+ return c_ast.Union
+
+ ##
+ ## Precedence and associativity of operators
+ ##
+ # If this changes, c_generator.CGenerator.precedence_map needs to change as
+ # well
+ precedence = (
+ ('left', 'LOR'),
+ ('left', 'LAND'),
+ ('left', 'OR'),
+ ('left', 'XOR'),
+ ('left', 'AND'),
+ ('left', 'EQ', 'NE'),
+ ('left', 'GT', 'GE', 'LT', 'LE'),
+ ('left', 'RSHIFT', 'LSHIFT'),
+ ('left', 'PLUS', 'MINUS'),
+ ('left', 'TIMES', 'DIVIDE', 'MOD')
+ )
+
+ ##
+ ## Grammar productions
+ ## Implementation of the BNF defined in K&R2 A.13
+ ##
+
+ # Wrapper around a translation unit, to allow for empty input.
+ # Not strictly part of the C99 Grammar, but useful in practice.
+ def p_translation_unit_or_empty(self, p):
+ """ translation_unit_or_empty : translation_unit
+ | empty
+ """
+ if p[1] is None:
+ p[0] = c_ast.FileAST([])
+ else:
+ p[0] = c_ast.FileAST(p[1])
+
+ def p_translation_unit_1(self, p):
+ """ translation_unit : external_declaration
+ """
+ # Note: external_declaration is already a list
+ p[0] = p[1]
+
+ def p_translation_unit_2(self, p):
+ """ translation_unit : translation_unit external_declaration
+ """
+ p[1].extend(p[2])
+ p[0] = p[1]
+
+ # Declarations always come as lists (because they can be
+ # several in one line), so we wrap the function definition
+ # into a list as well, to make the return value of
+ # external_declaration homogeneous.
+ def p_external_declaration_1(self, p):
+ """ external_declaration : function_definition
+ """
+ p[0] = [p[1]]
+
+ def p_external_declaration_2(self, p):
+ """ external_declaration : declaration
+ """
+ p[0] = p[1]
+
+ def p_external_declaration_3(self, p):
+ """ external_declaration : pp_directive
+ | pppragma_directive
+ """
+ p[0] = [p[1]]
+
+ def p_external_declaration_4(self, p):
+ """ external_declaration : SEMI
+ """
+ p[0] = []
+
+ def p_external_declaration_5(self, p):
+ """ external_declaration : static_assert
+ """
+ p[0] = p[1]
+
+ def p_static_assert_declaration(self, p):
+ """ static_assert : _STATIC_ASSERT LPAREN constant_expression COMMA unified_string_literal RPAREN
+ | _STATIC_ASSERT LPAREN constant_expression RPAREN
+ """
+ if len(p) == 5:
+ p[0] = [c_ast.StaticAssert(p[3], None, self._token_coord(p, 1))]
+ else:
+ p[0] = [c_ast.StaticAssert(p[3], p[5], self._token_coord(p, 1))]
+
+ def p_pp_directive(self, p):
+ """ pp_directive : PPHASH
+ """
+ self._parse_error('Directives not supported yet',
+ self._token_coord(p, 1))
+
+ def p_pppragma_directive(self, p):
+ """ pppragma_directive : PPPRAGMA
+ | PPPRAGMA PPPRAGMASTR
+ """
+ if len(p) == 3:
+ p[0] = c_ast.Pragma(p[2], self._token_coord(p, 2))
+ else:
+ p[0] = c_ast.Pragma("", self._token_coord(p, 1))
+
+ # In function definitions, the declarator can be followed by
+ # a declaration list, for old "K&R style" function definitios.
+ def p_function_definition_1(self, p):
+ """ function_definition : id_declarator declaration_list_opt compound_statement
+ """
+ # no declaration specifiers - 'int' becomes the default type
+ spec = dict(
+ qual=[],
+ alignment=[],
+ storage=[],
+ type=[c_ast.IdentifierType(['int'],
+ coord=self._token_coord(p, 1))],
+ function=[])
+
+ p[0] = self._build_function_definition(
+ spec=spec,
+ decl=p[1],
+ param_decls=p[2],
+ body=p[3])
+
+ def p_function_definition_2(self, p):
+ """ function_definition : declaration_specifiers id_declarator declaration_list_opt compound_statement
+ """
+ spec = p[1]
+
+ p[0] = self._build_function_definition(
+ spec=spec,
+ decl=p[2],
+ param_decls=p[3],
+ body=p[4])
+
+ # Note, according to C18 A.2.2 6.7.10 static_assert-declaration _Static_assert
+ # is a declaration, not a statement. We additionally recognise it as a statement
+ # to fix parsing of _Static_assert inside the functions.
+ #
+ def p_statement(self, p):
+ """ statement : labeled_statement
+ | expression_statement
+ | compound_statement
+ | selection_statement
+ | iteration_statement
+ | jump_statement
+ | pppragma_directive
+ | static_assert
+ """
+ p[0] = p[1]
+
+ # A pragma is generally considered a decorator rather than an actual
+ # statement. Still, for the purposes of analyzing an abstract syntax tree of
+ # C code, pragma's should not be ignored and were previously treated as a
+ # statement. This presents a problem for constructs that take a statement
+ # such as labeled_statements, selection_statements, and
+ # iteration_statements, causing a misleading structure in the AST. For
+ # example, consider the following C code.
+ #
+ # for (int i = 0; i < 3; i++)
+ # #pragma omp critical
+ # sum += 1;
+ #
+ # This code will compile and execute "sum += 1;" as the body of the for
+ # loop. Previous implementations of PyCParser would render the AST for this
+ # block of code as follows:
+ #
+ # For:
+ # DeclList:
+ # Decl: i, [], [], []
+ # TypeDecl: i, []
+ # IdentifierType: ['int']
+ # Constant: int, 0
+ # BinaryOp: <
+ # ID: i
+ # Constant: int, 3
+ # UnaryOp: p++
+ # ID: i
+ # Pragma: omp critical
+ # Assignment: +=
+ # ID: sum
+ # Constant: int, 1
+ #
+ # This AST misleadingly takes the Pragma as the body of the loop and the
+ # assignment then becomes a sibling of the loop.
+ #
+ # To solve edge cases like these, the pragmacomp_or_statement rule groups
+ # a pragma and its following statement (which would otherwise be orphaned)
+ # using a compound block, effectively turning the above code into:
+ #
+ # for (int i = 0; i < 3; i++) {
+ # #pragma omp critical
+ # sum += 1;
+ # }
+ def p_pragmacomp_or_statement(self, p):
+ """ pragmacomp_or_statement : pppragma_directive statement
+ | statement
+ """
+ if isinstance(p[1], c_ast.Pragma) and len(p) == 3:
+ p[0] = c_ast.Compound(
+ block_items=[p[1], p[2]],
+ coord=self._token_coord(p, 1))
+ else:
+ p[0] = p[1]
+
+ # In C, declarations can come several in a line:
+ # int x, *px, romulo = 5;
+ #
+ # However, for the AST, we will split them to separate Decl
+ # nodes.
+ #
+ # This rule splits its declarations and always returns a list
+ # of Decl nodes, even if it's one element long.
+ #
+ def p_decl_body(self, p):
+ """ decl_body : declaration_specifiers init_declarator_list_opt
+ | declaration_specifiers_no_type id_init_declarator_list_opt
+ """
+ spec = p[1]
+
+ # p[2] (init_declarator_list_opt) is either a list or None
+ #
+ if p[2] is None:
+ # By the standard, you must have at least one declarator unless
+ # declaring a structure tag, a union tag, or the members of an
+ # enumeration.
+ #
+ ty = spec['type']
+ s_u_or_e = (c_ast.Struct, c_ast.Union, c_ast.Enum)
+ if len(ty) == 1 and isinstance(ty[0], s_u_or_e):
+ decls = [c_ast.Decl(
+ name=None,
+ quals=spec['qual'],
+ align=spec['alignment'],
+ storage=spec['storage'],
+ funcspec=spec['function'],
+ type=ty[0],
+ init=None,
+ bitsize=None,
+ coord=ty[0].coord)]
+
+ # However, this case can also occur on redeclared identifiers in
+ # an inner scope. The trouble is that the redeclared type's name
+ # gets grouped into declaration_specifiers; _build_declarations
+ # compensates for this.
+ #
+ else:
+ decls = self._build_declarations(
+ spec=spec,
+ decls=[dict(decl=None, init=None)],
+ typedef_namespace=True)
+
+ else:
+ decls = self._build_declarations(
+ spec=spec,
+ decls=p[2],
+ typedef_namespace=True)
+
+ p[0] = decls
+
+ # The declaration has been split to a decl_body sub-rule and
+ # SEMI, because having them in a single rule created a problem
+ # for defining typedefs.
+ #
+ # If a typedef line was directly followed by a line using the
+ # type defined with the typedef, the type would not be
+ # recognized. This is because to reduce the declaration rule,
+ # the parser's lookahead asked for the token after SEMI, which
+ # was the type from the next line, and the lexer had no chance
+ # to see the updated type symbol table.
+ #
+ # Splitting solves this problem, because after seeing SEMI,
+ # the parser reduces decl_body, which actually adds the new
+ # type into the table to be seen by the lexer before the next
+ # line is reached.
+ def p_declaration(self, p):
+ """ declaration : decl_body SEMI
+ """
+ p[0] = p[1]
+
+ # Since each declaration is a list of declarations, this
+ # rule will combine all the declarations and return a single
+ # list
+ #
+ def p_declaration_list(self, p):
+ """ declaration_list : declaration
+ | declaration_list declaration
+ """
+ p[0] = p[1] if len(p) == 2 else p[1] + p[2]
+
+ # To know when declaration-specifiers end and declarators begin,
+ # we require declaration-specifiers to have at least one
+ # type-specifier, and disallow typedef-names after we've seen any
+ # type-specifier. These are both required by the spec.
+ #
+ def p_declaration_specifiers_no_type_1(self, p):
+ """ declaration_specifiers_no_type : type_qualifier declaration_specifiers_no_type_opt
+ """
+ p[0] = self._add_declaration_specifier(p[2], p[1], 'qual')
+
+ def p_declaration_specifiers_no_type_2(self, p):
+ """ declaration_specifiers_no_type : storage_class_specifier declaration_specifiers_no_type_opt
+ """
+ p[0] = self._add_declaration_specifier(p[2], p[1], 'storage')
+
+ def p_declaration_specifiers_no_type_3(self, p):
+ """ declaration_specifiers_no_type : function_specifier declaration_specifiers_no_type_opt
+ """
+ p[0] = self._add_declaration_specifier(p[2], p[1], 'function')
+
+ # Without this, `typedef _Atomic(T) U` will parse incorrectly because the
+ # _Atomic qualifier will match, instead of the specifier.
+ def p_declaration_specifiers_no_type_4(self, p):
+ """ declaration_specifiers_no_type : atomic_specifier declaration_specifiers_no_type_opt
+ """
+ p[0] = self._add_declaration_specifier(p[2], p[1], 'type')
+
+ def p_declaration_specifiers_no_type_5(self, p):
+ """ declaration_specifiers_no_type : alignment_specifier declaration_specifiers_no_type_opt
+ """
+ p[0] = self._add_declaration_specifier(p[2], p[1], 'alignment')
+
+ def p_declaration_specifiers_1(self, p):
+ """ declaration_specifiers : declaration_specifiers type_qualifier
+ """
+ p[0] = self._add_declaration_specifier(p[1], p[2], 'qual', append=True)
+
+ def p_declaration_specifiers_2(self, p):
+ """ declaration_specifiers : declaration_specifiers storage_class_specifier
+ """
+ p[0] = self._add_declaration_specifier(p[1], p[2], 'storage', append=True)
+
+ def p_declaration_specifiers_3(self, p):
+ """ declaration_specifiers : declaration_specifiers function_specifier
+ """
+ p[0] = self._add_declaration_specifier(p[1], p[2], 'function', append=True)
+
+ def p_declaration_specifiers_4(self, p):
+ """ declaration_specifiers : declaration_specifiers type_specifier_no_typeid
+ """
+ p[0] = self._add_declaration_specifier(p[1], p[2], 'type', append=True)
+
+ def p_declaration_specifiers_5(self, p):
+ """ declaration_specifiers : type_specifier
+ """
+ p[0] = self._add_declaration_specifier(None, p[1], 'type')
+
+ def p_declaration_specifiers_6(self, p):
+ """ declaration_specifiers : declaration_specifiers_no_type type_specifier
+ """
+ p[0] = self._add_declaration_specifier(p[1], p[2], 'type', append=True)
+
+ def p_declaration_specifiers_7(self, p):
+ """ declaration_specifiers : declaration_specifiers alignment_specifier
+ """
+ p[0] = self._add_declaration_specifier(p[1], p[2], 'alignment', append=True)
+
+ def p_storage_class_specifier(self, p):
+ """ storage_class_specifier : AUTO
+ | REGISTER
+ | STATIC
+ | EXTERN
+ | TYPEDEF
+ | _THREAD_LOCAL
+ """
+ p[0] = p[1]
+
+ def p_function_specifier(self, p):
+ """ function_specifier : INLINE
+ | _NORETURN
+ """
+ p[0] = p[1]
+
+ def p_type_specifier_no_typeid(self, p):
+ """ type_specifier_no_typeid : VOID
+ | _BOOL
+ | CHAR
+ | SHORT
+ | INT
+ | LONG
+ | FLOAT
+ | DOUBLE
+ | _COMPLEX
+ | SIGNED
+ | UNSIGNED
+ | __INT128
+ """
+ p[0] = c_ast.IdentifierType([p[1]], coord=self._token_coord(p, 1))
+
+ def p_type_specifier(self, p):
+ """ type_specifier : typedef_name
+ | enum_specifier
+ | struct_or_union_specifier
+ | type_specifier_no_typeid
+ | atomic_specifier
+ """
+ p[0] = p[1]
+
+ # See section 6.7.2.4 of the C11 standard.
+ def p_atomic_specifier(self, p):
+ """ atomic_specifier : _ATOMIC LPAREN type_name RPAREN
+ """
+ typ = p[3]
+ typ.quals.append('_Atomic')
+ p[0] = typ
+
+ def p_type_qualifier(self, p):
+ """ type_qualifier : CONST
+ | RESTRICT
+ | VOLATILE
+ | _ATOMIC
+ """
+ p[0] = p[1]
+
+ def p_init_declarator_list(self, p):
+ """ init_declarator_list : init_declarator
+ | init_declarator_list COMMA init_declarator
+ """
+ p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]]
+
+ # Returns a {decl=<declarator> : init=<initializer>} dictionary
+ # If there's no initializer, uses None
+ #
+ def p_init_declarator(self, p):
+ """ init_declarator : declarator
+ | declarator EQUALS initializer
+ """
+ p[0] = dict(decl=p[1], init=(p[3] if len(p) > 2 else None))
+
+ def p_id_init_declarator_list(self, p):
+ """ id_init_declarator_list : id_init_declarator
+ | id_init_declarator_list COMMA init_declarator
+ """
+ p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]]
+
+ def p_id_init_declarator(self, p):
+ """ id_init_declarator : id_declarator
+ | id_declarator EQUALS initializer
+ """
+ p[0] = dict(decl=p[1], init=(p[3] if len(p) > 2 else None))
+
+ # Require at least one type specifier in a specifier-qualifier-list
+ #
+ def p_specifier_qualifier_list_1(self, p):
+ """ specifier_qualifier_list : specifier_qualifier_list type_specifier_no_typeid
+ """
+ p[0] = self._add_declaration_specifier(p[1], p[2], 'type', append=True)
+
+ def p_specifier_qualifier_list_2(self, p):
+ """ specifier_qualifier_list : specifier_qualifier_list type_qualifier
+ """
+ p[0] = self._add_declaration_specifier(p[1], p[2], 'qual', append=True)
+
+ def p_specifier_qualifier_list_3(self, p):
+ """ specifier_qualifier_list : type_specifier
+ """
+ p[0] = self._add_declaration_specifier(None, p[1], 'type')
+
+ def p_specifier_qualifier_list_4(self, p):
+ """ specifier_qualifier_list : type_qualifier_list type_specifier
+ """
+ p[0] = dict(qual=p[1], alignment=[], storage=[], type=[p[2]], function=[])
+
+ def p_specifier_qualifier_list_5(self, p):
+ """ specifier_qualifier_list : alignment_specifier
+ """
+ p[0] = dict(qual=[], alignment=[p[1]], storage=[], type=[], function=[])
+
+ def p_specifier_qualifier_list_6(self, p):
+ """ specifier_qualifier_list : specifier_qualifier_list alignment_specifier
+ """
+ p[0] = self._add_declaration_specifier(p[1], p[2], 'alignment')
+
+ # TYPEID is allowed here (and in other struct/enum related tag names), because
+ # struct/enum tags reside in their own namespace and can be named the same as types
+ #
+ def p_struct_or_union_specifier_1(self, p):
+ """ struct_or_union_specifier : struct_or_union ID
+ | struct_or_union TYPEID
+ """
+ klass = self._select_struct_union_class(p[1])
+ # None means no list of members
+ p[0] = klass(
+ name=p[2],
+ decls=None,
+ coord=self._token_coord(p, 2))
+
+ def p_struct_or_union_specifier_2(self, p):
+ """ struct_or_union_specifier : struct_or_union brace_open struct_declaration_list brace_close
+ | struct_or_union brace_open brace_close
+ """
+ klass = self._select_struct_union_class(p[1])
+ if len(p) == 4:
+ # Empty sequence means an empty list of members
+ p[0] = klass(
+ name=None,
+ decls=[],
+ coord=self._token_coord(p, 2))
+ else:
+ p[0] = klass(
+ name=None,
+ decls=p[3],
+ coord=self._token_coord(p, 2))
+
+
+ def p_struct_or_union_specifier_3(self, p):
+ """ struct_or_union_specifier : struct_or_union ID brace_open struct_declaration_list brace_close
+ | struct_or_union ID brace_open brace_close
+ | struct_or_union TYPEID brace_open struct_declaration_list brace_close
+ | struct_or_union TYPEID brace_open brace_close
+ """
+ klass = self._select_struct_union_class(p[1])
+ if len(p) == 5:
+ # Empty sequence means an empty list of members
+ p[0] = klass(
+ name=p[2],
+ decls=[],
+ coord=self._token_coord(p, 2))
+ else:
+ p[0] = klass(
+ name=p[2],
+ decls=p[4],
+ coord=self._token_coord(p, 2))
+
+ def p_struct_or_union(self, p):
+ """ struct_or_union : STRUCT
+ | UNION
+ """
+ p[0] = p[1]
+
+ # Combine all declarations into a single list
+ #
+ def p_struct_declaration_list(self, p):
+ """ struct_declaration_list : struct_declaration
+ | struct_declaration_list struct_declaration
+ """
+ if len(p) == 2:
+ p[0] = p[1] or []
+ else:
+ p[0] = p[1] + (p[2] or [])
+
+ def p_struct_declaration_1(self, p):
+ """ struct_declaration : specifier_qualifier_list struct_declarator_list_opt SEMI
+ """
+ spec = p[1]
+ assert 'typedef' not in spec['storage']
+
+ if p[2] is not None:
+ decls = self._build_declarations(
+ spec=spec,
+ decls=p[2])
+
+ elif len(spec['type']) == 1:
+ # Anonymous struct/union, gcc extension, C1x feature.
+ # Although the standard only allows structs/unions here, I see no
+ # reason to disallow other types since some compilers have typedefs
+ # here, and pycparser isn't about rejecting all invalid code.
+ #
+ node = spec['type'][0]
+ if isinstance(node, c_ast.Node):
+ decl_type = node
+ else:
+ decl_type = c_ast.IdentifierType(node)
+
+ decls = self._build_declarations(
+ spec=spec,
+ decls=[dict(decl=decl_type)])
+
+ else:
+ # Structure/union members can have the same names as typedefs.
+ # The trouble is that the member's name gets grouped into
+ # specifier_qualifier_list; _build_declarations compensates.
+ #
+ decls = self._build_declarations(
+ spec=spec,
+ decls=[dict(decl=None, init=None)])
+
+ p[0] = decls
+
+ def p_struct_declaration_2(self, p):
+ """ struct_declaration : SEMI
+ """
+ p[0] = None
+
+ def p_struct_declaration_3(self, p):
+ """ struct_declaration : pppragma_directive
+ """
+ p[0] = [p[1]]
+
+ def p_struct_declarator_list(self, p):
+ """ struct_declarator_list : struct_declarator
+ | struct_declarator_list COMMA struct_declarator
+ """
+ p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]]
+
+ # struct_declarator passes up a dict with the keys: decl (for
+ # the underlying declarator) and bitsize (for the bitsize)
+ #
+ def p_struct_declarator_1(self, p):
+ """ struct_declarator : declarator
+ """
+ p[0] = {'decl': p[1], 'bitsize': None}
+
+ def p_struct_declarator_2(self, p):
+ """ struct_declarator : declarator COLON constant_expression
+ | COLON constant_expression
+ """
+ if len(p) > 3:
+ p[0] = {'decl': p[1], 'bitsize': p[3]}
+ else:
+ p[0] = {'decl': c_ast.TypeDecl(None, None, None, None), 'bitsize': p[2]}
+
+ def p_enum_specifier_1(self, p):
+ """ enum_specifier : ENUM ID
+ | ENUM TYPEID
+ """
+ p[0] = c_ast.Enum(p[2], None, self._token_coord(p, 1))
+
+ def p_enum_specifier_2(self, p):
+ """ enum_specifier : ENUM brace_open enumerator_list brace_close
+ """
+ p[0] = c_ast.Enum(None, p[3], self._token_coord(p, 1))
+
+ def p_enum_specifier_3(self, p):
+ """ enum_specifier : ENUM ID brace_open enumerator_list brace_close
+ | ENUM TYPEID brace_open enumerator_list brace_close
+ """
+ p[0] = c_ast.Enum(p[2], p[4], self._token_coord(p, 1))
+
+ def p_enumerator_list(self, p):
+ """ enumerator_list : enumerator
+ | enumerator_list COMMA
+ | enumerator_list COMMA enumerator
+ """
+ if len(p) == 2:
+ p[0] = c_ast.EnumeratorList([p[1]], p[1].coord)
+ elif len(p) == 3:
+ p[0] = p[1]
+ else:
+ p[1].enumerators.append(p[3])
+ p[0] = p[1]
+
+ def p_alignment_specifier(self, p):
+ """ alignment_specifier : _ALIGNAS LPAREN type_name RPAREN
+ | _ALIGNAS LPAREN constant_expression RPAREN
+ """
+ p[0] = c_ast.Alignas(p[3], self._token_coord(p, 1))
+
+ def p_enumerator(self, p):
+ """ enumerator : ID
+ | ID EQUALS constant_expression
+ """
+ if len(p) == 2:
+ enumerator = c_ast.Enumerator(
+ p[1], None,
+ self._token_coord(p, 1))
+ else:
+ enumerator = c_ast.Enumerator(
+ p[1], p[3],
+ self._token_coord(p, 1))
+ self._add_identifier(enumerator.name, enumerator.coord)
+
+ p[0] = enumerator
+
+ def p_declarator(self, p):
+ """ declarator : id_declarator
+ | typeid_declarator
+ """
+ p[0] = p[1]
+
+ @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
+ def p_xxx_declarator_1(self, p):
+ """ xxx_declarator : direct_xxx_declarator
+ """
+ p[0] = p[1]
+
+ @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
+ def p_xxx_declarator_2(self, p):
+ """ xxx_declarator : pointer direct_xxx_declarator
+ """
+ p[0] = self._type_modify_decl(p[2], p[1])
+
+ @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
+ def p_direct_xxx_declarator_1(self, p):
+ """ direct_xxx_declarator : yyy
+ """
+ p[0] = c_ast.TypeDecl(
+ declname=p[1],
+ type=None,
+ quals=None,
+ align=None,
+ coord=self._token_coord(p, 1))
+
+ @parameterized(('id', 'ID'), ('typeid', 'TYPEID'))
+ def p_direct_xxx_declarator_2(self, p):
+ """ direct_xxx_declarator : LPAREN xxx_declarator RPAREN
+ """
+ p[0] = p[2]
+
+ @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
+ def p_direct_xxx_declarator_3(self, p):
+ """ direct_xxx_declarator : direct_xxx_declarator LBRACKET type_qualifier_list_opt assignment_expression_opt RBRACKET
+ """
+ quals = (p[3] if len(p) > 5 else []) or []
+ # Accept dimension qualifiers
+ # Per C99 6.7.5.3 p7
+ arr = c_ast.ArrayDecl(
+ type=None,
+ dim=p[4] if len(p) > 5 else p[3],
+ dim_quals=quals,
+ coord=p[1].coord)
+
+ p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
+
+ @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
+ def p_direct_xxx_declarator_4(self, p):
+ """ direct_xxx_declarator : direct_xxx_declarator LBRACKET STATIC type_qualifier_list_opt assignment_expression RBRACKET
+ | direct_xxx_declarator LBRACKET type_qualifier_list STATIC assignment_expression RBRACKET
+ """
+ # Using slice notation for PLY objects doesn't work in Python 3 for the
+ # version of PLY embedded with pycparser; see PLY Google Code issue 30.
+ # Work around that here by listing the two elements separately.
+ listed_quals = [item if isinstance(item, list) else [item]
+ for item in [p[3],p[4]]]
+ dim_quals = [qual for sublist in listed_quals for qual in sublist
+ if qual is not None]
+ arr = c_ast.ArrayDecl(
+ type=None,
+ dim=p[5],
+ dim_quals=dim_quals,
+ coord=p[1].coord)
+
+ p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
+
+ # Special for VLAs
+ #
+ @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
+ def p_direct_xxx_declarator_5(self, p):
+ """ direct_xxx_declarator : direct_xxx_declarator LBRACKET type_qualifier_list_opt TIMES RBRACKET
+ """
+ arr = c_ast.ArrayDecl(
+ type=None,
+ dim=c_ast.ID(p[4], self._token_coord(p, 4)),
+ dim_quals=p[3] if p[3] is not None else [],
+ coord=p[1].coord)
+
+ p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
+
+ @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
+ def p_direct_xxx_declarator_6(self, p):
+ """ direct_xxx_declarator : direct_xxx_declarator LPAREN parameter_type_list RPAREN
+ | direct_xxx_declarator LPAREN identifier_list_opt RPAREN
+ """
+ func = c_ast.FuncDecl(
+ args=p[3],
+ type=None,
+ coord=p[1].coord)
+
+ # To see why _get_yacc_lookahead_token is needed, consider:
+ # typedef char TT;
+ # void foo(int TT) { TT = 10; }
+ # Outside the function, TT is a typedef, but inside (starting and
+ # ending with the braces) it's a parameter. The trouble begins with
+ # yacc's lookahead token. We don't know if we're declaring or
+ # defining a function until we see LBRACE, but if we wait for yacc to
+ # trigger a rule on that token, then TT will have already been read
+ # and incorrectly interpreted as TYPEID. We need to add the
+ # parameters to the scope the moment the lexer sees LBRACE.
+ #
+ if self._get_yacc_lookahead_token().type == "LBRACE":
+ if func.args is not None:
+ for param in func.args.params:
+ if isinstance(param, c_ast.EllipsisParam): break
+ self._add_identifier(param.name, param.coord)
+
+ p[0] = self._type_modify_decl(decl=p[1], modifier=func)
+
+ def p_pointer(self, p):
+ """ pointer : TIMES type_qualifier_list_opt
+ | TIMES type_qualifier_list_opt pointer
+ """
+ coord = self._token_coord(p, 1)
+ # Pointer decls nest from inside out. This is important when different
+ # levels have different qualifiers. For example:
+ #
+ # char * const * p;
+ #
+ # Means "pointer to const pointer to char"
+ #
+ # While:
+ #
+ # char ** const p;
+ #
+ # Means "const pointer to pointer to char"
+ #
+ # So when we construct PtrDecl nestings, the leftmost pointer goes in
+ # as the most nested type.
+ nested_type = c_ast.PtrDecl(quals=p[2] or [], type=None, coord=coord)
+ if len(p) > 3:
+ tail_type = p[3]
+ while tail_type.type is not None:
+ tail_type = tail_type.type
+ tail_type.type = nested_type
+ p[0] = p[3]
+ else:
+ p[0] = nested_type
+
+ def p_type_qualifier_list(self, p):
+ """ type_qualifier_list : type_qualifier
+ | type_qualifier_list type_qualifier
+ """
+ p[0] = [p[1]] if len(p) == 2 else p[1] + [p[2]]
+
+ def p_parameter_type_list(self, p):
+ """ parameter_type_list : parameter_list
+ | parameter_list COMMA ELLIPSIS
+ """
+ if len(p) > 2:
+ p[1].params.append(c_ast.EllipsisParam(self._token_coord(p, 3)))
+
+ p[0] = p[1]
+
+ def p_parameter_list(self, p):
+ """ parameter_list : parameter_declaration
+ | parameter_list COMMA parameter_declaration
+ """
+ if len(p) == 2: # single parameter
+ p[0] = c_ast.ParamList([p[1]], p[1].coord)
+ else:
+ p[1].params.append(p[3])
+ p[0] = p[1]
+
+ # From ISO/IEC 9899:TC2, 6.7.5.3.11:
+ # "If, in a parameter declaration, an identifier can be treated either
+ # as a typedef name or as a parameter name, it shall be taken as a
+ # typedef name."
+ #
+ # Inside a parameter declaration, once we've reduced declaration specifiers,
+ # if we shift in an LPAREN and see a TYPEID, it could be either an abstract
+ # declarator or a declarator nested inside parens. This rule tells us to
+ # always treat it as an abstract declarator. Therefore, we only accept
+ # `id_declarator`s and `typeid_noparen_declarator`s.
+ def p_parameter_declaration_1(self, p):
+ """ parameter_declaration : declaration_specifiers id_declarator
+ | declaration_specifiers typeid_noparen_declarator
+ """
+ spec = p[1]
+ if not spec['type']:
+ spec['type'] = [c_ast.IdentifierType(['int'],
+ coord=self._token_coord(p, 1))]
+ p[0] = self._build_declarations(
+ spec=spec,
+ decls=[dict(decl=p[2])])[0]
+
+ def p_parameter_declaration_2(self, p):
+ """ parameter_declaration : declaration_specifiers abstract_declarator_opt
+ """
+ spec = p[1]
+ if not spec['type']:
+ spec['type'] = [c_ast.IdentifierType(['int'],
+ coord=self._token_coord(p, 1))]
+
+ # Parameters can have the same names as typedefs. The trouble is that
+ # the parameter's name gets grouped into declaration_specifiers, making
+ # it look like an old-style declaration; compensate.
+ #
+ if len(spec['type']) > 1 and len(spec['type'][-1].names) == 1 and \
+ self._is_type_in_scope(spec['type'][-1].names[0]):
+ decl = self._build_declarations(
+ spec=spec,
+ decls=[dict(decl=p[2], init=None)])[0]
+
+ # This truly is an old-style parameter declaration
+ #
+ else:
+ decl = c_ast.Typename(
+ name='',
+ quals=spec['qual'],
+ align=None,
+ type=p[2] or c_ast.TypeDecl(None, None, None, None),
+ coord=self._token_coord(p, 2))
+ typename = spec['type']
+ decl = self._fix_decl_name_type(decl, typename)
+
+ p[0] = decl
+
+ def p_identifier_list(self, p):
+ """ identifier_list : identifier
+ | identifier_list COMMA identifier
+ """
+ if len(p) == 2: # single parameter
+ p[0] = c_ast.ParamList([p[1]], p[1].coord)
+ else:
+ p[1].params.append(p[3])
+ p[0] = p[1]
+
+ def p_initializer_1(self, p):
+ """ initializer : assignment_expression
+ """
+ p[0] = p[1]
+
+ def p_initializer_2(self, p):
+ """ initializer : brace_open initializer_list_opt brace_close
+ | brace_open initializer_list COMMA brace_close
+ """
+ if p[2] is None:
+ p[0] = c_ast.InitList([], self._token_coord(p, 1))
+ else:
+ p[0] = p[2]
+
+ def p_initializer_list(self, p):
+ """ initializer_list : designation_opt initializer
+ | initializer_list COMMA designation_opt initializer
+ """
+ if len(p) == 3: # single initializer
+ init = p[2] if p[1] is None else c_ast.NamedInitializer(p[1], p[2])
+ p[0] = c_ast.InitList([init], p[2].coord)
+ else:
+ init = p[4] if p[3] is None else c_ast.NamedInitializer(p[3], p[4])
+ p[1].exprs.append(init)
+ p[0] = p[1]
+
+ def p_designation(self, p):
+ """ designation : designator_list EQUALS
+ """
+ p[0] = p[1]
+
+ # Designators are represented as a list of nodes, in the order in which
+ # they're written in the code.
+ #
+ def p_designator_list(self, p):
+ """ designator_list : designator
+ | designator_list designator
+ """
+ p[0] = [p[1]] if len(p) == 2 else p[1] + [p[2]]
+
+ def p_designator(self, p):
+ """ designator : LBRACKET constant_expression RBRACKET
+ | PERIOD identifier
+ """
+ p[0] = p[2]
+
+ def p_type_name(self, p):
+ """ type_name : specifier_qualifier_list abstract_declarator_opt
+ """
+ typename = c_ast.Typename(
+ name='',
+ quals=p[1]['qual'][:],
+ align=None,
+ type=p[2] or c_ast.TypeDecl(None, None, None, None),
+ coord=self._token_coord(p, 2))
+
+ p[0] = self._fix_decl_name_type(typename, p[1]['type'])
+
+ def p_abstract_declarator_1(self, p):
+ """ abstract_declarator : pointer
+ """
+ dummytype = c_ast.TypeDecl(None, None, None, None)
+ p[0] = self._type_modify_decl(
+ decl=dummytype,
+ modifier=p[1])
+
+ def p_abstract_declarator_2(self, p):
+ """ abstract_declarator : pointer direct_abstract_declarator
+ """
+ p[0] = self._type_modify_decl(p[2], p[1])
+
+ def p_abstract_declarator_3(self, p):
+ """ abstract_declarator : direct_abstract_declarator
+ """
+ p[0] = p[1]
+
+ # Creating and using direct_abstract_declarator_opt here
+ # instead of listing both direct_abstract_declarator and the
+ # lack of it in the beginning of _1 and _2 caused two
+ # shift/reduce errors.
+ #
+ def p_direct_abstract_declarator_1(self, p):
+ """ direct_abstract_declarator : LPAREN abstract_declarator RPAREN """
+ p[0] = p[2]
+
+ def p_direct_abstract_declarator_2(self, p):
+ """ direct_abstract_declarator : direct_abstract_declarator LBRACKET assignment_expression_opt RBRACKET
+ """
+ arr = c_ast.ArrayDecl(
+ type=None,
+ dim=p[3],
+ dim_quals=[],
+ coord=p[1].coord)
+
+ p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
+
+ def p_direct_abstract_declarator_3(self, p):
+ """ direct_abstract_declarator : LBRACKET type_qualifier_list_opt assignment_expression_opt RBRACKET
+ """
+ quals = (p[2] if len(p) > 4 else []) or []
+ p[0] = c_ast.ArrayDecl(
+ type=c_ast.TypeDecl(None, None, None, None),
+ dim=p[3] if len(p) > 4 else p[2],
+ dim_quals=quals,
+ coord=self._token_coord(p, 1))
+
+ def p_direct_abstract_declarator_4(self, p):
+ """ direct_abstract_declarator : direct_abstract_declarator LBRACKET TIMES RBRACKET
+ """
+ arr = c_ast.ArrayDecl(
+ type=None,
+ dim=c_ast.ID(p[3], self._token_coord(p, 3)),
+ dim_quals=[],
+ coord=p[1].coord)
+
+ p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
+
+ def p_direct_abstract_declarator_5(self, p):
+ """ direct_abstract_declarator : LBRACKET TIMES RBRACKET
+ """
+ p[0] = c_ast.ArrayDecl(
+ type=c_ast.TypeDecl(None, None, None, None),
+ dim=c_ast.ID(p[3], self._token_coord(p, 3)),
+ dim_quals=[],
+ coord=self._token_coord(p, 1))
+
+ def p_direct_abstract_declarator_6(self, p):
+ """ direct_abstract_declarator : direct_abstract_declarator LPAREN parameter_type_list_opt RPAREN
+ """
+ func = c_ast.FuncDecl(
+ args=p[3],
+ type=None,
+ coord=p[1].coord)
+
+ p[0] = self._type_modify_decl(decl=p[1], modifier=func)
+
+ def p_direct_abstract_declarator_7(self, p):
+ """ direct_abstract_declarator : LPAREN parameter_type_list_opt RPAREN
+ """
+ p[0] = c_ast.FuncDecl(
+ args=p[2],
+ type=c_ast.TypeDecl(None, None, None, None),
+ coord=self._token_coord(p, 1))
+
+ # declaration is a list, statement isn't. To make it consistent, block_item
+ # will always be a list
+ #
+ def p_block_item(self, p):
+ """ block_item : declaration
+ | statement
+ """
+ p[0] = p[1] if isinstance(p[1], list) else [p[1]]
+
+ # Since we made block_item a list, this just combines lists
+ #
+ def p_block_item_list(self, p):
+ """ block_item_list : block_item
+ | block_item_list block_item
+ """
+ # Empty block items (plain ';') produce [None], so ignore them
+ p[0] = p[1] if (len(p) == 2 or p[2] == [None]) else p[1] + p[2]
+
+ def p_compound_statement_1(self, p):
+ """ compound_statement : brace_open block_item_list_opt brace_close """
+ p[0] = c_ast.Compound(
+ block_items=p[2],
+ coord=self._token_coord(p, 1))
+
+ def p_labeled_statement_1(self, p):
+ """ labeled_statement : ID COLON pragmacomp_or_statement """
+ p[0] = c_ast.Label(p[1], p[3], self._token_coord(p, 1))
+
+ def p_labeled_statement_2(self, p):
+ """ labeled_statement : CASE constant_expression COLON pragmacomp_or_statement """
+ p[0] = c_ast.Case(p[2], [p[4]], self._token_coord(p, 1))
+
+ def p_labeled_statement_3(self, p):
+ """ labeled_statement : DEFAULT COLON pragmacomp_or_statement """
+ p[0] = c_ast.Default([p[3]], self._token_coord(p, 1))
+
+ def p_selection_statement_1(self, p):
+ """ selection_statement : IF LPAREN expression RPAREN pragmacomp_or_statement """
+ p[0] = c_ast.If(p[3], p[5], None, self._token_coord(p, 1))
+
+ def p_selection_statement_2(self, p):
+ """ selection_statement : IF LPAREN expression RPAREN statement ELSE pragmacomp_or_statement """
+ p[0] = c_ast.If(p[3], p[5], p[7], self._token_coord(p, 1))
+
+ def p_selection_statement_3(self, p):
+ """ selection_statement : SWITCH LPAREN expression RPAREN pragmacomp_or_statement """
+ p[0] = fix_switch_cases(
+ c_ast.Switch(p[3], p[5], self._token_coord(p, 1)))
+
+ def p_iteration_statement_1(self, p):
+ """ iteration_statement : WHILE LPAREN expression RPAREN pragmacomp_or_statement """
+ p[0] = c_ast.While(p[3], p[5], self._token_coord(p, 1))
+
+ def p_iteration_statement_2(self, p):
+ """ iteration_statement : DO pragmacomp_or_statement WHILE LPAREN expression RPAREN SEMI """
+ p[0] = c_ast.DoWhile(p[5], p[2], self._token_coord(p, 1))
+
+ def p_iteration_statement_3(self, p):
+ """ iteration_statement : FOR LPAREN expression_opt SEMI expression_opt SEMI expression_opt RPAREN pragmacomp_or_statement """
+ p[0] = c_ast.For(p[3], p[5], p[7], p[9], self._token_coord(p, 1))
+
+ def p_iteration_statement_4(self, p):
+ """ iteration_statement : FOR LPAREN declaration expression_opt SEMI expression_opt RPAREN pragmacomp_or_statement """
+ p[0] = c_ast.For(c_ast.DeclList(p[3], self._token_coord(p, 1)),
+ p[4], p[6], p[8], self._token_coord(p, 1))
+
+ def p_jump_statement_1(self, p):
+ """ jump_statement : GOTO ID SEMI """
+ p[0] = c_ast.Goto(p[2], self._token_coord(p, 1))
+
+ def p_jump_statement_2(self, p):
+ """ jump_statement : BREAK SEMI """
+ p[0] = c_ast.Break(self._token_coord(p, 1))
+
+ def p_jump_statement_3(self, p):
+ """ jump_statement : CONTINUE SEMI """
+ p[0] = c_ast.Continue(self._token_coord(p, 1))
+
+ def p_jump_statement_4(self, p):
+ """ jump_statement : RETURN expression SEMI
+ | RETURN SEMI
+ """
+ p[0] = c_ast.Return(p[2] if len(p) == 4 else None, self._token_coord(p, 1))
+
+ def p_expression_statement(self, p):
+ """ expression_statement : expression_opt SEMI """
+ if p[1] is None:
+ p[0] = c_ast.EmptyStatement(self._token_coord(p, 2))
+ else:
+ p[0] = p[1]
+
+ def p_expression(self, p):
+ """ expression : assignment_expression
+ | expression COMMA assignment_expression
+ """
+ if len(p) == 2:
+ p[0] = p[1]
+ else:
+ if not isinstance(p[1], c_ast.ExprList):
+ p[1] = c_ast.ExprList([p[1]], p[1].coord)
+
+ p[1].exprs.append(p[3])
+ p[0] = p[1]
+
+ def p_parenthesized_compound_expression(self, p):
+ """ assignment_expression : LPAREN compound_statement RPAREN """
+ p[0] = p[2]
+
+ def p_typedef_name(self, p):
+ """ typedef_name : TYPEID """
+ p[0] = c_ast.IdentifierType([p[1]], coord=self._token_coord(p, 1))
+
+ def p_assignment_expression(self, p):
+ """ assignment_expression : conditional_expression
+ | unary_expression assignment_operator assignment_expression
+ """
+ if len(p) == 2:
+ p[0] = p[1]
+ else:
+ p[0] = c_ast.Assignment(p[2], p[1], p[3], p[1].coord)
+
+ # K&R2 defines these as many separate rules, to encode
+ # precedence and associativity. Why work hard ? I'll just use
+ # the built in precedence/associativity specification feature
+ # of PLY. (see precedence declaration above)
+ #
+ def p_assignment_operator(self, p):
+ """ assignment_operator : EQUALS
+ | XOREQUAL
+ | TIMESEQUAL
+ | DIVEQUAL
+ | MODEQUAL
+ | PLUSEQUAL
+ | MINUSEQUAL
+ | LSHIFTEQUAL
+ | RSHIFTEQUAL
+ | ANDEQUAL
+ | OREQUAL
+ """
+ p[0] = p[1]
+
+ def p_constant_expression(self, p):
+ """ constant_expression : conditional_expression """
+ p[0] = p[1]
+
+ def p_conditional_expression(self, p):
+ """ conditional_expression : binary_expression
+ | binary_expression CONDOP expression COLON conditional_expression
+ """
+ if len(p) == 2:
+ p[0] = p[1]
+ else:
+ p[0] = c_ast.TernaryOp(p[1], p[3], p[5], p[1].coord)
+
+ def p_binary_expression(self, p):
+ """ binary_expression : cast_expression
+ | binary_expression TIMES binary_expression
+ | binary_expression DIVIDE binary_expression
+ | binary_expression MOD binary_expression
+ | binary_expression PLUS binary_expression
+ | binary_expression MINUS binary_expression
+ | binary_expression RSHIFT binary_expression
+ | binary_expression LSHIFT binary_expression
+ | binary_expression LT binary_expression
+ | binary_expression LE binary_expression
+ | binary_expression GE binary_expression
+ | binary_expression GT binary_expression
+ | binary_expression EQ binary_expression
+ | binary_expression NE binary_expression
+ | binary_expression AND binary_expression
+ | binary_expression OR binary_expression
+ | binary_expression XOR binary_expression
+ | binary_expression LAND binary_expression
+ | binary_expression LOR binary_expression
+ """
+ if len(p) == 2:
+ p[0] = p[1]
+ else:
+ p[0] = c_ast.BinaryOp(p[2], p[1], p[3], p[1].coord)
+
+ def p_cast_expression_1(self, p):
+ """ cast_expression : unary_expression """
+ p[0] = p[1]
+
+ def p_cast_expression_2(self, p):
+ """ cast_expression : LPAREN type_name RPAREN cast_expression """
+ p[0] = c_ast.Cast(p[2], p[4], self._token_coord(p, 1))
+
+ def p_unary_expression_1(self, p):
+ """ unary_expression : postfix_expression """
+ p[0] = p[1]
+
+ def p_unary_expression_2(self, p):
+ """ unary_expression : PLUSPLUS unary_expression
+ | MINUSMINUS unary_expression
+ | unary_operator cast_expression
+ """
+ p[0] = c_ast.UnaryOp(p[1], p[2], p[2].coord)
+
+ def p_unary_expression_3(self, p):
+ """ unary_expression : SIZEOF unary_expression
+ | SIZEOF LPAREN type_name RPAREN
+ | _ALIGNOF LPAREN type_name RPAREN
+ """
+ p[0] = c_ast.UnaryOp(
+ p[1],
+ p[2] if len(p) == 3 else p[3],
+ self._token_coord(p, 1))
+
+ def p_unary_operator(self, p):
+ """ unary_operator : AND
+ | TIMES
+ | PLUS
+ | MINUS
+ | NOT
+ | LNOT
+ """
+ p[0] = p[1]
+
+ def p_postfix_expression_1(self, p):
+ """ postfix_expression : primary_expression """
+ p[0] = p[1]
+
+ def p_postfix_expression_2(self, p):
+ """ postfix_expression : postfix_expression LBRACKET expression RBRACKET """
+ p[0] = c_ast.ArrayRef(p[1], p[3], p[1].coord)
+
+ def p_postfix_expression_3(self, p):
+ """ postfix_expression : postfix_expression LPAREN argument_expression_list RPAREN
+ | postfix_expression LPAREN RPAREN
+ """
+ p[0] = c_ast.FuncCall(p[1], p[3] if len(p) == 5 else None, p[1].coord)
+
+ def p_postfix_expression_4(self, p):
+ """ postfix_expression : postfix_expression PERIOD ID
+ | postfix_expression PERIOD TYPEID
+ | postfix_expression ARROW ID
+ | postfix_expression ARROW TYPEID
+ """
+ field = c_ast.ID(p[3], self._token_coord(p, 3))
+ p[0] = c_ast.StructRef(p[1], p[2], field, p[1].coord)
+
+ def p_postfix_expression_5(self, p):
+ """ postfix_expression : postfix_expression PLUSPLUS
+ | postfix_expression MINUSMINUS
+ """
+ p[0] = c_ast.UnaryOp('p' + p[2], p[1], p[1].coord)
+
+ def p_postfix_expression_6(self, p):
+ """ postfix_expression : LPAREN type_name RPAREN brace_open initializer_list brace_close
+ | LPAREN type_name RPAREN brace_open initializer_list COMMA brace_close
+ """
+ p[0] = c_ast.CompoundLiteral(p[2], p[5])
+
+ def p_primary_expression_1(self, p):
+ """ primary_expression : identifier """
+ p[0] = p[1]
+
+ def p_primary_expression_2(self, p):
+ """ primary_expression : constant """
+ p[0] = p[1]
+
+ def p_primary_expression_3(self, p):
+ """ primary_expression : unified_string_literal
+ | unified_wstring_literal
+ """
+ p[0] = p[1]
+
+ def p_primary_expression_4(self, p):
+ """ primary_expression : LPAREN expression RPAREN """
+ p[0] = p[2]
+
+ def p_primary_expression_5(self, p):
+ """ primary_expression : OFFSETOF LPAREN type_name COMMA offsetof_member_designator RPAREN
+ """
+ coord = self._token_coord(p, 1)
+ p[0] = c_ast.FuncCall(c_ast.ID(p[1], coord),
+ c_ast.ExprList([p[3], p[5]], coord),
+ coord)
+
+ def p_offsetof_member_designator(self, p):
+ """ offsetof_member_designator : identifier
+ | offsetof_member_designator PERIOD identifier
+ | offsetof_member_designator LBRACKET expression RBRACKET
+ """
+ if len(p) == 2:
+ p[0] = p[1]
+ elif len(p) == 4:
+ p[0] = c_ast.StructRef(p[1], p[2], p[3], p[1].coord)
+ elif len(p) == 5:
+ p[0] = c_ast.ArrayRef(p[1], p[3], p[1].coord)
+ else:
+ raise NotImplementedError("Unexpected parsing state. len(p): %u" % len(p))
+
+ def p_argument_expression_list(self, p):
+ """ argument_expression_list : assignment_expression
+ | argument_expression_list COMMA assignment_expression
+ """
+ if len(p) == 2: # single expr
+ p[0] = c_ast.ExprList([p[1]], p[1].coord)
+ else:
+ p[1].exprs.append(p[3])
+ p[0] = p[1]
+
+ def p_identifier(self, p):
+ """ identifier : ID """
+ p[0] = c_ast.ID(p[1], self._token_coord(p, 1))
+
+ def p_constant_1(self, p):
+ """ constant : INT_CONST_DEC
+ | INT_CONST_OCT
+ | INT_CONST_HEX
+ | INT_CONST_BIN
+ | INT_CONST_CHAR
+ """
+ uCount = 0
+ lCount = 0
+ for x in p[1][-3:]:
+ if x in ('l', 'L'):
+ lCount += 1
+ elif x in ('u', 'U'):
+ uCount += 1
+ t = ''
+ if uCount > 1:
+ raise ValueError('Constant cannot have more than one u/U suffix.')
+ elif lCount > 2:
+ raise ValueError('Constant cannot have more than two l/L suffix.')
+ prefix = 'unsigned ' * uCount + 'long ' * lCount
+ p[0] = c_ast.Constant(
+ prefix + 'int', p[1], self._token_coord(p, 1))
+
+ def p_constant_2(self, p):
+ """ constant : FLOAT_CONST
+ | HEX_FLOAT_CONST
+ """
+ if 'x' in p[1].lower():
+ t = 'float'
+ else:
+ if p[1][-1] in ('f', 'F'):
+ t = 'float'
+ elif p[1][-1] in ('l', 'L'):
+ t = 'long double'
+ else:
+ t = 'double'
+
+ p[0] = c_ast.Constant(
+ t, p[1], self._token_coord(p, 1))
+
+ def p_constant_3(self, p):
+ """ constant : CHAR_CONST
+ | WCHAR_CONST
+ | U8CHAR_CONST
+ | U16CHAR_CONST
+ | U32CHAR_CONST
+ """
+ p[0] = c_ast.Constant(
+ 'char', p[1], self._token_coord(p, 1))
+
+ # The "unified" string and wstring literal rules are for supporting
+ # concatenation of adjacent string literals.
+ # I.e. "hello " "world" is seen by the C compiler as a single string literal
+ # with the value "hello world"
+ #
+ def p_unified_string_literal(self, p):
+ """ unified_string_literal : STRING_LITERAL
+ | unified_string_literal STRING_LITERAL
+ """
+ if len(p) == 2: # single literal
+ p[0] = c_ast.Constant(
+ 'string', p[1], self._token_coord(p, 1))
+ else:
+ p[1].value = p[1].value[:-1] + p[2][1:]
+ p[0] = p[1]
+
+ def p_unified_wstring_literal(self, p):
+ """ unified_wstring_literal : WSTRING_LITERAL
+ | U8STRING_LITERAL
+ | U16STRING_LITERAL
+ | U32STRING_LITERAL
+ | unified_wstring_literal WSTRING_LITERAL
+ | unified_wstring_literal U8STRING_LITERAL
+ | unified_wstring_literal U16STRING_LITERAL
+ | unified_wstring_literal U32STRING_LITERAL
+ """
+ if len(p) == 2: # single literal
+ p[0] = c_ast.Constant(
+ 'string', p[1], self._token_coord(p, 1))
+ else:
+ p[1].value = p[1].value.rstrip()[:-1] + p[2][2:]
+ p[0] = p[1]
+
+ def p_brace_open(self, p):
+ """ brace_open : LBRACE
+ """
+ p[0] = p[1]
+ p.set_lineno(0, p.lineno(1))
+
+ def p_brace_close(self, p):
+ """ brace_close : RBRACE
+ """
+ p[0] = p[1]
+ p.set_lineno(0, p.lineno(1))
+
+ def p_empty(self, p):
+ 'empty : '
+ p[0] = None
+
+ def p_error(self, p):
+ # If error recovery is added here in the future, make sure
+ # _get_yacc_lookahead_token still works!
+ #
+ if p:
+ self._parse_error(
+ 'before: %s' % p.value,
+ self._coord(lineno=p.lineno,
+ column=self.clex.find_tok_column(p)))
+ else:
+ self._parse_error('At end of input', self.clex.filename)