C语言的词法语法分析lex&yacc 经典收藏

2024-04-20 00:22•数据库•阅读 3796

http://www.quut.com/c/ANSI-C-grammar-l-1998.html

http://www.quut.com/c/ANSI-C-grammar-y-1998.html

In 1985, Jeff Lee published this Lex specification together with a Yacc grammar for the April 30, 1985 ANSI C draft. Tom Stockfisch reposted both to net.sources in 1987; that original, as mentioned in the answer to question 17.25 of the comp.lang.c FAQ, can be ftp'ed from ftp.uu.net, file usenet/net.sources/ansi.c.grammar.Z.

The version you see here has been updated based on an 1998 draft of the standards document. It allows for restricted pointers, variable arrays, "inline", and designated initializers. The previous version's lex and yacc files (ANSI C as of ca 1995) are still around as archived copies.

I intend to keep this version as close to the current C Standard grammar as possible; please let me know if you discover discrepancies.

Jutta Degener, 2008

D                  [0-9]
L                       [a-zA-Z_]
H                       [a-fA-F0-9]
E                       [Ee][+-]?{D}+
P                       [Pp][+-]?{D}+
FS                      (f|F|l|L)
IS                      ((u|U)|(u|U)?(l|L|ll|LL)|(l|L|ll|LL)(u|U))

%{
#include <stdio.h>
#include "y.tab.h"

void count(void);
%}

%%
"/*"                    { comment(); }
"//"[^\n]*              { /* consume //-comment */ }


"auto"                  { count(); return(AUTO); }
"_Bool"                 { count(); return(BOOL); }
"break"                 { count(); return(BREAK); }
"case"                  { count(); return(CASE); }
"char"                  { count(); return(CHAR); }
"_Complex"              { count(); return(COMPLEX); }
"const"                 { count(); return(CONST); }
"continue"              { count(); return(CONTINUE); }
"default"               { count(); return(DEFAULT); }
"do"                    { count(); return(DO); }
"double"                { count(); return(DOUBLE); }
"else"                  { count(); return(ELSE); }
"enum"                  { count(); return(ENUM); }
"extern"                { count(); return(EXTERN); }
"float"                 { count(); return(FLOAT); }
"for"                   { count(); return(FOR); }
"goto"                  { count(); return(GOTO); }
"if"                    { count(); return(IF); }
"_Imaginary"            { count(); return(IMAGINARY); }
"inline"                { count(); return(INLINE); }
"int"                   { count(); return(INT); }
"long"                  { count(); return(LONG); }
"register"              { count(); return(REGISTER); }
"restrict"              { count(); return(RESTRICT); }
"return"                { count(); return(RETURN); }
"short"                 { count(); return(SHORT); }
"signed"                { count(); return(SIGNED); }
"sizeof"                { count(); return(SIZEOF); }
"static"                { count(); return(STATIC); }
"struct"                { count(); return(STRUCT); }
"switch"                { count(); return(SWITCH); }
"typedef"               { count(); return(TYPEDEF); }
"union"                 { count(); return(UNION); }
"unsigned"              { count(); return(UNSIGNED); }
"void"                  { count(); return(VOID); }
"volatile"              { count(); return(VOLATILE); }
"while"                 { count(); return(WHILE); }

{L}({L}|{D})*           { count(); return(check_type()); }

0[xX]{H}+{IS}?          { count(); return(CONSTANT); }
0{D}+{IS}?              { count(); return(CONSTANT); }
{D}+{IS}?               { count(); return(CONSTANT); }
L?'(\\.|[^\\'\n])+'     { count(); return(CONSTANT); }

{D}+{E}{FS}?            { count(); return(CONSTANT); }
{D}*"."{D}+({E})?{FS}?  { count(); return(CONSTANT); }
{D}+"."{D}*({E})?{FS}?  { count(); return(CONSTANT); }
0[xX]{H}+{P}{FS}?               { count(); return(CONSTANT); }
0[xX]{H}*"."{H}+({P})?{FS}?     { count(); return(CONSTANT); }
0[xX]{H}+"."{H}*({P})?{FS}?     { count(); return(CONSTANT); }


L?\"(\\.|[^\\"\n])*\"   { count(); return(STRING_LITERAL); }

"..."                   { count(); return(ELLIPSIS); }
">>="                     { count(); return(RIGHT_ASSIGN); }
"<<="                     { count(); return(LEFT_ASSIGN); }
"+="                    { count(); return(ADD_ASSIGN); }
"-="                    { count(); return(SUB_ASSIGN); }
"*="                    { count(); return(MUL_ASSIGN); }
"/="                    { count(); return(DIV_ASSIGN); }
"%="                    { count(); return(MOD_ASSIGN); }
"&="                        { count(); return(AND_ASSIGN); }
"^="                    { count(); return(XOR_ASSIGN); }
"|="                    { count(); return(OR_ASSIGN); }
">>"                      { count(); return(RIGHT_OP); }
"<<"                      { count(); return(LEFT_OP); }
"++"                    { count(); return(INC_OP); }
"--"                    { count(); return(DEC_OP); }
"->"                 { count(); return(PTR_OP); }
"&&"                    { count(); return(AND_OP); }
"||"                    { count(); return(OR_OP); }
"<="                 { count(); return(LE_OP); }
">="                 { count(); return(GE_OP); }
"=="                    { count(); return(EQ_OP); }
"!="                    { count(); return(NE_OP); }
";"                     { count(); return(';'); }
("{"|"<%")           { count(); return('{'); }
("}"|"%>")           { count(); return('}'); }
","                     { count(); return(','); }
":"                     { count(); return(':'); }
"="                     { count(); return('='); }
"("                     { count(); return('('); }
")"                     { count(); return(')'); }
("["|"<:")           { count(); return('['); }
("]"|":>")           { count(); return(']'); }
"."                     { count(); return('.'); }
"&"                 { count(); return('&'); }
"!"                     { count(); return('!'); }
"~"                     { count(); return('~'); }
"-"                     { count(); return('-'); }
"+"                     { count(); return('+'); }
"*"                     { count(); return('*'); }
"/"                     { count(); return('/'); }
"%"                     { count(); return('%'); }
"<"                  { count(); return('<'); }
">"                  { count(); return('>'); }
"^"                     { count(); return('^'); }
"|"                     { count(); return('|'); }
"?"                     { count(); return('?'); }

[ \t\v\n\f]             { count(); }
.                       { /* Add code to complain about unmatched characters */ }

%%

int yywrap(void)
{
        return 1;
}


void comment(void)
{
        char c, prev = 0;
  
        while ((c = input()) != 0)      /* (EOF maps to 0) */
        {
                if (c == '/' && prev == '*')
                        return;
                prev = c;
        }
        error("unterminated comment");
}
int column = 0;

void count(void)
{
        int i;

        for (i = 0; yytext[i] != '\0'; i++)
                if (yytext[i] == '\n')
                        column = 0;
                else if (yytext[i] == '\t')
                        column += 8 - (column % 8);
                else
                        column++;

        ECHO;
}
int check_type(void)
{
/*
* pseudo code --- this is what it should check
*
*       if (yytext == type_name)
*               return TYPE_NAME;
*
*       return IDENTIFIER;
*/

/*
*       it actually will only return IDENTIFIER
*/

        return IDENTIFIER;
}

ANSI C Yacc grammar

In 1985, Jeff Lee published his Yacc grammar (which is accompanied by a matching Lex specification) for the April 30, 1985 draft version of the ANSI C standard. Tom Stockfisch reposted it to net.sources in 1987; that original, as mentioned in the answer to question 17.25 of the comp.lang.c FAQ, can be ftp'ed from ftp.uu.net, file usenet/net.sources/ansi.c.grammar.Z. 
The version you see here has been updated based on an 1998 draft of the standards document. It allows for restricted pointers, variable arrays, "inline", and designated initializers. The previous version's lex and yacc files (ANSI C as of ca 1995) are still around as archived copies. 
I intend to keep this version as close to the current C Standard grammar as possible; please let me know if you discover discrepancies.
(If you feel like it, read the FAQ first.) 
Jutta Degener, November 2008

%token IDENTIFIERCONSTANTSTRING_LITERALSIZEOF
%token PTR_OPINC_OPDEC_OPLEFT_OPRIGHT_OPLE_OPGE_OPEQ_OPNE_OP
%token AND_OPOR_OPMUL_ASSIGNDIV_ASSIGNMOD_ASSIGNADD_ASSIGN
%token SUB_ASSIGNLEFT_ASSIGNRIGHT_ASSIGNAND_ASSIGN
%token XOR_ASSIGNOR_ASSIGNTYPE_NAME%token TYPEDEFEXTERNSTATICAUTOREGISTERINLINERESTRICT
%token CHARSHORTINTLONGSIGNEDUNSIGNEDFLOATDOUBLECONSTVOLATILEVOID
%token BOOLCOMPLEXIMAGINARY
%token STRUCTUNIONENUMELLIPSIS%token CASEDEFAULTIFELSESWITCHWHILEDOFORGOTOCONTINUEBREAKRETURN%start translation_unit
%%

primary_expression
        : IDENTIFIER
        | CONSTANT
        | STRING_LITERAL
        | '(' expression ')'
        ;

postfix_expression
        : primary_expression
        | postfix_expression '[' expression ']'
        | postfix_expression '(' ')'
        | postfix_expression '(' argument_expression_list ')'
        | postfix_expression '.' IDENTIFIER
        | postfix_expression PTR_OPIDENTIFIER
        | postfix_expression INC_OP
        | postfix_expression DEC_OP
        | '(' type_name ')' '{' initializer_list '}'
        | '(' type_name ')' '{' initializer_list ',' '}'
        ;

argument_expression_list
        : assignment_expression
        | argument_expression_list ',' assignment_expression
        ;

unary_expression
        : postfix_expression
        | INC_OP unary_expression
        | DEC_OP unary_expression
        | unary_operatorcast_expression
        | SIZEOF unary_expression
        | SIZEOF '(' type_name ')'
        ;

unary_operator
        : '&'
        | '*'
        | '+'
        | '-'
        | '~'
        | '!'
        ;

cast_expression
        : unary_expression
        | '(' type_name ')' cast_expression
        ;

multiplicative_expression
        : cast_expression
        | multiplicative_expression '*' cast_expression
        | multiplicative_expression '/' cast_expression
        | multiplicative_expression '%' cast_expression
        ;

additive_expression
        : multiplicative_expression
        | additive_expression '+' multiplicative_expression
        | additive_expression '-' multiplicative_expression
        ;

shift_expression
        : additive_expression
        | shift_expression LEFT_OPadditive_expression
        | shift_expression RIGHT_OPadditive_expression
        ;

relational_expression
        : shift_expression
        | relational_expression '<' shift_expression
        | relational_expression '>' shift_expression
        | relational_expression LE_OPshift_expression
        | relational_expression GE_OPshift_expression
        ;

equality_expression
        : relational_expression
        | equality_expression EQ_OPrelational_expression
        | equality_expression NE_OPrelational_expression
        ;

and_expression
        : equality_expression
        | and_expression '&' equality_expression
        ;

exclusive_or_expression
        : and_expression
        | exclusive_or_expression '^' and_expression
        ;

inclusive_or_expression
        : exclusive_or_expression
        | inclusive_or_expression '|' exclusive_or_expression
        ;

logical_and_expression
        : inclusive_or_expression
        | logical_and_expression AND_OPinclusive_or_expression
        ;

logical_or_expression
        : logical_and_expression
        | logical_or_expression OR_OPlogical_and_expression
        ;

conditional_expression
        : logical_or_expression
        | logical_or_expression '?' expression ':' conditional_expression
        ;

assignment_expression
        : conditional_expression
        | unary_expressionassignment_operator assignment_expression
        ;

assignment_operator
        : '='
        | MUL_ASSIGN
        | DIV_ASSIGN
        | MOD_ASSIGN
        | ADD_ASSIGN
        | SUB_ASSIGN
        | LEFT_ASSIGN
        | RIGHT_ASSIGN
        | AND_ASSIGN
        | XOR_ASSIGN
        | OR_ASSIGN
        ;

expression
        : assignment_expression
        | expression ',' assignment_expression
        ;

constant_expression
        : conditional_expression
        ;

declaration
        : declaration_specifiers ';'
        | declaration_specifiersinit_declarator_list ';'
        ;

declaration_specifiers
        : storage_class_specifier
        | storage_class_specifier declaration_specifiers
        | type_specifier
        | type_specifier declaration_specifiers
        | type_qualifier
        | type_qualifier declaration_specifiers
        | function_specifier
        | function_specifier declaration_specifiers
        ;

init_declarator_list
        : init_declarator
        | init_declarator_list ',' init_declarator
        ;

init_declarator
        : declarator
        | declarator '=' initializer
        ;

storage_class_specifier
        : TYPEDEF
        | EXTERN
        | STATIC
        | AUTO
        | REGISTER
        ;

type_specifier
        : VOID
        | CHAR
        | SHORT
        | INT
        | LONG
        | FLOAT
        | DOUBLE
        | SIGNED
        | UNSIGNED
        | BOOL
        | COMPLEX
        | IMAGINARY
        | struct_or_union_specifier
        | enum_specifier
        | TYPE_NAME
        ;

struct_or_union_specifier
        : struct_or_unionIDENTIFIER '{' struct_declaration_list '}'
        | struct_or_union '{' struct_declaration_list '}'
        | struct_or_unionIDENTIFIER
        ;

struct_or_union
        : STRUCT
        | UNION
        ;

struct_declaration_list
        : struct_declaration
        | struct_declaration_list struct_declaration
        ;

struct_declaration
        : specifier_qualifier_liststruct_declarator_list ';'
        ;

specifier_qualifier_list
        : type_specifier specifier_qualifier_list
        | type_specifier
        | type_qualifier specifier_qualifier_list
        | type_qualifier
        ;

struct_declarator_list
        : struct_declarator
        | struct_declarator_list ',' struct_declarator
        ;

struct_declarator
        : declarator
        | ':' constant_expression
        | declarator ':' constant_expression
        ;

enum_specifier
        : ENUM '{' enumerator_list '}'
        | ENUMIDENTIFIER '{' enumerator_list '}'
        | ENUM '{' enumerator_list ',' '}'
        | ENUMIDENTIFIER '{' enumerator_list ',' '}'
        | ENUMIDENTIFIER
        ;

enumerator_list
        : enumerator
        | enumerator_list ',' enumerator
        ;

enumerator
        : IDENTIFIER
        | IDENTIFIER '=' constant_expression
        ;

type_qualifier
        : CONST
        | RESTRICT
        | VOLATILE
        ;

function_specifier
        : INLINE
        ;

declarator
        : pointerdirect_declarator
        | direct_declarator
        ;


direct_declarator
        : IDENTIFIER
        | '(' declarator ')'
        | direct_declarator '[' type_qualifier_listassignment_expression ']'
        | direct_declarator '[' type_qualifier_list ']'
        | direct_declarator '[' assignment_expression ']'
        | direct_declarator '[' STATICtype_qualifier_listassignment_expression ']'
        | direct_declarator '[' type_qualifier_listSTATICassignment_expression ']'
        | direct_declarator '[' type_qualifier_list '*' ']'
        | direct_declarator '[' '*' ']'
        | direct_declarator '[' ']'
        | direct_declarator '(' parameter_type_list ')'
        | direct_declarator '(' identifier_list ')'
        | direct_declarator '(' ')'
        ;

pointer
        : '*'
        | '*' type_qualifier_list
        | '*' pointer
        | '*' type_qualifier_list pointer
        ;

type_qualifier_list
        : type_qualifier
        | type_qualifier_list type_qualifier
        ;


parameter_type_list
        : parameter_list
        | parameter_list ',' ELLIPSIS
        ;

parameter_list
        : parameter_declaration
        | parameter_list ',' parameter_declaration
        ;

parameter_declaration
        : declaration_specifiersdeclarator
        | declaration_specifiersabstract_declarator
        | declaration_specifiers
        ;

identifier_list
        : IDENTIFIER
        | identifier_list ',' IDENTIFIER
        ;

type_name
        : specifier_qualifier_list
        | specifier_qualifier_listabstract_declarator
        ;

abstract_declarator
        : pointer
        | direct_abstract_declarator
        | pointerdirect_abstract_declarator
        ;

direct_abstract_declarator
        : '(' abstract_declarator ')'
        | '[' ']'
        | '[' assignment_expression ']'
        | direct_abstract_declarator '[' ']'
        | direct_abstract_declarator '[' assignment_expression ']'
        | '[' '*' ']'
        | direct_abstract_declarator '[' '*' ']'
        | '(' ')'
        | '(' parameter_type_list ')'
        | direct_abstract_declarator '(' ')'
        | direct_abstract_declarator '(' parameter_type_list ')'
        ;

initializer
        : assignment_expression
        | '{' initializer_list '}'
        | '{' initializer_list ',' '}'
        ;

initializer_list
        : initializer
        | designationinitializer
        | initializer_list ',' initializer
        | initializer_list ',' designationinitializer
        ;

designation
        : designator_list '='
        ;

designator_list
        : designator
        | designator_listdesignator
        ;

designator
        : '[' constant_expression ']'
        | '.' IDENTIFIER
        ;

statement
        : labeled_statement
        | compound_statement
        | expression_statement
        | selection_statement
        | iteration_statement
        | jump_statement
        ;

labeled_statement
        : IDENTIFIER ':' statement
        | CASEconstant_expression ':' statement
        | DEFAULT ':' statement
        ;

compound_statement
        : '{' '}'
        | '{' block_item_list '}'
        ;

block_item_list
        : block_item
        | block_item_list block_item
        ;

block_item
        : declaration
        | statement
        ;

expression_statement
        : ';'
        | expression ';'
        ;

selection_statement
        : IF '(' expression ')' statement
        | IF '(' expression ')' statementELSEstatement
        | SWITCH '(' expression ')' statement
        ;

iteration_statement
        : WHILE '(' expression ')' statement
        | DOstatementWHILE '(' expression ')' ';'
        | FOR '(' expression_statementexpression_statement ')' statement
        | FOR '(' expression_statementexpression_statementexpression ')' statement
        | FOR '(' declarationexpression_statement ')' statement
        | FOR '(' declarationexpression_statementexpression ')' statement
        ;

jump_statement
        : GOTOIDENTIFIER ';'
        | CONTINUE ';'
        | BREAK ';'
        | RETURN ';'
        | RETURNexpression ';'
        ;

translation_unit
        : external_declaration
        | translation_unit external_declaration
        ;

external_declaration
        : function_definition
        | declaration
        ;

function_definition
        : declaration_specifiersdeclaratordeclaration_listcompound_statement
        | declaration_specifiersdeclaratorcompound_statement
        ;

declaration_list
        : declaration
        | declaration_list declaration
        ;


%%
#include <stdio.h>

extern char yytext[];
extern int column;

void yyerror(char const *s)
{
        fflush(stdout);
        printf("\n%*s\n%*s\n", column, "^", column, s);
}

上一篇 »编译原理词法分析 java简单实现
下一篇 »javascript中的闭包、模块与模块加载

C语言的词法语法分析lex&yacc 经典收藏

ANSI C Yacc grammar

相关推荐

java 简单的词法分析

详解JavaScript实现简单的词法分析器示例

JavaScript预编译原理分析

javascript函数作用域链之词法作用域

javascript的词法作用域，转

经典常用的javascript代码收藏

《你不知道的JavaScript上卷》知识点笔记

JavaScript语法分析，预编译及执行过程