# C/C++ BNF language specification # # Copyright (C) 1999, 2000, 2001, 2002 Eric M. Ludlam # # Author: Eric M. Ludlam # X-RCS: $Id: c.bnf,v 1.59.2.10 2003/04/04 12:19:38 berndl Exp $ # # c.bnf is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2, or (at your option) # any later version. # # This software is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with GNU Emacs; see the file COPYING. If not, write to the # Free Software Foundation, Inc., 59 Temple Place - Suite 330, # Boston, MA 02111-1307, USA. # %start declaration %scopestart codeblock %outputfile semantic-c.el %parsetable semantic-toplevel-c-bovine-table %keywordtable semantic-c-keyword-table %languagemode (c-mode c++-mode) %setupfunction semantic-default-c-setup %(setq semantic-expand-nonterminal 'semantic-expand-c-nonterminal semantic-flex-extensions semantic-flex-c-extensions semantic-dependency-include-path semantic-default-c-path semantic-orphaned-member-metaparent-type "struct" semantic-symbol->name-assoc-list '((type . "Types") (variable . "Variables") (function . "Functions") (include . "Includes")) semantic-symbol->name-assoc-list-for-type-parts '((type . "Types") (variable . "Attributes") (function . "Methods") (label . "Labels") ) imenu-create-index-function 'semantic-create-imenu-index semantic-type-relation-separator-character '("." "->") semantic-command-separation-character ";" document-comment-start "/*" document-comment-line-prefix " *" document-comment-end " */" ;; Semantic navigation inside 'type children senator-step-at-token-ids '(function variable) )% %token INCLUDE "include" %token DEFINE "define" %token HASH punctuation "#" %token PERIOD punctuation "." %token COLON punctuation ":" %token SEMICOLON punctuation ";" %token STAR punctuation "*" %token AMPERSAND punctuation "&" %token DIVIDE punctuation "/" %token PLUS punctuation "+" %token MINUS punctuation "-" %token BANG punctuation "!" %token EQUAL punctuation "=" %token LESS punctuation "<" %token GREATER punctuation ">" %token COMA punctuation "," %token TILDE punctuation "~" %token EXTERN "extern" %put EXTERN summary "Declaration Modifier: extern ..." %token STATIC "static" %put STATIC summary "Declaration Modifier: static ..." %token CONST "const" %put CONST summary "Declaration Modifier: const ..." %token VOLATILE "volatile" %put VOLATILE summary "Declaration Modifier: volatile ..." %token REGISTER "register" %put REGISTER summary "Declaration Modifier: register ..." %token SIGNED "signed" %put SIGNED summary "Numeric Type Modifier: signed ..." %token UNSIGNED "unsigned" %put UNSIGNED summary "Numeric Type Modifier: unsigned ..." %token INLINE "inline" %put INLINE summary "Function Modifier: inline (...) {...};" %token VIRTUAL "virtual" %put VIRTUAL summary "Method Modifier: virtual (...) ..." %token MUTABLE "mutable" %put MUTABLE summary "Member Declaration Modifier: mutable ..." %token STRUCT "struct" %put STRUCT summary "Structure Type Declaration: struct [name] { ... };" %token UNION "union" %put UNION summary "Union Type Declaration: union [name] { ... };" %token ENUM "enum" %put ENUM summary "Enumeration Type Declaration: enum [name] { ... };" %token TYPEDEF "typedef" %put TYPEDEF summary "Arbitrary Type Declaration: typedef ;" %token CLASS "class" %put CLASS summary "Class Declaration: class [:parents] { ... };" %token TYPENAME "typename" %put TYPENAME summary "typename is used to handle a qualified name as a typename;" %token NAMESPACE "namespace" %put NAMESPACE summary "Namespace Declaration: namespace { ... };" %token USING "using" %put USING summary "using ;" # Despite this, this parser can find templates by ignoring the TEMPLATE # keyword, and finding the class/method being templateized. %token TEMPLATE "template" %put TEMPLATE summary "template TYPE_OR_FUNCTION" %token THROW "throw" %put THROW summary " () throw () ..." %token REENTRANT "reentrant" %put REENTRANT summary " () reentrant ..." # Leave these alone for now. %token OPERATOR "operator" %token PUBLIC "public" %token PRIVATE "private" %token PROTECTED "protected" %token FRIEND "friend" # These aren't used for parsing, but is a useful place to describe the keywords. %token IF "if" %token ELSE "else" %put {IF ELSE} summary "if () { code } [ else { code } ]" %token DO "do" %token WHILE "while" %put DO summary " do { code } while ();" %put WHILE summary "do { code } while (); or while () { code };" %token FOR "for" %put FOR summary "for(; ; ) { code }" %token SWITCH "switch" %token CASE "case" %token DEFAULT "default" %put {SWITCH CASE DEFAULT} summary "switch () { case : code; ... default: code; }" %token RETURN "return" %put RETURN summary "return ;" %token BREAK "break" %put BREAK summary "Non-local exit within a loop or switch (for, do/while, switch): break;" %token CONTINUE "continue" %put CONTINUE summary "Non-local continue within a lool (for, do/while): continue;" %token SIZEOF "sizeof" %put SIZEOF summary "Compile time macro: sizeof() // size in bytes" # Types %token VOID "void" %put VOID summary "Built in typeless type: void" %token CHAR "char" %put CHAR summary "Integral Character Type: (0 to 256)" %token WCHAR "wchar_t" %put WCHAR summary "Wide Character Type" %token SHORT "short" %put SHORT summary "Integral Primitive Type: (-32768 to 32767)" %token INT "int" %put INT summary "Integral Primitive Type: (-2147483648 to 2147483647)" %token LONG "long" %put LONG summary "Integral primitive type (-9223372036854775808 to 9223372036854775807)" %token FLOAT "float" %put FLOAT summary "Primitive floating-point type (single-precision 32-bit IEEE 754)" %token DOUBLE "double" %put DOUBLE summary "Primitive floating-point type (double-precision 64-bit IEEE 754)" %token UNDERP "_P" %token UNDERUNDERP "__P" %put UNDERP summary "Common macro to eliminate prototype compatibility on some compilers" %put UNDERUNDERP summary "Common macro to eliminate prototype compatibility on some compilers" declaration : macro | type # TODO: Klaus Berndl : Is the define here # necessary or even wrong? Is this part not already covered by macro?? | define | var-or-fun | extern-c | template | using ; codeblock : define | codeblock-var-or-fun | type # type is less likely to be used here. ; extern-c-contents: open-paren ( nil ) | bovine-toplevel | close-paren ( nil ) ; extern-c: EXTERN string "\"C\"" semantic-list # Extern C commands which contain a list need to have the # entries of the list extracted, and spliced into the main # list of entries. This must be done via the function # that expands singular nonterminals, such as int x,y; ( extern (EXPANDFULL $3 extern-c-contents) ) | EXTERN string "\"C\"" # A plain extern "C" call should add something to the token, # but just strip it from the buffer here for now. ( nil ) ; # Klaus Berndl : At least one-liner should be # parsed correctly! Multi-line macros (every line ends with a # backslash) not being one code-block can not be parsed because # \+newline is flexed as nothing and therefore we can not distinguish # if the lines > 1 belong to the macro or are separated statements! # Maybe we need special backslash recognizing and then better # macro-parsing. For the moment it's goog enough. macro-expression-list : expression macro-expression-list SEMICOLON ( nil ) | expression ( nil ) ; macro-def : macro-expression-list ( nil ) | expression ( nil ) | EMPTY ; macro : HASH macro-or-include ( ,$2 ) ; macro-or-include : DEFINE symbol opt-define-arglist macro-def ( $2 variable nil $3 (ASSOC const t) nil ) | INCLUDE system-include ( (substring $2 1 (1- (length $2))) include t nil ) | INCLUDE string ( (read $2) include nil nil ) ; opt-define-arglist : semantic-list ( nil ) | EMPTY ; # This is used in struct parts. define : HASH DEFINE symbol opt-define-arglist macro-def ( $3 variable nil $4 (ASSOC const t) nil ) ; # In C++, structures can have the same things as classes. # So delete this somday in the figure. # #structparts : semantic-list # (EXPANDFULL $1 structsubparts) # ; # #structsubparts : open-paren "{" # ( nil ) # | close-paren "}" # ( nil ) # | var-or-fun # | define # # sometimes there are defines in structs. # ; unionparts : semantic-list (EXPANDFULL $1 classsubparts) ; opt-symbol : symbol | EMPTY ; classsubparts : open-paren "{" ( nil ) | close-paren "}" ( nil ) | class-protection opt-symbol COLON # For QT, they may put a `slot' keyword between the protection # and the COLON. ( ,$1 label ) | var-or-fun | type | define ( ,$1 protection ) | template | EMPTY ; opt-class-parents : COLON class-parents opt-template-specifier ( $2 ) | EMPTY ( ) ; class-parents : opt-class-protection opt-class-declmods namespace-symbol COMA class-parents ( ,(cons $3 $5 ) ) | opt-class-protection opt-class-declmods namespace-symbol ( ,$3 ) ; opt-class-declmods : class-declmods opt-class-declmods ( nil ) | EMPTY ; class-declmods : VIRTUAL ; class-protection: PUBLIC | PRIVATE | PROTECTED ; opt-class-protection : class-protection ( ,$1 ) | EMPTY ; namespaceparts : semantic-list (EXPANDFULL $1 namespacesubparts) ; namespacesubparts : open-paren "{" ( nil ) | close-paren "}" ( nil ) | type | var-or-fun | define | class-protection COLON ( $1 protection ) # In C++, this label in a classsubpart represents # PUBLIC or PRIVATE bits. Ignore them for now. | template | using | EMPTY ; enumparts : semantic-list (EXPANDFULL $1 enumsubparts) ; enumsubparts : symbol opt-assign ( $1 variable "int" ,$2 (ASSOC const t) nil) | open-paren "{" ( nil ) | close-paren "}" ( nil ) | COMA ( nil ) ; opt-name : symbol | EMPTY ( "" ) ; opt-class-declmods : symbol declespec semantic-list | symbol | EMPTY ; typesimple : struct-or-class opt-name opt-template-specifier opt-class-parents semantic-list ( ,$2 type ,$1 (let ((semantic-c-classname (cons (car ,$2) (car ,$1)))) (EXPANDFULL $5 classsubparts)) $4 (ASSOC 'template-specifier $3) nil ) | struct-or-class opt-name opt-template-specifier opt-class-parents ( ,$2 type ,$1 nil $4 (ASSOC 'template-specifier $3) nil ) | UNION opt-name unionparts ( ,$2 type $1 $3 nil nil nil ) | ENUM opt-name enumparts ( ,$2 type $1 $3 nil nil nil ) # Klaus Berndl : a typedef can be a typeformbase # with all this declmods stuff. | TYPEDEF declmods typeformbase cv-declmods typedef-symbol-list ## We put the type this typedef renames into PARENT ## but will move it in the expand function. ( $5 type $1 nil $3 nil nil ) ; typedef-symbol-list : typedefname COMA typedef-symbol-list ( ,(cons $1 $3) ) | typedefname ( $1 ) ; # TODO: Klaus Berndl : symbol -> namespace-symbol?! # Answer: Probably symbol is correct here! typedefname : opt-stars symbol opt-bits opt-array ( $1 $2 ) ; struct-or-class: STRUCT | CLASS ; type : typesimple SEMICOLON ( ,$1 ) # named namespaces like "namespace XXX {" | NAMESPACE symbol namespaceparts ( $2 type $1 $3 nil nil nil ) # unnamed namespaces like "namespace {" | NAMESPACE namespaceparts ( "unnamed" type $1 $2 nil nil nil ) ; # Klaus Berndl : # We must parse "using namespace XXX" too # Using is vaguely like an include statement in the named portions # of the code. We should probably specify a new token type for this. using : USING typeformbase SEMICOLON ( nil ) | USING NAMESPACE typeformbase SEMICOLON ( nil ) ; template : TEMPLATE template-specifier opt-friend template-definition ( ,(semantic-c-reconstitute-template $4 ,$2) ) ; opt-friend : FRIEND | EMPTY ; opt-template-specifier : template-specifier ( ,$1 ) | EMPTY () ; template-specifier : LESS template-specifier-types GREATER ( ,$2 ) ; template-specifier-types : template-var template-specifier-type-list ( ,(cons ,$1 ,$2 ) ) | EMPTY ; template-specifier-type-list : COMA template-specifier-types ( ,$2 ) | EMPTY ( ) ; #template-var : template-type opt-stars opt-template-equal # ( ,(cons (concat (car $1) (make-string (car ,$2) ?*)) # (cdr $1))) ## Klaus Berndl : for template-types the ## template-var can also be literals or constants. ## Example: map map_size10_var; This parses also ## template which is nonsense but who cares.... # | string # ( $1 ) # | number # ( $1 ) # ; template-var : # Klaus Berndl : The following handles all # template-vars of template-definitions template-type opt-template-equal ( ,(cons (car $1) (cdr $1)) ) # Klaus Berndl : for template-types the # template-var can also be literals or constants. # Example: map map_size10_var; This parses also # template which is nonsense but who cares.... | string ( $1 ) | number ( $1 ) # Klaus Berndl : In template-types arguments can # be any symbols with optional adress-operator (&) and optional # dereferencing operator (*) # Example map sized_map_var. | opt-stars opt-ref namespace-symbol ( ,$3 ) ; opt-template-equal : EQUAL symbol LESS template-specifier-types GREATER ( $2 ) | EMPTY ; template-type : CLASS symbol ( $2 type "class" nil nil) | STRUCT symbol ( $2 type "struct" nil nil) # TODO: Klaus Berndl : For the moment is is ok, # that we parse the C++ keyword typename as a class.... | TYPENAME symbol ( $2 type "class" nil nil) # Klaus Berndl : template-types can be all # flavors of variable-args but here the argument is ignored, only the # type stuff is needed. | declmods typeformbase cv-declmods opt-stars opt-ref variablearg-opt-name ( (car $2) type nil nil (ASSOC const (if (member "const" (append $1 $3)) t nil) typemodifiers (delete "const" (append $1 $3)) reference (car ,$5) pointer (car $4) ) ) ; template-definition : type ( ,$1 ) | var-or-fun ( ,$1 ) ; opt-stars : STAR opt-starmod opt-stars ( (1+ (car $3)) ) | EMPTY ( 0 ) ; opt-starmod : STARMOD opt-starmod ( ,(cons (,car ,$1) $2) ) | EMPTY () ; STARMOD : CONST ; declmods : DECLMOD declmods ( ,(cons ,(car ,$1) $2 ) ) | DECLMOD ( ,$1 ) | EMPTY () ; DECLMOD : EXTERN | STATIC | CVDECLMOD # Klaus Berndl : IMHO signed and unsigned are not # decl-modes but these are only valid for some buildin-types like # short, int etc... whereas "real" declmods are valid for all types, # buildin and user-defined! # | SIGNED # | UNSIGNED | INLINE | REGISTER | FRIEND # Klaus Berndl : There can be a few cases where # TYPENAME is not allowed in C++-syntax but better than not # recognizing the allowed situations. | TYPENAME | METADECLMOD # This is a hack in case we are in a class. | VIRTUAL ; metadeclmod : METADECLMOD () | EMPTY () ; CVDECLMOD : CONST | VOLATILE ; cv-declmods : CVDECLMOD cv-declmods ( ,(cons ,(car ,$1) $2 ) ) | CVDECLMOD ( ,$1 ) | EMPTY () ; METADECLMOD : VIRTUAL | MUTABLE ; # C++: A type can be modified into a reference by "&" opt-ref : AMPERSAND ( 1 ) | EMPTY ( 0 ) ; typeformbase : typesimple ( ,$1 ) | STRUCT symbol ( $2 type $1 ) | UNION symbol ( $2 type $1 ) | ENUM symbol ( $2 type $1 ) | builtintype ( ,$1 ) # | symbol template-specifier # ( $1 type "class" ) # | namespace-symbol opt-stars opt-template-specifier # | namespace-symbol opt-template-specifier | namespace-symbol # ( ,$1 ) ( ,$1 type "class" ) | symbol ( $1 ) ; signedmod : UNSIGNED | SIGNED ; # Klaus Berndl : builtintype-types was builtintype builtintype-types : VOID | CHAR # Klaus Berndl : Added WCHAR | WCHAR | SHORT | INT | LONG INT ( (concat $1 " " $2) ) | FLOAT | DOUBLE | LONG DOUBLE ( (concat $1 " " $2) ) # TODO: Klaus Berndl : Is there a long long, i # think so?! | LONG LONG ( (concat $1 " " $2) ) | LONG ; builtintype : signedmod builtintype-types ( (concat (car $1) " " (car $2)) ) | builtintype-types ( ,$1 ) # Klaus Berndl : unsigned is synonym for unsigned # int and signed for signed int. To make this confusing stuff clear we # add here the int. | signedmod ( (concat (car $1) " int") ) ; # Klaus Berndl : This parses also nonsense like # "const volatile int const volatile const const volatile a ..." but # IMHO nobody writes such code. Normaly we shoud define a rule like # typeformbase-mode which exactly defines the different allowed cases # and combinations of declmods (minus the CVDECLMOD) typeformbase and # cv-declmods so we could recognize more invalid code but IMHO this is # not worth the effort... codeblock-var-or-fun : declmods typeformbase declmods opt-ref var-or-func-decl ( ,(semantic-c-reconstitute-token ,$5 $1 $2 ) ) ; var-or-fun : codeblock-var-or-fun ( ,$1 ) # it is possible for a function to not have a type, and # it is then assumed to be an int. How annoying. # In C++, this could be a constructor or a destructor. # Even more annoying. Only ever do this for regular # top-level items. Ignore this problem in code blocks # so that we don't have to deal with regular code # being erroneously converted into types. | declmods var-or-func-decl ( ,(semantic-c-reconstitute-token ,$2 $1 nil ) ) ; var-or-func-decl : func-decl ( ,$1 ) | var-decl ( ,$1 ) ; func-decl : opt-stars opt-class opt-destructor functionname opt-template-specifier opt-under-p arg-list opt-post-fcn-modifiers opt-throw opt-initializers fun-or-proto-end ( ,$4 'function ;; Extra stuff goes in here. ;; Continue with the stuff we found in ;; this definition $2 $3 $7 $9 $8 ,$1 ,$11) ; var-decl : varnamelist SEMICOLON ( $1 'variable ) ; opt-under-p : UNDERP (nil) | UNDERUNDERP (nil) | EMPTY ; # Klaus Berndl : symbol -> namespace-symbol opt-initializers: COLON namespace-symbol semantic-list opt-initializers | COMA namespace-symbol semantic-list opt-initializers | EMPTY ; opt-post-fcn-modifiers : post-fcn-modifiers opt-post-fcn-modifiers ( ,(cons ,$1 $2) ) | EMPTY ( nil ) ; post-fcn-modifiers : REENTRANT | CONST ; opt-throw : THROW semantic-list ( EXPAND $2 throw-exception-list ) | EMPTY ; # Is this true? I don't actually know. throw-exception-list : namespace-symbol COMA throw-exception-list ( ,(cons (car $1) $3) ) | namespace-symbol close-paren ")" ( ,$1 ) | open-paren "(" throw-exception-list ( ,$2 ) | close-paren ")" ( ) ; opt-bits : COLON number ( $2 ) | EMPTY ( nil ) ; opt-array : semantic-list "\\[.*\\]$" opt-array # Eventually we want to replace the 1 below with a size # (if available) ( (cons 1 (car ,$2) ) ) | EMPTY ( nil ) ; opt-assign : EQUAL expression ( $2 ) | EMPTY ( nil ) ; opt-restrict : symbol "\\<\\(__\\)?restrict\\>" | EMPTY ; # Klaus Berndl : symbol -> namespace-symbol?! I # think so. Can be that then also some invalid C++-syntax is parsed # but this is better than not parsing valid syntax. varname : opt-stars opt-restrict namespace-symbol opt-bits opt-array opt-assign ( ,$3 ,$1 ,$4 ,$5 ,$6 ) ; # I should store more in this def, but leave it simple for now. Klaus # Berndl : const and volatile can be written # after the type! variablearg : declmods typeformbase cv-declmods opt-ref variablearg-opt-name ( (list $5) variable $2 nil (ASSOC const (if (member "const" (append $1 $3)) t nil) typemodifiers (delete "const" (append $1 $3)) reference (car ,$4) ) nil ) ; variablearg-opt-name: varname ( ,$1) # Klaus Berndl : This allows variableargs without # a arg-name being parsed correct even if there several pointers (*) | opt-stars ( "" ,$1 nil nil nil ) ; varnamelist : varname COMA varnamelist ( ,(cons $1 $3) ) | varname ( $1 ) ; # Klaus Berndl : # Is necessary to parse stuff like # class list_of_facts : public list, public entity # and # list >::const_iterator l; # Parses also invalid(?) and senseless(?) c++-syntax like # symbol::symbol1::test_iterator # but better parsing too much than to less namespace-symbol : symbol opt-template-specifier COLON COLON namespace-symbol ( (concat $1 "::" (car $5)) ) | symbol opt-template-specifier ( $1 ) ; #namespace-symbol : symbol COLON COLON namespace-symbol # ( (concat $1 "::" (car $4)) ) # | symbol # ( $1 ) # ; namespace-opt-class : symbol COLON COLON namespace-opt-class ( (concat $1 "::" (car $4)) ) # Klaus Berndl : We must recognize # template-specifiers here so we can parse correctly the # method-implementations of template-classes outside the # template-class-declaration # Example: TemplateClass1::method_1(...) | symbol opt-template-specifier COLON COLON ( $1 ) ; # Klaus Berndl : The opt-class of a func-decl # must be able to recognize opt-classes with namespaces, e.g. # Test1::Test2::classname:: opt-class : namespace-opt-class ( ,$1 ) | EMPTY ( nil ) ; opt-destructor : TILDE ( t ) | EMPTY ( nil ) ; arg-list : semantic-list "^(" knr-arguments ( ,$2 ) | semantic-list "^(" (EXPANDFULL $1 arg-sub-list) | semantic-list "^(void)$" ( ) ; knr-arguments : variablearg SEMICOLON knr-arguments ( ,(cons (car (semantic-expand-c-nonterminal ,$1) ) ,$3) ) | variablearg SEMICOLON ( (car (semantic-expand-c-nonterminal ,$1)) ) ; arg-sub-list : variablearg ( ,$1 ) | PERIOD PERIOD PERIOD close-paren ")" ( "..." ) | COMA ( nil ) | open-paren "(" ( nil ) | close-paren ")" ( nil ) ; operatorsym : LESS LESS ( "<<" ) | GREATER GREATER ( ">>" ) | EQUAL EQUAL ( "==" ) | LESS EQUAL ( "<=" ) | GREATER EQUAL ( ">=" ) | BANG EQUAL ( "!=" ) | MINUS GREATER ( "->" ) # Klaus Berndl : We have to parse also # operator() and operator[] | semantic-list "()" ( "()" ) | semantic-list "\\[\\]" ( "[]" ) | LESS | GREATER | STAR | PLUS | MINUS | DIVIDE | EQUAL | BANG ; functionname : OPERATOR operatorsym ( ,$2 ) | symbol ( $1 ) ; fun-or-proto-end: SEMICOLON ( t ) | semantic-list ( nil ) # Here is an anoying feature of C++ pure virtual methods | EQUAL number "^0$" SEMICOLON ( 'pure-virtual ) ; type-cast : semantic-list ( EXPAND $1 type-cast-list ) ; type-cast-list : open-paren typeformbase close-paren ; # Klaus Berndl : symbol -> namespace-symbol! function-call: namespace-symbol semantic-list ; string-seq : string string-seq ( (concat $1 (car $2)) ) | string ( $1 ) ; # Use expression for parsing only. Don't actually return anything # for now. Hopefully we can fix this later. expression : number ( (identity start) (identity end) ) | function-call ( (identity start) (identity end) ) # Klaus Berndl : symbol -> namespace-symbol! | namespace-symbol ( (identity start) (identity end) ) # Klaus Berndl : C/C++ allows sequences of # strings which are concatenated by the precompiler to one string | string-seq ( (identity start) (identity end)) | type-cast expression # A cast to some other type ( (identity start) (identity end) ) | semantic-list ( (identity start) (identity end) ) | punctuation "[-+*/%^|&]" expression ( (identity start) (identity end) ) ;