1 ;;; wisent-python.wy -- LALR grammar for Python
3 ;; Copyright (C) 2002, 2003, 2004, 2007 Richard Kim
5 ;; Author: Richard Kim <ryk@dspwiz.com>
6 ;; Maintainer: Richard Kim <ryk@dspwiz.com>
9 ;; X-RCS: $Id: wisent-python.wy,v 1.1 2007-11-26 15:12:35 michaels Exp $
11 ;; This file is not part of GNU Emacs.
13 ;; This program is free software; you can redistribute it and/or
14 ;; modify it under the terms of the GNU General Public License as
15 ;; published by the Free Software Foundation; either version 2, or (at
16 ;; your option) any later version.
18 ;; This software is distributed in the hope that it will be useful,
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 ;; General Public License for more details.
23 ;; You should have received a copy of the GNU General Public License
24 ;; along with GNU Emacs; see the file COPYING. If not, write to the
25 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
26 ;; Boston, MA 02110-1301, USA.
30 ;; This is an LALR python parser that follows the official python
31 ;; grammar closely with very few exceptions.
35 ;; * Verify that semantic-lex-python-number regexp is correct.
41 ;;%package wisent-python-wy
43 %languagemode python-mode
45 ;; The default start symbol
47 ;; Alternate entry points
48 ;; - Needed by partial re-parse
49 %start function_parameter
52 ;; - Needed by EXPANDFULL clauses
53 %start function_parameters
55 %start indented_block_body
57 ;; -------------------------------
58 ;; Misc. Python specific terminals
59 ;; -------------------------------
60 ;; The value of these tokens are for documentation only, they are not
62 %token <charquote> BACKSLASH "\\"
63 %token <newline> NEWLINE "\n"
64 %token <indentation> INDENT "^\\s-+"
65 %token <indentation> DEDENT "[^:INDENT:]"
66 %token <indentation> INDENT_BLOCK "(INDENT DEDENT)"
68 ;; -----------------------------
69 ;; Block & Parenthesis terminals
70 ;; -----------------------------
71 %type <block> ;;syntax "\\s(\\|\\s)" matchdatatype block
73 %token <block> PAREN_BLOCK "(LPAREN RPAREN)"
74 %token <block> BRACE_BLOCK "(LBRACE RBRACE)"
75 %token <block> BRACK_BLOCK "(LBRACK RBRACK)"
77 %token <open-paren> LPAREN "("
78 %token <close-paren> RPAREN ")"
79 %token <open-paren> LBRACE "{"
80 %token <close-paren> RBRACE "}"
81 %token <open-paren> LBRACK "["
82 %token <close-paren> RBRACK "]"
87 %type <punctuation> ;;syntax "\\(\\s.\\|\\s$\\|\\s'\\)+" matchdatatype string
89 %token <punctuation> LTLTEQ "<<="
90 %token <punctuation> GTGTEQ ">>="
91 %token <punctuation> EXPEQ "**="
92 %token <punctuation> DIVDIVEQ "//="
93 %token <punctuation> DIVDIV "//"
94 %token <punctuation> LTLT "<<"
95 %token <punctuation> GTGT ">>"
96 %token <punctuation> EXPONENT "**"
97 %token <punctuation> EQ "=="
98 %token <punctuation> GE ">="
99 %token <punctuation> LE "<="
100 %token <punctuation> PLUSEQ "+="
101 %token <punctuation> MINUSEQ "-="
102 %token <punctuation> MULTEQ "*="
103 %token <punctuation> DIVEQ "/="
104 %token <punctuation> MODEQ "%="
105 %token <punctuation> AMPEQ "&="
106 %token <punctuation> OREQ "|="
107 %token <punctuation> HATEQ "^="
108 %token <punctuation> LTGT "<>"
109 %token <punctuation> NE "!="
110 %token <punctuation> HAT "^"
111 %token <punctuation> LT "<"
112 %token <punctuation> GT ">"
113 %token <punctuation> AMP "&"
114 %token <punctuation> MULT "*"
115 %token <punctuation> DIV "/"
116 %token <punctuation> MOD "%"
117 %token <punctuation> PLUS "+"
118 %token <punctuation> MINUS "-"
119 %token <punctuation> PERIOD "."
120 %token <punctuation> TILDE "~"
121 %token <punctuation> BAR "|"
122 %token <punctuation> COLON ":"
123 %token <punctuation> SEMICOLON ";"
124 %token <punctuation> COMMA ","
125 %token <punctuation> ASSIGN "="
126 %token <punctuation> BACKQUOTE "`"
132 %token <string> STRING_LITERAL
134 %type <number> ;;syntax semantic-lex-number-expression
135 %token <number> NUMBER_LITERAL
137 %type <symbol> ;;syntax "\\(\\sw\\|\\s_\\)+"
143 %type <keyword> ;;syntax "\\(\\sw\\|\\s_\\)+" matchdatatype keyword
147 "Logical AND binary operator ... "
149 %keyword ASSERT "assert"
151 "Raise AssertionError exception if <expr> is false"
153 %keyword BREAK "break"
155 "Terminate 'for' or 'while loop"
157 %keyword CLASS "class"
161 %keyword CONTINUE "continue"
162 %put CONTINUE summary
163 "Skip to the next interation of enclosing for or whilte loop"
167 "Define a new function"
171 "Delete specified objects, i.e., undo what assignment did"
175 "Shorthand for 'else if' following an 'if' statement"
179 "Start the 'else' clause following an 'if' statement"
181 %keyword EXCEPT "except"
183 "Specify exception handlers along with 'try' keyword"
187 "Dynamically execute python code"
189 %keyword FINALLY "finally"
191 "Specify code to be executed after 'try' statements whether or not an exception occured"
199 "Modify behavior of 'import' statement"
201 %keyword GLOBAL "global"
203 "Declare one or more symbols as global symbols"
207 "Start 'if' conditional statement"
209 %keyword IMPORT "import"
211 "Load specified modules"
215 "Part of 'for' statement "
219 "Binary operator that tests for object equality"
221 %keyword LAMBDA "lambda"
223 "Create anonymous function"
227 "Unary boolean negation operator"
231 "Binary logical 'or' operator"
235 "Statement that does nothing"
237 %keyword PRINT "print"
239 "Print each argument to standard output"
241 %keyword RAISE "raise"
245 %keyword RETURN "return"
247 "Return from a function"
251 "Start of statements protected by exception handlers"
253 %keyword WHILE "while"
255 "Start a 'while' loop"
257 %keyword YIELD "yield"
259 "Create a generator function"
263 ;;;****************************************************************************
265 ;;;****************************************************************************
267 ;; simple_stmt are statements that do not involve INDENT tokens
268 ;; compound_stmt are statements that involve INDENT tokens
275 ;;;****************************************************************************
277 ;;;****************************************************************************
279 ;; simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
281 : small_stmt_list semicolon_opt NEWLINE
284 ;; small_stmt (';' small_stmt)*
287 | small_stmt_list SEMICOLON small_stmt
302 ;;;============================================================================
304 ;;;============================================================================
306 ;; print_stmt: 'print' [ test (',' test)* [','] ]
307 ;; | '>>' test [ (',' test)+ [','] ]
309 : PRINT print_stmt_trailer
313 ;; [ test (',' test)* [','] ] | '>>' test [ (',' test)+ [','] ]
317 | GTGT test trailing_test_list_with_opt_comma_opt
321 ;; [ (',' test)+ [','] ]
322 trailing_test_list_with_opt_comma_opt
324 | trailing_test_list comma_opt
332 | trailing_test_list COMMA test
336 ;;;============================================================================
338 ;;;============================================================================
340 ;; expr_stmt: testlist (augassign testlist | ('=' testlist)*)
342 : testlist expr_stmt_trailer
343 (if (and $2 (stringp $1) (string-match "^\\(\\sw\\|\\s_\\)+$" $1))
344 ;; If this is an assignment statement and left side is a symbol,
345 ;; then generate a 'variable token, else return 'code token.
346 (VARIABLE-TAG $1 nil nil)
350 ;; Could be EMPTY because of eq_testlist_zom.
351 ;; (augassign testlist | ('=' testlist)*)
361 | eq_testlist_zom ASSIGN testlist
365 ;; augassign: '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^='
366 ;; | '<<=' | '>>=' | '**=' | '//='
368 : PLUSEQ | MINUSEQ | MULTEQ | DIVEQ | MODEQ
369 | AMPEQ | OREQ | HATEQ | LTLTEQ
370 | GTGTEQ | EXPEQ | DIVDIVEQ
373 ;;;============================================================================
375 ;;;============================================================================
377 ;; del_stmt: 'del' exprlist
383 ;; exprlist: expr (',' expr)* [',']
385 : expr_list comma_opt
393 | expr_list COMMA expr
397 ;;;============================================================================
399 ;;;============================================================================
407 ;;;============================================================================
409 ;;;============================================================================
419 ;; break_stmt: 'break'
425 ;; continue_stmt: 'continue'
431 ;; return_stmt: 'return' [testlist]
433 : RETURN testlist_opt
444 ;; yield_stmt: 'yield' testlist
450 ;; raise_stmt: 'raise' [test [',' test [',' test]]]
452 : RAISE zero_one_two_or_three_tests
456 ;; [test [',' test [',' test]]]
457 zero_one_two_or_three_tests
459 | test zero_one_or_two_tests
463 ;; [',' test [',' test]]
464 zero_one_or_two_tests
466 | COMMA test zero_or_one_comma_test
471 zero_or_one_comma_test
477 ;;;============================================================================
479 ;;;============================================================================
481 ;; import_stmt : 'import' dotted_as_name (',' dotted_as_name)*
482 ;; | 'from' dotted_name 'import'
483 ;; ('*' | import_as_name (',' import_as_name)*)
485 : IMPORT dotted_as_name_list
487 | FROM dotted_name IMPORT star_or_import_as_name_list
491 ;; dotted_as_name (',' dotted_as_name)*
494 | dotted_as_name_list COMMA dotted_as_name
497 ;; ('*' | import_as_name (',' import_as_name)*)
498 star_or_import_as_name_list
501 | import_as_name_list
505 ;; import_as_name (',' import_as_name)*
509 | import_as_name_list COMMA import_as_name
513 ;; import_as_name: NAME [NAME NAME]
519 ;; dotted_as_name: dotted_name [NAME NAME]
521 : dotted_name name_name_opt
531 ;; dotted_name: NAME ('.' NAME)*
534 | dotted_name PERIOD NAME
535 (format "%s.%s" $1 $3)
538 ;;;============================================================================
540 ;;;============================================================================
542 ;; global_stmt: 'global' NAME (',' NAME)*
544 : GLOBAL comma_sep_name_list
551 | comma_sep_name_list COMMA NAME
554 ;;;============================================================================
556 ;;;============================================================================
558 ;; exec_stmt: 'exec' expr ['in' test [',' test]]
560 : EXEC expr exec_trailer
564 ;; ['in' test [',' test]]
567 | IN test comma_test_opt
578 ;;;============================================================================
580 ;;;============================================================================
582 ;; assert_stmt: 'assert' test [',' test]
584 : ASSERT test comma_test_opt
588 ;;;****************************************************************************
590 ;;;****************************************************************************
601 ;;;============================================================================
603 ;;;============================================================================
605 ;; if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
607 : IF test COLON suite elif_suite_pair_list else_suite_pair_opt
611 ;; ('elif' test ':' suite)*
614 | elif_suite_pair_list ELIF test COLON suite
618 ;; ['else' ':' suite]
625 ;; This NT follows the COLON token for most compound statements.
626 ;; suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
630 | NEWLINE indented_block
636 (EXPANDFULL $1 indented_block_body)
648 ;;;============================================================================
650 ;;;============================================================================
652 ;; while_stmt: 'while' test ':' suite ['else' ':' suite]
654 : WHILE test COLON suite else_suite_pair_opt
658 ;;;============================================================================
660 ;;;============================================================================
662 ;; for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
664 : FOR exprlist IN testlist COLON suite else_suite_pair_opt
668 ;;;============================================================================
670 ;;;============================================================================
672 ;; try_stmt: ('try' ':' suite (except_clause ':' suite)+ #diagram:break
673 ;; ['else' ':' suite] | 'try' ':' suite 'finally' ':' suite)
675 : TRY COLON suite except_clause_suite_pair_list else_suite_pair_opt
677 | TRY COLON suite FINALLY COLON suite
681 ;; (except_clause ':' suite)+
682 except_clause_suite_pair_list
683 : except_clause COLON suite
685 | except_clause_suite_pair_list except_clause COLON suite
689 ;; # NB compile.c makes sure that the default except clause is last
690 ;; except_clause: 'except' [test [',' test]]
692 : EXCEPT zero_one_or_two_test
699 | test zero_or_one_comma_test
703 ;;;============================================================================
705 ;;;============================================================================
707 ;; funcdef: 'def' NAME parameters ':' suite
709 : DEF NAME function_parameter_list COLON suite
710 (FUNCTION-TAG $2 nil $3)
713 function_parameter_list
715 (let ((wisent-python-EXPANDING-block t))
716 (EXPANDFULL $1 function_parameters))
719 ;; parameters: '(' [varargslist] ')'
725 | function_parameter COMMA
726 | function_parameter RPAREN
732 ;; (VARIABLE-TAG $1 nil nil)
734 (VARIABLE-TAG $2 nil nil)
736 (VARIABLE-TAG $2 nil nil)
739 ;;;============================================================================
740 ;;;@@ class_declaration
741 ;;;============================================================================
743 ;; classdef: 'class' NAME ['(' testlist ')'] ':' suite
745 : CLASS NAME paren_class_list_opt COLON suite
746 (TYPE-TAG $2 $1 ;; Name "class"
748 (cons $3 nil) ;; (SUPERCLASSES . INTERFACES)
752 ;; ['(' testlist ')']
760 (let ((wisent-python-EXPANDING-block t))
761 (mapcar 'semantic-tag-name (EXPANDFULL $1 paren_classes)))
764 ;; parameters: '(' [varargslist] ')'
771 (VARIABLE-TAG $1 nil nil)
773 (VARIABLE-TAG $1 nil nil)
776 ;; In general, the base class can be specified by a general expression
777 ;; which evalue to a class object, i.e., base classes are not just names!
778 ;; However base classes are names in most cases. Thus the
779 ;; non-terminals below work only with simple names. Even if the
780 ;; parser can parse general expressions, I don't see much benefit in
781 ;; generating a string of expression as base class "name".
786 ;;;****************************************************************************
788 ;;;****************************************************************************
790 ;; test: and_test ('or' and_test)* | lambdef
796 ;; and_test ('or' and_test)*
799 | test_test OR and_test
803 ;; and_test: not_test ('and' not_test)*
806 | and_test AND not_test
810 ;; not_test: 'not' not_test | comparison
817 ;; comparison: expr (comp_op expr)*
820 | comparison comp_op expr
824 ;; comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
826 : LT | GT | EQ | GE | LE | LTGT | NE | IN | NOT IN | IS | IS NOT
829 ;; expr: xor_expr ('|' xor_expr)*
836 ;; xor_expr: and_expr ('^' and_expr)*
839 | xor_expr HAT and_expr
843 ;; and_expr: shift_expr ('&' shift_expr)*
846 | and_expr AMP shift_expr
850 ;; shift_expr: arith_expr (('<<'|'>>') arith_expr)*
853 | shift_expr shift_expr_operators arith_expr
863 ;; arith_expr: term (('+'|'-') term)*
866 | arith_expr plus_or_minus term
876 ;; term: factor (('*'|'/'|'%'|'//') factor)*
879 | term term_operator factor
890 ;; factor: ('+'|'-'|'~') factor | power
892 : prefix_operators factor
904 ;; power: atom trailer* ('**' factor)*
906 : atom trailer_zom exponent_zom
908 (if $2 (concat " " $2 " ") "")
909 (if $3 (concat " " $3) "")
915 | trailer_zom trailer
921 | exponent_zom EXPONENT factor
925 ;; trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
935 ;; atom: '(' [testlist] ')' | '[' [listmaker] ']' | '{' [dictmaker] '}'
936 ;; | '`' testlist '`' | NAME | NUMBER | STRING+
944 | BACKQUOTE testlist BACKQUOTE
957 ;; testlist: test (',' test)* [',']
959 : comma_sep_test_list comma_opt
965 | comma_sep_test_list COMMA test
966 (format "%s, %s" $1 $3)
969 ;; (read $1) and (read $2) were done before to peel away the double quotes.
970 ;; However that does not work for single quotes, so it was taken out.
973 | one_or_more_string STRING_LITERAL
977 ;;;****************************************************************************
979 ;;;****************************************************************************
981 ;; lambdef: 'lambda' [varargslist] ':' test
983 : LAMBDA varargslist_opt COLON test
984 (format "%s %s" $1 (or $2 ""))
993 ;; varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] | '**' NAME)
994 ;; | fpdef ['=' test] (',' fpdef ['=' test])* [',']
996 : fpdef_opt_test_list_comma_zom rest_args
998 | fpdef_opt_test_list comma_opt
1001 ;; ('*' NAME [',' '**' NAME] | '**' NAME)
1003 : MULT NAME multmult_name_opt
1004 () ;;(VARIABLE-TAG $2 nil nil)
1006 () ;;(VARIABLE-TAG $2 nil nil)
1012 | COMMA EXPONENT NAME
1013 (VARIABLE-TAG $3 nil nil)
1016 fpdef_opt_test_list_comma_zom
1018 | fpdef_opt_test_list_comma_zom fpdef_opt_test COMMA
1022 ;; fpdef ['=' test] (',' fpdef ['=' test])*
1025 | fpdef_opt_test_list COMMA fpdef_opt_test
1034 ;; fpdef: NAME | '(' fplist ')'
1037 (VARIABLE-TAG $1 nil nil)
1038 ;; Below breaks the parser. Don't know why, but my guess is that
1039 ;; LPAREN/RPAREN clashes with the ones in function_parameters.
1040 ;; | LPAREN fplist RPAREN
1044 ;; fplist: fpdef (',' fpdef)* [',']
1046 : fpdef_list comma_opt
1049 ;; fpdef (',' fpdef)*
1052 | fpdef_list COMMA fpdef
1062 ;;;****************************************************************************
1064 ;;;****************************************************************************
1078 ;;; wisent-python.wy ends here