You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
602 lines
12 KiB
602 lines
12 KiB
/*
|
|
[The 'BSD licence']
|
|
Copyright (c) 2004 Terence Parr and Loring Craymer
|
|
All rights reserved.
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions
|
|
are met:
|
|
1. Redistributions of source code must retain the above copyright
|
|
notice, this list of conditions and the following disclaimer.
|
|
2. Redistributions in binary form must reproduce the above copyright
|
|
notice, this list of conditions and the following disclaimer in the
|
|
documentation and/or other materials provided with the distribution.
|
|
3. The name of the author may not be used to endorse or promote products
|
|
derived from this software without specific prior written permission.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
/** Python 2.3.3 Grammar
|
|
*
|
|
* Terence Parr and Loring Craymer
|
|
* February 2004
|
|
*
|
|
* Converted to ANTLR v3 November 2005 by Terence Parr.
|
|
*
|
|
* This grammar was derived automatically from the Python 2.3.3
|
|
* parser grammar to get a syntactically correct ANTLR grammar
|
|
* for Python. Then Terence hand tweaked it to be semantically
|
|
* correct; i.e., removed lookahead issues etc... It is LL(1)
|
|
* except for the (sometimes optional) trailing commas and semi-colons.
|
|
* It needs two symbols of lookahead in this case.
|
|
*
|
|
* Starting with Loring's preliminary lexer for Python, I modified it
|
|
* to do my version of the whole nasty INDENT/DEDENT issue just so I
|
|
* could understand the problem better. This grammar requires
|
|
* PythonTokenStream.java to work. Also I used some rules from the
|
|
* semi-formal grammar on the web for Python (automatically
|
|
* translated to ANTLR format by an ANTLR grammar, naturally <grin>).
|
|
* The lexical rules for python are particularly nasty and it took me
|
|
* a long time to get it 'right'; i.e., think about it in the proper
|
|
* way. Resist changing the lexer unless you've used ANTLR a lot. ;)
|
|
*
|
|
* I (Terence) tested this by running it on the jython-2.1/Lib
|
|
* directory of 40k lines of Python.
|
|
*
|
|
* REQUIRES ANTLR v3
|
|
*/
|
|
grammar Python;
|
|
options {language=JavaScript;}
|
|
|
|
tokens {
|
|
INDENT;
|
|
DEDENT;
|
|
}
|
|
|
|
@lexer::members {
|
|
/** Handles context-sensitive lexing of implicit line joining such as
|
|
* the case where newline is ignored in cases like this:
|
|
* a = [3,
|
|
* 4]
|
|
*/
|
|
this.implicitLineJoiningLevel= 0;
|
|
this.startPos = -1;
|
|
}
|
|
|
|
single_input
|
|
: NEWLINE
|
|
| simple_stmt
|
|
| compound_stmt NEWLINE
|
|
;
|
|
|
|
file_input
|
|
: (NEWLINE | stmt)*
|
|
;
|
|
|
|
eval_input
|
|
: (NEWLINE)* testlist (NEWLINE)*
|
|
;
|
|
|
|
funcdef
|
|
: 'def' NAME parameters COLON suite
|
|
{xlog("found method def "+$NAME.text);}
|
|
;
|
|
|
|
parameters
|
|
: LPAREN (varargslist)? RPAREN
|
|
;
|
|
|
|
varargslist
|
|
: defparameter (options {greedy=true;}:COMMA defparameter)*
|
|
(COMMA
|
|
( STAR NAME (COMMA DOUBLESTAR NAME)?
|
|
| DOUBLESTAR NAME
|
|
)?
|
|
)?
|
|
| STAR NAME (COMMA DOUBLESTAR NAME)?
|
|
| DOUBLESTAR NAME
|
|
;
|
|
|
|
defparameter
|
|
: fpdef (ASSIGN test)?
|
|
;
|
|
|
|
fpdef
|
|
: NAME
|
|
| LPAREN fplist RPAREN
|
|
;
|
|
|
|
fplist
|
|
: fpdef (options {greedy=true;}:COMMA fpdef)* (COMMA)?
|
|
;
|
|
|
|
|
|
stmt: simple_stmt
|
|
| compound_stmt
|
|
;
|
|
|
|
simple_stmt
|
|
: small_stmt (options {greedy=true;}:SEMI small_stmt)* (SEMI)? NEWLINE
|
|
;
|
|
|
|
small_stmt: expr_stmt
|
|
| print_stmt
|
|
| del_stmt
|
|
| pass_stmt
|
|
| flow_stmt
|
|
| import_stmt
|
|
| global_stmt
|
|
| exec_stmt
|
|
| assert_stmt
|
|
;
|
|
|
|
expr_stmt
|
|
: testlist
|
|
( augassign testlist
|
|
| (ASSIGN testlist)+
|
|
)?
|
|
;
|
|
|
|
augassign
|
|
: PLUSEQUAL
|
|
| MINUSEQUAL
|
|
| STAREQUAL
|
|
| SLASHEQUAL
|
|
| PERCENTEQUAL
|
|
| AMPEREQUAL
|
|
| VBAREQUAL
|
|
| CIRCUMFLEXEQUAL
|
|
| LEFTSHIFTEQUAL
|
|
| RIGHTSHIFTEQUAL
|
|
| DOUBLESTAREQUAL
|
|
| DOUBLESLASHEQUAL
|
|
;
|
|
|
|
print_stmt:
|
|
'print'
|
|
( testlist
|
|
| RIGHTSHIFT testlist
|
|
)?
|
|
;
|
|
|
|
del_stmt: 'del' exprlist
|
|
;
|
|
|
|
pass_stmt: 'pass'
|
|
;
|
|
|
|
flow_stmt: break_stmt
|
|
| continue_stmt
|
|
| return_stmt
|
|
| raise_stmt
|
|
| yield_stmt
|
|
;
|
|
|
|
break_stmt: 'break'
|
|
;
|
|
|
|
continue_stmt: 'continue'
|
|
;
|
|
|
|
return_stmt: 'return' (testlist)?
|
|
;
|
|
|
|
yield_stmt: 'yield' testlist
|
|
;
|
|
|
|
raise_stmt: 'raise' (test (COMMA test (COMMA test)?)?)?
|
|
;
|
|
|
|
import_stmt
|
|
: 'import' dotted_as_name (COMMA dotted_as_name)*
|
|
| 'from' dotted_name 'import'
|
|
(STAR | import_as_name (COMMA import_as_name)*)
|
|
;
|
|
|
|
import_as_name
|
|
: NAME (NAME NAME)?
|
|
;
|
|
|
|
dotted_as_name: dotted_name (NAME NAME)?
|
|
;
|
|
|
|
dotted_name: NAME (DOT NAME)*
|
|
;
|
|
|
|
global_stmt: 'global' NAME (COMMA NAME)*
|
|
;
|
|
|
|
exec_stmt: 'exec' expr ('in' test (COMMA test)?)?
|
|
;
|
|
|
|
assert_stmt: 'assert' test (COMMA test)?
|
|
;
|
|
|
|
|
|
compound_stmt: if_stmt
|
|
| while_stmt
|
|
| for_stmt
|
|
| try_stmt
|
|
| funcdef
|
|
| classdef
|
|
;
|
|
|
|
if_stmt: 'if' test COLON suite ('elif' test COLON suite)* ('else' COLON suite)?
|
|
;
|
|
|
|
while_stmt: 'while' test COLON suite ('else' COLON suite)?
|
|
;
|
|
|
|
for_stmt: 'for' exprlist 'in' testlist COLON suite ('else' COLON suite)?
|
|
;
|
|
|
|
try_stmt
|
|
: 'try' COLON suite
|
|
( (except_clause COLON suite)+ ('else' COLON suite)?
|
|
| 'finally' COLON suite
|
|
)
|
|
;
|
|
|
|
except_clause: 'except' (test (COMMA test)?)?
|
|
;
|
|
|
|
suite: simple_stmt
|
|
| NEWLINE INDENT (stmt)+ DEDENT
|
|
;
|
|
|
|
|
|
test: and_test ('or' and_test)*
|
|
| lambdef
|
|
;
|
|
|
|
and_test
|
|
: not_test ('and' not_test)*
|
|
;
|
|
|
|
not_test
|
|
: 'not' not_test
|
|
| comparison
|
|
;
|
|
|
|
comparison: expr (comp_op expr)*
|
|
;
|
|
|
|
comp_op: LESS
|
|
|GREATER
|
|
|EQUAL
|
|
|GREATEREQUAL
|
|
|LESSEQUAL
|
|
|ALT_NOTEQUAL
|
|
|NOTEQUAL
|
|
|'in'
|
|
|'not' 'in'
|
|
|'is'
|
|
|'is' 'not'
|
|
;
|
|
|
|
expr: xor_expr (VBAR xor_expr)*
|
|
;
|
|
|
|
xor_expr: and_expr (CIRCUMFLEX and_expr)*
|
|
;
|
|
|
|
and_expr: shift_expr (AMPER shift_expr)*
|
|
;
|
|
|
|
shift_expr: arith_expr ((LEFTSHIFT|RIGHTSHIFT) arith_expr)*
|
|
;
|
|
|
|
arith_expr: term ((PLUS|MINUS) term)*
|
|
;
|
|
|
|
term: factor ((STAR | SLASH | PERCENT | DOUBLESLASH ) factor)*
|
|
;
|
|
|
|
factor
|
|
: (PLUS|MINUS|TILDE) factor
|
|
| power
|
|
;
|
|
|
|
power
|
|
: atom (trailer)* (options {greedy=true;}:DOUBLESTAR factor)?
|
|
;
|
|
|
|
atom: LPAREN (testlist)? RPAREN
|
|
| LBRACK (listmaker)? RBRACK
|
|
| LCURLY (dictmaker)? RCURLY
|
|
| BACKQUOTE testlist BACKQUOTE
|
|
| NAME
|
|
| INT
|
|
| LONGINT
|
|
| FLOAT
|
|
| COMPLEX
|
|
| (STRING)+
|
|
;
|
|
|
|
listmaker: test ( list_for | (options {greedy=true;}:COMMA test)* ) (COMMA)?
|
|
;
|
|
|
|
lambdef: 'lambda' (varargslist)? COLON test
|
|
;
|
|
|
|
trailer: LPAREN (arglist)? RPAREN
|
|
| LBRACK subscriptlist RBRACK
|
|
| DOT NAME
|
|
;
|
|
|
|
subscriptlist
|
|
: subscript (options {greedy=true;}:COMMA subscript)* (COMMA)?
|
|
;
|
|
|
|
subscript
|
|
: DOT DOT DOT
|
|
| test (COLON (test)? (sliceop)?)?
|
|
| COLON (test)? (sliceop)?
|
|
;
|
|
|
|
sliceop: COLON (test)?
|
|
;
|
|
|
|
exprlist
|
|
: expr (options {k=2;}:COMMA expr)* (COMMA)?
|
|
;
|
|
|
|
testlist
|
|
: test (options {k=2;}: COMMA test)* (COMMA)?
|
|
;
|
|
|
|
dictmaker
|
|
: test COLON test
|
|
(options {k=2;}:COMMA test COLON test)* (COMMA)?
|
|
;
|
|
|
|
classdef: 'class' NAME (LPAREN testlist RPAREN)? COLON suite
|
|
{xlog("found class def "+$NAME.text);}
|
|
;
|
|
|
|
arglist: argument (COMMA argument)*
|
|
( COMMA
|
|
( STAR test (COMMA DOUBLESTAR test)?
|
|
| DOUBLESTAR test
|
|
)?
|
|
)?
|
|
| STAR test (COMMA DOUBLESTAR test)?
|
|
| DOUBLESTAR test
|
|
;
|
|
|
|
argument : test (ASSIGN test)?
|
|
;
|
|
|
|
list_iter: list_for
|
|
| list_if
|
|
;
|
|
|
|
list_for: 'for' exprlist 'in' testlist (list_iter)?
|
|
;
|
|
|
|
list_if: 'if' test (list_iter)?
|
|
;
|
|
|
|
LPAREN : '(' {this.implicitLineJoiningLevel++;} ;
|
|
|
|
RPAREN : ')' {this.implicitLineJoiningLevel--;} ;
|
|
|
|
LBRACK : '[' {this.implicitLineJoiningLevel++;} ;
|
|
|
|
RBRACK : ']' {this.implicitLineJoiningLevel--;} ;
|
|
|
|
COLON : ':' ;
|
|
|
|
COMMA : ',' ;
|
|
|
|
SEMI : ';' ;
|
|
|
|
PLUS : '+' ;
|
|
|
|
MINUS : '-' ;
|
|
|
|
STAR : '*' ;
|
|
|
|
SLASH : '/' ;
|
|
|
|
VBAR : '|' ;
|
|
|
|
AMPER : '&' ;
|
|
|
|
LESS : '<' ;
|
|
|
|
GREATER : '>' ;
|
|
|
|
ASSIGN : '=' ;
|
|
|
|
PERCENT : '%' ;
|
|
|
|
BACKQUOTE : '`' ;
|
|
|
|
LCURLY : '{' {this.implicitLineJoiningLevel++;} ;
|
|
|
|
RCURLY : '}' {this.implicitLineJoiningLevel--;} ;
|
|
|
|
CIRCUMFLEX : '^' ;
|
|
|
|
TILDE : '~' ;
|
|
|
|
EQUAL : '==' ;
|
|
|
|
NOTEQUAL : '!=' ;
|
|
|
|
ALT_NOTEQUAL: '<>' ;
|
|
|
|
LESSEQUAL : '<=' ;
|
|
|
|
LEFTSHIFT : '<<' ;
|
|
|
|
GREATEREQUAL : '>=' ;
|
|
|
|
RIGHTSHIFT : '>>' ;
|
|
|
|
PLUSEQUAL : '+=' ;
|
|
|
|
MINUSEQUAL : '-=' ;
|
|
|
|
DOUBLESTAR : '**' ;
|
|
|
|
STAREQUAL : '*=' ;
|
|
|
|
DOUBLESLASH : '//' ;
|
|
|
|
SLASHEQUAL : '/=' ;
|
|
|
|
VBAREQUAL : '|=' ;
|
|
|
|
PERCENTEQUAL : '%=' ;
|
|
|
|
AMPEREQUAL : '&=' ;
|
|
|
|
CIRCUMFLEXEQUAL : '^=' ;
|
|
|
|
LEFTSHIFTEQUAL : '<<=' ;
|
|
|
|
RIGHTSHIFTEQUAL : '>>=' ;
|
|
|
|
DOUBLESTAREQUAL : '**=' ;
|
|
|
|
DOUBLESLASHEQUAL : '//=' ;
|
|
|
|
DOT : '.' ;
|
|
|
|
FLOAT
|
|
: '.' DIGITS (Exponent)?
|
|
| DIGITS ('.' (DIGITS (Exponent)?)? | Exponent)
|
|
;
|
|
|
|
LONGINT
|
|
: INT ('l'|'L')
|
|
;
|
|
|
|
fragment
|
|
Exponent
|
|
: ('e' | 'E') ( '+' | '-' )? DIGITS
|
|
;
|
|
|
|
INT : // Hex
|
|
'0' ('x' | 'X') ( '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' )+
|
|
('l' | 'L')?
|
|
| // Octal
|
|
'0' DIGITS*
|
|
| '1'..'9' DIGITS*
|
|
;
|
|
|
|
COMPLEX
|
|
: INT ('j'|'J')
|
|
| FLOAT ('j'|'J')
|
|
;
|
|
|
|
fragment
|
|
DIGITS : ( '0' .. '9' )+ ;
|
|
|
|
NAME: ( 'a' .. 'z' | 'A' .. 'Z' | '_')
|
|
( 'a' .. 'z' | 'A' .. 'Z' | '_' | '0' .. '9' )*
|
|
;
|
|
|
|
/** Match various string types. Note that greedy=false implies '''
|
|
* should make us exit loop not continue.
|
|
*/
|
|
STRING
|
|
: ('r'|'u'|'ur')?
|
|
( '\'\'\'' (options {greedy=false;}:.)* '\'\'\''
|
|
| '"""' (options {greedy=false;}:.)* '"""'
|
|
| '"' (ESC|~('\\'|'\n'|'"'))* '"'
|
|
| '\'' (ESC|~('\\'|'\n'|'\''))* '\''
|
|
)
|
|
;
|
|
|
|
fragment
|
|
ESC
|
|
: '\\' .
|
|
;
|
|
|
|
/** Consume a newline and any whitespace at start of next line */
|
|
CONTINUED_LINE
|
|
: '\\' ('\r')? '\n' (' '|'\t')* { $channel=HIDDEN; }
|
|
;
|
|
|
|
/** Treat a sequence of blank lines as a single blank line. If
|
|
* nested within a (..), {..}, or [..], then ignore newlines.
|
|
* If the first newline starts in column one, they are to be ignored.
|
|
*/
|
|
NEWLINE
|
|
: (('\r')? '\n' )+
|
|
{if ( this.startPos==0 || this.implicitLineJoiningLevel>0 )
|
|
$channel=HIDDEN;
|
|
}
|
|
;
|
|
|
|
WS : {this.startPos>0}?=> (' '|'\t')+ {$channel=HIDDEN;}
|
|
;
|
|
|
|
/** Grab everything before a real symbol. Then if newline, kill it
|
|
* as this is a blank line. If whitespace followed by comment, kill it
|
|
* as it's a comment on a line by itself.
|
|
*
|
|
* Ignore leading whitespace when nested in [..], (..), {..}.
|
|
*/
|
|
LEADING_WS
|
|
@init {
|
|
var spaces = 0;
|
|
}
|
|
: {this.startPos==0}?=>
|
|
( {this.implicitLineJoiningLevel>0}? ( ' ' | '\t' )+ {$channel=HIDDEN;}
|
|
| ( ' ' { spaces++; }
|
|
| '\t' { spaces += 8; spaces -= (spaces \% 8); }
|
|
)+
|
|
{
|
|
// make a string of n spaces where n is column number - 1
|
|
var indentation = new Array(spaces);
|
|
for (var i=0; i<spaces; i++) {
|
|
indentation[i] = ' ';
|
|
}
|
|
var s = indentation.join("");
|
|
this.emit(new org.antlr.runtime.CommonToken(this.LEADING_WS,s));
|
|
}
|
|
// kill trailing newline if present and then ignore
|
|
( ('\r')? '\n' {if (this.state.token!=null) this.state.token.setChannel(HIDDEN); else $channel=HIDDEN;})*
|
|
// {this.token.setChannel(99); }
|
|
)
|
|
;
|
|
|
|
/** Comments not on line by themselves are turned into newlines.
|
|
|
|
b = a # end of line comment
|
|
|
|
or
|
|
|
|
a = [1, # weird
|
|
2]
|
|
|
|
This rule is invoked directly by nextToken when the comment is in
|
|
first column or when comment is on end of nonwhitespace line.
|
|
|
|
Only match \n here if we didn't start on left edge; let NEWLINE return that.
|
|
Kill if newlines if we live on a line by ourselves
|
|
|
|
Consume any leading whitespace if it starts on left edge.
|
|
*/
|
|
COMMENT
|
|
@init {
|
|
$channel=HIDDEN;
|
|
}
|
|
: {this.startPos==0}?=> (' '|'\t')* '#' (~'\n')* '\n'+
|
|
| {this.startPos>0}?=> '#' (~'\n')* // let NEWLINE handle \n unless char pos==0 for '#'
|
|
;
|