You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1133 lines
40 KiB

//
// BaseRecognizer.m
// ANTLR
//
// Created by Alan Condit on 6/16/10.
// [The "BSD licence"]
// Copyright (c) 2010 Alan Condit
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#import "ACNumber.h"
#import "BaseRecognizer.h"
#import "HashRule.h"
#import "RuleMemo.h"
#import "CommonToken.h"
#import "Map.h"
#import "NoViableAltException.h"
extern NSInteger debug;
@implementation BaseRecognizer
static AMutableArray *_tokenNames;
static NSString *_grammarFileName;
static NSString *NEXT_TOKEN_RULE_NAME;
@synthesize state;
@synthesize grammarFileName;
//@synthesize failed;
@synthesize sourceName;
//@synthesize numberOfSyntaxErrors;
@synthesize tokenNames;
+ (void) initialize
{
NEXT_TOKEN_RULE_NAME = [NSString stringWithString:@"nextToken"];
[NEXT_TOKEN_RULE_NAME retain];
}
+ (BaseRecognizer *) newBaseRecognizer
{
return [[BaseRecognizer alloc] init];
}
+ (BaseRecognizer *) newBaseRecognizerWithRuleLen:(NSInteger)aLen
{
return [[BaseRecognizer alloc] initWithLen:aLen];
}
+ (BaseRecognizer *) newBaseRecognizer:(RecognizerSharedState *)aState
{
return [[BaseRecognizer alloc] initWithState:aState];
}
+ (AMutableArray *)getTokenNames
{
return _tokenNames;
}
+ (void)setTokenNames:(AMutableArray *)theTokNams
{
if ( _tokenNames != theTokNams ) {
if ( _tokenNames ) [_tokenNames release];
[theTokNams retain];
}
_tokenNames = theTokNams;
}
+ (void)setGrammarFileName:(NSString *)aFileName
{
if ( _grammarFileName != aFileName ) {
if ( _grammarFileName ) [_grammarFileName release];
[aFileName retain];
}
[_grammarFileName retain];
}
- (id) init
{
if ((self = [super init]) != nil) {
if (state == nil) {
state = [[RecognizerSharedState newRecognizerSharedState] retain];
}
tokenNames = _tokenNames;
if ( tokenNames ) [tokenNames retain];
grammarFileName = _grammarFileName;
if ( grammarFileName ) [grammarFileName retain];
state._fsp = -1;
state.errorRecovery = NO; // are we recovering?
state.lastErrorIndex = -1;
state.failed = NO; // indicate that some match failed
state.syntaxErrors = 0;
state.backtracking = 0; // the level of backtracking
state.tokenStartCharIndex = -1;
}
return self;
}
- (id) initWithLen:(NSInteger)aLen
{
if ((self = [super init]) != nil) {
if (state == nil) {
state = [[RecognizerSharedState newRecognizerSharedStateWithRuleLen:aLen] retain];
}
tokenNames = _tokenNames;
if ( tokenNames ) [tokenNames retain];
grammarFileName = _grammarFileName;
if ( grammarFileName ) [grammarFileName retain];
state._fsp = -1;
state.errorRecovery = NO; // are we recovering?
state.lastErrorIndex = -1;
state.failed = NO; // indicate that some match failed
state.syntaxErrors = 0;
state.backtracking = 0; // the level of backtracking
state.tokenStartCharIndex = -1;
}
return self;
}
- (id) initWithState:(RecognizerSharedState *)aState
{
if ((self = [super init]) != nil) {
state = aState;
if (state == nil) {
state = [RecognizerSharedState newRecognizerSharedState];
}
[state retain];
tokenNames = _tokenNames;
if ( tokenNames ) [tokenNames retain];
grammarFileName = _grammarFileName;
if ( grammarFileName ) [grammarFileName retain];
state._fsp = -1;
state.errorRecovery = NO; // are we recovering?
state.lastErrorIndex = -1;
state.failed = NO; // indicate that some match failed
state.syntaxErrors = 0;
state.backtracking = 0; // the level of backtracking
state.tokenStartCharIndex = -1;
}
return self;
}
- (void)dealloc
{
#ifdef DEBUG_DEALLOC
NSLog( @"called dealloc in BaseRecognizer" );
#endif
if ( grammarFileName ) [grammarFileName release];
if ( tokenNames ) [tokenNames release];
if ( state ) [state release];
[super dealloc];
}
// reset the recognizer to the initial state. does not touch the token source!
// this can be extended by the grammar writer to reset custom ivars
- (void) reset
{
if ( state == nil )
return;
if ( state.following != nil ) {
if ( [state.following count] )
[state.following removeAllObjects];
}
state._fsp = -1;
state.errorRecovery = NO; // are we recovering?
state.lastErrorIndex = -1;
state.failed = NO; // indicate that some match failed
state.syntaxErrors = 0;
state.backtracking = 0; // the level of backtracking
state.tokenStartCharIndex = -1;
if ( state.ruleMemo != nil ) {
if ( [state.ruleMemo count] )
[state.ruleMemo removeAllObjects];
}
}
- (BOOL) getFailed
{
return [state getFailed];
}
- (void) setFailed:(BOOL)flag
{
[state setFailed:flag];
}
- (RecognizerSharedState *) getState
{
return state;
}
- (void) setState:(RecognizerSharedState *) theState
{
if (state != theState) {
if ( state ) [state release];
state = theState;
[state retain];
}
}
- (id)input
{
return nil; // Must be overriden in inheriting class
}
- (void)skip // override in inheriting class
{
return;
}
-(id) match:(id<IntStream>)anInput TokenType:(NSInteger)ttype Follow:(ANTLRBitSet *)follow
{
id matchedSymbol = [self getCurrentInputSymbol:anInput];
if ([anInput LA:1] == ttype) {
[anInput consume];
state.errorRecovery = NO;
state.failed = NO;
return matchedSymbol;
}
if (state.backtracking > 0) {
state.failed = YES;
return matchedSymbol;
}
matchedSymbol = [self recoverFromMismatchedToken:anInput TokenType:ttype Follow:follow];
return matchedSymbol;
}
-(void) matchAny:(id<IntStream>)anInput
{
state.errorRecovery = NO;
state.failed = NO;
[anInput consume];
}
-(BOOL) mismatchIsUnwantedToken:(id<IntStream>)anInput TokenType:(NSInteger)ttype
{
return [anInput LA:2] == ttype;
}
-(BOOL) mismatchIsMissingToken:(id<IntStream>)anInput Follow:(ANTLRBitSet *) follow
{
if ( follow == nil ) {
// we have no information about the follow; we can only consume
// a single token and hope for the best
return NO;
}
// compute what can follow this grammar element reference
if ( [follow member:TokenTypeEOR] ) {
ANTLRBitSet *viableTokensFollowingThisRule = [self computeContextSensitiveRuleFOLLOW];
follow = [follow or:viableTokensFollowingThisRule];
if ( state._fsp >= 0 ) { // remove EOR if we're not the start symbol
[follow remove:(TokenTypeEOR)];
}
}
// if current token is consistent with what could come after set
// then we know we're missing a token; error recovery is free to
// "insert" the missing token
//System.out.println("viable tokens="+follow.toString(getTokenNames()));
//System.out.println("LT(1)="+((TokenStream)input).LT(1));
// BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
// in follow set to indicate that the fall of the start symbol is
// in the set (EOF can follow).
if ( [follow member:[anInput LA:1]] || [follow member:TokenTypeEOR] ) {
//System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; inserting...");
return YES;
}
return NO;
}
/** Report a recognition problem.
*
* This method sets errorRecovery to indicate the parser is recovering
* not parsing. Once in recovery mode, no errors are generated.
* To get out of recovery mode, the parser must successfully match
* a token (after a resync). So it will go:
*
* 1. error occurs
* 2. enter recovery mode, report error
* 3. consume until token found in resynch set
* 4. try to resume parsing
* 5. next match() will reset errorRecovery mode
*
* If you override, make sure to update syntaxErrors if you care about that.
*/
-(void) reportError:(RecognitionException *) e
{
// if we've already reported an error and have not matched a token
// yet successfully, don't report any errors.
if ( state.errorRecovery ) {
//System.err.print("[SPURIOUS] ");
return;
}
state.syntaxErrors++; // don't count spurious
state.errorRecovery = YES;
[self displayRecognitionError:[self getTokenNames] Exception:e];
}
-(void) displayRecognitionError:(AMutableArray *)theTokNams Exception:(RecognitionException *)e
{
NSString *hdr = [self getErrorHeader:e];
NSString *msg = [self getErrorMessage:e TokenNames:theTokNams];
[self emitErrorMessage:[NSString stringWithFormat:@" %@ %@", hdr, msg]];
}
/** What error message should be generated for the various
* exception types?
*
* Not very object-oriented code, but I like having all error message
* generation within one method rather than spread among all of the
* exception classes. This also makes it much easier for the exception
* handling because the exception classes do not have to have pointers back
* to this object to access utility routines and so on. Also, changing
* the message for an exception type would be difficult because you
* would have to subclassing exception, but then somehow get ANTLR
* to make those kinds of exception objects instead of the default.
* This looks weird, but trust me--it makes the most sense in terms
* of flexibility.
*
* For grammar debugging, you will want to override this to add
* more information such as the stack frame with
* getRuleInvocationStack(e, this.getClass().getName()) and,
* for no viable alts, the decision description and state etc...
*
* Override this to change the message generated for one or more
* exception types.
*/
- (NSString *)getErrorMessage:(RecognitionException *)e TokenNames:(AMutableArray *)theTokNams
{
// NSString *msg = [e getMessage];
NSString *msg;
if ( [e isKindOfClass:[UnwantedTokenException class]] ) {
UnwantedTokenException *ute = (UnwantedTokenException *)e;
NSString *tokenName=@"<unknown>";
if ( ute.expecting == TokenTypeEOF ) {
tokenName = @"EOF";
}
else {
tokenName = (NSString *)[theTokNams objectAtIndex:ute.expecting];
}
msg = [NSString stringWithFormat:@"extraneous input %@ expecting %@", [self getTokenErrorDisplay:[ute getUnexpectedToken]],
tokenName];
}
else if ( [e isKindOfClass:[MissingTokenException class] ] ) {
MissingTokenException *mte = (MissingTokenException *)e;
NSString *tokenName=@"<unknown>";
if ( mte.expecting== TokenTypeEOF ) {
tokenName = @"EOF";
}
else {
tokenName = [theTokNams objectAtIndex:mte.expecting];
}
msg = [NSString stringWithFormat:@"missing %@ at %@", tokenName, [self getTokenErrorDisplay:(e.token)] ];
}
else if ( [e isKindOfClass:[MismatchedTokenException class]] ) {
MismatchedTokenException *mte = (MismatchedTokenException *)e;
NSString *tokenName=@"<unknown>";
if ( mte.expecting== TokenTypeEOF ) {
tokenName = @"EOF";
}
else {
tokenName = [theTokNams objectAtIndex:mte.expecting];
}
msg = [NSString stringWithFormat:@"mismatched input %@ expecting %@",[self getTokenErrorDisplay:(e.token)], tokenName];
}
else if ( [e isKindOfClass:[MismatchedTreeNodeException class]] ) {
MismatchedTreeNodeException *mtne = (MismatchedTreeNodeException *)e;
NSString *tokenName=@"<unknown>";
if ( mtne.expecting==TokenTypeEOF ) {
tokenName = @"EOF";
}
else {
tokenName = [theTokNams objectAtIndex:mtne.expecting];
}
msg = [NSString stringWithFormat:@"mismatched tree node: %@ expecting %@", mtne.node, tokenName];
}
else if ( [e isKindOfClass:[NoViableAltException class]] ) {
//NoViableAltException *nvae = (NoViableAltException *)e;
// for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
// and "(decision="+nvae.decisionNumber+") and
// "state "+nvae.stateNumber
// msg = [NSString stringWithFormat:@"no viable alternative at input %@", [self getTokenErrorDisplay:e.token]];
msg = [NSString stringWithFormat:@"no viable alternative decision:%d state:%d at input %@", ((NoViableAltException *)e).stateNumber, ((NoViableAltException *)e).decisionNumber, [self getTokenErrorDisplay:e.token]];
}
else if ( [e isKindOfClass:[EarlyExitException class]] ) {
//EarlyExitException *eee = (EarlyExitException *)e;
// for development, can add "(decision="+eee.decisionNumber+")"
msg =[NSString stringWithFormat: @"required (...)+ loop did not match anything at input ", [self getTokenErrorDisplay:e.token]];
}
else if ( [e isKindOfClass:[MismatchedSetException class]] ) {
MismatchedSetException *mse = (MismatchedSetException *)e;
msg = [NSString stringWithFormat:@"mismatched input %@ expecting set %@",
[self getTokenErrorDisplay:(e.token)],
mse.expecting];
}
#pragma warning NotSet not yet implemented.
else if ( [e isKindOfClass:[MismatchedNotSetException class] ] ) {
MismatchedNotSetException *mse = (MismatchedNotSetException *)e;
msg = [NSString stringWithFormat:@"mismatched input %@ expecting set %@",
[self getTokenErrorDisplay:(e.token)],
mse.expecting];
}
else if ( [e isKindOfClass:[FailedPredicateException class]] ) {
FailedPredicateException *fpe = (FailedPredicateException *)e;
msg = [NSString stringWithFormat:@"rule %@ failed predicate: { %@ }?", fpe.ruleName, fpe.predicate];
}
else {
msg = [NSString stringWithFormat:@"Exception= %@\n", e.name];
}
return msg;
}
/** Get number of recognition errors (lexer, parser, tree parser). Each
* recognizer tracks its own number. So parser and lexer each have
* separate count. Does not count the spurious errors found between
* an error and next valid token match
*
* See also reportError()
*/
- (NSInteger) getNumberOfSyntaxErrors
{
return state.syntaxErrors;
}
/** What is the error header, normally line/character position information? */
- (NSString *)getErrorHeader:(RecognitionException *)e
{
return [NSString stringWithFormat:@"line %d:%d", e.line, e.charPositionInLine];
}
/** How should a token be displayed in an error message? The default
* is to display just the text, but during development you might
* want to have a lot of information spit out. Override in that case
* to use t.toString() (which, for CommonToken, dumps everything about
* the token). This is better than forcing you to override a method in
* your token objects because you don't have to go modify your lexer
* so that it creates a new Java type.
*/
- (NSString *)getTokenErrorDisplay:(id<Token>)t
{
NSString *s = t.text;
if ( s == nil ) {
if ( t.type == TokenTypeEOF ) {
s = @"<EOF>";
}
else {
s = [NSString stringWithFormat:@"<%@>", t.type];
}
}
s = [s stringByReplacingOccurrencesOfString:@"\n" withString:@"\\\\n"];
s = [s stringByReplacingOccurrencesOfString:@"\r" withString:@"\\\\r"];
s = [s stringByReplacingOccurrencesOfString:@"\t" withString:@"\\\\t"];
return [NSString stringWithFormat:@"\'%@\'", s];
}
/** Override this method to change where error messages go */
- (void) emitErrorMessage:(NSString *) msg
{
// System.err.println(msg);
NSLog(@"%@", msg);
}
/** Recover from an error found on the input stream. This is
* for NoViableAlt and mismatched symbol exceptions. If you enable
* single token insertion and deletion, this will usually not
* handle mismatched symbol exceptions but there could be a mismatched
* token that the match() routine could not recover from.
*/
- (void)recover:(id<IntStream>)anInput Exception:(RecognitionException *)re
{
if ( state.lastErrorIndex == anInput.index ) {
// uh oh, another error at same token index; must be a case
// where LT(1) is in the recovery token set so nothing is
// consumed; consume a single token so at least to prevent
// an infinite loop; this is a failsafe.
[anInput consume];
}
state.lastErrorIndex = anInput.index;
ANTLRBitSet *followSet = [self computeErrorRecoverySet];
[self beginResync];
[self consumeUntilFollow:anInput Follow:followSet];
[self endResync];
}
- (void) beginResync
{
}
- (void) endResync
{
}
/* Compute the error recovery set for the current rule. During
* rule invocation, the parser pushes the set of tokens that can
* follow that rule reference on the stack; this amounts to
* computing FIRST of what follows the rule reference in the
* enclosing rule. This local follow set only includes tokens
* from within the rule; i.e., the FIRST computation done by
* ANTLR stops at the end of a rule.
*
* EXAMPLE
*
* When you find a "no viable alt exception", the input is not
* consistent with any of the alternatives for rule r. The best
* thing to do is to consume tokens until you see something that
* can legally follow a call to r *or* any rule that called r.
* You don't want the exact set of viable next tokens because the
* input might just be missing a token--you might consume the
* rest of the input looking for one of the missing tokens.
*
* Consider grammar:
*
* a : '[' b ']'
* | '(' b ')'
* ;
* b : c '^' INT ;
* c : ID
* | INT
* ;
*
* At each rule invocation, the set of tokens that could follow
* that rule is pushed on a stack. Here are the various "local"
* follow sets:
*
* FOLLOW(b1_in_a) = FIRST(']') = ']'
* FOLLOW(b2_in_a) = FIRST(')') = ')'
* FOLLOW(c_in_b) = FIRST('^') = '^'
*
* Upon erroneous input "[]", the call chain is
*
* a -> b -> c
*
* and, hence, the follow context stack is:
*
* depth local follow set after call to rule
* 0 <EOF> a (from main())
* 1 ']' b
* 3 '^' c
*
* Notice that ')' is not included, because b would have to have
* been called from a different context in rule a for ')' to be
* included.
*
* For error recovery, we cannot consider FOLLOW(c)
* (context-sensitive or otherwise). We need the combined set of
* all context-sensitive FOLLOW sets--the set of all tokens that
* could follow any reference in the call chain. We need to
* resync to one of those tokens. Note that FOLLOW(c)='^' and if
* we resync'd to that token, we'd consume until EOF. We need to
* sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
* In this case, for input "[]", LA(1) is in this set so we would
* not consume anything and after printing an error rule c would
* return normally. It would not find the required '^' though.
* At this point, it gets a mismatched token error and throws an
* exception (since LA(1) is not in the viable following token
* set). The rule exception handler tries to recover, but finds
* the same recovery set and doesn't consume anything. Rule b
* exits normally returning to rule a. Now it finds the ']' (and
* with the successful match exits errorRecovery mode).
*
* So, you cna see that the parser walks up call chain looking
* for the token that was a member of the recovery set.
*
* Errors are not generated in errorRecovery mode.
*
* ANTLR's error recovery mechanism is based upon original ideas:
*
* "Algorithms + Data Structures = Programs" by Niklaus Wirth
*
* and
*
* "A note on error recovery in recursive descent parsers":
* http://portal.acm.org/citation.cfm?id=947902.947905
*
* Later, Josef Grosch had some good ideas:
*
* "Efficient and Comfortable Error Recovery in Recursive Descent
* Parsers":
* ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
*
* Like Grosch I implemented local FOLLOW sets that are combined
* at run-time upon error to avoid overhead during parsing.
*/
- (ANTLRBitSet *) computeErrorRecoverySet
{
return [self combineFollows:NO];
}
/** Compute the context-sensitive FOLLOW set for current rule.
* This is set of token types that can follow a specific rule
* reference given a specific call chain. You get the set of
* viable tokens that can possibly come next (lookahead depth 1)
* given the current call chain. Contrast this with the
* definition of plain FOLLOW for rule r:
*
* FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
*
* where x in T* and alpha, beta in V*; T is set of terminals and
* V is the set of terminals and nonterminals. In other words,
* FOLLOW(r) is the set of all tokens that can possibly follow
* references to r in *any* sentential form (context). At
* runtime, however, we know precisely which context applies as
* we have the call chain. We may compute the exact (rather
* than covering superset) set of following tokens.
*
* For example, consider grammar:
*
* stat : ID '=' expr ';' // FOLLOW(stat)=={EOF}
* | "return" expr '.'
* ;
* expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'}
* atom : INT // FOLLOW(atom)=={'+',')',';','.'}
* | '(' expr ')'
* ;
*
* The FOLLOW sets are all inclusive whereas context-sensitive
* FOLLOW sets are precisely what could follow a rule reference.
* For input input "i=(3);", here is the derivation:
*
* stat => ID '=' expr ';'
* => ID '=' atom ('+' atom)* ';'
* => ID '=' '(' expr ')' ('+' atom)* ';'
* => ID '=' '(' atom ')' ('+' atom)* ';'
* => ID '=' '(' INT ')' ('+' atom)* ';'
* => ID '=' '(' INT ')' ';'
*
* At the "3" token, you'd have a call chain of
*
* stat -> expr -> atom -> expr -> atom
*
* What can follow that specific nested ref to atom? Exactly ')'
* as you can see by looking at the derivation of this specific
* input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
*
* You want the exact viable token set when recovering from a
* token mismatch. Upon token mismatch, if LA(1) is member of
* the viable next token set, then you know there is most likely
* a missing token in the input stream. "Insert" one by just not
* throwing an exception.
*/
- (ANTLRBitSet *)computeContextSensitiveRuleFOLLOW
{
return [self combineFollows:YES];
}
// what is exact? it seems to only add sets from above on stack
// if EOR is in set i. When it sees a set w/o EOR, it stops adding.
// Why would we ever want them all? Maybe no viable alt instead of
// mismatched token?
- (ANTLRBitSet *)combineFollows:(BOOL) exact
{
NSInteger top = state._fsp;
ANTLRBitSet *followSet = [[ANTLRBitSet newBitSet] retain];
for (int i = top; i >= 0; i--) {
ANTLRBitSet *localFollowSet = (ANTLRBitSet *)[state.following objectAtIndex:i];
/*
System.out.println("local follow depth "+i+"="+
localFollowSet.toString(getTokenNames())+")");
*/
[followSet orInPlace:localFollowSet];
if ( exact ) {
// can we see end of rule?
if ( [localFollowSet member:TokenTypeEOR] ) {
// Only leave EOR in set if at top (start rule); this lets
// us know if have to include follow(start rule); i.e., EOF
if ( i > 0 ) {
[followSet remove:TokenTypeEOR];
}
}
else { // can't see end of rule, quit
break;
}
}
}
return followSet;
}
/** Attempt to recover from a single missing or extra token.
*
* EXTRA TOKEN
*
* LA(1) is not what we are looking for. If LA(2) has the right token,
* however, then assume LA(1) is some extra spurious token. Delete it
* and LA(2) as if we were doing a normal match(), which advances the
* input.
*
* MISSING TOKEN
*
* If current token is consistent with what could come after
* ttype then it is ok to "insert" the missing token, else throw
* exception For example, Input "i=(3;" is clearly missing the
* ')'. When the parser returns from the nested call to expr, it
* will have call chain:
*
* stat -> expr -> atom
*
* and it will be trying to match the ')' at this point in the
* derivation:
*
* => ID '=' '(' INT ')' ('+' atom)* ';'
* ^
* match() will see that ';' doesn't match ')' and report a
* mismatched token error. To recover, it sees that LA(1)==';'
* is in the set of tokens that can follow the ')' token
* reference in rule atom. It can assume that you forgot the ')'.
*/
- (id<Token>)recoverFromMismatchedToken:(id<IntStream>)anInput
TokenType:(NSInteger)ttype
Follow:(ANTLRBitSet *)follow
{
RecognitionException *e = nil;
// if next token is what we are looking for then "delete" this token
if ( [self mismatchIsUnwantedToken:anInput TokenType:ttype] ) {
e = [UnwantedTokenException newException:ttype Stream:anInput];
/*
System.err.println("recoverFromMismatchedToken deleting "+
((TokenStream)input).LT(1)+
" since "+((TokenStream)input).LT(2)+" is what we want");
*/
[self beginResync];
[anInput consume]; // simply delete extra token
[self endResync];
[self reportError:e]; // report after consuming so AW sees the token in the exception
// we want to return the token we're actually matching
id matchedSymbol = [self getCurrentInputSymbol:anInput];
[anInput consume]; // move past ttype token as if all were ok
return matchedSymbol;
}
// can't recover with single token deletion, try insertion
if ( [self mismatchIsMissingToken:anInput Follow:follow] ) {
id<Token> inserted = [self getMissingSymbol:anInput Exception:e TokenType:ttype Follow:follow];
e = [MissingTokenException newException:ttype Stream:anInput With:inserted];
[self reportError:e]; // report after inserting so AW sees the token in the exception
return inserted;
}
// even that didn't work; must throw the exception
e = [MismatchedTokenException newException:ttype Stream:anInput];
@throw e;
}
/** Not currently used */
-(id) recoverFromMismatchedSet:(id<IntStream>)anInput
Exception:(RecognitionException *)e
Follow:(ANTLRBitSet *) follow
{
if ( [self mismatchIsMissingToken:anInput Follow:follow] ) {
// System.out.println("missing token");
[self reportError:e];
// we don't know how to conjure up a token for sets yet
return [self getMissingSymbol:anInput Exception:e TokenType:TokenTypeInvalid Follow:follow];
}
// TODO do single token deletion like above for Token mismatch
@throw e;
}
/** Match needs to return the current input symbol, which gets put
* into the label for the associated token ref; e.g., x=ID. Token
* and tree parsers need to return different objects. Rather than test
* for input stream type or change the IntStream interface, I use
* a simple method to ask the recognizer to tell me what the current
* input symbol is.
*
* This is ignored for lexers.
*/
- (id) getCurrentInputSymbol:(id<IntStream>)anInput
{
return nil;
}
/** Conjure up a missing token during error recovery.
*
* The recognizer attempts to recover from single missing
* symbols. But, actions might refer to that missing symbol.
* For example, x=ID {f($x);}. The action clearly assumes
* that there has been an identifier matched previously and that
* $x points at that token. If that token is missing, but
* the next token in the stream is what we want we assume that
* this token is missing and we keep going. Because we
* have to return some token to replace the missing token,
* we have to conjure one up. This method gives the user control
* over the tokens returned for missing tokens. Mostly,
* you will want to create something special for identifier
* tokens. For literals such as '{' and ',', the default
* action in the parser or tree parser works. It simply creates
* a CommonToken of the appropriate type. The text will be the token.
* If you change what tokens must be created by the lexer,
* override this method to create the appropriate tokens.
*/
- (id)getMissingSymbol:(id<IntStream>)anInput
Exception:(RecognitionException *)e
TokenType:(NSInteger)expectedTokenType
Follow:(ANTLRBitSet *)follow
{
return nil;
}
-(void) consumeUntilTType:(id<IntStream>)anInput TokenType:(NSInteger)tokenType
{
//System.out.println("consumeUntil "+tokenType);
int ttype = [anInput LA:1];
while (ttype != TokenTypeEOF && ttype != tokenType) {
[anInput consume];
ttype = [anInput LA:1];
}
}
/** Consume tokens until one matches the given token set */
-(void) consumeUntilFollow:(id<IntStream>)anInput Follow:(ANTLRBitSet *)set
{
//System.out.println("consumeUntil("+set.toString(getTokenNames())+")");
int ttype = [anInput LA:1];
while (ttype != TokenTypeEOF && ![set member:ttype] ) {
//System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]);
[anInput consume];
ttype = [anInput LA:1];
}
}
/** Push a rule's follow set using our own hardcoded stack */
- (void)pushFollow:(ANTLRBitSet *)fset
{
if ( (state._fsp +1) >= [state.following count] ) {
// AMutableArray *f = [AMutableArray arrayWithCapacity:[[state.following] count]*2];
// System.arraycopy(state.following, 0, f, 0, state.following.length);
// state.following = f;
[state.following addObject:fset];
[fset retain];
state._fsp++;
}
else {
[state.following replaceObjectAtIndex:++state._fsp withObject:fset];
}
}
- (ANTLRBitSet *)popFollow
{
ANTLRBitSet *fset;
if ( state._fsp >= 0 && [state.following count] > 0 ) {
fset = [state.following objectAtIndex:state._fsp--];
[state.following removeLastObject];
return fset;
}
else {
NSLog( @"Attempted to pop a follow when none exists on the stack\n" );
}
return nil;
}
/** Return List<String> of the rules in your parser instance
* leading up to a call to this method. You could override if
* you want more details such as the file/line info of where
* in the parser java code a rule is invoked.
*
* This is very useful for error messages and for context-sensitive
* error recovery.
*/
- (AMutableArray *)getRuleInvocationStack
{
NSString *parserClassName = [[self className] retain];
return [self getRuleInvocationStack:[RecognitionException newException] Recognizer:parserClassName];
}
/** A more general version of getRuleInvocationStack where you can
* pass in, for example, a RecognitionException to get it's rule
* stack trace. This routine is shared with all recognizers, hence,
* static.
*
* TODO: move to a utility class or something; weird having lexer call this
*/
- (AMutableArray *)getRuleInvocationStack:(RecognitionException *)e
Recognizer:(NSString *)recognizerClassName
{
// char *name;
AMutableArray *rules = [[AMutableArray arrayWithCapacity:20] retain];
NSArray *stack = [e callStackSymbols];
int i = 0;
for (i = [stack count]-1; i >= 0; i--) {
NSString *t = [stack objectAtIndex:i];
// NSLog(@"stack %d = %@\n", i, t);
if ( [t commonPrefixWithString:@"org.antlr.runtime." options:NSLiteralSearch] ) {
// id aClass = objc_getClass( [t UTF8String] );
continue; // skip support code such as this method
}
if ( [t isEqualTo:NEXT_TOKEN_RULE_NAME] ) {
// name = sel_getName(method_getName(method));
// NSString *aMethod = [NSString stringWithFormat:@"%s", name];
continue;
}
if ( ![t isEqualTo:recognizerClassName] ) {
// name = class_getName( [t UTF8String] );
continue; // must not be part of this parser
}
[rules addObject:t];
}
#ifdef DONTUSEYET
StackTraceElement[] stack = e.getStackTrace();
int i = 0;
for (i=stack.length-1; i>=0; i--) {
StackTraceElement t = stack[i];
if ( [t getClassName().startsWith("org.antlr.runtime.") ) {
continue; // skip support code such as this method
}
if ( [[t getMethodName] equals:NEXT_TOKEN_RULE_NAME] ) {
continue;
}
if ( ![[t getClassName] equals:recognizerClassName] ) {
continue; // must not be part of this parser
}
[rules addObject:[t getMethodName]];
}
#endif
[stack release];
return rules;
}
- (NSInteger) getBacktrackingLevel
{
return [state getBacktracking];
}
- (void) setBacktrackingLevel:(NSInteger)level
{
[state setBacktracking:level];
}
/** Used to print out token names like ID during debugging and
* error reporting. The generated parsers implement a method
* that overrides this to point to their String[] tokenNames.
*/
- (NSArray *)getTokenNames
{
return tokenNames;
}
/** For debugging and other purposes, might want the grammar name.
* Have ANTLR generate an implementation for this method.
*/
- (NSString *)getGrammarFileName
{
return grammarFileName;
}
- (NSString *)getSourceName
{
return nil;
}
/** A convenience method for use most often with template rewrites.
* Convert a List<Token> to List<String>
*/
- (AMutableArray *)toStrings:(AMutableArray *)tokens
{
if ( tokens == nil )
return nil;
AMutableArray *strings = [AMutableArray arrayWithCapacity:[tokens count]];
id object;
NSInteger i = 0;
for (object in tokens) {
[strings addObject:[object text]];
i++;
}
return strings;
}
/** Given a rule number and a start token index number, return
* ANTLR_MEMO_RULE_UNKNOWN if the rule has not parsed input starting from
* start index. If this rule has parsed input starting from the
* start index before, then return where the rule stopped parsing.
* It returns the index of the last token matched by the rule.
*
* For now we use a hashtable and just the slow Object-based one.
* Later, we can make a special one for ints and also one that
* tosses out data after we commit past input position i.
*/
- (NSInteger)getRuleMemoization:(NSInteger)ruleIndex StartIndex:(NSInteger)ruleStartIndex
{
ACNumber *stopIndexI;
HashRule *aHashRule;
if ( (aHashRule = [state.ruleMemo objectAtIndex:ruleIndex]) == nil ) {
aHashRule = [HashRule newHashRuleWithLen:17];
[state.ruleMemo insertObject:aHashRule atIndex:ruleIndex];
}
stopIndexI = [aHashRule getRuleMemoStopIndex:ruleStartIndex];
if ( stopIndexI == nil ) {
return ANTLR_MEMO_RULE_UNKNOWN;
}
return [stopIndexI integerValue];
}
/** Has this rule already parsed input at the current index in the
* input stream? Return the stop token index or MEMO_RULE_UNKNOWN.
* If we attempted but failed to parse properly before, return
* MEMO_RULE_FAILED.
*
* This method has a side-effect: if we have seen this input for
* this rule and successfully parsed before, then seek ahead to
* 1 past the stop token matched for this rule last time.
*/
- (BOOL)alreadyParsedRule:(id<IntStream>)anInput RuleIndex:(NSInteger)ruleIndex
{
NSInteger aStopIndex = [self getRuleMemoization:ruleIndex StartIndex:anInput.index];
if ( aStopIndex == ANTLR_MEMO_RULE_UNKNOWN ) {
// NSLog(@"rule %d not yet encountered\n", ruleIndex);
return NO;
}
if ( aStopIndex == ANTLR_MEMO_RULE_FAILED ) {
if (debug) NSLog(@"rule %d will never succeed\n", ruleIndex);
state.failed = YES;
}
else {
if (debug) NSLog(@"seen rule %d before; skipping ahead to %d failed = %@\n", ruleIndex, aStopIndex+1, state.failed?@"YES":@"NO");
[anInput seek:(aStopIndex+1)]; // jump to one past stop token
}
return YES;
}
/** Record whether or not this rule parsed the input at this position
* successfully. Use a standard java hashtable for now.
*/
- (void)memoize:(id<IntStream>)anInput
RuleIndex:(NSInteger)ruleIndex
StartIndex:(NSInteger)ruleStartIndex
{
RuleStack *aRuleStack;
NSInteger stopTokenIndex;
aRuleStack = state.ruleMemo;
stopTokenIndex = (state.failed ? ANTLR_MEMO_RULE_FAILED : (anInput.index-1));
if ( aRuleStack == nil ) {
if (debug) NSLog(@"!!!!!!!!! memo array is nil for %@", [self getGrammarFileName]);
return;
}
if ( ruleIndex >= [aRuleStack length] ) {
if (debug) NSLog(@"!!!!!!!!! memo size is %d, but rule index is %d", [state.ruleMemo length], ruleIndex);
return;
}
if ( [aRuleStack objectAtIndex:ruleIndex] != nil ) {
[aRuleStack putHashRuleAtRuleIndex:ruleIndex StartIndex:ruleStartIndex StopIndex:stopTokenIndex];
}
return;
}
/** return how many rule/input-index pairs there are in total.
* TODO: this includes synpreds. :(
*/
- (NSInteger)getRuleMemoizationCacheSize
{
RuleStack *aRuleStack;
HashRule *aHashRule;
int aCnt = 0;
aRuleStack = state.ruleMemo;
for (NSUInteger i = 0; aRuleStack != nil && i < [aRuleStack length]; i++) {
aHashRule = [aRuleStack objectAtIndex:i];
if ( aHashRule != nil ) {
aCnt += [aHashRule count]; // how many input indexes are recorded?
}
}
return aCnt;
}
#pragma warning Have to fix traceIn and traceOut.
- (void)traceIn:(NSString *)ruleName Index:(NSInteger)ruleIndex Object:(id)inputSymbol
{
NSLog(@"enter %@ %@", ruleName, inputSymbol);
if ( state.backtracking > 0 ) {
NSLog(@" backtracking=%s", ((state.backtracking==YES)?"YES":"NO"));
}
NSLog(@"\n");
}
- (void)traceOut:(NSString *)ruleName Index:(NSInteger)ruleIndex Object:(id)inputSymbol
{
NSLog(@"exit %@ -- %@", ruleName, inputSymbol);
if ( state.backtracking > 0 ) {
NSLog(@" backtracking=%s %s", state.backtracking?"YES":"NO", state.failed ? "failed":"succeeded");
}
NSLog(@"\n");
}
// call a syntactic predicate methods using its selector. this way we can support arbitrary synpreds.
- (BOOL) evaluateSyntacticPredicate:(SEL)synpredFragment // stream:(id<IntStream>)input
{
id<IntStream> input;
state.backtracking++;
// input = state.token.input;
input = self.input;
int start = [input mark];
@try {
[self performSelector:synpredFragment];
}
@catch (RecognitionException *re) {
NSLog(@"impossible synpred: %@", re.name);
}
BOOL success = (state.failed == NO);
[input rewind:start];
state.backtracking--;
state.failed = NO;
return success;
}
@end