You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
618 lines
17 KiB
618 lines
17 KiB
package ANTLR::Runtime::BaseRecognizer;
|
|
|
|
use Readonly;
|
|
use Carp;
|
|
|
|
use ANTLR::Runtime::RecognizerSharedState;
|
|
use ANTLR::Runtime::Token;
|
|
use ANTLR::Runtime::UnwantedTokenException;
|
|
use ANTLR::Runtime::MissingTokenException;
|
|
use ANTLR::Runtime::MismatchedTokenException;
|
|
|
|
use Moose;
|
|
|
|
Readonly my $MEMO_RULE_FAILED => -2;
|
|
sub MEMO_RULE_FAILED { $MEMO_RULE_FAILED }
|
|
|
|
Readonly my $MEMO_RULE_UNKNOWN => -1;
|
|
sub MEMO_RULE_UNKNOWN { $MEMO_RULE_UNKNOWN }
|
|
|
|
Readonly my $INITIAL_FOLLOW_STACK_SIZE => 100;
|
|
sub INITIAL_FOLLOW_STACK_SIZE { $INITIAL_FOLLOW_STACK_SIZE }
|
|
|
|
# copies from Token object for convenience in actions
|
|
Readonly my $DEFAULT_TOKEN_CHANNEL => ANTLR::Runtime::Token->DEFAULT_CHANNEL;
|
|
sub DEFAULT_TOKEN_CHANNEL { $DEFAULT_TOKEN_CHANNEL }
|
|
|
|
Readonly my $HIDDEN => ANTLR::Runtime::Token->HIDDEN_CHANNEL;
|
|
sub HIDDEN { $HIDDEN }
|
|
|
|
Readonly my $NEXT_TOKEN_RULE_NAME => 'next_token';
|
|
sub NEXT_TOKEN_RULE_NAME { $NEXT_TOKEN_RULE_NAME }
|
|
|
|
# State of a lexer, parser, or tree parser are collected into a state
|
|
# object so the state can be shared. This sharing is needed to
|
|
# have one grammar import others and share same error variables
|
|
# and other state variables. It's a kind of explicit multiple
|
|
# inheritance via delegation of methods and shared state.
|
|
has 'state' => (
|
|
is => 'rw',
|
|
isa => 'ANTLR::Runtime::RecognizerSharedState',
|
|
default => sub { ANTLR::Runtime::RecognizerSharedState->new() },
|
|
);
|
|
|
|
sub reset {
|
|
my ($self) = @_;
|
|
|
|
if (!defined $self->state) {
|
|
return;
|
|
}
|
|
|
|
my $state = $self->state;
|
|
$state->_fsp(-1);
|
|
$state->error_recovery(0);
|
|
$state->last_error_index(-1);
|
|
$state->failed(0);
|
|
$state->syntax_errors(0);
|
|
|
|
# wack everything related to backtracking and memoization
|
|
$state->backtracking(0);
|
|
# wipe cache
|
|
$state->rule_memo([]);
|
|
}
|
|
|
|
sub match {
|
|
Readonly my $usage => 'void match(IntStream input, int ttype, BitSet follow)';
|
|
croak $usage if @_ != 4;
|
|
my ($self, $input, $ttype, $follow) = @_;
|
|
|
|
my $matched_symbol = $self->get_current_input_symbol($input);
|
|
if ($input->LA(1) eq $ttype) {
|
|
$input->consume();
|
|
$self->state->error_recovery(0);
|
|
$self->state->failed(0);
|
|
return $matched_symbol;
|
|
}
|
|
|
|
if ($self->state->backtracking > 0) {
|
|
$self->state->failed(1);
|
|
return $matched_symbol;
|
|
}
|
|
|
|
return $self->recover_from_mismatched_token($input, $ttype, $follow);
|
|
}
|
|
|
|
sub match_any {
|
|
Readonly my $usage => 'void match_any(IntStream input)';
|
|
croak $usage if @_ != 2;
|
|
my ($self, $input) = @_;
|
|
|
|
$self->state->error_recovery(0);
|
|
$self->state->failed(0);
|
|
$input->consume();
|
|
}
|
|
|
|
sub mismatch_is_unwanted_token {
|
|
my ($self, $input, $ttype) = @_;
|
|
return $input->LA(2) == $ttype;
|
|
}
|
|
|
|
sub mismatch_is_missing_token {
|
|
my ($self, $input, $follow) = @_;
|
|
|
|
if (!defined $follow) {
|
|
return 0;
|
|
}
|
|
|
|
if ($follow->member(ANTLR::Runtime::Token->EOR_TOKEN_TYPE)) {
|
|
my $viable_tokens_following_this_rule = $self->compute_context_sensitive_rule_FOLLOW();
|
|
$follow = $follow->or($viable_tokens_following_this_rule);
|
|
if ($self->state->_fsp >= 0) {
|
|
$follow->remove(ANTLR::Runtime::Token->EOR_TOKEN_TYPE);
|
|
}
|
|
}
|
|
|
|
if ($follow->member($input->LA(1)) || $follow->member(ANTLR::Runtime::Token->EOR_TOKEN_TYPE)) {
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
sub mismatch {
|
|
Readonly my $usage => 'void mismatch(IntStream input, int ttype, BitSet follow)';
|
|
croak $usage if @_ != 4;
|
|
my ($self, $input, $ttype, $follow) = @_;
|
|
|
|
if ($self->mismatch_is_unwanted_token($input, $ttype)) {
|
|
ANTLR::Runtime::UnwantedTokenException->new({
|
|
expecting => $ttype,
|
|
input => $input
|
|
})->throw();
|
|
}
|
|
elsif ($self->mismatch_is_missing_token($input, $follow)) {
|
|
ANTLR::Runtime::MissingTokenException->new({
|
|
expecting => $ttype,
|
|
input => $input
|
|
})->throw();
|
|
}
|
|
else {
|
|
ANTLR::Runtime::MismatchedTokenException->new({
|
|
expecting => $ttype,
|
|
input => $input
|
|
})->throw();
|
|
}
|
|
}
|
|
|
|
sub report_error {
|
|
Readonly my $usage => 'void report_error(RecognitionException e)';
|
|
croak $usage if @_ != 2;
|
|
my ($self, $e) = @_;
|
|
|
|
if ($self->state->error_recovery) {
|
|
return;
|
|
}
|
|
$self->state->syntax_errors($self->state->syntax_errors + 1);
|
|
$self->state->error_recovery(1);
|
|
|
|
$self->display_recognition_error($self->get_token_names(), $e);
|
|
return;
|
|
}
|
|
|
|
sub display_recognition_error {
|
|
Readonly my $usage => 'void display_recognition_error(String[] token_names, RecognitionException e)';
|
|
croak $usage if @_ != 3;
|
|
my ($self, $token_names, $e) = @_;
|
|
|
|
my $hdr = $self->get_error_header($e);
|
|
my $msg = $self->get_error_message($e, $token_names);
|
|
$self->emit_error_message("$hdr $msg");
|
|
}
|
|
|
|
sub get_error_message {
|
|
Readonly my $usage => 'String get_error_message(RecognitionException e, String[] token_names)';
|
|
croak $usage if @_ != 3;
|
|
my ($self, $e, $token_names) = @_;
|
|
|
|
if ($e->isa('ANTLR::Runtime::MismatchedTokenException')) {
|
|
my $token_name;
|
|
if ($e->get_expecting == ANTLR::Runtime::Token->EOF) {
|
|
$token_name = 'EOF';
|
|
} else {
|
|
$token_name = $token_names->[$e->get_expecting];
|
|
}
|
|
|
|
return 'mismatched input ' . $self->get_token_error_display($e->get_token)
|
|
. ' expecting ' . $token_name;
|
|
} elsif ($e->isa('ANTLR::Runtime::MismatchedTreeNodeException')) {
|
|
my $token_name;
|
|
if ($e->get_expecting == ANTLR::Runtime::Token->EOF) {
|
|
$token_name = 'EOF';
|
|
} else {
|
|
$token_name = $token_names->[$e->get_expecting];
|
|
}
|
|
|
|
return 'mismatched tree node: ' . $e->node
|
|
. ' expecting ' . $token_name;
|
|
} elsif ($e->isa('ANTLR::Runtime::NoViableAltException')) {
|
|
return 'no viable alternative at input ' . $self->get_token_error_display($e->get_token);
|
|
} elsif ($e->isa('ANTLR::Runtime::EarlyExitException')) {
|
|
return 'required (...)+ loop did not match anything at input '
|
|
. get_token_error_display($e->get_token);
|
|
} elsif ($e->isa('ANTLR::Runtime::MismatchedSetException')) {
|
|
return 'mismatched input ' . $self->get_token_error_display($e->get_token)
|
|
. ' expecting set ' . $e->get_expecting;
|
|
} elsif ($e->isa('ANTLR::Runtime::MismatchedNotSetException')) {
|
|
return 'mismatched input ' . $self->get_token_error_display($e->get_token)
|
|
. ' expecting set ' . $e->get_expecting;
|
|
} elsif ($e->isa('ANTLR::Runtime::FailedPredicateException')) {
|
|
return 'rule ' . $e->rule_name . ' failed predicate: {'
|
|
. $e->predicate_text . '}?';
|
|
} else {
|
|
return undef;
|
|
}
|
|
}
|
|
|
|
sub get_number_of_syntax_errors {
|
|
my ($self) = @_;
|
|
return $self->state->syntax_errors;
|
|
}
|
|
|
|
sub get_error_header {
|
|
Readonly my $usage => 'String get_error_header(RecognitionException e)';
|
|
croak $usage if @_ != 2;
|
|
my ($self, $e) = @_;
|
|
|
|
my $line = $e->get_line();
|
|
my $col = $e->get_char_position_in_line();
|
|
|
|
return "line $line:$col";
|
|
}
|
|
|
|
sub get_token_error_display {
|
|
Readonly my $usage => 'String get_token_error_display(Token t)';
|
|
croak $usage if @_ != 2;
|
|
my ($self, $t) = @_;
|
|
|
|
my $s = $t->get_text();
|
|
if (!defined $s) {
|
|
if ($t->get_type() == ANTLR::Runtime::Token->EOF) {
|
|
$s = '<EOF>';
|
|
} else {
|
|
my $ttype = $t->get_type();
|
|
$s = "<$ttype>";
|
|
}
|
|
}
|
|
|
|
$s =~ s/\n/\\\\n/g;
|
|
$s =~ s/\r/\\\\r/g;
|
|
$s =~ s/\t/\\\\t/g;
|
|
|
|
return "'$s'";
|
|
}
|
|
|
|
sub emit_error_message {
|
|
Readonly my $usage => 'void emit_error_message(String msg)';
|
|
croak $usage if @_ != 2;
|
|
my ($self, $msg) = @_;
|
|
|
|
print STDERR $msg, "\n";
|
|
}
|
|
|
|
sub recover {
|
|
Readonly my $usage => 'void recover(IntStream input, RecognitionException re)';
|
|
croak $usage if @_ != 3;
|
|
my ($self, $input, $re) = @_;
|
|
|
|
if ($self->state->last_error_index == $input->index()) {
|
|
# uh oh, another error at same token index; must be a case
|
|
# where LT(1) is in the recovery token set so nothing is
|
|
# consumed; consume a single token so at least to prevent
|
|
# an infinite loop; this is a failsafe.
|
|
$input->consume();
|
|
}
|
|
|
|
$self->state->last_error_index($input->index());
|
|
my $follow_set = $self->compute_error_recovery_set();
|
|
$self->begin_resync();
|
|
$self->consume_until($input, $follow_set);
|
|
$self->end_resync();
|
|
}
|
|
|
|
sub begin_resync {
|
|
}
|
|
|
|
sub end_resync {
|
|
}
|
|
|
|
sub compute_error_recovery_set {
|
|
Readonly my $usage => 'void compute_error_recovery_set()';
|
|
croak $usage if @_ != 1;
|
|
my ($self) = @_;
|
|
|
|
$self->combine_follows(0);
|
|
}
|
|
|
|
sub compute_context_sensitive_rule_FOLLOW {
|
|
Readonly my $usage => 'void compute_context_sensitive_rule_FOLLOW()';
|
|
croak $usage if @_ != 1;
|
|
my ($self) = @_;
|
|
|
|
$self->combine_follows(1);
|
|
}
|
|
|
|
sub combine_follows {
|
|
Readonly my $usage => 'BitSet combine_follows(boolean exact)';
|
|
croak $usage if @_ != 2;
|
|
my ($self, $exact) = @_;
|
|
|
|
my $top = $self->state->_fsp;
|
|
my $follow_set = ANTLR::Runtime::BitSet->new();
|
|
|
|
foreach my $local_follow_set (reverse @{$self->state->following}) {
|
|
$follow_set |= $local_follow_set;
|
|
if ($exact && $local_follow_set->member(ANTLR::Runtime::Token->EOR_TOKEN_TYPE)) {
|
|
last;
|
|
}
|
|
}
|
|
#$follow_set->remove(ANTLR::Runtime::Token->EOR_TOKEN_TYPE);
|
|
return $follow_set;
|
|
}
|
|
|
|
sub recover_from_mismatched_token {
|
|
Readonly my $usage => 'void recover_from_mismatched_token(IntStream input, int ttype, BitSet follow)';
|
|
croak $usage if @_ != 4;
|
|
my ($self, $input, $ttype, $follow) = @_;
|
|
|
|
if ($self->mismatch_is_unwanted_token($input, $ttype)) {
|
|
my $ex = ANTLR::Runtime::UnwantedTokenException->new({
|
|
expecting => $ttype,
|
|
input => $input
|
|
});
|
|
|
|
$self->begin_resync();
|
|
$input->consume();
|
|
$self->end_resync();
|
|
$self->report_error($ex);
|
|
|
|
my $matched_symbol = $self->get_current_input_symbol($input);
|
|
$input->consume();
|
|
return $matched_symbol;
|
|
}
|
|
|
|
if ($self->mismatch_is_missing_token($input, $follow)) {
|
|
my $inserted = $self->get_missing_symbol({
|
|
input => $input,
|
|
expected_token_type => $ttype,
|
|
follow => $follow,
|
|
});
|
|
my $ex = ANTLR::Runtime::MissingTokenException({
|
|
expecting => $ttype,
|
|
input => $input,
|
|
inserted => $inserted,
|
|
});
|
|
$self->report_error($ex);
|
|
return $inserted;
|
|
}
|
|
|
|
ANTLR::Runtime::MismatchedTokenException->new({
|
|
expecting => $ttype,
|
|
input => $input,
|
|
})->throw();
|
|
}
|
|
|
|
sub recover_from_mismatched_set {
|
|
Readonly my $usage => 'void recover_from_mismatched_set(IntStream input, RecognitionException e, BitSet follow)';
|
|
croak $usage if @_ != 4;
|
|
my ($self, $input, $e, $follow) = @_;
|
|
|
|
if ($self->mismatch_is_missing_token($input, $follow)) {
|
|
$self->report_error($e);
|
|
return $self->get_missing_symbol({
|
|
input => $input,
|
|
exception => $e,
|
|
expected_token_type => ANTLR::Runtime::Token->INVALID_TOKEN_TYPE,
|
|
follow => $follow,
|
|
});
|
|
}
|
|
|
|
$e->throw();
|
|
}
|
|
|
|
sub recover_from_mismatched_element {
|
|
Readonly my $usage => 'boolean recover_from_mismatched_element(IntStream input, RecognitionException e, BitSet follow)';
|
|
croak $usage if @_ != 4;
|
|
my ($self, $input, $e, $follow) = @_;
|
|
|
|
return 0 if (!defined $follow);
|
|
|
|
if ($follow->member(ANTLR::Runtime::Token->EOR_TOKEN_TYPE)) {
|
|
my $viable_tokens_following_this_rule = $self->compute_context_sensitive_rule_FOLLOW();
|
|
$follow |= $viable_tokens_following_this_rule;
|
|
$follow->remove(ANTLR::Runtime::Token->EOR_TOKEN_TYPE);
|
|
}
|
|
|
|
if ($follow->member($input->LA(1))) {
|
|
$self->report_error($e);
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
sub get_current_input_symbol {
|
|
my ($self, $input) = @_;
|
|
return undef;
|
|
}
|
|
|
|
sub get_missing_symbol {
|
|
my ($self, $arg_ref) = @_;
|
|
my $input = $arg_ref->{input};
|
|
my $exception = $arg_ref->{exception};
|
|
my $expected_token_type = $arg_ref->{expected_token_type};
|
|
my $follow = $arg_ref->{follow};
|
|
|
|
return undef;
|
|
}
|
|
|
|
sub consume_until {
|
|
Readonly my $usage => 'void consume_until(IntStream input, (int token_type | BitSet set))';
|
|
croak $usage if @_ != 3;
|
|
|
|
if ($_[2]->isa('ANTLR::Runtime::BitSet')) {
|
|
my ($self, $input, $set) = @_;
|
|
|
|
my $ttype = $input->LA(1);
|
|
while ($ttype != ANTLR::Runtime::Token->EOF && !$set->member($ttype)) {
|
|
$input->consume();
|
|
$ttype = $input->LA(1);
|
|
}
|
|
} else {
|
|
my ($self, $input, $token_type) = @_;
|
|
|
|
my $ttype = $input->LA(1);
|
|
while ($ttype != ANTLR::Runtime::Token->EOF && $ttype != $token_type) {
|
|
$input->consume();
|
|
$ttype = $input->LA(1);
|
|
}
|
|
}
|
|
}
|
|
|
|
sub push_follow {
|
|
Readonly my $usage => 'void push_follow(BitSet fset)';
|
|
croak $usage if @_ != 2;
|
|
my ($self, $fset) = @_;
|
|
|
|
push @{$self->state->following}, $fset;
|
|
$self->state->_fsp($self->state->_fsp + 1);
|
|
}
|
|
|
|
sub get_rule_invocation_stack {
|
|
Readonly my $usage => 'List get_rule_invocation_stack()';
|
|
croak $usage if @_ != 1;
|
|
my ($self) = @_;
|
|
|
|
my $rules = [];
|
|
for (my $i = 0; ; ++$i) {
|
|
my @frame = caller $i;
|
|
last if !@frame;
|
|
|
|
my ($package, $filename, $line, $subroutine) = @frame;
|
|
|
|
if ($package =~ /^ANTLR::Runtime::/) {
|
|
next;
|
|
}
|
|
|
|
if ($subroutine eq NEXT_TOKEN_RULE_NAME) {
|
|
next;
|
|
}
|
|
|
|
if ($package ne ref $self) {
|
|
next;
|
|
}
|
|
|
|
push @{$rules}, $subroutine;
|
|
}
|
|
}
|
|
|
|
sub get_backtracking_level {
|
|
Readonly my $usage => 'int get_backtracking_level()';
|
|
croak $usage if @_ != 1;
|
|
my ($self) = @_;
|
|
|
|
return $self->state->backtracking;
|
|
}
|
|
|
|
sub set_backtracking_level {
|
|
my ($self, $n) = @_;
|
|
$self->state->backtracking($n);
|
|
}
|
|
|
|
sub failed {
|
|
my ($self) = @_;
|
|
return $self->state->failed;
|
|
}
|
|
|
|
sub get_token_names {
|
|
return undef;
|
|
}
|
|
|
|
sub get_grammar_file_name {
|
|
return undef;
|
|
}
|
|
|
|
sub to_strings {
|
|
Readonly my $usage => 'List to_strings(List tokens)';
|
|
croak $usage if @_ != 2;
|
|
my ($self, $tokens) = @_;
|
|
|
|
if (!defined $tokens) {
|
|
return undef;
|
|
}
|
|
|
|
return map { $_->get_text() } @{$tokens};
|
|
}
|
|
|
|
sub get_rule_memoization {
|
|
Readonly my $usage => 'int get_rule_memoization(int rule_index, int rule_start_index)';
|
|
croak $usage if @_ != 3;
|
|
my ($self, $rule_index, $rule_start_index) = @_;
|
|
|
|
if (!defined $self->rule_memo->[$rule_index]) {
|
|
$self->rule_memo->[$rule_index] = {};
|
|
}
|
|
|
|
my $stop_index = $self->state->rule_memo->[$rule_index]->{$rule_start_index};
|
|
if (!defined $stop_index) {
|
|
return $self->MEMO_RULE_UNKNOWN;
|
|
}
|
|
return $stop_index;
|
|
}
|
|
|
|
sub alredy_parsed_rule {
|
|
Readonly my $usage => 'boolean alredy_parsed_rule(IntStream input, int rule_index)';
|
|
croak $usage if @_ != 3;
|
|
my ($self, $input, $rule_index) = @_;
|
|
|
|
my $stop_index = $self->get_rule_memoization($rule_index, $input->index());
|
|
if ($stop_index == $self->MEMO_RULE_UNKNOWN) {
|
|
return 0;
|
|
}
|
|
|
|
if ($stop_index == $self->MEMO_RULE_FAILED) {
|
|
$self->state->failed(1);
|
|
} else {
|
|
$input->seek($stop_index + 1);
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
sub memoize {
|
|
Readonly my $usage => 'void memoize(IntStream input, int rule_index, int rule_start_index)';
|
|
croak $usage if @_ != 4;
|
|
my ($self, $input, $rule_index, $rule_start_index) = @_;
|
|
|
|
my $stop_token_index = $self->state->failed ? $self->MEMO_RULE_FAILED : $input->index() - 1;
|
|
if (defined $self->state->rule_memo->[$rule_index]) {
|
|
$self->state->rule_memo->[$rule_index]->{$rule_start_index} = $stop_token_index;
|
|
}
|
|
}
|
|
|
|
sub get_rule_memoization_cache_size {
|
|
Readonly my $usage => 'int get_rule_memoization_cache_size()';
|
|
croak $usage if @_ != 1;
|
|
my ($self) = @_;
|
|
|
|
my $n = 0;
|
|
foreach my $m (@{$self->state->rule_memo}) {
|
|
$n += keys %{$m} if defined $m;
|
|
}
|
|
|
|
return $n;
|
|
}
|
|
|
|
sub trace_in {
|
|
Readonly my $usage => 'void trace_in(String rule_name, int rule_index, input_symbol)';
|
|
croak $usage if @_ != 4;
|
|
my ($self, $rule_name, $rule_index, $input_symbol) = @_;
|
|
|
|
print "enter $rule_name $input_symbol";
|
|
if ($self->state->failed) {
|
|
print ' failed=', $self->state->failed;
|
|
}
|
|
if ($self->state->backtracking > 0) {
|
|
print ' backtracking=', $self->state->backtracking;
|
|
}
|
|
print "\n";
|
|
}
|
|
|
|
sub trace_out {
|
|
Readonly my $usage => 'void trace_out(String rule_name, int rule_index, input_symbol)';
|
|
croak $usage if @_ != 4;
|
|
my ($self, $rule_name, $rule_index, $input_symbol) = @_;
|
|
|
|
print "exit $rule_name $input_symbol";
|
|
if ($self->state->failed) {
|
|
print ' failed=', $self->state->failed;
|
|
}
|
|
if ($self->state->backtracking > 0) {
|
|
print ' backtracking=', $self->state->backtracking;
|
|
}
|
|
print "\n";
|
|
}
|
|
|
|
no Moose;
|
|
__PACKAGE__->meta->make_immutable();
|
|
1;
|
|
__END__
|
|
|
|
=head1 NAME
|
|
|
|
ANTLR::Runtime::BaseRecognizer
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
A generic recognizer that can handle recognizers generated from
|
|
lexer, parser, and tree grammars. This is all the parsing
|
|
support code essentially; most of it is error recovery stuff and
|
|
backtracking.
|