v811_spc009/external/antlr/runtime/Cpp/include/antlr3lexer.hpp

/** \file
 * Base interface for any ANTLR3 lexer.
 *
 * An ANLTR3 lexer builds from two sets of components:
 *
 *  - The runtime components that provide common functionality such as
 *    traversing character streams, building tokens for output and so on.
 *  - The generated rules and struutre of the actual lexer, which call upon the
 *    runtime components.
 *
 * A lexer class contains  a character input stream, a base recognizer interface
 * (which it will normally implement) and a token source interface (which it also
 * implements. The Tokensource interface is called by a token consumer (such as
 * a parser, but in theory it can be anything that wants a set of abstract
 * tokens in place of a raw character stream.
 *
 * So then, we set up a lexer in a sequence akin to:
 *
 *  - Create a character stream (something which implements ANTLR3_INPUT_STREAM)
 *    and initialize it.
 *  - Create a lexer interface and tell it where it its input stream is.
 *    This will cause the creation of a base recognizer class, which it will
 *    override with its own implementations of some methods. The lexer creator
 *    can also then in turn override anything it likes.
 *  - The lexer token source interface is then passed to some interface that
 *    knows how to use it, byte calling for a next token.
 *  - When a next token is called, let ze lexing begin.
 *
 */
#ifndef	_ANTLR3_LEXER_HPP
#define	_ANTLR3_LEXER_HPP

// [The "BSD licence"]
// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB

//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
//    derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

/* Definitions
 */
#include    "antlr3defs.hpp"

ANTLR_BEGIN_NAMESPACE()

static const ANTLR_UINT32	ANTLR_STRING_TERMINATOR	= 0xFFFFFFFF;

template<class ImplTraits>
class  Lexer : public ImplTraits::template RecognizerType< typename ImplTraits::InputStreamType >,
			   public ImplTraits::TokenSourceType
{
public:
	typedef typename ImplTraits::AllocPolicyType AllocPolicyType;
	typedef typename ImplTraits::InputStreamType InputStreamType;
	typedef InputStreamType StreamType;
	typedef typename InputStreamType::IntStreamType IntStreamType;
	typedef typename ImplTraits::CommonTokenType CommonTokenType;
	typedef typename ImplTraits::StreamDataType TokenType;
	typedef typename ImplTraits::StringType StringType;
	typedef typename ImplTraits::StringStreamType StringStreamType;
	typedef typename ImplTraits::template RecognizerType< InputStreamType > RecognizerType;
	typedef typename RecognizerType::RecognizerSharedStateType RecognizerSharedStateType;
	typedef typename ImplTraits::template ExceptionBaseType<InputStreamType> ExceptionBaseType;
	typedef typename ImplTraits::BitsetListType BitsetListType;
	typedef typename ImplTraits::TokenSourceType TokenSourceType;

	typedef typename RecognizerSharedStateType::RuleMemoType RuleMemoType;
	typedef typename RecognizerType::DebugEventListenerType DebuggerType;

private:
    /** A pointer to the character stream whence this lexer is receiving
     *  characters.
     *  TODO: I may come back to this and implement charstream outside
     *  the input stream as per the java implementation.
     */
    InputStreamType*		m_input;

public:
	Lexer(ANTLR_UINT32 sizeHint, RecognizerSharedStateType* state);
	Lexer(ANTLR_UINT32 sizeHint, InputStreamType* input, RecognizerSharedStateType* state);

	InputStreamType* get_input() const;
	IntStreamType* get_istream() const;
	RecognizerType* get_rec();
	const RecognizerType* get_rec() const;
	TokenSourceType* get_tokSource();

	//functions used in .stg file
	const RecognizerType* get_recognizer() const;
	RecognizerSharedStateType* get_lexstate() const;
	void set_lexstate( RecognizerSharedStateType* lexstate );
	const TokenSourceType* get_tokSource() const;
	CommonTokenType* get_ltoken() const;
	void set_ltoken( const CommonTokenType* ltoken );
	bool hasFailed() const;
	ANTLR_INT32 get_backtracking() const;
	void inc_backtracking();
	void dec_backtracking();
	bool get_failedflag() const;
	void set_failedflag( bool failed );
	InputStreamType* get_strstream() const;
	ANTLR_MARKER  index() const;
	void	seek(ANTLR_MARKER index);
	const CommonTokenType* EOF_Token() const;
	bool hasException() const;
	ExceptionBaseType* get_exception() const;
	void constructEx();
	void lrecover();
	ANTLR_MARKER mark();
	void rewind(ANTLR_MARKER marker);
	void rewindLast();
	void setText( const StringType& text );
	void skip();
	RuleMemoType* getRuleMemo() const;
	DebuggerType* get_debugger() const;
	void setRuleMemo(RuleMemoType* rulememo);
	ANTLR_UINT32 LA(ANTLR_INT32 i);
	void consume();
	void memoize(ANTLR_MARKER	ruleIndex, ANTLR_MARKER	ruleParseStart);
	bool haveParsedRule(ANTLR_MARKER	ruleIndex);

    /** Pointer to a function that sets the charstream source for the lexer and
     *  causes it to  be reset.
     */
    void	setCharStream(InputStreamType* input);

    /*!
	 * \brief
	 * Change to a new input stream, remembering the old one.
	 *
	 * \param lexer
	 * Pointer to the lexer instance to switch input streams for.
	 *
	 * \param input
	 * New input stream to install as the current one.
	 *
	 * Switches the current character input stream to
	 * a new one, saving the old one, which we will revert to at the end of this
	 * new one.
	 */
    void	pushCharStream(InputStreamType* input);

	/*!
	 * \brief
	 * Stops using the current input stream and reverts to any prior
	 * input stream on the stack.
	 *
	 * \param lexer
	 * Description of parameter lexer.
	 *
	 * Pointer to a function that abandons the current input stream, whether it
	 * is empty or not and reverts to the previous stacked input stream.
	 *
	 * \remark
	 * The function fails silently if there are no prior input streams.
	 */
    void	popCharStream();

    /** Function that emits (a copy of ) the supplied token as the next token in
     *  the stream.
     */
    void	emit(const CommonTokenType* token);

    /** Pointer to a function that constructs a new token from the lexer stored information
     */
    CommonTokenType*	emit();

    /** Pointer to a function that attempts to match and consume the specified string from the input
     *  stream. Note that strings muse be passed as terminated arrays of ANTLR3_UCHAR. Strings are terminated
     *  with 0xFFFFFFFF, which is an invalid UTF32 character
     */
    bool	matchs(ANTLR_UCHAR* string);

    /** Pointer to a function that matches and consumes the specified character from the input stream.
     *  The input stream is required to provide characters via LA() as UTF32 characters. The default lexer
     *  implementation is source encoding agnostic and so input streams do not generally need to
     *  override the default implmentation.
     */
    bool	matchc(ANTLR_UCHAR c);

    /** Pointer to a function that matches any character in the supplied range (I suppose it could be a token range too
     *  but this would only be useful if the tokens were in tsome guaranteed order which is
     *  only going to happen with a hand crafted token set).
     */
    bool	matchRange(ANTLR_UCHAR low, ANTLR_UCHAR high);

    /** Pointer to a function that matches the next token/char in the input stream
     *  regardless of what it actaully is.
     */
    void		matchAny();

    /** Pointer to a function that recovers from an error found in the input stream.
     *  Generally, this will be a #ANTLR3_EXCEPTION_NOVIABLE_ALT but it could also
     *  be from a mismatched token that the (*match)() could not recover from.
     */
    void		recover();

    /** Function to return the current line number in the input stream
     */
    ANTLR_UINT32	getLine();
    ANTLR_MARKER	getCharIndex();
    ANTLR_UINT32	getCharPositionInLine();

    /** Function to return the text so far for the current token being generated
     */
    StringType 	getText();

	//Other utility functions
	void fillExceptionData( ExceptionBaseType* ex );

	/** Default lexer error handler (works for 8 bit streams only!!!)
	 */
	void displayRecognitionError( ANTLR_UINT8** tokenNames, ExceptionBaseType* ex);
	void exConstruct();
	TokenType*	getMissingSymbol( IntStreamType* istream, ExceptionBaseType* e,
								  ANTLR_UINT32	expectedTokenType, BitsetListType*	follow);

    /** Pointer to a function that knows how to free the resources of a lexer
     */
	~Lexer();
};

ANTLR_END_NAMESPACE()

#include "antlr3lexer.inl"

#endif