You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
368 lines
13 KiB
368 lines
13 KiB
/** \file
|
|
* \brief Defines the interface for a common token.
|
|
*
|
|
* All token streams should provide their tokens using an instance
|
|
* of this common token. A custom pointer is provided, wher you may attach
|
|
* a further structure to enhance the common token if you feel the need
|
|
* to do so. The C runtime will assume that a token provides implementations
|
|
* of the interface functions, but all of them may be rplaced by your own
|
|
* implementation if you require it.
|
|
*/
|
|
#ifndef _ANTLR3_COMMON_TOKEN_H
|
|
#define _ANTLR3_COMMON_TOKEN_H
|
|
|
|
// [The "BSD licence"]
|
|
// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
|
|
// http://www.temporal-wave.com
|
|
// http://www.linkedin.com/in/jimidle
|
|
//
|
|
// All rights reserved.
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions
|
|
// are met:
|
|
// 1. Redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer.
|
|
// 2. Redistributions in binary form must reproduce the above copyright
|
|
// notice, this list of conditions and the following disclaimer in the
|
|
// documentation and/or other materials provided with the distribution.
|
|
// 3. The name of the author may not be used to endorse or promote products
|
|
// derived from this software without specific prior written permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
#include <antlr3defs.h>
|
|
|
|
/** How many tokens to allocate at once in the token factory
|
|
*/
|
|
#define ANTLR3_FACTORY_POOL_SIZE 1024
|
|
|
|
/* Base token types, which all lexer/parser tokens come after in sequence.
|
|
*/
|
|
|
|
/** Indicator of an invalid token
|
|
*/
|
|
#define ANTLR3_TOKEN_INVALID 0
|
|
|
|
#define ANTLR3_EOR_TOKEN_TYPE 1
|
|
|
|
/** Imaginary token type to cause a traversal of child nodes in a tree parser
|
|
*/
|
|
#define ANTLR3_TOKEN_DOWN 2
|
|
|
|
/** Imaginary token type to signal the end of a stream of child nodes.
|
|
*/
|
|
#define ANTLR3_TOKEN_UP 3
|
|
|
|
/** First token that can be used by users/generated code
|
|
*/
|
|
|
|
#define ANTLR3_MIN_TOKEN_TYPE ANTLR3_TOKEN_UP + 1
|
|
|
|
/** End of file token
|
|
*/
|
|
#define ANTLR3_TOKEN_EOF (ANTLR3_CHARSTREAM_EOF & 0xFFFFFFFF)
|
|
|
|
/** Default channel for a token
|
|
*/
|
|
#define ANTLR3_TOKEN_DEFAULT_CHANNEL 0
|
|
|
|
/** Reserved channel number for a HIDDEN token - a token that
|
|
* is hidden from the parser.
|
|
*/
|
|
#define HIDDEN 99
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
// Indicates whether this token is carrying:
|
|
//
|
|
// State | Meaning
|
|
// ------+--------------------------------------
|
|
// 0 | Nothing (neither rewrite text, nor setText)
|
|
// 1 | char * to user supplied rewrite text
|
|
// 2 | pANTLR3_STRING because of setText or similar action
|
|
//
|
|
#define ANTLR3_TEXT_NONE 0
|
|
#define ANTLR3_TEXT_CHARP 1
|
|
#define ANTLR3_TEXT_STRING 2
|
|
|
|
/** The definition of an ANTLR3 common token structure, which all implementations
|
|
* of a token stream should provide, installing any further structures in the
|
|
* custom pointer element of this structure.
|
|
*
|
|
* \remark
|
|
* Token streams are in essence provided by lexers or other programs that serve
|
|
* as lexers.
|
|
*/
|
|
typedef struct ANTLR3_COMMON_TOKEN_struct
|
|
{
|
|
/** The actual type of this token
|
|
*/
|
|
ANTLR3_UINT32 type;
|
|
|
|
/** Indicates that a token was produced from the token factory and therefore
|
|
* the the freeToken() method should not do anything itself because
|
|
* token factory is responsible for deleting it.
|
|
*/
|
|
ANTLR3_BOOLEAN factoryMade;
|
|
|
|
/// A string factory that we can use if we ever need the text of a token
|
|
/// and need to manufacture a pANTLR3_STRING
|
|
///
|
|
pANTLR3_STRING_FACTORY strFactory;
|
|
|
|
/** The line number in the input stream where this token was derived from
|
|
*/
|
|
ANTLR3_UINT32 line;
|
|
|
|
/** The offset into the input stream that the line in which this
|
|
* token resides starts.
|
|
*/
|
|
void * lineStart;
|
|
|
|
/** The character position in the line that this token was derived from
|
|
*/
|
|
ANTLR3_INT32 charPosition;
|
|
|
|
/** The virtual channel that this token exists in.
|
|
*/
|
|
ANTLR3_UINT32 channel;
|
|
|
|
/** Pointer to the input stream that this token originated in.
|
|
*/
|
|
pANTLR3_INPUT_STREAM input;
|
|
|
|
/** What the index of this token is, 0, 1, .., n-2, n-1 tokens
|
|
*/
|
|
ANTLR3_MARKER index;
|
|
|
|
/** The character offset in the input stream where the text for this token
|
|
* starts.
|
|
*/
|
|
ANTLR3_MARKER start;
|
|
|
|
/** The character offset in the input stream where the text for this token
|
|
* stops.
|
|
*/
|
|
ANTLR3_MARKER stop;
|
|
|
|
/// Indicates whether this token is carrying:
|
|
///
|
|
/// State | Meaning
|
|
/// ------+--------------------------------------
|
|
/// 0 | Nothing (neither rewrite text, nor setText)
|
|
/// 1 | char * to user supplied rewrite text
|
|
/// 2 | pANTLR3_STRING because of setText or similar action
|
|
///
|
|
/// Affects the union structure tokText below
|
|
/// (uses 32 bit so alignment is always good)
|
|
///
|
|
ANTLR3_UINT32 textState;
|
|
|
|
union
|
|
{
|
|
/// Pointer that is used when the token just has a pointer to
|
|
/// a char *, such as when a rewrite of an imaginary token supplies
|
|
/// a string in the grammar. No sense in constructing a pANTLR3_STRING just
|
|
/// for that, as mostly the text will not be accessed - if it is, then
|
|
/// we will build a pANTLR3_STRING for it a that point.
|
|
///
|
|
pANTLR3_UCHAR chars;
|
|
|
|
/// Some token types actually do carry around their associated text, hence
|
|
/// (*getText)() will return this pointer if it is not NULL
|
|
///
|
|
pANTLR3_STRING text;
|
|
}
|
|
tokText;
|
|
|
|
/** Because it is a bit more of a hassle to override an ANTLR3_COMMON_TOKEN
|
|
* as the standard structure for a token, a number of user programmable
|
|
* elements are allowed in a token. This is one of them.
|
|
*/
|
|
ANTLR3_UINT32 user1;
|
|
|
|
/** Because it is a bit more of a hassle to override an ANTLR3_COMMON_TOKEN
|
|
* as the standard structure for a token, a number of user programmable
|
|
* elements are allowed in a token. This is one of them.
|
|
*/
|
|
ANTLR3_UINT32 user2;
|
|
|
|
/** Because it is a bit more of a hassle to override an ANTLR3_COMMON_TOKEN
|
|
* as the standard structure for a token, a number of user programmable
|
|
* elements are allowed in a token. This is one of them.
|
|
*/
|
|
ANTLR3_UINT32 user3;
|
|
|
|
/** Pointer to a custom element that the ANTLR3 programmer may define and install
|
|
*/
|
|
void * custom;
|
|
|
|
/** Pointer to a function that knows how to free the custom structure when the
|
|
* token is destroyed.
|
|
*/
|
|
void (*freeCustom)(void * custom);
|
|
|
|
/* ==============================
|
|
* API
|
|
*/
|
|
|
|
/** Pointer to function that returns the text pointer of a token, use
|
|
* toString() if you want a pANTLR3_STRING version of the token.
|
|
*/
|
|
pANTLR3_STRING (*getText)(struct ANTLR3_COMMON_TOKEN_struct * token);
|
|
|
|
/** Pointer to a function that 'might' be able to set the text associated
|
|
* with a token. Imaginary tokens such as an ANTLR3_CLASSIC_TOKEN may actually
|
|
* do this, however many tokens such as ANTLR3_COMMON_TOKEN do not actaully have
|
|
* strings associated with them but just point into the current input stream. These
|
|
* tokens will implement this function with a function that errors out (probably
|
|
* drastically.
|
|
*/
|
|
void (*setText)(struct ANTLR3_COMMON_TOKEN_struct * token, pANTLR3_STRING text);
|
|
|
|
/** Pointer to a function that 'might' be able to set the text associated
|
|
* with a token. Imaginary tokens such as an ANTLR3_CLASSIC_TOKEN may actually
|
|
* do this, however many tokens such as ANTLR3_COMMON_TOKEN do not actully have
|
|
* strings associated with them but just point into the current input stream. These
|
|
* tokens will implement this function with a function that errors out (probably
|
|
* drastically.
|
|
*/
|
|
void (*setText8)(struct ANTLR3_COMMON_TOKEN_struct * token, pANTLR3_UINT8 text);
|
|
|
|
/** Pointer to a function that returns the token type of this token
|
|
*/
|
|
ANTLR3_UINT32 (*getType)(struct ANTLR3_COMMON_TOKEN_struct * token);
|
|
|
|
/** Pointer to a function that sets the type of this token
|
|
*/
|
|
void (*setType)(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_UINT32 ttype);
|
|
|
|
/** Pointer to a function that gets the 'line' number where this token resides
|
|
*/
|
|
ANTLR3_UINT32 (*getLine)(struct ANTLR3_COMMON_TOKEN_struct * token);
|
|
|
|
/** Pointer to a function that sets the 'line' number where this token reside
|
|
*/
|
|
void (*setLine)(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_UINT32 line);
|
|
|
|
/** Pointer to a function that gets the offset in the line where this token exists
|
|
*/
|
|
ANTLR3_INT32 (*getCharPositionInLine) (struct ANTLR3_COMMON_TOKEN_struct * token);
|
|
|
|
/** Pointer to a function that sets the offset in the line where this token exists
|
|
*/
|
|
void (*setCharPositionInLine) (struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_INT32 pos);
|
|
|
|
/** Pointer to a function that gets the channel that this token was placed in (parsers
|
|
* can 'tune' to these channels.
|
|
*/
|
|
ANTLR3_UINT32 (*getChannel) (struct ANTLR3_COMMON_TOKEN_struct * token);
|
|
|
|
/** Pointer to a function that sets the channel that this token should belong to
|
|
*/
|
|
void (*setChannel) (struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_UINT32 channel);
|
|
|
|
/** Pointer to a function that returns an index 0...n-1 of the token in the token
|
|
* input stream.
|
|
*/
|
|
ANTLR3_MARKER (*getTokenIndex) (struct ANTLR3_COMMON_TOKEN_struct * token);
|
|
|
|
/** Pointer to a function that can set the token index of this token in the token
|
|
* input stream.
|
|
*/
|
|
void (*setTokenIndex) (struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_MARKER);
|
|
|
|
/** Pointer to a function that gets the start index in the input stream for this token.
|
|
*/
|
|
ANTLR3_MARKER (*getStartIndex) (struct ANTLR3_COMMON_TOKEN_struct * token);
|
|
|
|
/** Pointer to a function that sets the start index in the input stream for this token.
|
|
*/
|
|
void (*setStartIndex) (struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_MARKER index);
|
|
|
|
/** Pointer to a function that gets the stop index in the input stream for this token.
|
|
*/
|
|
ANTLR3_MARKER (*getStopIndex) (struct ANTLR3_COMMON_TOKEN_struct * token);
|
|
|
|
/** Pointer to a function that sets the stop index in the input stream for this token.
|
|
*/
|
|
void (*setStopIndex) (struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_MARKER index);
|
|
|
|
/** Pointer to a function that returns this token as a text representation that can be
|
|
* printed with embedded control codes such as \n replaced with the printable sequence "\\n"
|
|
* This also yields a string structure that can be used more easily than the pointer to
|
|
* the input stream in certain situations.
|
|
*/
|
|
pANTLR3_STRING (*toString) (struct ANTLR3_COMMON_TOKEN_struct * token);
|
|
}
|
|
ANTLR3_COMMON_TOKEN;
|
|
|
|
/** \brief ANTLR3 Token factory interface to create lots of tokens efficiently
|
|
* rather than creating and freeing lots of little bits of memory.
|
|
*/
|
|
typedef struct ANTLR3_TOKEN_FACTORY_struct
|
|
{
|
|
/** Pointers to the array of tokens that this factory has produced so far
|
|
*/
|
|
pANTLR3_COMMON_TOKEN *pools;
|
|
|
|
/** Current pool tokens we are allocating from
|
|
*/
|
|
ANTLR3_INT32 thisPool;
|
|
|
|
/** Maximum pool count we have available
|
|
*/
|
|
ANTLR3_INT32 maxPool;
|
|
|
|
/** The next token to throw out from the pool, will cause a new pool allocation
|
|
* if this exceeds the available tokenCount
|
|
*/
|
|
ANTLR3_UINT32 nextToken;
|
|
|
|
/** Trick to initialize tokens and their API quickly, we set up this token when the
|
|
* factory is created, then just copy the memory it uses into the new token.
|
|
*/
|
|
ANTLR3_COMMON_TOKEN unTruc;
|
|
|
|
/** Pointer to an input stream that is using this token factory (may be NULL)
|
|
* which will be assigned to the tokens automatically.
|
|
*/
|
|
pANTLR3_INPUT_STREAM input;
|
|
|
|
/** Pointer to a function that returns a new token
|
|
*/
|
|
pANTLR3_COMMON_TOKEN (*newToken) (struct ANTLR3_TOKEN_FACTORY_struct * factory);
|
|
|
|
/** Pointer to a function that resets the factory so you can reuse the pools it
|
|
* has laready allocated
|
|
*/
|
|
void (*reset) (struct ANTLR3_TOKEN_FACTORY_struct * factory);
|
|
|
|
/** Pointer to a function that changes teh curent inptu stream so that
|
|
* new tokens are created with reference to their originating text.
|
|
*/
|
|
void (*setInputStream) (struct ANTLR3_TOKEN_FACTORY_struct * factory, pANTLR3_INPUT_STREAM input);
|
|
/** Pointer to a function the destroys the factory
|
|
*/
|
|
void (*close) (struct ANTLR3_TOKEN_FACTORY_struct * factory);
|
|
}
|
|
ANTLR3_TOKEN_FACTORY;
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#endif
|