You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1124 lines
24 KiB
1124 lines
24 KiB
// Copyright 2017 The Bazel Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package syntax
|
|
|
|
// A lexical scanner for Starlark.
|
|
|
|
import (
|
|
"fmt"
|
|
"io"
|
|
"io/ioutil"
|
|
"log"
|
|
"math/big"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
"unicode"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
// A Token represents a Starlark lexical token.
|
|
type Token int8
|
|
|
|
const (
|
|
ILLEGAL Token = iota
|
|
EOF
|
|
|
|
NEWLINE
|
|
INDENT
|
|
OUTDENT
|
|
|
|
// Tokens with values
|
|
IDENT // x
|
|
INT // 123
|
|
FLOAT // 1.23e45
|
|
STRING // "foo" or 'foo' or '''foo''' or r'foo' or r"foo"
|
|
BYTES // b"foo", etc
|
|
|
|
// Punctuation
|
|
PLUS // +
|
|
MINUS // -
|
|
STAR // *
|
|
SLASH // /
|
|
SLASHSLASH // //
|
|
PERCENT // %
|
|
AMP // &
|
|
PIPE // |
|
|
CIRCUMFLEX // ^
|
|
LTLT // <<
|
|
GTGT // >>
|
|
TILDE // ~
|
|
DOT // .
|
|
COMMA // ,
|
|
EQ // =
|
|
SEMI // ;
|
|
COLON // :
|
|
LPAREN // (
|
|
RPAREN // )
|
|
LBRACK // [
|
|
RBRACK // ]
|
|
LBRACE // {
|
|
RBRACE // }
|
|
LT // <
|
|
GT // >
|
|
GE // >=
|
|
LE // <=
|
|
EQL // ==
|
|
NEQ // !=
|
|
PLUS_EQ // += (keep order consistent with PLUS..GTGT)
|
|
MINUS_EQ // -=
|
|
STAR_EQ // *=
|
|
SLASH_EQ // /=
|
|
SLASHSLASH_EQ // //=
|
|
PERCENT_EQ // %=
|
|
AMP_EQ // &=
|
|
PIPE_EQ // |=
|
|
CIRCUMFLEX_EQ // ^=
|
|
LTLT_EQ // <<=
|
|
GTGT_EQ // >>=
|
|
STARSTAR // **
|
|
|
|
// Keywords
|
|
AND
|
|
BREAK
|
|
CONTINUE
|
|
DEF
|
|
ELIF
|
|
ELSE
|
|
FOR
|
|
IF
|
|
IN
|
|
LAMBDA
|
|
LOAD
|
|
NOT
|
|
NOT_IN // synthesized by parser from NOT IN
|
|
OR
|
|
PASS
|
|
RETURN
|
|
WHILE
|
|
|
|
maxToken
|
|
)
|
|
|
|
func (tok Token) String() string { return tokenNames[tok] }
|
|
|
|
// GoString is like String but quotes punctuation tokens.
|
|
// Use Sprintf("%#v", tok) when constructing error messages.
|
|
func (tok Token) GoString() string {
|
|
if tok >= PLUS && tok <= STARSTAR {
|
|
return "'" + tokenNames[tok] + "'"
|
|
}
|
|
return tokenNames[tok]
|
|
}
|
|
|
|
var tokenNames = [...]string{
|
|
ILLEGAL: "illegal token",
|
|
EOF: "end of file",
|
|
NEWLINE: "newline",
|
|
INDENT: "indent",
|
|
OUTDENT: "outdent",
|
|
IDENT: "identifier",
|
|
INT: "int literal",
|
|
FLOAT: "float literal",
|
|
STRING: "string literal",
|
|
PLUS: "+",
|
|
MINUS: "-",
|
|
STAR: "*",
|
|
SLASH: "/",
|
|
SLASHSLASH: "//",
|
|
PERCENT: "%",
|
|
AMP: "&",
|
|
PIPE: "|",
|
|
CIRCUMFLEX: "^",
|
|
LTLT: "<<",
|
|
GTGT: ">>",
|
|
TILDE: "~",
|
|
DOT: ".",
|
|
COMMA: ",",
|
|
EQ: "=",
|
|
SEMI: ";",
|
|
COLON: ":",
|
|
LPAREN: "(",
|
|
RPAREN: ")",
|
|
LBRACK: "[",
|
|
RBRACK: "]",
|
|
LBRACE: "{",
|
|
RBRACE: "}",
|
|
LT: "<",
|
|
GT: ">",
|
|
GE: ">=",
|
|
LE: "<=",
|
|
EQL: "==",
|
|
NEQ: "!=",
|
|
PLUS_EQ: "+=",
|
|
MINUS_EQ: "-=",
|
|
STAR_EQ: "*=",
|
|
SLASH_EQ: "/=",
|
|
SLASHSLASH_EQ: "//=",
|
|
PERCENT_EQ: "%=",
|
|
AMP_EQ: "&=",
|
|
PIPE_EQ: "|=",
|
|
CIRCUMFLEX_EQ: "^=",
|
|
LTLT_EQ: "<<=",
|
|
GTGT_EQ: ">>=",
|
|
STARSTAR: "**",
|
|
AND: "and",
|
|
BREAK: "break",
|
|
CONTINUE: "continue",
|
|
DEF: "def",
|
|
ELIF: "elif",
|
|
ELSE: "else",
|
|
FOR: "for",
|
|
IF: "if",
|
|
IN: "in",
|
|
LAMBDA: "lambda",
|
|
LOAD: "load",
|
|
NOT: "not",
|
|
NOT_IN: "not in",
|
|
OR: "or",
|
|
PASS: "pass",
|
|
RETURN: "return",
|
|
WHILE: "while",
|
|
}
|
|
|
|
// A FilePortion describes the content of a portion of a file.
|
|
// Callers may provide a FilePortion for the src argument of Parse
|
|
// when the desired initial line and column numbers are not (1, 1),
|
|
// such as when an expression is parsed from within larger file.
|
|
type FilePortion struct {
|
|
Content []byte
|
|
FirstLine, FirstCol int32
|
|
}
|
|
|
|
// A Position describes the location of a rune of input.
|
|
type Position struct {
|
|
file *string // filename (indirect for compactness)
|
|
Line int32 // 1-based line number; 0 if line unknown
|
|
Col int32 // 1-based column (rune) number; 0 if column unknown
|
|
}
|
|
|
|
// IsValid reports whether the position is valid.
|
|
func (p Position) IsValid() bool { return p.file != nil }
|
|
|
|
// Filename returns the name of the file containing this position.
|
|
func (p Position) Filename() string {
|
|
if p.file != nil {
|
|
return *p.file
|
|
}
|
|
return "<invalid>"
|
|
}
|
|
|
|
// MakePosition returns position with the specified components.
|
|
func MakePosition(file *string, line, col int32) Position { return Position{file, line, col} }
|
|
|
|
// add returns the position at the end of s, assuming it starts at p.
|
|
func (p Position) add(s string) Position {
|
|
if n := strings.Count(s, "\n"); n > 0 {
|
|
p.Line += int32(n)
|
|
s = s[strings.LastIndex(s, "\n")+1:]
|
|
p.Col = 1
|
|
}
|
|
p.Col += int32(utf8.RuneCountInString(s))
|
|
return p
|
|
}
|
|
|
|
func (p Position) String() string {
|
|
file := p.Filename()
|
|
if p.Line > 0 {
|
|
if p.Col > 0 {
|
|
return fmt.Sprintf("%s:%d:%d", file, p.Line, p.Col)
|
|
}
|
|
return fmt.Sprintf("%s:%d", file, p.Line)
|
|
}
|
|
return file
|
|
}
|
|
|
|
func (p Position) isBefore(q Position) bool {
|
|
if p.Line != q.Line {
|
|
return p.Line < q.Line
|
|
}
|
|
return p.Col < q.Col
|
|
}
|
|
|
|
// An scanner represents a single input file being parsed.
|
|
type scanner struct {
|
|
rest []byte // rest of input (in REPL, a line of input)
|
|
token []byte // token being scanned
|
|
pos Position // current input position
|
|
depth int // nesting of [ ] { } ( )
|
|
indentstk []int // stack of indentation levels
|
|
dents int // number of saved INDENT (>0) or OUTDENT (<0) tokens to return
|
|
lineStart bool // after NEWLINE; convert spaces to indentation tokens
|
|
keepComments bool // accumulate comments in slice
|
|
lineComments []Comment // list of full line comments (if keepComments)
|
|
suffixComments []Comment // list of suffix comments (if keepComments)
|
|
|
|
readline func() ([]byte, error) // read next line of input (REPL only)
|
|
}
|
|
|
|
func newScanner(filename string, src interface{}, keepComments bool) (*scanner, error) {
|
|
var firstLine, firstCol int32 = 1, 1
|
|
if portion, ok := src.(FilePortion); ok {
|
|
firstLine, firstCol = portion.FirstLine, portion.FirstCol
|
|
}
|
|
sc := &scanner{
|
|
pos: MakePosition(&filename, firstLine, firstCol),
|
|
indentstk: make([]int, 1, 10), // []int{0} + spare capacity
|
|
lineStart: true,
|
|
keepComments: keepComments,
|
|
}
|
|
sc.readline, _ = src.(func() ([]byte, error)) // ParseCompoundStmt (REPL) only
|
|
if sc.readline == nil {
|
|
data, err := readSource(filename, src)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
sc.rest = data
|
|
}
|
|
return sc, nil
|
|
}
|
|
|
|
func readSource(filename string, src interface{}) ([]byte, error) {
|
|
switch src := src.(type) {
|
|
case string:
|
|
return []byte(src), nil
|
|
case []byte:
|
|
return src, nil
|
|
case io.Reader:
|
|
data, err := ioutil.ReadAll(src)
|
|
if err != nil {
|
|
err = &os.PathError{Op: "read", Path: filename, Err: err}
|
|
return nil, err
|
|
}
|
|
return data, nil
|
|
case FilePortion:
|
|
return src.Content, nil
|
|
case nil:
|
|
return ioutil.ReadFile(filename)
|
|
default:
|
|
return nil, fmt.Errorf("invalid source: %T", src)
|
|
}
|
|
}
|
|
|
|
// An Error describes the nature and position of a scanner or parser error.
|
|
type Error struct {
|
|
Pos Position
|
|
Msg string
|
|
}
|
|
|
|
func (e Error) Error() string { return e.Pos.String() + ": " + e.Msg }
|
|
|
|
// errorf is called to report an error.
|
|
// errorf does not return: it panics.
|
|
func (sc *scanner) error(pos Position, s string) {
|
|
panic(Error{pos, s})
|
|
}
|
|
|
|
func (sc *scanner) errorf(pos Position, format string, args ...interface{}) {
|
|
sc.error(pos, fmt.Sprintf(format, args...))
|
|
}
|
|
|
|
func (sc *scanner) recover(err *error) {
|
|
// The scanner and parser panic both for routine errors like
|
|
// syntax errors and for programmer bugs like array index
|
|
// errors. Turn both into error returns. Catching bug panics
|
|
// is especially important when processing many files.
|
|
switch e := recover().(type) {
|
|
case nil:
|
|
// no panic
|
|
case Error:
|
|
*err = e
|
|
default:
|
|
*err = Error{sc.pos, fmt.Sprintf("internal error: %v", e)}
|
|
if debug {
|
|
log.Fatal(*err)
|
|
}
|
|
}
|
|
}
|
|
|
|
// eof reports whether the input has reached end of file.
|
|
func (sc *scanner) eof() bool {
|
|
return len(sc.rest) == 0 && !sc.readLine()
|
|
}
|
|
|
|
// readLine attempts to read another line of input.
|
|
// Precondition: len(sc.rest)==0.
|
|
func (sc *scanner) readLine() bool {
|
|
if sc.readline != nil {
|
|
var err error
|
|
sc.rest, err = sc.readline()
|
|
if err != nil {
|
|
sc.errorf(sc.pos, "%v", err) // EOF or ErrInterrupt
|
|
}
|
|
return len(sc.rest) > 0
|
|
}
|
|
return false
|
|
}
|
|
|
|
// peekRune returns the next rune in the input without consuming it.
|
|
// Newlines in Unix, DOS, or Mac format are treated as one rune, '\n'.
|
|
func (sc *scanner) peekRune() rune {
|
|
// TODO(adonovan): opt: measure and perhaps inline eof.
|
|
if sc.eof() {
|
|
return 0
|
|
}
|
|
|
|
// fast path: ASCII
|
|
if b := sc.rest[0]; b < utf8.RuneSelf {
|
|
if b == '\r' {
|
|
return '\n'
|
|
}
|
|
return rune(b)
|
|
}
|
|
|
|
r, _ := utf8.DecodeRune(sc.rest)
|
|
return r
|
|
}
|
|
|
|
// readRune consumes and returns the next rune in the input.
|
|
// Newlines in Unix, DOS, or Mac format are treated as one rune, '\n'.
|
|
func (sc *scanner) readRune() rune {
|
|
// eof() has been inlined here, both to avoid a call
|
|
// and to establish len(rest)>0 to avoid a bounds check.
|
|
if len(sc.rest) == 0 {
|
|
if !sc.readLine() {
|
|
sc.error(sc.pos, "internal scanner error: readRune at EOF")
|
|
}
|
|
// Redundant, but eliminates the bounds-check below.
|
|
if len(sc.rest) == 0 {
|
|
return 0
|
|
}
|
|
}
|
|
|
|
// fast path: ASCII
|
|
if b := sc.rest[0]; b < utf8.RuneSelf {
|
|
r := rune(b)
|
|
sc.rest = sc.rest[1:]
|
|
if r == '\r' {
|
|
if len(sc.rest) > 0 && sc.rest[0] == '\n' {
|
|
sc.rest = sc.rest[1:]
|
|
}
|
|
r = '\n'
|
|
}
|
|
if r == '\n' {
|
|
sc.pos.Line++
|
|
sc.pos.Col = 1
|
|
} else {
|
|
sc.pos.Col++
|
|
}
|
|
return r
|
|
}
|
|
|
|
r, size := utf8.DecodeRune(sc.rest)
|
|
sc.rest = sc.rest[size:]
|
|
sc.pos.Col++
|
|
return r
|
|
}
|
|
|
|
// tokenValue records the position and value associated with each token.
|
|
type tokenValue struct {
|
|
raw string // raw text of token
|
|
int int64 // decoded int
|
|
bigInt *big.Int // decoded integers > int64
|
|
float float64 // decoded float
|
|
string string // decoded string or bytes
|
|
pos Position // start position of token
|
|
}
|
|
|
|
// startToken marks the beginning of the next input token.
|
|
// It must be followed by a call to endToken once the token has
|
|
// been consumed using readRune.
|
|
func (sc *scanner) startToken(val *tokenValue) {
|
|
sc.token = sc.rest
|
|
val.raw = ""
|
|
val.pos = sc.pos
|
|
}
|
|
|
|
// endToken marks the end of an input token.
|
|
// It records the actual token string in val.raw if the caller
|
|
// has not done that already.
|
|
func (sc *scanner) endToken(val *tokenValue) {
|
|
if val.raw == "" {
|
|
val.raw = string(sc.token[:len(sc.token)-len(sc.rest)])
|
|
}
|
|
}
|
|
|
|
// nextToken is called by the parser to obtain the next input token.
|
|
// It returns the token value and sets val to the data associated with
|
|
// the token.
|
|
//
|
|
// For all our input tokens, the associated data is val.pos (the
|
|
// position where the token begins), val.raw (the input string
|
|
// corresponding to the token). For string and int tokens, the string
|
|
// and int fields additionally contain the token's interpreted value.
|
|
func (sc *scanner) nextToken(val *tokenValue) Token {
|
|
|
|
// The following distribution of tokens guides case ordering:
|
|
//
|
|
// COMMA 27 %
|
|
// STRING 23 %
|
|
// IDENT 15 %
|
|
// EQL 11 %
|
|
// LBRACK 5.5 %
|
|
// RBRACK 5.5 %
|
|
// NEWLINE 3 %
|
|
// LPAREN 2.9 %
|
|
// RPAREN 2.9 %
|
|
// INT 2 %
|
|
// others < 1 %
|
|
//
|
|
// Although NEWLINE tokens are infrequent, and lineStart is
|
|
// usually (~97%) false on entry, skipped newlines account for
|
|
// about 50% of all iterations of the 'start' loop.
|
|
|
|
start:
|
|
var c rune
|
|
|
|
// Deal with leading spaces and indentation.
|
|
blank := false
|
|
savedLineStart := sc.lineStart
|
|
if sc.lineStart {
|
|
sc.lineStart = false
|
|
col := 0
|
|
for {
|
|
c = sc.peekRune()
|
|
if c == ' ' {
|
|
col++
|
|
sc.readRune()
|
|
} else if c == '\t' {
|
|
const tab = 8
|
|
col += int(tab - (sc.pos.Col-1)%tab)
|
|
sc.readRune()
|
|
} else {
|
|
break
|
|
}
|
|
}
|
|
|
|
// The third clause matches EOF.
|
|
if c == '#' || c == '\n' || c == 0 {
|
|
blank = true
|
|
}
|
|
|
|
// Compute indentation level for non-blank lines not
|
|
// inside an expression. This is not the common case.
|
|
if !blank && sc.depth == 0 {
|
|
cur := sc.indentstk[len(sc.indentstk)-1]
|
|
if col > cur {
|
|
// indent
|
|
sc.dents++
|
|
sc.indentstk = append(sc.indentstk, col)
|
|
} else if col < cur {
|
|
// outdent(s)
|
|
for len(sc.indentstk) > 0 && col < sc.indentstk[len(sc.indentstk)-1] {
|
|
sc.dents--
|
|
sc.indentstk = sc.indentstk[:len(sc.indentstk)-1] // pop
|
|
}
|
|
if col != sc.indentstk[len(sc.indentstk)-1] {
|
|
sc.error(sc.pos, "unindent does not match any outer indentation level")
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Return saved indentation tokens.
|
|
if sc.dents != 0 {
|
|
sc.startToken(val)
|
|
sc.endToken(val)
|
|
if sc.dents < 0 {
|
|
sc.dents++
|
|
return OUTDENT
|
|
} else {
|
|
sc.dents--
|
|
return INDENT
|
|
}
|
|
}
|
|
|
|
// start of line proper
|
|
c = sc.peekRune()
|
|
|
|
// Skip spaces.
|
|
for c == ' ' || c == '\t' {
|
|
sc.readRune()
|
|
c = sc.peekRune()
|
|
}
|
|
|
|
// comment
|
|
if c == '#' {
|
|
if sc.keepComments {
|
|
sc.startToken(val)
|
|
}
|
|
// Consume up to newline (included).
|
|
for c != 0 && c != '\n' {
|
|
sc.readRune()
|
|
c = sc.peekRune()
|
|
}
|
|
if sc.keepComments {
|
|
sc.endToken(val)
|
|
if blank {
|
|
sc.lineComments = append(sc.lineComments, Comment{val.pos, val.raw})
|
|
} else {
|
|
sc.suffixComments = append(sc.suffixComments, Comment{val.pos, val.raw})
|
|
}
|
|
}
|
|
}
|
|
|
|
// newline
|
|
if c == '\n' {
|
|
sc.lineStart = true
|
|
|
|
// Ignore newlines within expressions (common case).
|
|
if sc.depth > 0 {
|
|
sc.readRune()
|
|
goto start
|
|
}
|
|
|
|
// Ignore blank lines, except in the REPL,
|
|
// where they emit OUTDENTs and NEWLINE.
|
|
if blank {
|
|
if sc.readline == nil {
|
|
sc.readRune()
|
|
goto start
|
|
} else if len(sc.indentstk) > 1 {
|
|
sc.dents = 1 - len(sc.indentstk)
|
|
sc.indentstk = sc.indentstk[:1]
|
|
goto start
|
|
}
|
|
}
|
|
|
|
// At top-level (not in an expression).
|
|
sc.startToken(val)
|
|
sc.readRune()
|
|
val.raw = "\n"
|
|
return NEWLINE
|
|
}
|
|
|
|
// end of file
|
|
if c == 0 {
|
|
// Emit OUTDENTs for unfinished indentation,
|
|
// preceded by a NEWLINE if we haven't just emitted one.
|
|
if len(sc.indentstk) > 1 {
|
|
if savedLineStart {
|
|
sc.dents = 1 - len(sc.indentstk)
|
|
sc.indentstk = sc.indentstk[:1]
|
|
goto start
|
|
} else {
|
|
sc.lineStart = true
|
|
sc.startToken(val)
|
|
val.raw = "\n"
|
|
return NEWLINE
|
|
}
|
|
}
|
|
|
|
sc.startToken(val)
|
|
sc.endToken(val)
|
|
return EOF
|
|
}
|
|
|
|
// line continuation
|
|
if c == '\\' {
|
|
sc.readRune()
|
|
if sc.peekRune() != '\n' {
|
|
sc.errorf(sc.pos, "stray backslash in program")
|
|
}
|
|
sc.readRune()
|
|
goto start
|
|
}
|
|
|
|
// start of the next token
|
|
sc.startToken(val)
|
|
|
|
// comma (common case)
|
|
if c == ',' {
|
|
sc.readRune()
|
|
sc.endToken(val)
|
|
return COMMA
|
|
}
|
|
|
|
// string literal
|
|
if c == '"' || c == '\'' {
|
|
return sc.scanString(val, c)
|
|
}
|
|
|
|
// identifier or keyword
|
|
if isIdentStart(c) {
|
|
if (c == 'r' || c == 'b') && len(sc.rest) > 1 && (sc.rest[1] == '"' || sc.rest[1] == '\'') {
|
|
// r"..."
|
|
// b"..."
|
|
sc.readRune()
|
|
c = sc.peekRune()
|
|
return sc.scanString(val, c)
|
|
} else if c == 'r' && len(sc.rest) > 2 && sc.rest[1] == 'b' && (sc.rest[2] == '"' || sc.rest[2] == '\'') {
|
|
// rb"..."
|
|
sc.readRune()
|
|
sc.readRune()
|
|
c = sc.peekRune()
|
|
return sc.scanString(val, c)
|
|
}
|
|
|
|
for isIdent(c) {
|
|
sc.readRune()
|
|
c = sc.peekRune()
|
|
}
|
|
sc.endToken(val)
|
|
if k, ok := keywordToken[val.raw]; ok {
|
|
return k
|
|
}
|
|
|
|
return IDENT
|
|
}
|
|
|
|
// brackets
|
|
switch c {
|
|
case '[', '(', '{':
|
|
sc.depth++
|
|
sc.readRune()
|
|
sc.endToken(val)
|
|
switch c {
|
|
case '[':
|
|
return LBRACK
|
|
case '(':
|
|
return LPAREN
|
|
case '{':
|
|
return LBRACE
|
|
}
|
|
panic("unreachable")
|
|
|
|
case ']', ')', '}':
|
|
if sc.depth == 0 {
|
|
sc.errorf(sc.pos, "unexpected %q", c)
|
|
} else {
|
|
sc.depth--
|
|
}
|
|
sc.readRune()
|
|
sc.endToken(val)
|
|
switch c {
|
|
case ']':
|
|
return RBRACK
|
|
case ')':
|
|
return RPAREN
|
|
case '}':
|
|
return RBRACE
|
|
}
|
|
panic("unreachable")
|
|
}
|
|
|
|
// int or float literal, or period
|
|
if isdigit(c) || c == '.' {
|
|
return sc.scanNumber(val, c)
|
|
}
|
|
|
|
// other punctuation
|
|
defer sc.endToken(val)
|
|
switch c {
|
|
case '=', '<', '>', '!', '+', '-', '%', '/', '&', '|', '^': // possibly followed by '='
|
|
start := sc.pos
|
|
sc.readRune()
|
|
if sc.peekRune() == '=' {
|
|
sc.readRune()
|
|
switch c {
|
|
case '<':
|
|
return LE
|
|
case '>':
|
|
return GE
|
|
case '=':
|
|
return EQL
|
|
case '!':
|
|
return NEQ
|
|
case '+':
|
|
return PLUS_EQ
|
|
case '-':
|
|
return MINUS_EQ
|
|
case '/':
|
|
return SLASH_EQ
|
|
case '%':
|
|
return PERCENT_EQ
|
|
case '&':
|
|
return AMP_EQ
|
|
case '|':
|
|
return PIPE_EQ
|
|
case '^':
|
|
return CIRCUMFLEX_EQ
|
|
}
|
|
}
|
|
switch c {
|
|
case '=':
|
|
return EQ
|
|
case '<':
|
|
if sc.peekRune() == '<' {
|
|
sc.readRune()
|
|
if sc.peekRune() == '=' {
|
|
sc.readRune()
|
|
return LTLT_EQ
|
|
} else {
|
|
return LTLT
|
|
}
|
|
}
|
|
return LT
|
|
case '>':
|
|
if sc.peekRune() == '>' {
|
|
sc.readRune()
|
|
if sc.peekRune() == '=' {
|
|
sc.readRune()
|
|
return GTGT_EQ
|
|
} else {
|
|
return GTGT
|
|
}
|
|
}
|
|
return GT
|
|
case '!':
|
|
sc.error(start, "unexpected input character '!'")
|
|
case '+':
|
|
return PLUS
|
|
case '-':
|
|
return MINUS
|
|
case '/':
|
|
if sc.peekRune() == '/' {
|
|
sc.readRune()
|
|
if sc.peekRune() == '=' {
|
|
sc.readRune()
|
|
return SLASHSLASH_EQ
|
|
} else {
|
|
return SLASHSLASH
|
|
}
|
|
}
|
|
return SLASH
|
|
case '%':
|
|
return PERCENT
|
|
case '&':
|
|
return AMP
|
|
case '|':
|
|
return PIPE
|
|
case '^':
|
|
return CIRCUMFLEX
|
|
}
|
|
panic("unreachable")
|
|
|
|
case ':', ';', '~': // single-char tokens (except comma)
|
|
sc.readRune()
|
|
switch c {
|
|
case ':':
|
|
return COLON
|
|
case ';':
|
|
return SEMI
|
|
case '~':
|
|
return TILDE
|
|
}
|
|
panic("unreachable")
|
|
|
|
case '*': // possibly followed by '*' or '='
|
|
sc.readRune()
|
|
switch sc.peekRune() {
|
|
case '*':
|
|
sc.readRune()
|
|
return STARSTAR
|
|
case '=':
|
|
sc.readRune()
|
|
return STAR_EQ
|
|
}
|
|
return STAR
|
|
}
|
|
|
|
sc.errorf(sc.pos, "unexpected input character %#q", c)
|
|
panic("unreachable")
|
|
}
|
|
|
|
func (sc *scanner) scanString(val *tokenValue, quote rune) Token {
|
|
start := sc.pos
|
|
triple := len(sc.rest) >= 3 && sc.rest[0] == byte(quote) && sc.rest[1] == byte(quote) && sc.rest[2] == byte(quote)
|
|
sc.readRune()
|
|
|
|
// String literals may contain escaped or unescaped newlines,
|
|
// causing them to span multiple lines (gulps) of REPL input;
|
|
// they are the only such token. Thus we cannot call endToken,
|
|
// as it assumes sc.rest is unchanged since startToken.
|
|
// Instead, buffer the token here.
|
|
// TODO(adonovan): opt: buffer only if we encounter a newline.
|
|
raw := new(strings.Builder)
|
|
|
|
// Copy the prefix, e.g. r' or " (see startToken).
|
|
raw.Write(sc.token[:len(sc.token)-len(sc.rest)])
|
|
|
|
if !triple {
|
|
// single-quoted string literal
|
|
for {
|
|
if sc.eof() {
|
|
sc.error(val.pos, "unexpected EOF in string")
|
|
}
|
|
c := sc.readRune()
|
|
raw.WriteRune(c)
|
|
if c == quote {
|
|
break
|
|
}
|
|
if c == '\n' {
|
|
sc.error(val.pos, "unexpected newline in string")
|
|
}
|
|
if c == '\\' {
|
|
if sc.eof() {
|
|
sc.error(val.pos, "unexpected EOF in string")
|
|
}
|
|
c = sc.readRune()
|
|
raw.WriteRune(c)
|
|
}
|
|
}
|
|
} else {
|
|
// triple-quoted string literal
|
|
sc.readRune()
|
|
raw.WriteRune(quote)
|
|
sc.readRune()
|
|
raw.WriteRune(quote)
|
|
|
|
quoteCount := 0
|
|
for {
|
|
if sc.eof() {
|
|
sc.error(val.pos, "unexpected EOF in string")
|
|
}
|
|
c := sc.readRune()
|
|
raw.WriteRune(c)
|
|
if c == quote {
|
|
quoteCount++
|
|
if quoteCount == 3 {
|
|
break
|
|
}
|
|
} else {
|
|
quoteCount = 0
|
|
}
|
|
if c == '\\' {
|
|
if sc.eof() {
|
|
sc.error(val.pos, "unexpected EOF in string")
|
|
}
|
|
c = sc.readRune()
|
|
raw.WriteRune(c)
|
|
}
|
|
}
|
|
}
|
|
val.raw = raw.String()
|
|
|
|
s, _, isByte, err := unquote(val.raw)
|
|
if err != nil {
|
|
sc.error(start, err.Error())
|
|
}
|
|
val.string = s
|
|
if isByte {
|
|
return BYTES
|
|
} else {
|
|
return STRING
|
|
}
|
|
}
|
|
|
|
func (sc *scanner) scanNumber(val *tokenValue, c rune) Token {
|
|
// https://github.com/google/starlark-go/blob/master/doc/spec.md#lexical-elements
|
|
//
|
|
// Python features not supported:
|
|
// - integer literals of >64 bits of precision
|
|
// - 123L or 123l long suffix
|
|
// - traditional octal: 0755
|
|
// https://docs.python.org/2/reference/lexical_analysis.html#integer-and-long-integer-literals
|
|
|
|
start := sc.pos
|
|
fraction, exponent := false, false
|
|
|
|
if c == '.' {
|
|
// dot or start of fraction
|
|
sc.readRune()
|
|
c = sc.peekRune()
|
|
if !isdigit(c) {
|
|
sc.endToken(val)
|
|
return DOT
|
|
}
|
|
fraction = true
|
|
} else if c == '0' {
|
|
// hex, octal, binary or float
|
|
sc.readRune()
|
|
c = sc.peekRune()
|
|
|
|
if c == '.' {
|
|
fraction = true
|
|
} else if c == 'x' || c == 'X' {
|
|
// hex
|
|
sc.readRune()
|
|
c = sc.peekRune()
|
|
if !isxdigit(c) {
|
|
sc.error(start, "invalid hex literal")
|
|
}
|
|
for isxdigit(c) {
|
|
sc.readRune()
|
|
c = sc.peekRune()
|
|
}
|
|
} else if c == 'o' || c == 'O' {
|
|
// octal
|
|
sc.readRune()
|
|
c = sc.peekRune()
|
|
if !isodigit(c) {
|
|
sc.error(sc.pos, "invalid octal literal")
|
|
}
|
|
for isodigit(c) {
|
|
sc.readRune()
|
|
c = sc.peekRune()
|
|
}
|
|
} else if c == 'b' || c == 'B' {
|
|
// binary
|
|
sc.readRune()
|
|
c = sc.peekRune()
|
|
if !isbdigit(c) {
|
|
sc.error(sc.pos, "invalid binary literal")
|
|
}
|
|
for isbdigit(c) {
|
|
sc.readRune()
|
|
c = sc.peekRune()
|
|
}
|
|
} else {
|
|
// float (or obsolete octal "0755")
|
|
allzeros, octal := true, true
|
|
for isdigit(c) {
|
|
if c != '0' {
|
|
allzeros = false
|
|
}
|
|
if c > '7' {
|
|
octal = false
|
|
}
|
|
sc.readRune()
|
|
c = sc.peekRune()
|
|
}
|
|
if c == '.' {
|
|
fraction = true
|
|
} else if c == 'e' || c == 'E' {
|
|
exponent = true
|
|
} else if octal && !allzeros {
|
|
sc.endToken(val)
|
|
sc.errorf(sc.pos, "obsolete form of octal literal; use 0o%s", val.raw[1:])
|
|
}
|
|
}
|
|
} else {
|
|
// decimal
|
|
for isdigit(c) {
|
|
sc.readRune()
|
|
c = sc.peekRune()
|
|
}
|
|
|
|
if c == '.' {
|
|
fraction = true
|
|
} else if c == 'e' || c == 'E' {
|
|
exponent = true
|
|
}
|
|
}
|
|
|
|
if fraction {
|
|
sc.readRune() // consume '.'
|
|
c = sc.peekRune()
|
|
for isdigit(c) {
|
|
sc.readRune()
|
|
c = sc.peekRune()
|
|
}
|
|
|
|
if c == 'e' || c == 'E' {
|
|
exponent = true
|
|
}
|
|
}
|
|
|
|
if exponent {
|
|
sc.readRune() // consume [eE]
|
|
c = sc.peekRune()
|
|
if c == '+' || c == '-' {
|
|
sc.readRune()
|
|
c = sc.peekRune()
|
|
if !isdigit(c) {
|
|
sc.error(sc.pos, "invalid float literal")
|
|
}
|
|
}
|
|
for isdigit(c) {
|
|
sc.readRune()
|
|
c = sc.peekRune()
|
|
}
|
|
}
|
|
|
|
sc.endToken(val)
|
|
if fraction || exponent {
|
|
var err error
|
|
val.float, err = strconv.ParseFloat(val.raw, 64)
|
|
if err != nil {
|
|
sc.error(sc.pos, "invalid float literal")
|
|
}
|
|
return FLOAT
|
|
} else {
|
|
var err error
|
|
s := val.raw
|
|
val.bigInt = nil
|
|
if len(s) > 2 && s[0] == '0' && (s[1] == 'o' || s[1] == 'O') {
|
|
val.int, err = strconv.ParseInt(s[2:], 8, 64)
|
|
} else if len(s) > 2 && s[0] == '0' && (s[1] == 'b' || s[1] == 'B') {
|
|
val.int, err = strconv.ParseInt(s[2:], 2, 64)
|
|
} else {
|
|
val.int, err = strconv.ParseInt(s, 0, 64)
|
|
if err != nil {
|
|
num := new(big.Int)
|
|
var ok bool
|
|
val.bigInt, ok = num.SetString(s, 0)
|
|
if ok {
|
|
err = nil
|
|
}
|
|
}
|
|
}
|
|
if err != nil {
|
|
sc.error(start, "invalid int literal")
|
|
}
|
|
return INT
|
|
}
|
|
}
|
|
|
|
// isIdent reports whether c is an identifier rune.
|
|
func isIdent(c rune) bool {
|
|
return isdigit(c) || isIdentStart(c)
|
|
}
|
|
|
|
func isIdentStart(c rune) bool {
|
|
return 'a' <= c && c <= 'z' ||
|
|
'A' <= c && c <= 'Z' ||
|
|
c == '_' ||
|
|
unicode.IsLetter(c)
|
|
}
|
|
|
|
func isdigit(c rune) bool { return '0' <= c && c <= '9' }
|
|
func isodigit(c rune) bool { return '0' <= c && c <= '7' }
|
|
func isxdigit(c rune) bool { return isdigit(c) || 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f' }
|
|
func isbdigit(c rune) bool { return '0' == c || c == '1' }
|
|
|
|
// keywordToken records the special tokens for
|
|
// strings that should not be treated as ordinary identifiers.
|
|
var keywordToken = map[string]Token{
|
|
"and": AND,
|
|
"break": BREAK,
|
|
"continue": CONTINUE,
|
|
"def": DEF,
|
|
"elif": ELIF,
|
|
"else": ELSE,
|
|
"for": FOR,
|
|
"if": IF,
|
|
"in": IN,
|
|
"lambda": LAMBDA,
|
|
"load": LOAD,
|
|
"not": NOT,
|
|
"or": OR,
|
|
"pass": PASS,
|
|
"return": RETURN,
|
|
"while": WHILE,
|
|
|
|
// reserved words:
|
|
"as": ILLEGAL,
|
|
// "assert": ILLEGAL, // heavily used by our tests
|
|
"class": ILLEGAL,
|
|
"del": ILLEGAL,
|
|
"except": ILLEGAL,
|
|
"finally": ILLEGAL,
|
|
"from": ILLEGAL,
|
|
"global": ILLEGAL,
|
|
"import": ILLEGAL,
|
|
"is": ILLEGAL,
|
|
"nonlocal": ILLEGAL,
|
|
"raise": ILLEGAL,
|
|
"try": ILLEGAL,
|
|
"with": ILLEGAL,
|
|
"yield": ILLEGAL,
|
|
}
|