mirror of
https://github.com/robertkrimen/otto
synced 2025-09-28 18:45:22 +08:00
550 lines
9.6 KiB
Go
550 lines
9.6 KiB
Go
package otto
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
"bytes"
|
|
"unicode"
|
|
"unicode/utf8"
|
|
"unicode/utf16"
|
|
"strconv"
|
|
)
|
|
|
|
var keywordTable map[string]bool = boolFields(`
|
|
break
|
|
case
|
|
catch
|
|
continue
|
|
default
|
|
delete
|
|
do
|
|
else
|
|
finally
|
|
for
|
|
function
|
|
if
|
|
in
|
|
instanceof
|
|
new
|
|
null
|
|
return
|
|
switch
|
|
this
|
|
throw
|
|
try
|
|
typeof
|
|
var
|
|
while
|
|
with
|
|
void
|
|
`)
|
|
|
|
var punctuatorTable map[string]bool
|
|
func init() {
|
|
|
|
punctuatorTable = boolFields(`
|
|
>>>= === !== >>> <<= >>=
|
|
`)
|
|
|
|
// 2-character
|
|
// <= >= == != ++ -- << >> && ||
|
|
// += -= *= %= &= |= ^= /=
|
|
for _, value := range "<>=!+-*%&|^/" {
|
|
punctuatorTable[string(value) + "="] = true
|
|
}
|
|
|
|
for _, value := range "+-<>&|" {
|
|
punctuatorTable[string(value) + string(value)] = true
|
|
}
|
|
|
|
// 1-character
|
|
for _, value := range "[]<>+-*%&|^!~?:=/;{},()" {
|
|
punctuatorTable[string(value)] = true
|
|
}
|
|
}
|
|
|
|
type _token struct {
|
|
Line, Column, Character int
|
|
Kind, File, Text string
|
|
Error bool
|
|
}
|
|
|
|
func (self _token) IsValid() bool {
|
|
return self.Kind != ""
|
|
}
|
|
|
|
type _lexer struct {
|
|
Source string
|
|
Tail int
|
|
Head int
|
|
Width int
|
|
|
|
Line int
|
|
LineHead int
|
|
}
|
|
|
|
func (self _lexer) Copy() *_lexer {
|
|
newSelf := self
|
|
return &newSelf
|
|
}
|
|
|
|
func (self *_lexer) scanEndOfLine(chr rune, consume bool) bool {
|
|
if !isLineTerminator(chr) {
|
|
return false
|
|
}
|
|
if consume {
|
|
self.Next()
|
|
}
|
|
if chr == '\r' && self.Next() != '\n' {
|
|
self.Back() // Back because the next character was NOT \n
|
|
}
|
|
self.Line += 1
|
|
return true
|
|
}
|
|
|
|
func (self *_lexer) ScanLineComment() {
|
|
for {
|
|
chr := self.Next()
|
|
if chr == endOfFile || self.scanEndOfLine(chr, false) {
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func (self *_lexer) ScanBlockComment() int {
|
|
lineCount := 0
|
|
for {
|
|
chr := self.Next()
|
|
switch {
|
|
case chr == '*' && self.Peek() == '/':
|
|
self.Next() // /
|
|
return lineCount
|
|
case chr == endOfFile:
|
|
panic(&_syntaxError{
|
|
Message: "Unexpected token ILLEGAL",
|
|
})
|
|
case self.scanEndOfLine(chr, false):
|
|
lineCount += 1
|
|
}
|
|
}
|
|
panic(hereBeDragons())
|
|
}
|
|
|
|
func (self *_lexer) ScanSkip() int {
|
|
|
|
lineCount := 0
|
|
|
|
for {
|
|
chr := self.Peek()
|
|
switch {
|
|
case chr == '/':
|
|
read, _, _ := self.Read(3)
|
|
switch read[1] {
|
|
case '/':
|
|
self.ScanLineComment()
|
|
lineCount += 1
|
|
case '*':
|
|
lineCount += self.ScanBlockComment()
|
|
default:
|
|
goto RETURN
|
|
}
|
|
self.Ignore()
|
|
self.LineHead = self.Tail
|
|
case isWhiteSpace(chr):
|
|
self.Next()
|
|
self.Ignore()
|
|
case self.scanEndOfLine(chr, true):
|
|
lineCount += 1
|
|
self.Ignore()
|
|
self.LineHead = self.Tail
|
|
default:
|
|
goto RETURN
|
|
}
|
|
}
|
|
|
|
RETURN:
|
|
return lineCount
|
|
}
|
|
|
|
func (self *_lexer) ScanLineSkip() bool {
|
|
return self.ScanSkip() > 0
|
|
}
|
|
|
|
func (self *_lexer) ScanRegularExpression() _token {
|
|
|
|
self.ScanSkip()
|
|
|
|
token := self.scanQuoteLiteral()
|
|
if token.Kind != "//" {
|
|
panic(token.newSyntaxError("Invalid regular expression"))
|
|
}
|
|
return token
|
|
}
|
|
|
|
func (self *_lexer) Scan() (token _token) {
|
|
|
|
self.ScanSkip()
|
|
|
|
if self.Peek() == endOfFile {
|
|
return self.Emit("EOF")
|
|
}
|
|
|
|
if token = self.scanPunctuator(); token.IsValid() {
|
|
return
|
|
}
|
|
|
|
rune := self.Peek()
|
|
|
|
if rune == '\'' || rune == '"' {
|
|
if token = self.scanQuoteLiteral(); token.IsValid() {
|
|
return
|
|
}
|
|
}
|
|
|
|
if rune == '.' || isDecimalDigit(rune) {
|
|
if token = self.scanNumericLiteral(); token.IsValid() {
|
|
return
|
|
}
|
|
}
|
|
|
|
if token = self.scanIdentifierKeyword(); token.IsValid() {
|
|
return
|
|
}
|
|
|
|
return self.scanIllegal()
|
|
}
|
|
|
|
func (self *_lexer) scanQuoteLiteral() _token {
|
|
|
|
value := self.Next()
|
|
quote := value
|
|
kind := "string"
|
|
if value == '/' {
|
|
kind = "//"
|
|
}
|
|
|
|
error := func() _token {
|
|
if self.Width != 0 {
|
|
self.Back()
|
|
}
|
|
return self.Emit("illegal")
|
|
}
|
|
|
|
var text bytes.Buffer
|
|
|
|
for {
|
|
value = self.Next()
|
|
switch value {
|
|
case endOfFile:
|
|
return error()
|
|
case quote:
|
|
return self.EmitWith(kind, text.String())
|
|
case '\\':
|
|
value = self.Next()
|
|
if isLineTerminator(value) {
|
|
if quote == '/' {
|
|
return error()
|
|
}
|
|
self.scanEndOfLine(value, false)
|
|
continue
|
|
}
|
|
if quote == '/' { // RegularExpression
|
|
// TODO Handle the case of [\]?
|
|
text.WriteRune('\\')
|
|
text.WriteRune(value)
|
|
continue
|
|
}
|
|
switch value {
|
|
case 'n':
|
|
text.WriteRune('\n')
|
|
case 'r':
|
|
text.WriteRune('\t')
|
|
case 't':
|
|
text.WriteRune('\t')
|
|
case 'b':
|
|
text.WriteRune('\t')
|
|
case 'f':
|
|
text.WriteRune('\t')
|
|
case 'v':
|
|
text.WriteRune('\t')
|
|
default:
|
|
text.WriteRune(value)
|
|
case 'u':
|
|
result := self.scanHexadecimalRune(4)
|
|
if result != utf8.RuneError {
|
|
text.WriteRune(result)
|
|
} else {
|
|
text.WriteRune(value)
|
|
}
|
|
|
|
case 'x':
|
|
result := self.scanHexadecimalRune(2)
|
|
if result != utf8.RuneError {
|
|
text.WriteRune(result)
|
|
} else {
|
|
text.WriteRune(value)
|
|
}
|
|
}
|
|
// TODO Octal escaping
|
|
default:
|
|
if isLineTerminator(value) {
|
|
return error()
|
|
}
|
|
text.WriteRune(value)
|
|
}
|
|
}
|
|
|
|
return error()
|
|
}
|
|
|
|
func (self *_lexer) scanHexadecimalRune(size int) rune {
|
|
_, read, width := self.Read(size)
|
|
value, err := strconv.ParseUint(read, 16, size * 4)
|
|
if err != nil {
|
|
// Not a valid hexadecimal sequence
|
|
return utf8.RuneError
|
|
}
|
|
self.Tail += width
|
|
return utf16.Decode([]uint16{uint16(value)})[0]
|
|
}
|
|
|
|
func (self *_lexer) scanPunctuator() (token _token) {
|
|
|
|
if self.Accept(";{},()") {
|
|
return self.Emit("punctuator")
|
|
}
|
|
|
|
accept := func(count int){
|
|
for count > 0 {
|
|
count--
|
|
self.Next()
|
|
}
|
|
}
|
|
|
|
read, word, _ := self.Read(4)
|
|
|
|
if read[0] == '.' && !isDecimalDigit(read[1]) {
|
|
accept(1)
|
|
return self.Emit("punctuator")
|
|
}
|
|
|
|
for len(word) > 0 {
|
|
if punctuatorTable[word] {
|
|
accept(len(word))
|
|
return self.Emit("punctuator")
|
|
}
|
|
word = word[:len(word) - 1]
|
|
}
|
|
|
|
return self.Emit("punctuator")
|
|
}
|
|
|
|
func (self *_lexer) scanNumericLiteral() _token {
|
|
// FIXME Make sure this is according to the specification
|
|
|
|
isHex, isOctal := false, false
|
|
{
|
|
self.Accept(".")
|
|
|
|
acceptable := "0123456789"
|
|
if self.Accept("0") {
|
|
if self.Accept("xX") {
|
|
acceptable = "0123456789abcdefABCDEF"
|
|
isHex = true
|
|
} else if self.Accept("01234567") {
|
|
acceptable = "01234567"
|
|
isOctal = true
|
|
} else if self.Accept("89") {
|
|
return self.Emit("illegal")
|
|
}
|
|
}
|
|
|
|
self.AcceptRun(acceptable)
|
|
if !isHex && !isOctal && self.Accept(".") {
|
|
self.AcceptRun(acceptable)
|
|
}
|
|
|
|
if self.Length() == 2 && isHex { // 0x$ or 0X$
|
|
return self.Emit("illegal")
|
|
}
|
|
}
|
|
|
|
if !isHex && !isOctal && self.Accept("eE") {
|
|
self.Accept("+-")
|
|
length := self.Length()
|
|
self.AcceptRun("0123456789")
|
|
if length == self.Length() { // <number>e$
|
|
return self.Emit("illegal")
|
|
}
|
|
}
|
|
|
|
if isAlphaNumeric(self.Peek()) {
|
|
self.Next()
|
|
// Bad number
|
|
return self.Emit("illegal")
|
|
}
|
|
|
|
return self.Emit("number")
|
|
}
|
|
|
|
func (self *_lexer) scanIdentifierKeyword() (token _token) {
|
|
if !isIdentifierStart(self.Peek()) {
|
|
return
|
|
}
|
|
for {
|
|
switch chr := self.Peek(); {
|
|
case isAlphaNumericDollar(chr):
|
|
self.Next()
|
|
default:
|
|
word := self.Word()
|
|
switch {
|
|
case keywordTable[word] == true:
|
|
return self.Emit(word)
|
|
case word == "true", word == "false":
|
|
return self.Emit("boolean")
|
|
default:
|
|
return self.Emit("identifier")
|
|
}
|
|
return
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
func (self *_lexer) scanIllegal() _token {
|
|
return self.Emit("illegal")
|
|
}
|
|
|
|
func (self *_lexer) EmitWith(kind string, text string) _token {
|
|
token := _token{
|
|
Character: 1 + self.Head,
|
|
Line: 1 + self.Line,
|
|
Column: 1 + self.Head - self.LineHead,
|
|
|
|
Kind: kind,
|
|
Text: text,
|
|
Error: false,
|
|
}
|
|
if kind == "punctuator" {
|
|
token.Kind = token.Text
|
|
}
|
|
self.Head = self.Tail
|
|
if ottoDebug {
|
|
fmt.Printf("emit: %s %s\n", token.Kind, token.Text)
|
|
}
|
|
if kind == "illegal" {
|
|
token.Error = true
|
|
}
|
|
return token
|
|
}
|
|
|
|
func (self *_lexer) Emit(kind string) _token {
|
|
return self.EmitWith(kind, self.Word())
|
|
}
|
|
|
|
func (self *_lexer) Read(count int) ([]rune, string, int) {
|
|
read := make([]rune, count)
|
|
tail := self.Tail
|
|
found := 0
|
|
for i := 0; i < count; i++ {
|
|
if tail >= len(self.Source) {
|
|
read[i] = endOfFile
|
|
continue
|
|
}
|
|
width := 0
|
|
read[i], width = utf8.DecodeRuneInString(self.Source[tail:])
|
|
tail += width
|
|
found = i
|
|
}
|
|
distance := tail - self.Tail
|
|
word := string(read[:found + 1])
|
|
return read, word, distance
|
|
}
|
|
|
|
func (self *_lexer) Next() (chr rune) {
|
|
chr, self.Width = self._Peek()
|
|
self.Tail += self.Width
|
|
return chr
|
|
}
|
|
|
|
func (self *_lexer) _Peek() (rune, int) {
|
|
if self.Tail >= len(self.Source) {
|
|
return endOfFile, 0
|
|
}
|
|
chr, width := utf8.DecodeRuneInString(self.Source[self.Tail:])
|
|
return chr, width
|
|
}
|
|
|
|
func (self *_lexer) Peek() rune {
|
|
chr, _ := self._Peek()
|
|
return chr
|
|
}
|
|
|
|
func (self *_lexer) Back() {
|
|
if self.Width == 0 {
|
|
panic(hereBeDragons("Can't backup when self.Width == 0"))
|
|
}
|
|
self.Tail -= self.Width
|
|
}
|
|
|
|
func (self *_lexer) Ignore() {
|
|
self.Head = self.Tail
|
|
}
|
|
|
|
func (self *_lexer) Accept(valid string) bool {
|
|
if strings.IndexRune(valid, self.Peek()) >= 0 {
|
|
self.Next()
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
func (self *_lexer) AcceptRun(valid string) bool {
|
|
found := false
|
|
for strings.IndexRune(valid, self.Peek()) >= 0 {
|
|
self.Next()
|
|
found = true
|
|
}
|
|
return found
|
|
}
|
|
|
|
func (self *_lexer) Word() string {
|
|
return self.Source[self.Head:self.Tail]
|
|
}
|
|
|
|
func (self *_lexer) Length() int {
|
|
return self.Tail - self.Head
|
|
}
|
|
|
|
func isDecimalDigit(rune rune) bool {
|
|
return unicode.IsDigit(rune)
|
|
}
|
|
|
|
func isAlphaNumeric(rune rune) bool {
|
|
return rune == '_' || unicode.IsLetter(rune) || unicode.IsDigit(rune)
|
|
}
|
|
|
|
func isAlphaNumericDollar(rune rune) bool {
|
|
return rune == '$' || rune == '_' || unicode.IsLetter(rune) || unicode.IsDigit(rune)
|
|
}
|
|
|
|
func isIdentifierStart(rune rune) bool {
|
|
return rune == '$' || rune == '_' || unicode.IsLetter(rune)
|
|
}
|
|
|
|
func isWhiteSpace(chr rune) bool {
|
|
switch chr {
|
|
case ' ', '\t':
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
func isLineTerminator(chr rune) bool {
|
|
switch chr {
|
|
case '\n', '\r', '\u2028', '\u2029':
|
|
return true
|
|
}
|
|
return false
|
|
}
|