mirror of
				https://github.com/robertkrimen/otto
				synced 2025-10-19 19:55:30 +08:00 
			
		
		
		
	
		
			
				
	
	
		
			666 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			666 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| package otto
 | |
| 
 | |
| import (
 | |
| 	"bytes"
 | |
| 	"fmt"
 | |
| 	"strconv"
 | |
| 	"strings"
 | |
| 	"unicode"
 | |
| 	"unicode/utf16"
 | |
| 	"unicode/utf8"
 | |
| )
 | |
| 
 | |
| var keywordTable map[string]bool = boolFields(`
 | |
| 	break
 | |
| 	case
 | |
| 	catch
 | |
| 	continue
 | |
| 	default
 | |
| 	delete
 | |
| 	do
 | |
| 	else
 | |
| 	finally
 | |
| 	for
 | |
| 	function
 | |
| 	if
 | |
| 	in
 | |
| 	instanceof
 | |
| 	new
 | |
| 	null
 | |
| 	return
 | |
| 	switch
 | |
| 	this
 | |
| 	throw
 | |
| 	try
 | |
| 	typeof
 | |
| 	var
 | |
| 	while
 | |
| 	with
 | |
| 	void
 | |
| 
 | |
| 	debugger
 | |
| 	const
 | |
| `)
 | |
| 
 | |
| var punctuatorTable map[string]bool
 | |
| 
 | |
| func init() {
 | |
| 
 | |
| 	punctuatorTable = boolFields(`
 | |
| 		>>>= === !== >>> <<= >>=
 | |
| 	`)
 | |
| 
 | |
| 	// 2-character
 | |
| 	// <= >= == != ++ -- << >> && ||
 | |
| 	// += -= *= %= &= |= ^= /=
 | |
| 	for _, value := range "<>=!+-*%&|^/" {
 | |
| 		punctuatorTable[string(value)+"="] = true
 | |
| 	}
 | |
| 
 | |
| 	for _, value := range "+-<>&|" {
 | |
| 		punctuatorTable[string(value)+string(value)] = true
 | |
| 	}
 | |
| 
 | |
| 	// 1-character
 | |
| 	for _, value := range "[]<>+-*%&|^!~?:=/;{},()" {
 | |
| 		punctuatorTable[string(value)] = true
 | |
| 	}
 | |
| }
 | |
| 
 | |
| type _token struct {
 | |
| 	Line, Column, Character int
 | |
| 	Kind, File, Text        string
 | |
| 	Error                   bool
 | |
| }
 | |
| 
 | |
| func (self _token) IsValid() bool {
 | |
| 	return self.Kind != ""
 | |
| }
 | |
| 
 | |
| type _lexer struct {
 | |
| 	Source string
 | |
| 	//Tail		int
 | |
| 	//Head		int
 | |
| 	//Width		int
 | |
| 
 | |
| 	lineCount        int
 | |
| 	zeroColumnOffset int
 | |
| 
 | |
| 	readIn       []rune
 | |
| 	readInOffset int
 | |
| 	atEndOfFile  bool
 | |
| 	head         int
 | |
| 	tail         int
 | |
| 
 | |
| 	headOffset int
 | |
| 	tailOffset int
 | |
| }
 | |
| 
 | |
| // Only called for testing (for now)
 | |
| func newLexer(source string) _lexer {
 | |
| 	self := _lexer{
 | |
| 		Source: source,
 | |
| 		readIn: make([]rune, 0, len(source)), // Guestimate
 | |
| 	}
 | |
| 	return self
 | |
| }
 | |
| 
 | |
| func (self _lexer) Copy() *_lexer {
 | |
| 	newSelf := self
 | |
| 	return &newSelf
 | |
| }
 | |
| 
 | |
| func (self *_lexer) scanEndOfLine(chr rune, consume bool) bool {
 | |
| 	if !isLineTerminator(chr) {
 | |
| 		return false
 | |
| 	}
 | |
| 	if consume {
 | |
| 		self.next()
 | |
| 	}
 | |
| 	if chr == '\r' && self.peek() == '\n' {
 | |
| 		self.next() // Consume \n
 | |
| 	}
 | |
| 	self.lineCount += 1
 | |
| 	return true
 | |
| }
 | |
| 
 | |
| func (self *_lexer) ScanLineComment() {
 | |
| 	for {
 | |
| 		chr := self.next()
 | |
| 		if chr == endOfFile || self.scanEndOfLine(chr, false) {
 | |
| 			return
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func (self *_lexer) ScanBlockComment() int {
 | |
| 	lineCount := 0
 | |
| 	for {
 | |
| 		chr := self.next()
 | |
| 		switch {
 | |
| 		case chr == '*' && self.peek() == '/':
 | |
| 			self.next() // /
 | |
| 			return lineCount
 | |
| 		case chr == endOfFile:
 | |
| 			panic(&_syntaxError{
 | |
| 				Message: "Unexpected token ILLEGAL",
 | |
| 			})
 | |
| 		case self.scanEndOfLine(chr, false):
 | |
| 			lineCount += 1
 | |
| 		}
 | |
| 	}
 | |
| 	panic(hereBeDragons())
 | |
| }
 | |
| 
 | |
| func (self *_lexer) ScanSkip() int {
 | |
| 
 | |
| 	lineCount := 0
 | |
| 
 | |
| 	for {
 | |
| 		chr := self.peek()
 | |
| 		switch {
 | |
| 		case chr == '/':
 | |
| 			read, _, found, width := self.read(2)
 | |
| 			switch read[1] {
 | |
| 			case '/':
 | |
| 				self.tail += found
 | |
| 				self.tailOffset += width
 | |
| 				self.ScanLineComment()
 | |
| 				lineCount += 1
 | |
| 			case '*':
 | |
| 				self.tail += found
 | |
| 				self.tailOffset += width
 | |
| 				lineCount += self.ScanBlockComment()
 | |
| 			default:
 | |
| 				goto RETURN
 | |
| 			}
 | |
| 			self.ignore()
 | |
| 			self.zeroColumnOffset = self.tailOffset
 | |
| 		case isWhiteSpace(chr):
 | |
| 			self.next()
 | |
| 			self.ignore()
 | |
| 		case self.scanEndOfLine(chr, true):
 | |
| 			lineCount += 1
 | |
| 			self.ignore()
 | |
| 			self.zeroColumnOffset = self.tailOffset
 | |
| 		default:
 | |
| 			goto RETURN
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| RETURN:
 | |
| 	return lineCount
 | |
| }
 | |
| 
 | |
| func (self *_lexer) ScanLineSkip() bool {
 | |
| 	return self.ScanSkip() > 0
 | |
| }
 | |
| 
 | |
| func (self *_lexer) ScanRegularExpression() _token {
 | |
| 
 | |
| 	self.ScanSkip()
 | |
| 
 | |
| 	token := self.scanQuoteLiteral()
 | |
| 	if token.Kind != "//" {
 | |
| 		panic(token.newSyntaxError("Invalid regular expression"))
 | |
| 	}
 | |
| 	return token
 | |
| }
 | |
| 
 | |
| func (self *_lexer) Scan() (token _token) {
 | |
| 
 | |
| 	self.ScanSkip()
 | |
| 
 | |
| 	if self.peek() == endOfFile {
 | |
| 		return self.emit("EOF")
 | |
| 	}
 | |
| 
 | |
| 	if token = self.scanPunctuator(); token.IsValid() {
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	chr := self.peek()
 | |
| 
 | |
| 	if chr == '\'' || chr == '"' {
 | |
| 		if token = self.scanQuoteLiteral(); token.IsValid() {
 | |
| 			return
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if chr == '.' || isDecimalDigit(chr) {
 | |
| 		if token = self.scanNumericLiteral(); token.IsValid() {
 | |
| 			return
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if token = self.scanIdentifierKeyword(); token.IsValid() {
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	return self.scanIllegal()
 | |
| }
 | |
| 
 | |
| func (self *_lexer) scanQuoteLiteral() _token {
 | |
| 
 | |
| 	value := self.next()
 | |
| 	quote := value
 | |
| 	kind := "string"
 | |
| 	if value == '/' {
 | |
| 		kind = "//"
 | |
| 	}
 | |
| 
 | |
| 	errorIllegal := func() _token {
 | |
| 		self.back()
 | |
| 		return self.emit("illegal")
 | |
| 	}
 | |
| 
 | |
| 	var text bytes.Buffer
 | |
| 
 | |
| 	for {
 | |
| 		value = self.next()
 | |
| 		switch value {
 | |
| 		case endOfFile:
 | |
| 			return errorIllegal()
 | |
| 		case quote:
 | |
| 			return self.emitWith(kind, text.String())
 | |
| 		case '\\':
 | |
| 			value = self.next()
 | |
| 			if isLineTerminator(value) {
 | |
| 				if quote == '/' {
 | |
| 					return errorIllegal()
 | |
| 				}
 | |
| 				self.scanEndOfLine(value, false)
 | |
| 				continue
 | |
| 			}
 | |
| 			if quote == '/' { // RegularExpression
 | |
| 				// TODO Handle the case of [\]?
 | |
| 				text.WriteRune('\\')
 | |
| 				text.WriteRune(value)
 | |
| 				continue
 | |
| 			}
 | |
| 			switch value {
 | |
| 			case 'n':
 | |
| 				text.WriteRune('\n')
 | |
| 			case 'r':
 | |
| 				text.WriteRune('\r')
 | |
| 			case 't':
 | |
| 				text.WriteRune('\t')
 | |
| 			case 'b':
 | |
| 				text.WriteRune('\b')
 | |
| 			case 'f':
 | |
| 				text.WriteRune('\f')
 | |
| 			case 'v':
 | |
| 				text.WriteRune('\v')
 | |
| 			case '0':
 | |
| 				text.WriteRune(0)
 | |
| 			case 'u':
 | |
| 				result := self.scanHexadecimalRune(4)
 | |
| 				if result != utf8.RuneError {
 | |
| 					text.WriteRune(result)
 | |
| 				} else {
 | |
| 					text.WriteRune(value)
 | |
| 				}
 | |
| 
 | |
| 			case 'x':
 | |
| 				result := self.scanHexadecimalRune(2)
 | |
| 				if result != utf8.RuneError {
 | |
| 					text.WriteRune(result)
 | |
| 				} else {
 | |
| 					text.WriteRune(value)
 | |
| 				}
 | |
| 			default:
 | |
| 				text.WriteRune(value)
 | |
| 			}
 | |
| 			// TODO Octal escaping
 | |
| 		default:
 | |
| 			if isLineTerminator(value) {
 | |
| 				return errorIllegal()
 | |
| 			}
 | |
| 			text.WriteRune(value)
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return self.emit("illegal")
 | |
| }
 | |
| 
 | |
| func convertHexadecimalRune(word string) rune {
 | |
| 	value, err := strconv.ParseUint(word, 16, len(word)*4)
 | |
| 	if err != nil {
 | |
| 		// Not a valid hexadecimal sequence
 | |
| 		return utf8.RuneError
 | |
| 	}
 | |
| 	return utf16.Decode([]uint16{uint16(value)})[0]
 | |
| }
 | |
| 
 | |
| func (self *_lexer) scanHexadecimalRune(size int) rune {
 | |
| 	_, word, found, width := self.read(size)
 | |
| 	chr := convertHexadecimalRune(word)
 | |
| 	if chr == utf8.RuneError {
 | |
| 		return chr
 | |
| 	}
 | |
| 	self.tail += found
 | |
| 	self.tailOffset += width
 | |
| 	return chr
 | |
| }
 | |
| 
 | |
| func (self *_lexer) scanPunctuator() (token _token) {
 | |
| 
 | |
| 	if self.accept(";{},()") {
 | |
| 		return self.emit("punctuator")
 | |
| 	}
 | |
| 
 | |
| 	accept := func(count int) {
 | |
| 		for count > 0 {
 | |
| 			count--
 | |
| 			self.next()
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	read, word, _, _ := self.read(4)
 | |
| 
 | |
| 	if read[0] == '.' && !isDecimalDigit(read[1]) {
 | |
| 		accept(1)
 | |
| 		return self.emit("punctuator")
 | |
| 	}
 | |
| 
 | |
| 	for len(word) > 0 {
 | |
| 		if punctuatorTable[word] {
 | |
| 			accept(len(word))
 | |
| 			return self.emit("punctuator")
 | |
| 		}
 | |
| 		word = word[:len(word)-1]
 | |
| 	}
 | |
| 
 | |
| 	return
 | |
| 	// I think this doesn't make any sense
 | |
| 	//return self.emit("punctuator")
 | |
| }
 | |
| 
 | |
| func (self *_lexer) scanNumericLiteral() _token {
 | |
| 	// FIXME Make sure this is according to the specification
 | |
| 
 | |
| 	isHex, isOctal := false, false
 | |
| 	{
 | |
| 		self.accept(".")
 | |
| 
 | |
| 		acceptable := "0123456789"
 | |
| 		if self.accept("0") {
 | |
| 			if self.accept("xX") {
 | |
| 				acceptable = "0123456789abcdefABCDEF"
 | |
| 				isHex = true
 | |
| 			} else if self.accept("01234567") {
 | |
| 				acceptable = "01234567"
 | |
| 				isOctal = true
 | |
| 			} else if self.accept("89") {
 | |
| 				return self.emit("illegal")
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		self.acceptRun(acceptable)
 | |
| 		if !isHex && !isOctal && self.accept(".") {
 | |
| 			self.acceptRun(acceptable)
 | |
| 		}
 | |
| 
 | |
| 		if self.length() == 2 && isHex { // 0x$ or 0X$
 | |
| 			return self.emit("illegal")
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if !isHex && !isOctal && self.accept("eE") {
 | |
| 		self.accept("+-")
 | |
| 		length := self.length()
 | |
| 		self.acceptRun("0123456789")
 | |
| 		if length == self.length() { // <number>e$
 | |
| 			return self.emit("illegal")
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if isAlphaNumeric(self.peek()) {
 | |
| 		self.next()
 | |
| 		// Bad number
 | |
| 		return self.emit("illegal")
 | |
| 	}
 | |
| 
 | |
| 	return self.emit("number")
 | |
| }
 | |
| 
 | |
| func (self *_lexer) scanIdentifierKeyword() (token _token) {
 | |
| 	word := []rune{}
 | |
| 
 | |
| 	// The first character should be of the class isIdentifierStart
 | |
| 	identifierCheck := isIdentifierStart
 | |
| 
 | |
| 	for {
 | |
| 		switch chr := self.peek(); {
 | |
| 		case identifierCheck(chr):
 | |
| 			if chr == '\\' {
 | |
| 				read, _, _, _ := self.read(6)
 | |
| 				if read[1] == 'u' {
 | |
| 					chr := convertHexadecimalRune(string(read[2:]))
 | |
| 					if chr == utf8.RuneError {
 | |
| 						word = append(word, 'u')
 | |
| 						self.skip(2) // Skip \u
 | |
| 					} else {
 | |
| 						if chr == '\\' || !identifierCheck(chr) {
 | |
| 							return
 | |
| 						}
 | |
| 						word = append(word, chr)
 | |
| 						self.skip(6) // Skip \u????
 | |
| 					}
 | |
| 				} else {
 | |
| 					return
 | |
| 				}
 | |
| 			} else {
 | |
| 				// Basically a skip of 1
 | |
| 				word = append(word, self.next())
 | |
| 			}
 | |
| 		default:
 | |
| 			if len(word) == 0 {
 | |
| 				// Did not scan at least one identifier character, so return with failure
 | |
| 				return
 | |
| 			}
 | |
| 			word := string(word)
 | |
| 			switch {
 | |
| 			case keywordTable[word] == true:
 | |
| 				return self.emitWith(word, word)
 | |
| 			case word == "true", word == "false":
 | |
| 				return self.emitWith("boolean", word)
 | |
| 			default:
 | |
| 				return self.emitWith("identifier", word)
 | |
| 			}
 | |
| 			return
 | |
| 		}
 | |
| 
 | |
| 		// Now we're looking at the body of the identiifer
 | |
| 		identifierCheck = isIdentifierPart
 | |
| 	}
 | |
| 
 | |
| 	return
 | |
| }
 | |
| 
 | |
| func (self *_lexer) scanIllegal() _token {
 | |
| 	return self.emit("illegal")
 | |
| }
 | |
| 
 | |
| func (self *_lexer) emitWith(kind string, text string) _token {
 | |
| 	token := _token{
 | |
| 		Character: 1 + self.tailOffset,
 | |
| 		Line:      1 + self.lineCount,
 | |
| 		Column:    1 + self.tailOffset - self.zeroColumnOffset,
 | |
| 
 | |
| 		Kind:  kind,
 | |
| 		Text:  text,
 | |
| 		Error: false,
 | |
| 	}
 | |
| 	if kind == "punctuator" {
 | |
| 		token.Kind = token.Text
 | |
| 	}
 | |
| 
 | |
| 	self.headOffset = self.tailOffset
 | |
| 	self.head = self.tail
 | |
| 
 | |
| 	if ottoDebug {
 | |
| 		fmt.Printf("emit: %s %s\n", token.Kind, token.Text)
 | |
| 	}
 | |
| 	if kind == "illegal" {
 | |
| 		token.Error = true
 | |
| 	}
 | |
| 	return token
 | |
| }
 | |
| 
 | |
| func (self *_lexer) emit(kind string) _token {
 | |
| 	return self.emitWith(kind, self.word())
 | |
| }
 | |
| 
 | |
| func (self *_lexer) read(count int) ([]rune, string, int, int) {
 | |
| 	head := self.tail
 | |
| 	tail := head + count
 | |
| 	unread := tail - len(self.readIn)
 | |
| 	for unread > 0 {
 | |
| 		unread--
 | |
| 		self.read1()
 | |
| 	}
 | |
| 
 | |
| 	var read []rune
 | |
| 	found := 0
 | |
| 	length := len(self.readIn)
 | |
| 	if tail >= length {
 | |
| 		read = make([]rune, count)
 | |
| 		index, head := 0, head
 | |
| 		for index < count {
 | |
| 			if head >= length {
 | |
| 				read[index] = endOfFile
 | |
| 			} else {
 | |
| 				found++
 | |
| 				read[index] = self.readIn[head]
 | |
| 			}
 | |
| 			index++
 | |
| 			head++
 | |
| 		}
 | |
| 	} else {
 | |
| 		found = count
 | |
| 		read = self.readIn[head:tail]
 | |
| 	}
 | |
| 
 | |
| 	width := 0
 | |
| 	word := ""
 | |
| 	if found > 0 {
 | |
| 		width = len(string(read[:found]))
 | |
| 		word = string(read[:found])
 | |
| 	}
 | |
| 
 | |
| 	return read, word, found, width
 | |
| }
 | |
| 
 | |
| func (self *_lexer) next() rune {
 | |
| 	chr, width := self.peek1()
 | |
| 	if width != 0 {
 | |
| 		self.tail += 1
 | |
| 		self.tailOffset += width
 | |
| 	}
 | |
| 	return chr
 | |
| }
 | |
| 
 | |
| func (self *_lexer) skip(count int) {
 | |
| 	read := self.readIn[self.tail : self.tail+count]
 | |
| 	for _, chr := range read {
 | |
| 		self.tail += 1
 | |
| 		self.tailOffset += utf8.RuneLen(chr)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func (self *_lexer) peek1() (chr rune, width int) {
 | |
| 	if self.tail < len(self.readIn) {
 | |
| 		chr = self.readIn[self.tail]
 | |
| 		width = utf8.RuneLen(chr)
 | |
| 	} else {
 | |
| 		chr, width = self.read1()
 | |
| 	}
 | |
| 	return
 | |
| }
 | |
| 
 | |
| func (self *_lexer) read1() (rune, int) {
 | |
| 	if self.readInOffset >= len(self.Source) {
 | |
| 		self.atEndOfFile = true
 | |
| 		return endOfFile, 0
 | |
| 	}
 | |
| 	chr, width := utf8.DecodeRuneInString(self.Source[self.readInOffset:])
 | |
| 	self.readIn = append(self.readIn, chr)
 | |
| 	self.readInOffset += width
 | |
| 	return chr, width
 | |
| }
 | |
| 
 | |
| func (self *_lexer) peek() rune {
 | |
| 	chr, _ := self.peek1()
 | |
| 	return chr
 | |
| }
 | |
| 
 | |
| func (self *_lexer) back() {
 | |
| 	if self.tail > self.head && self.tail > 0 {
 | |
| 		self.tailOffset -= utf8.RuneLen(self.readIn[self.tail-1])
 | |
| 		self.tail -= 1
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func (self *_lexer) ignore() {
 | |
| 	self.head = self.tail
 | |
| 	self.headOffset = self.tailOffset
 | |
| }
 | |
| 
 | |
| func (self *_lexer) accept(valid string) bool {
 | |
| 	if strings.IndexRune(valid, self.peek()) >= 0 {
 | |
| 		self.next()
 | |
| 		return true
 | |
| 	}
 | |
| 	return false
 | |
| }
 | |
| 
 | |
| func (self *_lexer) acceptRun(valid string) bool {
 | |
| 	found := false
 | |
| 	for strings.IndexRune(valid, self.peek()) >= 0 {
 | |
| 		self.next()
 | |
| 		found = true
 | |
| 	}
 | |
| 	return found
 | |
| }
 | |
| 
 | |
| func (self *_lexer) word() string {
 | |
| 	return self.Source[self.headOffset:self.tailOffset]
 | |
| }
 | |
| 
 | |
| func (self *_lexer) length() int {
 | |
| 	return self.tailOffset - self.headOffset
 | |
| }
 | |
| 
 | |
| func isDecimalDigit(rune rune) bool {
 | |
| 	return unicode.IsDigit(rune)
 | |
| }
 | |
| 
 | |
| func isAlphaNumeric(chr rune) bool {
 | |
| 	return chr == '_' || unicode.IsLetter(chr) || unicode.IsDigit(chr)
 | |
| }
 | |
| 
 | |
| func isIdentifierStart(chr rune) bool {
 | |
| 	return chr == '$' || chr == '_' || chr == '\\' || unicode.IsLetter(chr)
 | |
| }
 | |
| 
 | |
| func isIdentifierPart(chr rune) bool {
 | |
| 	return chr == '$' || chr == '_' || chr == '\\' || unicode.IsLetter(chr) || unicode.IsDigit(chr)
 | |
| }
 | |
| 
 | |
| func isWhiteSpace(chr rune) bool {
 | |
| 	switch chr {
 | |
| 	case ' ', '\t', '\u00a0', '\u000b', '\u000c':
 | |
| 		return true
 | |
| 	}
 | |
| 	return false
 | |
| }
 | |
| 
 | |
| func isLineTerminator(chr rune) bool {
 | |
| 	switch chr {
 | |
| 	case '\n', '\r', '\u2028', '\u2029':
 | |
| 		return true
 | |
| 	}
 | |
| 	return false
 | |
| }
 | 
