mirror of
https://github.com/robertkrimen/otto
synced 2025-09-28 18:45:22 +08:00
289 lines
6.8 KiB
Go
289 lines
6.8 KiB
Go
package otto
|
|
|
|
import (
|
|
"fmt"
|
|
"regexp"
|
|
"strings"
|
|
)
|
|
|
|
type _regExpObject struct {
|
|
RegularExpression *regexp.Regexp
|
|
Global bool
|
|
IgnoreCase bool
|
|
Multiline bool
|
|
Source string
|
|
LastIndex Value
|
|
}
|
|
|
|
func (runtime *_runtime) newRegExpObject(pattern string, flags string) *_object {
|
|
self := runtime.newObject()
|
|
self.Class = "RegExp"
|
|
|
|
global := false
|
|
ignoreCase := false
|
|
multiline := false
|
|
re2flags := ""
|
|
|
|
for _, rune := range flags {
|
|
switch rune {
|
|
case 'g':
|
|
if global {
|
|
panic(newError("SyntaxError: newRegExpObject: %s %s", pattern, flags))
|
|
}
|
|
global = true
|
|
case 'm':
|
|
if multiline {
|
|
panic(newError("SyntaxError: newRegExpObject: %s %s", pattern, flags))
|
|
}
|
|
multiline = true
|
|
case 'i':
|
|
if ignoreCase {
|
|
panic(newError("SyntaxError: newRegExpObject: %s %s", pattern, flags))
|
|
}
|
|
ignoreCase = true
|
|
re2flags += "i"
|
|
}
|
|
}
|
|
|
|
re2pattern := transformRegExp(pattern)
|
|
if len(re2flags) > 0 {
|
|
re2pattern = fmt.Sprintf("(?%s:%s)", re2flags, re2pattern)
|
|
}
|
|
|
|
self.RegExp = &_regExpObject{
|
|
RegularExpression: regexp.MustCompile(re2pattern),
|
|
Global: global,
|
|
IgnoreCase: ignoreCase,
|
|
Multiline: multiline,
|
|
Source: pattern,
|
|
LastIndex: toValue(0),
|
|
}
|
|
self._propertyStash = newRegExpStash(self.RegExp, self._propertyStash)
|
|
return self
|
|
}
|
|
|
|
func execRegExp(this *_object, target string) (match bool, result []int) {
|
|
lastIndex := toInteger(this.Get("lastIndex"))
|
|
index := lastIndex
|
|
global := toBoolean(this.Get("global"))
|
|
if !global {
|
|
index = 0
|
|
}
|
|
if 0 > index || index > int64(len(target)) {
|
|
} else {
|
|
result = this.RegExp.RegularExpression.FindStringSubmatchIndex(target[index:])
|
|
}
|
|
if result == nil {
|
|
this.WriteValue("lastIndex", toValue(0), true)
|
|
return // !match
|
|
}
|
|
match = true
|
|
endIndex := result[len(result)-1]
|
|
if global {
|
|
this.WriteValue("lastIndex", toValue(endIndex), true)
|
|
}
|
|
return // match
|
|
}
|
|
|
|
func execResultToArray(runtime *_runtime, target string, result []int) *_object {
|
|
captureCount := len(result) / 2
|
|
valueArray := make([]Value, captureCount)
|
|
for index := 0; index < captureCount; index++ {
|
|
offset := 2 * index
|
|
if result[offset] != -1 {
|
|
valueArray[index] = toValue(target[result[offset]:result[offset+1]])
|
|
} else {
|
|
valueArray[index] = UndefinedValue()
|
|
}
|
|
}
|
|
return runtime.newArray(valueArray)
|
|
}
|
|
|
|
// 0031,0032,0033,0034,0035,0036,0037,0038,0039 // 1 - 9
|
|
// 0043,0045,0046,0047,0048,0049,004A,004B,004C,004D,004E,004F
|
|
// 0050,0052,0054,0055,0056,0058,0059,005A
|
|
// 0063,0065,0067,0068,0069,006A,006B,006C,006D,006F
|
|
// 0070,0071,0075,0078,0079
|
|
// 0080,0081,0082,0083,0084,0085,0086,0087,0088,0089,008A,008B,008C,008D,008E,008F
|
|
// 0090,0091,0092,0093,0094,0095,0096,0097,0098,0099,009A,009B,009C,009D,009E,009F
|
|
// 00A0,00A1,00A2,00A3,00A4,00A5,00A6,00A7,00A8,00A9,00AA,00AB,00AC,00AD,00AE,00AF
|
|
// 00B0,00B1,00B2,00B3,00B4,00B5,00B6,00B7,00B8,00B9,00BA,00BB,00BC,00BD,00BE,00BF
|
|
// 00C0,00C1,00C2,00C3,00C4,00C5,00C6,00C7,00C8,00C9,00CA,00CB,00CC,00CD,00CE,00CF
|
|
// ...
|
|
// c = 63* c[A-Z]
|
|
// p = 70
|
|
// u = 75* u[:xdigit:]{4}
|
|
// x = 78* x[:xdigit:]{2}
|
|
//\x{0031}-\x{0039}
|
|
|
|
var transformRegExp_matchSlashU = regexp.MustCompile(`\\u([[:xdigit:]]{1,4})`)
|
|
var transformRegExp_unescape = regexp.MustCompile(strings.NewReplacer("\n", "", "\t", "", " ", "").Replace(`
|
|
|
|
(?:
|
|
\\(
|
|
[
|
|
\x{0043}\x{0045}-\x{004F}
|
|
\x{0050}\x{0052}\x{0054}-\x{0056}\x{0058}-\x{005A}
|
|
\x{0065}\x{0067}-\x{006D}\x{006F}
|
|
\x{0070}\x{0071}\x{0079}
|
|
\x{0080}-\x{FFFF}
|
|
]
|
|
)()
|
|
) |
|
|
(?:
|
|
\\(c)([^A-Z])
|
|
) |
|
|
(?:
|
|
\\(u)([^[:xdigit:]])
|
|
) |
|
|
(?:
|
|
\\(u)([:xdigit:][^[:xdigit:]])
|
|
) |
|
|
(?:
|
|
\\(u)([:xdigit:][:xdigit:][^[:xdigit:]])
|
|
) |
|
|
(?:
|
|
\\(u)([:xdigit:][:xdigit:][:xdigit:][^[:xdigit:]])
|
|
) |
|
|
(?:
|
|
\\(x)([^[:xdigit:]])
|
|
) |
|
|
(?:
|
|
\\(x)([:xdigit:][^[:xdigit:]])
|
|
)
|
|
|
|
`))
|
|
var transformRegExp_unescapeDollar = regexp.MustCompile(strings.NewReplacer("\n", "", "\t", "", " ", "").Replace(`
|
|
|
|
(?:
|
|
\\([cux])$
|
|
)
|
|
|
|
`))
|
|
// TODO Go "regexp" bug? Can't do: (?:)|(?:$)
|
|
|
|
func transformRegExp(ecmaRegExp string) (goRegExp string) {
|
|
tmp := []byte(ecmaRegExp)
|
|
tmp = transformRegExp_unescape.ReplaceAll(tmp, []byte(`$1$2`))
|
|
tmp = transformRegExp_unescapeDollar.ReplaceAll(tmp, []byte(`$1`))
|
|
tmp = transformRegExp_matchSlashU.ReplaceAll(tmp, []byte(`\x{$1}`))
|
|
return string(tmp)
|
|
}
|
|
|
|
func isValidRegExp(ecmaRegExp string) bool {
|
|
shibboleth := 0 // The shibboleth in this case is (?
|
|
// Since we're looking for (?! / (?=
|
|
inSet := false // In a bracketed set, e.g. [0-9]
|
|
escape := false
|
|
for _, chr := range ecmaRegExp {
|
|
if escape {
|
|
escape = false
|
|
shibboleth = 0
|
|
continue
|
|
}
|
|
if chr == '\\' {
|
|
escape = true
|
|
continue
|
|
}
|
|
if inSet {
|
|
if chr == ']' {
|
|
inSet = false
|
|
shibboleth = 0
|
|
}
|
|
continue
|
|
}
|
|
switch chr {
|
|
case '[':
|
|
inSet = true
|
|
continue
|
|
case '(':
|
|
shibboleth = 1
|
|
continue
|
|
case '?':
|
|
if shibboleth == 1 {
|
|
shibboleth = 2
|
|
}
|
|
continue
|
|
case '=', '!':
|
|
if shibboleth == 2 {
|
|
return false
|
|
}
|
|
}
|
|
shibboleth = 0
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
// _regExpStash
|
|
|
|
type _regExpStash struct {
|
|
_regExpObject *_regExpObject
|
|
_stash
|
|
}
|
|
|
|
func newRegExpStash(_regExpObject *_regExpObject, stash _stash) *_regExpStash {
|
|
self := &_regExpStash{
|
|
_regExpObject,
|
|
stash,
|
|
}
|
|
return self
|
|
}
|
|
|
|
func (self *_regExpStash) CanRead(name string) bool {
|
|
switch name {
|
|
case "global", "ignoreCase", "multiline", "lastIndex", "source":
|
|
return true
|
|
}
|
|
return self._stash.CanRead(name)
|
|
}
|
|
|
|
func (self *_regExpStash) Read(name string) Value {
|
|
switch name {
|
|
case "global":
|
|
return toValue(self._regExpObject.Global)
|
|
case "ignoreCase":
|
|
return toValue(self._regExpObject.IgnoreCase)
|
|
case "multiline":
|
|
return toValue(self._regExpObject.Multiline)
|
|
case "lastIndex":
|
|
return self._regExpObject.LastIndex
|
|
case "source":
|
|
return toValue(self._regExpObject.Source)
|
|
}
|
|
return self._stash.Read(name)
|
|
}
|
|
|
|
func (self *_regExpStash) Write(name string, value Value) {
|
|
switch name {
|
|
case "global", "ignoreCase", "multiline", "source":
|
|
// TODO Is this good enough? Check DefineOwnProperty
|
|
panic(newTypeError())
|
|
case "lastIndex":
|
|
self._regExpObject.LastIndex = value
|
|
return
|
|
}
|
|
self._stash.Write(name, value)
|
|
}
|
|
|
|
func (self *_regExpStash) property(name string) *_property {
|
|
switch name {
|
|
case "global":
|
|
return &_property{Value: toValue(self._regExpObject.Global), Mode: 0} // -Write -Enumerate -Configure
|
|
case "ignoreCase":
|
|
return &_property{Value: toValue(self._regExpObject.IgnoreCase), Mode: 0} // -Write -Enumerate -Configure
|
|
case "multiline":
|
|
return &_property{Value: toValue(self._regExpObject.Multiline), Mode: 0} // -Write -Enumerate -Configure
|
|
case "lastIndex":
|
|
return &_property{Value: (self._regExpObject.LastIndex), Mode: propertyModeWrite} // +Write -Enumerate -Configure
|
|
case "source":
|
|
return &_property{Value: toValue(self._regExpObject.Source), Mode: 0} // -Write -Enumerate -Configure
|
|
}
|
|
return self._stash.property(name)
|
|
}
|
|
|
|
func (self *_regExpStash) Enumerate(each func(string)) {
|
|
// Skip global, ignoreCase, multiline, source, & lastIndex
|
|
self._stash.Enumerate(each)
|
|
}
|