mirror of
https://github.com/robertkrimen/otto
synced 2025-09-28 18:45:22 +08:00
More (complex) RegExp => RE2 transforming and tests
The unescaping of that which does not need escaping
This commit is contained in:
parent
bf38778ba2
commit
68943ba1e4
|
@ -45,3 +45,4 @@ try-tmp: tmp
|
||||||
|
|
||||||
look: .fail
|
look: .fail
|
||||||
cat $<
|
cat $<
|
||||||
|
@echo `readlink $<`
|
||||||
|
|
|
@ -67,4 +67,7 @@ rm -f ch07/7.6/7.6.1/7.6.1-8-8.js
|
||||||
rm -f ch07/7.6/7.6.1/7.6.1-8-9.js
|
rm -f ch07/7.6/7.6.1/7.6.1-8-9.js
|
||||||
# Skip, no backreferencing in re2
|
# Skip, no backreferencing in re2
|
||||||
rm -f ch07/7.8/7.8.5/S7.8.5_A1.4_T1.js
|
rm -f ch07/7.8/7.8.5/S7.8.5_A1.4_T1.js
|
||||||
|
# We WANT to squawk on \1, \2, \3, ..., \9
|
||||||
|
# TODO Come up with replacement test?
|
||||||
|
rm -f ch07/7.8/7.8.5/S7.8.5_A1.4_T2.js
|
||||||
popd
|
popd
|
||||||
|
|
2
Makefile
2
Makefile
|
@ -23,10 +23,10 @@ TEST := -v --run String_fromCharCode
|
||||||
TEST := -v --run Lexer\|Parse
|
TEST := -v --run Lexer\|Parse
|
||||||
TEST := -v --run Lexer
|
TEST := -v --run Lexer
|
||||||
TEST := -v --run String_
|
TEST := -v --run String_
|
||||||
TEST := -v --run RegExp
|
|
||||||
TEST := -v --run ParseSuccess
|
TEST := -v --run ParseSuccess
|
||||||
TEST := -v --run Parse
|
TEST := -v --run Parse
|
||||||
TEST := -v --run ParseFailure
|
TEST := -v --run ParseFailure
|
||||||
|
TEST := -v --run RegExp
|
||||||
TEST := .
|
TEST := .
|
||||||
|
|
||||||
test: test-i
|
test: test-i
|
||||||
|
|
15
otto_test.go
15
otto_test.go
|
@ -57,6 +57,7 @@ func TestTransformRegExp(t *testing.T) {
|
||||||
Terst(t)
|
Terst(t)
|
||||||
|
|
||||||
Is(transformRegExp(`\\|'|\r|\n|\t|\u2028|\u2029`), `\\|'|\r|\n|\t|\x{2028}|\x{2029}`)
|
Is(transformRegExp(`\\|'|\r|\n|\t|\u2028|\u2029`), `\\|'|\r|\n|\t|\x{2028}|\x{2029}`)
|
||||||
|
Is(transformRegExp(`\x`), `x`)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestIsValidRegExp(t *testing.T) {
|
func TestIsValidRegExp(t *testing.T) {
|
||||||
|
@ -1567,6 +1568,20 @@ func TestRegExp(t *testing.T) {
|
||||||
test(`/\a/.source`, "\\a")
|
test(`/\a/.source`, "\\a")
|
||||||
test(`/\;/.source`, "\\;")
|
test(`/\;/.source`, "\\;")
|
||||||
test(`/\ /.source`, "\\ ")
|
test(`/\ /.source`, "\\ ")
|
||||||
|
|
||||||
|
// Start sanity check...
|
||||||
|
test("eval(\"/abc/\").source", "abc")
|
||||||
|
test("eval(\"/\u0023/\").source", "#")
|
||||||
|
test("eval(\"/\u0058/\").source", "X")
|
||||||
|
test("eval(\"/\\\u0023/\").source == \"\\\u0023\"", "true")
|
||||||
|
test("'0x' + '0058'", "0x0058")
|
||||||
|
test("'\\\\' + '0x' + '0058'", "\\0x0058")
|
||||||
|
// ...stop sanity check
|
||||||
|
|
||||||
|
test(`abc = '\\' + String.fromCharCode('0x' + '0058'); eval('/' + abc + '/').source`, "\\X")
|
||||||
|
test(`abc = '\\' + String.fromCharCode('0x0058'); eval('/' + abc + '/').source == "\\\u0058"`, "true")
|
||||||
|
test(`abc = '\\' + String.fromCharCode('0x0023'); eval('/' + abc + '/').source == "\\\u0023"`, "true")
|
||||||
|
test(`abc = '\\' + String.fromCharCode('0x0078'); eval('/' + abc + '/').source == "\\\u0078"`, "true")
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestNewFunction(t *testing.T) {
|
func TestNewFunction(t *testing.T) {
|
||||||
|
|
|
@ -3,6 +3,7 @@ package otto
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
type _regExpObject struct {
|
type _regExpObject struct {
|
||||||
|
@ -98,11 +99,75 @@ func execResultToArray(runtime *_runtime, target string, result []int) *_object
|
||||||
return runtime.newArray(valueArray)
|
return runtime.newArray(valueArray)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*var transformRegExp_matchSlashU *regexp.Regexp = regexp.MustCompile(`\\u([:xdigit:]{1-4})`)*/
|
// 0031,0032,0033,0034,0035,0036,0037,0038,0039 // 1 - 9
|
||||||
var transformRegExp_matchSlashU *regexp.Regexp = regexp.MustCompile(`\\u([[:xdigit:]]{1,4})`)
|
// 0043,0045,0046,0047,0048,0049,004A,004B,004C,004D,004E,004F
|
||||||
|
// 0050,0052,0054,0055,0056,0058,0059,005A
|
||||||
|
// 0063,0065,0067,0068,0069,006A,006B,006C,006D,006F
|
||||||
|
// 0070,0071,0075,0078,0079
|
||||||
|
// 0080,0081,0082,0083,0084,0085,0086,0087,0088,0089,008A,008B,008C,008D,008E,008F
|
||||||
|
// 0090,0091,0092,0093,0094,0095,0096,0097,0098,0099,009A,009B,009C,009D,009E,009F
|
||||||
|
// 00A0,00A1,00A2,00A3,00A4,00A5,00A6,00A7,00A8,00A9,00AA,00AB,00AC,00AD,00AE,00AF
|
||||||
|
// 00B0,00B1,00B2,00B3,00B4,00B5,00B6,00B7,00B8,00B9,00BA,00BB,00BC,00BD,00BE,00BF
|
||||||
|
// 00C0,00C1,00C2,00C3,00C4,00C5,00C6,00C7,00C8,00C9,00CA,00CB,00CC,00CD,00CE,00CF
|
||||||
|
// ...
|
||||||
|
// c = 63* c[A-Z]
|
||||||
|
// p = 70
|
||||||
|
// u = 75* u[:xdigit:]{4}
|
||||||
|
// x = 78* x[:xdigit:]{2}
|
||||||
|
//\x{0031}-\x{0039}
|
||||||
|
|
||||||
|
var transformRegExp_matchSlashU = regexp.MustCompile(`\\u([[:xdigit:]]{1,4})`)
|
||||||
|
var transformRegExp_unescape = regexp.MustCompile(strings.NewReplacer("\n", "", "\t", "", " ", "").Replace(`
|
||||||
|
|
||||||
|
(?:
|
||||||
|
\\(
|
||||||
|
[
|
||||||
|
\x{0043}\x{0045}-\x{004F}
|
||||||
|
\x{0050}\x{0052}\x{0054}-\x{0056}\x{0058}-\x{005A}
|
||||||
|
\x{0065}\x{0067}-\x{006D}\x{006F}
|
||||||
|
\x{0070}\x{0071}\x{0079}
|
||||||
|
\x{0080}-\x{FFFF}
|
||||||
|
]
|
||||||
|
)()
|
||||||
|
) |
|
||||||
|
(?:
|
||||||
|
\\(c)([^A-Z])
|
||||||
|
) |
|
||||||
|
(?:
|
||||||
|
\\(u)([^[:xdigit:]])
|
||||||
|
) |
|
||||||
|
(?:
|
||||||
|
\\(u)([:xdigit:][^[:xdigit:]])
|
||||||
|
) |
|
||||||
|
(?:
|
||||||
|
\\(u)([:xdigit:][:xdigit:][^[:xdigit:]])
|
||||||
|
) |
|
||||||
|
(?:
|
||||||
|
\\(u)([:xdigit:][:xdigit:][:xdigit:][^[:xdigit:]])
|
||||||
|
) |
|
||||||
|
(?:
|
||||||
|
\\(x)([^[:xdigit:]])
|
||||||
|
) |
|
||||||
|
(?:
|
||||||
|
\\(x)([:xdigit:][^[:xdigit:]])
|
||||||
|
)
|
||||||
|
|
||||||
|
`))
|
||||||
|
var transformRegExp_unescapeDollar = regexp.MustCompile(strings.NewReplacer("\n", "", "\t", "", " ", "").Replace(`
|
||||||
|
|
||||||
|
(?:
|
||||||
|
\\([cux])$
|
||||||
|
)
|
||||||
|
|
||||||
|
`))
|
||||||
|
// TODO Go "regexp" bug? Can't do: (?:)|(?:$)
|
||||||
|
|
||||||
func transformRegExp(ecmaRegExp string) (goRegExp string) {
|
func transformRegExp(ecmaRegExp string) (goRegExp string) {
|
||||||
return transformRegExp_matchSlashU.ReplaceAllString(ecmaRegExp, `\x{$1}`)
|
tmp := []byte(ecmaRegExp)
|
||||||
|
tmp = transformRegExp_unescape.ReplaceAll(tmp, []byte(`$1$2`))
|
||||||
|
tmp = transformRegExp_unescapeDollar.ReplaceAll(tmp, []byte(`$1`))
|
||||||
|
tmp = transformRegExp_matchSlashU.ReplaceAll(tmp, []byte(`\x{$1}`))
|
||||||
|
return string(tmp)
|
||||||
}
|
}
|
||||||
|
|
||||||
func isValidRegExp(ecmaRegExp string) bool {
|
func isValidRegExp(ecmaRegExp string) bool {
|
||||||
|
|
Loading…
Reference in New Issue
Block a user