From 1864a88fa0223dd942c84caaadf261e7dd18ebf2 Mon Sep 17 00:00:00 2001 From: Igor Kharin Date: Sun, 4 May 2014 02:38:33 +0700 Subject: [PATCH] Implement strings without value16 This fixes #68 Some changes over the original patch, removing references to utf8string.String: * (better) This removes a dependency on a non-standard (though solid) package * (better) utf8string.String has mutable parts * (worse) utf8string.String has a smarter consecutive access approach (by remembering where the last access was) * (?) _stringWide allocates a []rune if charAt or charCodeAt access is needed (though it will only do this once for the string object) --- builtin_string.go | 20 ++++---- global.go | 17 +++---- type_string.go | 114 ++++++++++++++++++++++++++++++++-------------- 3 files changed, 95 insertions(+), 56 deletions(-) diff --git a/builtin_string.go b/builtin_string.go index 1b0f89c..e2c6a62 100644 --- a/builtin_string.go +++ b/builtin_string.go @@ -5,7 +5,7 @@ import ( "regexp" "strconv" "strings" - "unicode/utf16" + "unicode/utf8" ) // String @@ -42,24 +42,22 @@ func builtinString_fromCharCode(call FunctionCall) Value { func builtinString_charAt(call FunctionCall) Value { checkObjectCoercible(call.This) - value := toString(call.This) - value16 := utf16.Encode([]rune(value)) - index := toInteger(call.Argument(0)).value - if 0 > index || index >= int64(len(value16)) { + idx := int(toInteger(call.Argument(0)).value) + chr := stringAt(call.This._object().stringValue(), idx) + if chr == utf8.RuneError { return toValue_string("") } - return toValue_string(string(value16[index])) + return toValue_string(string(chr)) } func builtinString_charCodeAt(call FunctionCall) Value { checkObjectCoercible(call.This) - value := toString(call.This) - value16 := utf16.Encode([]rune(value)) - index := toInteger(call.Argument(0)).value - if 0 > index || index >= int64(len(value16)) { + idx := int(toInteger(call.Argument(0)).value) + chr := stringAt(call.This._object().stringValue(), idx) + if chr == utf8.RuneError { return NaNValue() } - return toValue_uint16(value16[index]) + return toValue_uint16(uint16(chr)) } func builtinString_concat(call FunctionCall) Value { diff --git a/global.go b/global.go index d9cd85b..87030a0 100644 --- a/global.go +++ b/global.go @@ -2,7 +2,7 @@ package otto import ( "strconv" - Time "time" + "time" ) var ( @@ -12,13 +12,8 @@ var ( return UndefinedValue() }}, } - prototypeValueString = _stringObject{ - value: Value{ - _valueType: valueString, - value: "", - }, - value16: []uint16(nil), - } + prototypeValueString = _stringASCII("") + // TODO Make this just false? prototypeValueBoolean = Value{ _valueType: valueBoolean, value: false, @@ -30,7 +25,7 @@ var ( prototypeValueDate = _dateObject{ epoch: 0, isNaN: false, - time: Time.Unix(0, 0).UTC(), + time: time.Unix(0, 0).UTC(), value: Value{ _valueType: valueNumber, value: 0, @@ -84,14 +79,14 @@ func (self *_object) primitiveValue() Value { case Value: return value case _stringObject: - return value.value + return toValue_string(value.String()) } return Value{} } func (self *_object) hasPrimitive() bool { switch self.value.(type) { - case Value, *_stringObject: + case Value, _stringObject: return true } return false diff --git a/type_string.go b/type_string.go index 2901743..fd93316 100644 --- a/type_string.go +++ b/type_string.go @@ -2,50 +2,97 @@ package otto import ( "strconv" - "unicode/utf16" + "unicode/utf8" ) -type _stringObject struct { - value Value - value16 []uint16 +type _stringObject interface { + Length() int + At(int) rune + String() string +} + +type _stringASCII string + +func (str _stringASCII) Length() int { + return len(str) +} + +func (str _stringASCII) At(at int) rune { + return rune(str[at]) +} + +func (str _stringASCII) String() string { + return string(str) +} + +type _stringWide struct { + string string + length int + runes []rune +} + +func (str _stringWide) Length() int { + return str.length +} + +func (str _stringWide) At(at int) rune { + if str.runes == nil { + str.runes = []rune(str.string) + } + return str.runes[at] +} + +func (str _stringWide) String() string { + return str.string +} + +func _newStringObject(str string) _stringObject { + for i := 0; i < len(str); i++ { + if str[i] >= utf8.RuneSelf { + goto wide + } + } + + return _stringASCII(str) + +wide: + return &_stringWide{ + string: str, + length: utf8.RuneCountInString(str), + } +} + +func stringAt(str _stringObject, index int) rune { + if 0 <= index && index < str.Length() { + return str.At(index) + } + return utf8.RuneError } func (runtime *_runtime) newStringObject(value Value) *_object { - value = toValue_string(toString(value)) - value16 := utf16Of(value.value.(string)) + str := _newStringObject(toString(value)) self := runtime.newClassObject("String") - self.defineProperty("length", toValue_int(len(value16)), 0, false) + self.defineProperty("length", toValue_int(str.Length()), 0, false) self.objectClass = _classString - self.value = _stringObject{ - value: value, - value16: value16, - } + self.value = str return self } -func (self *_object) stringValue() (string, _stringObject) { - value, valid := self.value.(_stringObject) - if valid { - return value.value.value.(string), value +func (self *_object) stringValue() _stringObject { + if str, ok := self.value.(_stringObject); ok { + return str } - return "", _stringObject{} -} - -func (self *_object) stringValue16() []uint16 { - _, value := self.stringValue() - return value.value16 -} - -func utf16Of(value string) []uint16 { - return utf16.Encode([]rune(value)) + return nil } func stringEnumerate(self *_object, all bool, each func(string) bool) { - length := len(self.stringValue16()) - for index := 0; index < length; index += 1 { - if !each(strconv.FormatInt(int64(index), 10)) { - return + if str := self.stringValue(); str != nil { + length := str.Length() + for index := 0; index < length; index++ { + if !each(strconv.FormatInt(int64(index), 10)) { + return + } } } objectEnumerate(self, all, each) @@ -55,11 +102,10 @@ func stringGetOwnProperty(self *_object, name string) *_property { if property := objectGetOwnProperty(self, name); property != nil { return property } - index := stringToArrayIndex(name) - if index >= 0 { - value16 := self.stringValue16() - if index < int64(len(value16)) { - return &_property{toValue_string(string(value16[index])), 0} + // TODO Test a string of length >= +int32 + 1? + if index := stringToArrayIndex(name); index >= 0 { + if chr := stringAt(self.stringValue(), int(index)); chr != utf8.RuneError { + return &_property{toValue_string(string(chr)), 0} } } return nil