From 1628647e055fcd9e4cb2b8ade80a806f8debbe28 Mon Sep 17 00:00:00 2001 From: Robert Krimen Date: Thu, 2 May 2013 20:23:06 +0200 Subject: [PATCH] For RegExp.exec, return the rune index in the string, not the byte index --- regexp_test.go | 7 ++++++- type_regexp.go | 13 ++++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/regexp_test.go b/regexp_test.go index 536aa15..26c11b1 100644 --- a/regexp_test.go +++ b/regexp_test.go @@ -94,13 +94,18 @@ func TestRegExp_exec(t *testing.T) { test(` var abc = /[abc](\d)?/.exec("a0 b c1 d3"); - [ abc.length, abc.input, abc.index, abc ] + [ abc.length, abc.input, abc.index, abc ]; `, "2,a0 b c1 d3,0,a0,0") test(`raise: var exec = RegExp.prototype.exec; exec("Xyzzy"); `, "TypeError: Calling RegExp.exec on a non-RegExp object") + + test(` + var abc = /\w{3}\d?/.exec("CE\uFFFFL\uFFDDbox127"); + [ abc.input.length, abc.length, abc.input, abc.index, abc ]; + `, "11,1,CE\uFFFFL\uFFDDbox127,5,box1") } func TestRegExp_zaacbbbcac(t *testing.T) { diff --git a/type_regexp.go b/type_regexp.go index 8aa2d5f..4b7706a 100644 --- a/type_regexp.go +++ b/type_regexp.go @@ -5,6 +5,7 @@ import ( "fmt" "regexp" "strings" + "unicode/utf8" ) type _regExpObject struct { @@ -106,9 +107,19 @@ func execResultToArray(runtime *_runtime, target string, result []int) *_object valueArray[index] = UndefinedValue() } } + matchIndex := result[0] + if matchIndex != 0 { + matchIndex = 0 + // Find the rune index in the string, not the byte index + for index := 0; index < result[0]; { + _, size := utf8.DecodeRuneInString(target[index:]) + matchIndex += 1 + index += size + } + } match := runtime.newArray(valueArray) match.set("input", toValue(target), false) - match.set("index", toValue(result[0]), false) + match.set("index", toValue(matchIndex), false) return match }