Skip to content

Commit e74c6cd

Browse files
martischbradfitz
authored andcommitted
regexp: add ASCII fast path for context methods
The step method implementations check directly if the next rune only needs one byte to be decoded and avoid calling utf8.DecodeRune for such ASCII characters. Introduce the same fast path optimization for rune decoding for the context methods. Results for regexp benchmarks that use the context methods: name old time/op new time/op delta AnchoredLiteralShortNonMatch-4 97.5ns ± 1% 94.8ns ± 2% -2.80% (p=0.000 n=45+43) AnchoredShortMatch-4 163ns ± 1% 160ns ± 1% -1.84% (p=0.000 n=46+47) NotOnePassShortA-4 742ns ± 2% 742ns ± 2% ~ (p=0.440 n=49+50) NotOnePassShortB-4 535ns ± 1% 533ns ± 2% -0.37% (p=0.005 n=46+48) OnePassLongPrefix-4 169ns ± 2% 166ns ± 2% -2.06% (p=0.000 n=50+49) Change-Id: Ib302d9e8c63333f02695369fcf9963974362e335 Reviewed-on: https://go-review.googlesource.com/38256 Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
1 parent 8a16d7d commit e74c6cd

File tree

1 file changed

+24
-8
lines changed

1 file changed

+24
-8
lines changed

src/regexp/regexp.go

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -313,11 +313,19 @@ func (i *inputString) index(re *Regexp, pos int) int {
313313

314314
func (i *inputString) context(pos int) syntax.EmptyOp {
315315
r1, r2 := endOfText, endOfText
316-
if pos > 0 && pos <= len(i.str) {
317-
r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
316+
// 0 < pos && pos <= len(i.str)
317+
if uint(pos-1) < uint(len(i.str)) {
318+
r1 = rune(i.str[pos-1])
319+
if r1 >= utf8.RuneSelf {
320+
r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
321+
}
318322
}
319-
if pos < len(i.str) {
320-
r2, _ = utf8.DecodeRuneInString(i.str[pos:])
323+
// 0 <= pos && pos < len(i.str)
324+
if uint(pos) < uint(len(i.str)) {
325+
r2 = rune(i.str[pos])
326+
if r2 >= utf8.RuneSelf {
327+
r2, _ = utf8.DecodeRuneInString(i.str[pos:])
328+
}
321329
}
322330
return syntax.EmptyOpContext(r1, r2)
323331
}
@@ -352,11 +360,19 @@ func (i *inputBytes) index(re *Regexp, pos int) int {
352360

353361
func (i *inputBytes) context(pos int) syntax.EmptyOp {
354362
r1, r2 := endOfText, endOfText
355-
if pos > 0 && pos <= len(i.str) {
356-
r1, _ = utf8.DecodeLastRune(i.str[:pos])
363+
// 0 < pos && pos <= len(i.str)
364+
if uint(pos-1) < uint(len(i.str)) {
365+
r1 = rune(i.str[pos-1])
366+
if r1 >= utf8.RuneSelf {
367+
r1, _ = utf8.DecodeLastRune(i.str[:pos])
368+
}
357369
}
358-
if pos < len(i.str) {
359-
r2, _ = utf8.DecodeRune(i.str[pos:])
370+
// 0 <= pos && pos < len(i.str)
371+
if uint(pos) < uint(len(i.str)) {
372+
r2 = rune(i.str[pos])
373+
if r2 >= utf8.RuneSelf {
374+
r2, _ = utf8.DecodeRune(i.str[pos:])
375+
}
360376
}
361377
return syntax.EmptyOpContext(r1, r2)
362378
}

0 commit comments

Comments
 (0)