perf: use Aho-Corasick for string matching

This commit is contained in:
nanmu42
2019-12-10 15:26:23 +08:00
parent edd9fc4ce3
commit 8f4e91f06f
6 changed files with 24 additions and 87 deletions

1
go.mod
View File

@@ -4,5 +4,6 @@ go 1.13
require (
github.com/gin-gonic/gin v1.5.0
github.com/signalsciences/ac v1.1.0
github.com/stretchr/testify v1.4.0
)

2
go.sum
View File

@@ -24,6 +24,8 @@ github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742 h1:Esafd1046DLD
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/signalsciences/ac v1.1.0 h1:3PS8uD5khoTgnnF10aCswUAVDgW8o30SGcN6XUv27yU=
github.com/signalsciences/ac v1.1.0/go.mod h1:jnlGjtNM8dyGcnOdZjY35vHmUtOn5M5K4U+BzcVPjN0=
github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=

View File

@@ -4,6 +4,8 @@ import (
"net/http"
"path"
"strings"
"github.com/signalsciences/ac/acascii"
)
// RequestFilter decide whether or not to compress response judging by request
@@ -39,14 +41,14 @@ func (c *CommonRequestFilter) ShouldCompress(req *http.Request) bool {
//
// Omit this filter if you want to compress all extension.
type ExtensionFilter struct {
Exts Set
Exts *acascii.Matcher
AllowEmpty bool
}
// NewExtensionFilter ...
// NewExtensionFilter returns a extension or panics
func NewExtensionFilter(extensions []string) *ExtensionFilter {
var (
exts = make(Set)
exts = make([]string, 0, len(extensions))
allowEmpty bool
)
@@ -55,10 +57,13 @@ func NewExtensionFilter(extensions []string) *ExtensionFilter {
allowEmpty = true
continue
}
exts.Add(item)
exts = append(exts, item)
}
return &ExtensionFilter{Exts: exts, AllowEmpty: allowEmpty}
return &ExtensionFilter{
Exts: acascii.MustCompileString(exts),
AllowEmpty: allowEmpty,
}
}
// ShouldCompress implements RequestFilter interface
@@ -67,7 +72,7 @@ func (e *ExtensionFilter) ShouldCompress(req *http.Request) bool {
if ext == "" {
return e.AllowEmpty
}
return e.Exts.Contains(ext)
return e.Exts.MatchString(ext)
}
// defaultExtensions is the list of default extensions for which to enable gzip.

View File

@@ -2,7 +2,8 @@ package gzip
import (
"net/http"
"strings"
"github.com/signalsciences/ac/acascii"
)
// ResponseHeaderFilter decide whether or not to compress response
@@ -39,14 +40,14 @@ func (s *SkipCompressedFilter) ShouldCompress(header http.Header) bool {
//
// Omit this filter if you want to compress all content type.
type ContentTypeFilter struct {
Types Set
Types *acascii.Matcher
AllowEmpty bool
}
// NewContentTypeFilter ...
func NewContentTypeFilter(types []string) *ContentTypeFilter {
var (
set = make(Set)
nonEmpty = make([]string, 0, len(types))
allowEmpty bool
)
@@ -55,14 +56,16 @@ func NewContentTypeFilter(types []string) *ContentTypeFilter {
allowEmpty = true
continue
}
set.Add(item)
nonEmpty = append(nonEmpty, item)
}
return &ContentTypeFilter{Types: set, AllowEmpty: allowEmpty}
return &ContentTypeFilter{
Types: acascii.MustCompileString(nonEmpty),
AllowEmpty: allowEmpty,
}
}
// ShouldCompress implements RequestFilter interface
// TODO: optimize with ahocorasick
func (e *ContentTypeFilter) ShouldCompress(header http.Header) bool {
contentType := header.Get("Content-Type")
@@ -70,9 +73,7 @@ func (e *ContentTypeFilter) ShouldCompress(header http.Header) bool {
return e.AllowEmpty
}
return e.Types.ContainsFunc(func(s string) bool {
return strings.Contains(contentType, s)
})
return e.Types.MatchString(contentType)
}
// defaultContentType is the list of default content types for which to enable gzip.

34
set.go
View File

@@ -1,34 +0,0 @@
package gzip
// Set stores distinct strings.
// Original source:
// https://github.com/caddyserver/caddy/blob/7fa90f08aee0861187236b2fbea16b4fa69c5a28/caddyhttp/gzip/requestfilter.go#L76-L105
type Set map[string]struct{}
// Add adds an element to the set.
func (s Set) Add(value string) {
s[value] = struct{}{}
}
// Remove removes an element from the set.
func (s Set) Remove(value string) {
delete(s, value)
}
// Contains check if the set contains value.
func (s Set) Contains(value string) bool {
_, ok := s[value]
return ok
}
// ContainsFunc is similar to Contains. It iterates all the
// elements in the set and passes each to f. It returns true
// on the first call to f that returns true and false otherwise.
func (s Set) ContainsFunc(f func(string) bool) bool {
for k := range s {
if f(k) {
return true
}
}
return false
}

View File

@@ -1,38 +0,0 @@
package gzip
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestSet(t *testing.T) {
var set = make(Set)
assert.False(t, set.Contains("a"))
set.Add("a")
assert.True(t, set.Contains("a"))
assert.False(t, set.Contains("b"))
set.Add("b")
assert.True(t, set.Contains("b"))
assert.True(t, set.ContainsFunc(func(s string) bool {
return s == "a"
}))
assert.True(t, set.ContainsFunc(func(s string) bool {
return s == "b"
}))
assert.False(t, set.ContainsFunc(func(s string) bool {
return s == "c"
}))
set.Remove("a")
assert.False(t, set.Contains("a"))
set.Remove("b")
assert.False(t, set.Contains("b"))
assert.False(t, set.Contains("c"))
set.Remove("c")
assert.False(t, set.Contains("c"))
}