diff --git a/go.mod b/go.mod index 9183c3b..37ee3d0 100644 --- a/go.mod +++ b/go.mod @@ -8,7 +8,6 @@ require ( github.com/axgle/mahonia v0.0.0-20180208002826-3358181d7394 github.com/bitly/go-simplejson v0.5.0 github.com/btcsuite/winsvc v1.0.0 - github.com/garyburd/redigo v1.6.3 github.com/go-redis/redis/v8 v8.4.11 github.com/go-sql-driver/mysql v1.5.0 github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 @@ -17,7 +16,6 @@ require ( github.com/jander/golog v0.0.0-20150917071935-954a5be801fc github.com/jinzhu/gorm v1.9.12 github.com/jroimartin/gocui v0.4.0 - github.com/juju/ratelimit v1.0.1 github.com/kardianos/service v1.0.0 github.com/muesli/cache2go v0.0.0-20200423001931-a100c5aac93f github.com/nicksnyder/go-i18n/v2 v2.0.3 @@ -28,14 +26,11 @@ require ( github.com/syndtr/goleveldb v1.0.0 github.com/wenzhenxi/gorsa v0.0.0-20210524035706-528c7050d703 github.com/xxjwxc/gowp v0.0.0-20200603130651-4d7368b0e285 - github.com/yudeguang/iox v0.0.0-20180519090448-bffdb29c87c0 // indirect - github.com/yudeguang/ratelimit v0.0.0-20220109125206-af2bdcdaf64a - go.uber.org/ratelimit v0.2.0 + go.uber.org/atomic v1.7.0 // indirect go.uber.org/zap v1.10.0 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9 golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb golang.org/x/text v0.3.3 - golang.org/x/time v0.0.0-20190308202827-9d24e82272b4 google.golang.org/grpc v1.29.1 gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect gopkg.in/eapache/queue.v1 v1.1.0 diff --git a/go.sum b/go.sum index 84b0907..4e81d21 100644 --- a/go.sum +++ b/go.sum @@ -5,8 +5,6 @@ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03 github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= -github.com/andres-erbsen/clock v0.0.0-20160526145045-9e14626cd129 h1:MzBOUgng9orim59UnfUTLRjMpd09C5uEVQ6RPGeCaVI= -github.com/andres-erbsen/clock v0.0.0-20160526145045-9e14626cd129/go.mod h1:rFgpPQZYZ8vdbc+48xibu8ALc3yeyd64IhHS+PU6Yyg= github.com/ant0ine/go-json-rest v3.3.2+incompatible h1:nBixrkLFiDNAW0hauKDLc8yJI6XfrQumWvytE1Hk14E= github.com/ant0ine/go-json-rest v3.3.2+incompatible/go.mod h1:q6aCt0GfU6LhpBsnZ/2U+mwe+0XB5WStbmwyoPfc+sk= github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= @@ -54,8 +52,6 @@ github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHqu github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= -github.com/garyburd/redigo v1.6.3 h1:HCeeRluvAgMusMomi1+6Y5dmFOdYV/JzoRrrbFlkGIc= -github.com/garyburd/redigo v1.6.3/go.mod h1:rTb6epsqigu3kYKBnaF028A7Tf/Aw5s0cqA47doKKqw= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= @@ -120,8 +116,6 @@ github.com/jinzhu/now v1.1.1/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/ github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= github.com/jroimartin/gocui v0.4.0 h1:52jnalstgmc25FmtGcWqa0tcbMEWS6RpFLsOIO+I+E8= github.com/jroimartin/gocui v0.4.0/go.mod h1:7i7bbj99OgFHzo7kB2zPb8pXLqMBSQegY7azfqXMkyY= -github.com/juju/ratelimit v1.0.1 h1:+7AIFJVQ0EQgq/K9+0Krm7m530Du7tIz0METWzN0RgY= -github.com/juju/ratelimit v1.0.1/go.mod h1:qapgC/Gy+xNh9UxzV13HGGl/6UXNN+ct+vwSgWNm/qk= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= github.com/kardianos/service v1.0.0 h1:HgQS3mFfOlyntWX8Oke98JcJLqt1DBcHR4kxShpYef0= github.com/kardianos/service v1.0.0/go.mod h1:8CzDhVuCuugtsHyZoTvsOBuvonN/UDBvl0kH+BUxvbo= @@ -219,10 +213,6 @@ github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1: github.com/xxjwxc/gowp v0.0.0-20200603130651-4d7368b0e285 h1:gbdax2ZvHZwe8zxu7by/HMuDUS47iHR2zmEzlgAHBMw= github.com/xxjwxc/gowp v0.0.0-20200603130651-4d7368b0e285/go.mod h1:yJ/fY5BorWARfDDsxBU/MyQTHc5MVyNcqBQQYD6MN0k= github.com/xxjwxc/public v0.0.0-20200603115833-341beff27850/go.mod h1:fp3M+FEQrCgWD1fZ/PLwZkCTglf086OEhC9LcydAUnc= -github.com/yudeguang/iox v0.0.0-20180519090448-bffdb29c87c0 h1:EIjQmYpnyudINP5M6Y3hFT/AA9SEaZ6La0MtHRkb0X0= -github.com/yudeguang/iox v0.0.0-20180519090448-bffdb29c87c0/go.mod h1:/yeZ8yPyE9g4jM7Z8LPKwi1L9lDGmLGQ0ywR4rtdNdY= -github.com/yudeguang/ratelimit v0.0.0-20220109125206-af2bdcdaf64a h1:z/xYclBL+mgRK5R8RI/jkUuLFXFYjxWI4aaRLwxi85c= -github.com/yudeguang/ratelimit v0.0.0-20220109125206-af2bdcdaf64a/go.mod h1:NcFk/p88iJxUWYrlDIat7mJLufpsHExnYvxUkApkhJc= go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.opentelemetry.io/otel v0.16.0 h1:uIWEbdeb4vpKPGITLsRVUS44L5oDbDUCZxn8lkxhmgw= go.opentelemetry.io/otel v0.16.0/go.mod h1:e4GKElweB8W2gWUqbghw0B8t5MCTccc9212eNHnOHwA= @@ -231,8 +221,6 @@ go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/multierr v1.1.0 h1:HoEmRHQPVSqub6w2z2d2EOVs2fjyFRGyofhKuyDq0QI= go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= -go.uber.org/ratelimit v0.2.0 h1:UQE2Bgi7p2B85uP5dC2bbRtig0C+OeNRnNEafLjsLPA= -go.uber.org/ratelimit v0.2.0/go.mod h1:YYBV4e4naJvhpitQrWJu1vCpgB7CboMe0qhltKt6mUg= go.uber.org/zap v1.10.0 h1:ORx85nbTijNz8ljznvCMR1ZBIPKFn3jQrag10X2AsuM= go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= @@ -287,7 +275,6 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/time v0.0.0-20190308202827-9d24e82272b4 h1:SvFZT6jyqRaOeXpc5h/JSfZenJ2O330aBsf7JfSUXmQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/wordsfilter/node.go b/wordsfilter/node.go new file mode 100644 index 0000000..e78592c --- /dev/null +++ b/wordsfilter/node.go @@ -0,0 +1,157 @@ +package wordsfilter + +import ( + "bytes" + "strings" +) + +type Node struct { + Child map[string]*Node + Placeholders string +} + +// New creates a node. +func NewNode(child map[string]*Node, placeholders string) *Node { + return &Node{ + Child: child, + Placeholders: placeholders, + } +} + +// Add sensitive words to specified sensitive words Map. +func (node *Node) add(text string, root map[string]*Node, placeholder string) { + if text == "" { + return + } + textr := []rune(text) + end := len(textr) - 1 + for i := 0; i <= end; i++ { + word := string(textr[i]) + if n, ok := root[word]; ok { // contains key + if i == end { // the last + n.Placeholders = strings.Repeat(placeholder, end+1) + } else { + if n.Child != nil { + root = n.Child + } else { + root = make(map[string]*Node) + n.Child = root + } + } + } else { + placeholders, child := "", make(map[string]*Node) + if i == end { + placeholders = strings.Repeat(placeholder, end+1) + } + root[word] = NewNode(child, placeholders) + root = child + } + } +} + +// Remove specified sensitive words from sensitive word map. +func (node *Node) remove(text string, root map[string]*Node) { + textr := []rune(text) + end := len(textr) - 1 + for i := 0; i <= end; i++ { + word := string(textr[i]) + if n, ok := root[word]; ok { + if i == end { + n.Placeholders = "" + } else { + root = n.Child + } + } else { + return + } + } +} + +// Replace sensitive words in strings and return new strings. +// Follow the principle of maximum matching. +func (node *Node) replace(text string, root map[string]*Node) string { + if root == nil || text == "" { + return text + } + textr := []rune(text) + i, s, e, l := 0, 0, 0, len(textr) + bf := bytes.Buffer{} + words := make(map[string]*Node) + var back []*Node +loop: + for e < l { + words = root + i = e + // Maximum Matching Principle, Matching Backwards First + for ; i < l; i ++ { + word := string(textr[i]) + if n, ok := words[word]; ok { + back = append(back, n) + if n.Child != nil { + words = n.Child + } else if n.Placeholders != "" { + bf.WriteString(string(textr[s:e])) + bf.WriteString(n.Placeholders) + i++ + s, e = i, i + continue loop + } else { + break + } + } else if n != nil && n.Placeholders != "" { + bf.WriteString(string(textr[s:e])) + bf.WriteString(n.Placeholders) + s, e = i, i + continue loop + } else { + break + } + } + // Backward match fails, backtracking. + for ; i > e; i-- { + bl := len(back) + if bl == 0 { + break + } + last := back[bl-1] + back = back[:bl-1] + if last.Placeholders != "" { + bf.WriteString(string(textr[s:e])) + bf.WriteString(last.Placeholders) + s, e = i, i + continue loop + } + } + + e++ + back = back[:0] + } + bf.WriteString(string(textr[s:e])) + + return bf.String() +} + +// Whether the string contains sensitive words. +func (node *Node) contains(text string, root map[string]*Node) bool { + if root == nil || text == "" { + return false + } + textr := []rune(text) + end := len(textr) - 1 + for i := 0; i <= end; i++ { + word := string(textr[i]) + if n, ok := root[word]; ok { + if i == end { + return n.Placeholders != "" + } else { + if len(n.Child) == 0 { // last + return true + } + root = n.Child + } + } else { + continue + } + } + return false +} diff --git a/wordsfilter/words_filter.go b/wordsfilter/words_filter.go new file mode 100644 index 0000000..e086416 --- /dev/null +++ b/wordsfilter/words_filter.go @@ -0,0 +1,118 @@ +package wordsfilter + +import ( + "bufio" + "bytes" + "io" + "os" + "strings" + "sync" +) + +var DefaultPlaceholder = "*" +var DefaultStripSpace = true + +type WordsFilter struct { + Placeholder string + StripSpace bool + node *Node + mutex sync.RWMutex +} + +// New creates a words filter. +func New() *WordsFilter { + return &WordsFilter{ + Placeholder: DefaultPlaceholder, + StripSpace: DefaultStripSpace, + node: NewNode(make(map[string]*Node), ""), + } +} + +// Generate Convert sensitive text lists into sensitive word tree nodes +func (wf *WordsFilter) Generate(texts []string) map[string]*Node { + root := make(map[string]*Node) + for _, text := range texts { + wf.Add(text, root) + } + return root +} + +// GenerateWithFile Convert sensitive text from file into sensitive word tree nodes. +// File content format, please wrap every sensitive word. +func (wf *WordsFilter) GenerateWithFile(path string) (map[string]*Node, error) { + fd, err := os.Open(path) + if err != nil { + return nil, err + } + defer fd.Close() + buf := bufio.NewReader(fd) + var texts []string + for { + line, _, err := buf.ReadLine() + if err != nil { + if err == io.EOF { + break + } else { + return nil, err + } + } + text := strings.TrimSpace(string(line)) + if text == "" { + continue + } + texts = append(texts, text) + } + + root := wf.Generate(texts) + return root, nil +} + +// Add sensitive words to specified sensitive words Map. +func (wf *WordsFilter) Add(text string, root map[string]*Node) { + if wf.StripSpace { + text = stripSpace(text) + } + wf.mutex.Lock() + defer wf.mutex.Unlock() + wf.node.add(text, root, wf.Placeholder) +} + +// Replace sensitive words in strings and return new strings. +func (wf *WordsFilter) Replace(text string, root map[string]*Node) string { + if wf.StripSpace { + text = stripSpace(text) + } + wf.mutex.RLock() + defer wf.mutex.RUnlock() + return wf.node.replace(text, root) +} + +// Contains Whether the string contains sensitive words. +func (wf *WordsFilter) Contains(text string, root map[string]*Node) bool { + if wf.StripSpace { + text = stripSpace(text) + } + wf.mutex.RLock() + defer wf.mutex.RUnlock() + return wf.node.contains(text, root) +} + +// Remove specified sensitive words from sensitive word map. +func (wf *WordsFilter) Remove(text string, root map[string]*Node) { + if wf.StripSpace { + text = stripSpace(text) + } + wf.mutex.Lock() + defer wf.mutex.Unlock() + wf.node.remove(text, root) +} + +// stripSpace Strip space +func stripSpace(str string) string { + fields := strings.Fields(str) + var bf bytes.Buffer + for _, field := range fields { + bf.WriteString(field) + } + return bf.String() +} diff --git a/wordsfilter/words_filter_test.go b/wordsfilter/words_filter_test.go new file mode 100644 index 0000000..0291346 --- /dev/null +++ b/wordsfilter/words_filter_test.go @@ -0,0 +1,34 @@ +package wordsfilter + +import ( + "testing" +) + +func TestWordsFilter(t *testing.T) { + texts := []string{ + "Miyamoto Musashi", + "妲己", + "アンジェラ", + "ความรุ่งโรจน์", + } + wf := New() + root := wf.Generate(texts) + wf.Remove("shif", root) + c1 := wf.Contains("アン", root) // 是否有敏感词 + if c1 != false { + t.Errorf("Test Contains expect false, get %T, %v", c1, c1) + } + c2 := wf.Contains("->アンジェラ2333", root) + if c2 != true { + t.Errorf("Test Contains expect true, get %T, %v", c2, c2) + } + r1 := wf.Replace("Game ความรุ่งโรจน์ i like 妲己 heroMiyamotoMusashi", root) + if r1 != "Game*************ilike**hero***************" { + t.Errorf("Test Replace expect Game*************ilike**hero***************,get %T,%v", r1, r1) + } + // Test generated with file. + root, _ = wf.GenerateWithFile("./words_test.txt") + if wf.Contains("アンジェラ", root) != true { + t.Errorf("Test Contains expect true, get %T, %v", c2, c2) + } +}