mirror of
https://github.com/xxjwxc/public.git
synced 2025-09-26 20:01:19 +08:00
words filter support
敏感词
This commit is contained in:
7
go.mod
7
go.mod
@@ -8,7 +8,6 @@ require (
|
||||
github.com/axgle/mahonia v0.0.0-20180208002826-3358181d7394
|
||||
github.com/bitly/go-simplejson v0.5.0
|
||||
github.com/btcsuite/winsvc v1.0.0
|
||||
github.com/garyburd/redigo v1.6.3
|
||||
github.com/go-redis/redis/v8 v8.4.11
|
||||
github.com/go-sql-driver/mysql v1.5.0
|
||||
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0
|
||||
@@ -17,7 +16,6 @@ require (
|
||||
github.com/jander/golog v0.0.0-20150917071935-954a5be801fc
|
||||
github.com/jinzhu/gorm v1.9.12
|
||||
github.com/jroimartin/gocui v0.4.0
|
||||
github.com/juju/ratelimit v1.0.1
|
||||
github.com/kardianos/service v1.0.0
|
||||
github.com/muesli/cache2go v0.0.0-20200423001931-a100c5aac93f
|
||||
github.com/nicksnyder/go-i18n/v2 v2.0.3
|
||||
@@ -28,14 +26,11 @@ require (
|
||||
github.com/syndtr/goleveldb v1.0.0
|
||||
github.com/wenzhenxi/gorsa v0.0.0-20210524035706-528c7050d703
|
||||
github.com/xxjwxc/gowp v0.0.0-20200603130651-4d7368b0e285
|
||||
github.com/yudeguang/iox v0.0.0-20180519090448-bffdb29c87c0 // indirect
|
||||
github.com/yudeguang/ratelimit v0.0.0-20220109125206-af2bdcdaf64a
|
||||
go.uber.org/ratelimit v0.2.0
|
||||
go.uber.org/atomic v1.7.0 // indirect
|
||||
go.uber.org/zap v1.10.0
|
||||
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9
|
||||
golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb
|
||||
golang.org/x/text v0.3.3
|
||||
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4
|
||||
google.golang.org/grpc v1.29.1
|
||||
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect
|
||||
gopkg.in/eapache/queue.v1 v1.1.0
|
||||
|
13
go.sum
13
go.sum
@@ -5,8 +5,6 @@ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03
|
||||
github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
|
||||
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
|
||||
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
|
||||
github.com/andres-erbsen/clock v0.0.0-20160526145045-9e14626cd129 h1:MzBOUgng9orim59UnfUTLRjMpd09C5uEVQ6RPGeCaVI=
|
||||
github.com/andres-erbsen/clock v0.0.0-20160526145045-9e14626cd129/go.mod h1:rFgpPQZYZ8vdbc+48xibu8ALc3yeyd64IhHS+PU6Yyg=
|
||||
github.com/ant0ine/go-json-rest v3.3.2+incompatible h1:nBixrkLFiDNAW0hauKDLc8yJI6XfrQumWvytE1Hk14E=
|
||||
github.com/ant0ine/go-json-rest v3.3.2+incompatible/go.mod h1:q6aCt0GfU6LhpBsnZ/2U+mwe+0XB5WStbmwyoPfc+sk=
|
||||
github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8=
|
||||
@@ -54,8 +52,6 @@ github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHqu
|
||||
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
|
||||
github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4=
|
||||
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
|
||||
github.com/garyburd/redigo v1.6.3 h1:HCeeRluvAgMusMomi1+6Y5dmFOdYV/JzoRrrbFlkGIc=
|
||||
github.com/garyburd/redigo v1.6.3/go.mod h1:rTb6epsqigu3kYKBnaF028A7Tf/Aw5s0cqA47doKKqw=
|
||||
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
|
||||
github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
|
||||
github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=
|
||||
@@ -120,8 +116,6 @@ github.com/jinzhu/now v1.1.1/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/
|
||||
github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo=
|
||||
github.com/jroimartin/gocui v0.4.0 h1:52jnalstgmc25FmtGcWqa0tcbMEWS6RpFLsOIO+I+E8=
|
||||
github.com/jroimartin/gocui v0.4.0/go.mod h1:7i7bbj99OgFHzo7kB2zPb8pXLqMBSQegY7azfqXMkyY=
|
||||
github.com/juju/ratelimit v1.0.1 h1:+7AIFJVQ0EQgq/K9+0Krm7m530Du7tIz0METWzN0RgY=
|
||||
github.com/juju/ratelimit v1.0.1/go.mod h1:qapgC/Gy+xNh9UxzV13HGGl/6UXNN+ct+vwSgWNm/qk=
|
||||
github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
|
||||
github.com/kardianos/service v1.0.0 h1:HgQS3mFfOlyntWX8Oke98JcJLqt1DBcHR4kxShpYef0=
|
||||
github.com/kardianos/service v1.0.0/go.mod h1:8CzDhVuCuugtsHyZoTvsOBuvonN/UDBvl0kH+BUxvbo=
|
||||
@@ -219,10 +213,6 @@ github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:
|
||||
github.com/xxjwxc/gowp v0.0.0-20200603130651-4d7368b0e285 h1:gbdax2ZvHZwe8zxu7by/HMuDUS47iHR2zmEzlgAHBMw=
|
||||
github.com/xxjwxc/gowp v0.0.0-20200603130651-4d7368b0e285/go.mod h1:yJ/fY5BorWARfDDsxBU/MyQTHc5MVyNcqBQQYD6MN0k=
|
||||
github.com/xxjwxc/public v0.0.0-20200603115833-341beff27850/go.mod h1:fp3M+FEQrCgWD1fZ/PLwZkCTglf086OEhC9LcydAUnc=
|
||||
github.com/yudeguang/iox v0.0.0-20180519090448-bffdb29c87c0 h1:EIjQmYpnyudINP5M6Y3hFT/AA9SEaZ6La0MtHRkb0X0=
|
||||
github.com/yudeguang/iox v0.0.0-20180519090448-bffdb29c87c0/go.mod h1:/yeZ8yPyE9g4jM7Z8LPKwi1L9lDGmLGQ0ywR4rtdNdY=
|
||||
github.com/yudeguang/ratelimit v0.0.0-20220109125206-af2bdcdaf64a h1:z/xYclBL+mgRK5R8RI/jkUuLFXFYjxWI4aaRLwxi85c=
|
||||
github.com/yudeguang/ratelimit v0.0.0-20220109125206-af2bdcdaf64a/go.mod h1:NcFk/p88iJxUWYrlDIat7mJLufpsHExnYvxUkApkhJc=
|
||||
go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU=
|
||||
go.opentelemetry.io/otel v0.16.0 h1:uIWEbdeb4vpKPGITLsRVUS44L5oDbDUCZxn8lkxhmgw=
|
||||
go.opentelemetry.io/otel v0.16.0/go.mod h1:e4GKElweB8W2gWUqbghw0B8t5MCTccc9212eNHnOHwA=
|
||||
@@ -231,8 +221,6 @@ go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw=
|
||||
go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
|
||||
go.uber.org/multierr v1.1.0 h1:HoEmRHQPVSqub6w2z2d2EOVs2fjyFRGyofhKuyDq0QI=
|
||||
go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0=
|
||||
go.uber.org/ratelimit v0.2.0 h1:UQE2Bgi7p2B85uP5dC2bbRtig0C+OeNRnNEafLjsLPA=
|
||||
go.uber.org/ratelimit v0.2.0/go.mod h1:YYBV4e4naJvhpitQrWJu1vCpgB7CboMe0qhltKt6mUg=
|
||||
go.uber.org/zap v1.10.0 h1:ORx85nbTijNz8ljznvCMR1ZBIPKFn3jQrag10X2AsuM=
|
||||
go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q=
|
||||
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
|
||||
@@ -287,7 +275,6 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
|
||||
golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
|
||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4 h1:SvFZT6jyqRaOeXpc5h/JSfZenJ2O330aBsf7JfSUXmQ=
|
||||
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
|
||||
golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
|
157
wordsfilter/node.go
Normal file
157
wordsfilter/node.go
Normal file
@@ -0,0 +1,157 @@
|
||||
package wordsfilter
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type Node struct {
|
||||
Child map[string]*Node
|
||||
Placeholders string
|
||||
}
|
||||
|
||||
// New creates a node.
|
||||
func NewNode(child map[string]*Node, placeholders string) *Node {
|
||||
return &Node{
|
||||
Child: child,
|
||||
Placeholders: placeholders,
|
||||
}
|
||||
}
|
||||
|
||||
// Add sensitive words to specified sensitive words Map.
|
||||
func (node *Node) add(text string, root map[string]*Node, placeholder string) {
|
||||
if text == "" {
|
||||
return
|
||||
}
|
||||
textr := []rune(text)
|
||||
end := len(textr) - 1
|
||||
for i := 0; i <= end; i++ {
|
||||
word := string(textr[i])
|
||||
if n, ok := root[word]; ok { // contains key
|
||||
if i == end { // the last
|
||||
n.Placeholders = strings.Repeat(placeholder, end+1)
|
||||
} else {
|
||||
if n.Child != nil {
|
||||
root = n.Child
|
||||
} else {
|
||||
root = make(map[string]*Node)
|
||||
n.Child = root
|
||||
}
|
||||
}
|
||||
} else {
|
||||
placeholders, child := "", make(map[string]*Node)
|
||||
if i == end {
|
||||
placeholders = strings.Repeat(placeholder, end+1)
|
||||
}
|
||||
root[word] = NewNode(child, placeholders)
|
||||
root = child
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove specified sensitive words from sensitive word map.
|
||||
func (node *Node) remove(text string, root map[string]*Node) {
|
||||
textr := []rune(text)
|
||||
end := len(textr) - 1
|
||||
for i := 0; i <= end; i++ {
|
||||
word := string(textr[i])
|
||||
if n, ok := root[word]; ok {
|
||||
if i == end {
|
||||
n.Placeholders = ""
|
||||
} else {
|
||||
root = n.Child
|
||||
}
|
||||
} else {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Replace sensitive words in strings and return new strings.
|
||||
// Follow the principle of maximum matching.
|
||||
func (node *Node) replace(text string, root map[string]*Node) string {
|
||||
if root == nil || text == "" {
|
||||
return text
|
||||
}
|
||||
textr := []rune(text)
|
||||
i, s, e, l := 0, 0, 0, len(textr)
|
||||
bf := bytes.Buffer{}
|
||||
words := make(map[string]*Node)
|
||||
var back []*Node
|
||||
loop:
|
||||
for e < l {
|
||||
words = root
|
||||
i = e
|
||||
// Maximum Matching Principle, Matching Backwards First
|
||||
for ; i < l; i ++ {
|
||||
word := string(textr[i])
|
||||
if n, ok := words[word]; ok {
|
||||
back = append(back, n)
|
||||
if n.Child != nil {
|
||||
words = n.Child
|
||||
} else if n.Placeholders != "" {
|
||||
bf.WriteString(string(textr[s:e]))
|
||||
bf.WriteString(n.Placeholders)
|
||||
i++
|
||||
s, e = i, i
|
||||
continue loop
|
||||
} else {
|
||||
break
|
||||
}
|
||||
} else if n != nil && n.Placeholders != "" {
|
||||
bf.WriteString(string(textr[s:e]))
|
||||
bf.WriteString(n.Placeholders)
|
||||
s, e = i, i
|
||||
continue loop
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
// Backward match fails, backtracking.
|
||||
for ; i > e; i-- {
|
||||
bl := len(back)
|
||||
if bl == 0 {
|
||||
break
|
||||
}
|
||||
last := back[bl-1]
|
||||
back = back[:bl-1]
|
||||
if last.Placeholders != "" {
|
||||
bf.WriteString(string(textr[s:e]))
|
||||
bf.WriteString(last.Placeholders)
|
||||
s, e = i, i
|
||||
continue loop
|
||||
}
|
||||
}
|
||||
|
||||
e++
|
||||
back = back[:0]
|
||||
}
|
||||
bf.WriteString(string(textr[s:e]))
|
||||
|
||||
return bf.String()
|
||||
}
|
||||
|
||||
// Whether the string contains sensitive words.
|
||||
func (node *Node) contains(text string, root map[string]*Node) bool {
|
||||
if root == nil || text == "" {
|
||||
return false
|
||||
}
|
||||
textr := []rune(text)
|
||||
end := len(textr) - 1
|
||||
for i := 0; i <= end; i++ {
|
||||
word := string(textr[i])
|
||||
if n, ok := root[word]; ok {
|
||||
if i == end {
|
||||
return n.Placeholders != ""
|
||||
} else {
|
||||
if len(n.Child) == 0 { // last
|
||||
return true
|
||||
}
|
||||
root = n.Child
|
||||
}
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
118
wordsfilter/words_filter.go
Normal file
118
wordsfilter/words_filter.go
Normal file
@@ -0,0 +1,118 @@
|
||||
package wordsfilter
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
)
|
||||
|
||||
var DefaultPlaceholder = "*"
|
||||
var DefaultStripSpace = true
|
||||
|
||||
type WordsFilter struct {
|
||||
Placeholder string
|
||||
StripSpace bool
|
||||
node *Node
|
||||
mutex sync.RWMutex
|
||||
}
|
||||
|
||||
// New creates a words filter.
|
||||
func New() *WordsFilter {
|
||||
return &WordsFilter{
|
||||
Placeholder: DefaultPlaceholder,
|
||||
StripSpace: DefaultStripSpace,
|
||||
node: NewNode(make(map[string]*Node), ""),
|
||||
}
|
||||
}
|
||||
|
||||
// Generate Convert sensitive text lists into sensitive word tree nodes
|
||||
func (wf *WordsFilter) Generate(texts []string) map[string]*Node {
|
||||
root := make(map[string]*Node)
|
||||
for _, text := range texts {
|
||||
wf.Add(text, root)
|
||||
}
|
||||
return root
|
||||
}
|
||||
|
||||
// GenerateWithFile Convert sensitive text from file into sensitive word tree nodes.
|
||||
// File content format, please wrap every sensitive word.
|
||||
func (wf *WordsFilter) GenerateWithFile(path string) (map[string]*Node, error) {
|
||||
fd, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer fd.Close()
|
||||
buf := bufio.NewReader(fd)
|
||||
var texts []string
|
||||
for {
|
||||
line, _, err := buf.ReadLine()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
} else {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
text := strings.TrimSpace(string(line))
|
||||
if text == "" {
|
||||
continue
|
||||
}
|
||||
texts = append(texts, text)
|
||||
}
|
||||
|
||||
root := wf.Generate(texts)
|
||||
return root, nil
|
||||
}
|
||||
|
||||
// Add sensitive words to specified sensitive words Map.
|
||||
func (wf *WordsFilter) Add(text string, root map[string]*Node) {
|
||||
if wf.StripSpace {
|
||||
text = stripSpace(text)
|
||||
}
|
||||
wf.mutex.Lock()
|
||||
defer wf.mutex.Unlock()
|
||||
wf.node.add(text, root, wf.Placeholder)
|
||||
}
|
||||
|
||||
// Replace sensitive words in strings and return new strings.
|
||||
func (wf *WordsFilter) Replace(text string, root map[string]*Node) string {
|
||||
if wf.StripSpace {
|
||||
text = stripSpace(text)
|
||||
}
|
||||
wf.mutex.RLock()
|
||||
defer wf.mutex.RUnlock()
|
||||
return wf.node.replace(text, root)
|
||||
}
|
||||
|
||||
// Contains Whether the string contains sensitive words.
|
||||
func (wf *WordsFilter) Contains(text string, root map[string]*Node) bool {
|
||||
if wf.StripSpace {
|
||||
text = stripSpace(text)
|
||||
}
|
||||
wf.mutex.RLock()
|
||||
defer wf.mutex.RUnlock()
|
||||
return wf.node.contains(text, root)
|
||||
}
|
||||
|
||||
// Remove specified sensitive words from sensitive word map.
|
||||
func (wf *WordsFilter) Remove(text string, root map[string]*Node) {
|
||||
if wf.StripSpace {
|
||||
text = stripSpace(text)
|
||||
}
|
||||
wf.mutex.Lock()
|
||||
defer wf.mutex.Unlock()
|
||||
wf.node.remove(text, root)
|
||||
}
|
||||
|
||||
// stripSpace Strip space
|
||||
func stripSpace(str string) string {
|
||||
fields := strings.Fields(str)
|
||||
var bf bytes.Buffer
|
||||
for _, field := range fields {
|
||||
bf.WriteString(field)
|
||||
}
|
||||
return bf.String()
|
||||
}
|
34
wordsfilter/words_filter_test.go
Normal file
34
wordsfilter/words_filter_test.go
Normal file
@@ -0,0 +1,34 @@
|
||||
package wordsfilter
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestWordsFilter(t *testing.T) {
|
||||
texts := []string{
|
||||
"Miyamoto Musashi",
|
||||
"妲己",
|
||||
"アンジェラ",
|
||||
"ความรุ่งโรจน์",
|
||||
}
|
||||
wf := New()
|
||||
root := wf.Generate(texts)
|
||||
wf.Remove("shif", root)
|
||||
c1 := wf.Contains("アン", root) // 是否有敏感词
|
||||
if c1 != false {
|
||||
t.Errorf("Test Contains expect false, get %T, %v", c1, c1)
|
||||
}
|
||||
c2 := wf.Contains("->アンジェラ2333", root)
|
||||
if c2 != true {
|
||||
t.Errorf("Test Contains expect true, get %T, %v", c2, c2)
|
||||
}
|
||||
r1 := wf.Replace("Game ความรุ่งโรจน์ i like 妲己 heroMiyamotoMusashi", root)
|
||||
if r1 != "Game*************ilike**hero***************" {
|
||||
t.Errorf("Test Replace expect Game*************ilike**hero***************,get %T,%v", r1, r1)
|
||||
}
|
||||
// Test generated with file.
|
||||
root, _ = wf.GenerateWithFile("./words_test.txt")
|
||||
if wf.Contains("アンジェラ", root) != true {
|
||||
t.Errorf("Test Contains expect true, get %T, %v", c2, c2)
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user