Keywords: Don't split words with a dash

Signed-off-by: Michael Mayer <michael@liquidbytes.net>
This commit is contained in:
Michael Mayer
2020-04-16 15:57:07 +02:00
parent 54fde97aba
commit 204ae30c90
6 changed files with 38 additions and 19 deletions

View File

@@ -87,14 +87,11 @@ func (m *Location) Find(db *gorm.DB, api string) error {
// Keywords computes keyword based on a Location // Keywords computes keyword based on a Location
func (m *Location) Keywords() (result []string) { func (m *Location) Keywords() (result []string) {
result = append(result, txt.Keywords(m.City())...) result = append(result, txt.Keywords(txt.ReplaceSpaces(m.City(), "-"))...)
result = append(result, txt.Keywords(m.State())...) result = append(result, txt.Keywords(txt.ReplaceSpaces(m.State(), "-"))...)
result = append(result, txt.Keywords(m.CountryName())...) result = append(result, txt.Keywords(txt.ReplaceSpaces(m.CountryName(), "-"))...)
result = append(result, txt.Keywords(m.Category())...) result = append(result, txt.Keywords(m.Category())...)
result = append(result, txt.Keywords(m.Name())...) result = append(result, txt.Keywords(m.Name())...)
result = append(result, txt.Keywords(m.Label())...)
result = append(result, txt.Keywords(m.Notes())...)
result = txt.UniqueWords(result) result = txt.UniqueWords(result)

View File

@@ -308,7 +308,7 @@ func (ind *Index) MediaFile(m *MediaFile, o IndexOptions, originalName string) (
} }
w = append(w, locKeywords...) w = append(w, locKeywords...)
w = append(w, txt.Keywords(file.OriginalName)...) w = append(w, txt.FilenameWords(file.OriginalName)...)
w = append(w, file.FileMainColor) w = append(w, file.FileMainColor)
w = append(w, labels.Keywords()...) w = append(w, labels.Keywords()...)

View File

@@ -14,7 +14,7 @@ func TestDiscover(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
assert.Equal(t, "Webdav", r.AccName) assert.Equal(t, "Webdav-Dummy", r.AccName)
assert.Equal(t, "webdav", r.AccType) assert.Equal(t, "webdav", r.AccType)
assert.Equal(t, "http://webdav-dummy/", r.AccURL) assert.Equal(t, "http://webdav-dummy/", r.AccURL)
assert.Equal(t, "admin", r.AccUser) assert.Equal(t, "admin", r.AccUser)
@@ -28,7 +28,7 @@ func TestDiscover(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
assert.Equal(t, "Webdav", r.AccName) assert.Equal(t, "Webdav-Dummy", r.AccName)
assert.Equal(t, "webdav", r.AccType) assert.Equal(t, "webdav", r.AccType)
assert.Equal(t, "http://webdav-dummy/", r.AccURL) assert.Equal(t, "http://webdav-dummy/", r.AccURL)
assert.Equal(t, "admin", r.AccUser) assert.Equal(t, "admin", r.AccUser)

View File

@@ -61,5 +61,4 @@ func TestBaseAbs(t *testing.T) {
assert.Equal(t, "/testdata/Test (4)", result) assert.Equal(t, "/testdata/Test (4)", result)
}) })
} }

View File

@@ -6,13 +6,25 @@ import (
"strings" "strings"
) )
var KeywordsRegexp = regexp.MustCompile("[\\p{L}]{3,}") var KeywordsRegexp = regexp.MustCompile("[\\p{L}\\-]{3,}")
// Words returns a slice of words with at least 3 characters from a string. // Words returns a slice of words with at least 3 characters from a string, dashes count as character ("ile-de-france").
func Words(s string) (results []string) { func Words(s string) (results []string) {
return KeywordsRegexp.FindAllString(s, -1) return KeywordsRegexp.FindAllString(s, -1)
} }
// ReplaceSpaces replaces all spaces with another string.
func ReplaceSpaces(s string, char string) string {
return strings.Replace(s, " ", char, -1)
}
var FilenameKeywordsRegexp = regexp.MustCompile("[\\p{L}]{3,}")
// FilenameWords returns a slice of words with at least 3 characters from a string ("ile", "france").
func FilenameWords(s string) (results []string) {
return FilenameKeywordsRegexp.FindAllString(s, -1)
}
// Keywords returns a slice of keywords without stopwords. // Keywords returns a slice of keywords without stopwords.
func Keywords(s string) (results []string) { func Keywords(s string) (results []string) {
for _, w := range Words(s) { for _, w := range Words(s) {

View File

@@ -7,9 +7,9 @@ import (
) )
func TestWords(t *testing.T) { func TestWords(t *testing.T) {
t.Run("I'm a lazy brown fox!", func(t *testing.T) { t.Run("I'm a lazy-brown fox!", func(t *testing.T) {
result := Words("I'm a lazy BRoWN fox!") result := Words("I'm a lazy-BRoWN fox!")
assert.Equal(t, []string{"lazy", "BRoWN", "fox"}, result) assert.Equal(t, []string{"lazy-BRoWN", "fox"}, result)
}) })
t.Run("no result", func(t *testing.T) { t.Run("no result", func(t *testing.T) {
result := Words("x") result := Words("x")
@@ -17,6 +17,17 @@ func TestWords(t *testing.T) {
}) })
} }
func TestFilenameWords(t *testing.T) {
t.Run("I'm a lazy-brown fox!", func(t *testing.T) {
result := FilenameWords("I'm a lazy-BRoWN fox!")
assert.Equal(t, []string{"lazy", "BRoWN", "fox"}, result)
})
t.Run("no result", func(t *testing.T) {
result := FilenameWords("x")
assert.Equal(t, []string(nil), result)
})
}
func TestKeywords(t *testing.T) { func TestKeywords(t *testing.T) {
t.Run("I'm a lazy brown fox!", func(t *testing.T) { t.Run("I'm a lazy brown fox!", func(t *testing.T) {
result := Keywords("I'm a lazy BRoWN img!") result := Keywords("I'm a lazy BRoWN img!")
@@ -30,8 +41,8 @@ func TestKeywords(t *testing.T) {
func TestUniqueWords(t *testing.T) { func TestUniqueWords(t *testing.T) {
t.Run("many", func(t *testing.T) { t.Run("many", func(t *testing.T) {
result := UniqueWords([]string{"lazy", "brown", "apple", "brown"}) result := UniqueWords([]string{"lazy", "brown", "apple", "brown", "new-york"})
assert.Equal(t, []string{"apple", "brown", "lazy"}, result) assert.Equal(t, []string{"apple", "brown", "lazy", "new-york"}, result)
}) })
t.Run("one", func(t *testing.T) { t.Run("one", func(t *testing.T) {
result := UniqueWords([]string{"lazy"}) result := UniqueWords([]string{"lazy"})
@@ -41,8 +52,8 @@ func TestUniqueWords(t *testing.T) {
func TestUniqueKeywords(t *testing.T) { func TestUniqueKeywords(t *testing.T) {
t.Run("many", func(t *testing.T) { t.Run("many", func(t *testing.T) {
result := UniqueKeywords("lazy, brown, apple, brown, ...") result := UniqueKeywords("lazy, brown, apple, new-york, brown, ...")
assert.Equal(t, []string{"apple", "brown", "lazy"}, result) assert.Equal(t, []string{"apple", "brown", "lazy", "new-york"}, result)
}) })
t.Run("one", func(t *testing.T) { t.Run("one", func(t *testing.T) {
result := UniqueKeywords("") result := UniqueKeywords("")