Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add search string parsing #1982

Merged
merged 5 commits into from
Nov 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
167 changes: 167 additions & 0 deletions pkg/models/search.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
package models

import "strings"

const (
or = "OR"
orSymbol = "|"
notPrefix = '-'
phraseChar = '"'
)

// SearchSpecs provides the specifications for text-based searches.
type SearchSpecs struct {
// MustHave specifies all of the terms that must appear in the results.
MustHave []string

// AnySets specifies sets of terms where one of each set must appear in the results.
AnySets [][]string

// MustNot specifies all terms that must not appear in the results.
MustNot []string
}

// combinePhrases detects quote characters at the start and end of
// words and combines the contents into a single word.
func combinePhrases(words []string) []string {
var ret []string
startIndex := -1
for i, w := range words {
if startIndex == -1 {
// looking for start of phrase
// this could either be " or -"
ww := w
if len(w) > 0 && w[0] == notPrefix {
ww = w[1:]
}
if len(ww) > 0 && ww[0] == phraseChar && (len(ww) < 2 || ww[len(ww)-1] != phraseChar) {
startIndex = i
continue
}

ret = append(ret, w)
} else if len(w) > 0 && w[len(w)-1] == phraseChar { // looking for end of phrase
// combine words
phrase := strings.Join(words[startIndex:i+1], " ")

// add to return value
ret = append(ret, phrase)
startIndex = -1
}
}

if startIndex != -1 {
ret = append(ret, words[startIndex:]...)
}

return ret
}

func extractOrConditions(words []string, searchSpec *SearchSpecs) []string {
for foundOr := true; foundOr; {
foundOr = false
for i, w := range words {
if i > 0 && i < len(words)-1 && (strings.EqualFold(w, or) || w == orSymbol) {
// found an OR keyword
// first operand will be the last word
startIndex := i - 1

// find the last operand
// this will be the last word not preceded by OR
lastIndex := len(words) - 1
for ii := i + 2; ii < len(words); ii += 2 {
if !strings.EqualFold(words[ii], or) {
lastIndex = ii - 1
break
}
}

foundOr = true

// combine the words into an any set
var set []string
for ii := startIndex; ii <= lastIndex; ii += 2 {
word := extractPhrase(words[ii])
if word == "" {
continue
}
set = append(set, word)
}

searchSpec.AnySets = append(searchSpec.AnySets, set)

// take out the OR'd words
words = append(words[0:startIndex], words[lastIndex+1:]...)

// break and reparse
break
}
}
}

return words
}

func extractNotConditions(words []string, searchSpec *SearchSpecs) []string {
var ret []string

for _, w := range words {
if len(w) > 1 && w[0] == notPrefix {
word := extractPhrase(w[1:])
if word == "" {
continue
}
searchSpec.MustNot = append(searchSpec.MustNot, word)
} else {
ret = append(ret, w)
}
}

return ret
}

func extractPhrase(w string) string {
if len(w) > 1 && w[0] == phraseChar && w[len(w)-1] == phraseChar {
return w[1 : len(w)-1]
}

return w
}

// ParseSearchString parses the Q value and returns a SearchSpecs object.
//
// By default, any words in the search value must appear in the results.
// Words encompassed by quotes (") as treated as a single term.
// Where keyword "OR" (case-insensitive) appears (and is not part of a quoted phrase), one of the
// OR'd terms must appear in the results.
// Where a keyword is prefixed with "-", that keyword must not appear in the results.
// Where OR appears as the first or last term, or where one of the OR operands has a
// not prefix, then the OR is treated literally.
func ParseSearchString(s string) SearchSpecs {
s = strings.TrimSpace(s)

if s == "" {
return SearchSpecs{}
}

// break into words
words := strings.Split(s, " ")

// combine phrases first, then extract OR conditions, then extract NOT conditions
// and the leftovers will be AND'd
ret := SearchSpecs{}
words = combinePhrases(words)
words = extractOrConditions(words, &ret)
words = extractNotConditions(words, &ret)

for _, w := range words {
// ignore empty quotes
word := extractPhrase(w)
if word == "" {
continue
}
ret.MustHave = append(ret.MustHave, word)
}

return ret
}
227 changes: 227 additions & 0 deletions pkg/models/search_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
package models

import (
"reflect"
"testing"
)

func TestParseSearchString(t *testing.T) {
tests := []struct {
name string
q string
want SearchSpecs
}{
{
"basic",
"a b c",
SearchSpecs{
MustHave: []string{"a", "b", "c"},
},
},
{
"empty",
"",
SearchSpecs{},
},
{
"whitespace",
" ",
SearchSpecs{},
},
{
"single",
"a",
SearchSpecs{
MustHave: []string{"a"},
},
},
{
"quoted",
`"a b" c`,
SearchSpecs{
MustHave: []string{"a b", "c"},
},
},
{
"quoted double space",
`"a b" c`,
SearchSpecs{
MustHave: []string{"a b", "c"},
},
},
{
"quoted end space",
`"a b " c`,
SearchSpecs{
MustHave: []string{"a b ", "c"},
},
},
{
"no matching end quote",
`"a b c`,
SearchSpecs{
MustHave: []string{`"a`, "b", "c"},
},
},
{
"no matching start quote",
`a b c"`,
SearchSpecs{
MustHave: []string{"a", "b", `c"`},
},
},
{
"or",
"a OR b",
SearchSpecs{
AnySets: [][]string{
{"a", "b"},
},
},
},
{
"multi or",
"a OR b c OR d",
SearchSpecs{
AnySets: [][]string{
{"a", "b"},
{"c", "d"},
},
},
},
{
"lowercase or",
"a or b",
SearchSpecs{
AnySets: [][]string{
{"a", "b"},
},
},
},
{
"or symbol",
"a | b",
SearchSpecs{
AnySets: [][]string{
{"a", "b"},
},
},
},
{
"quoted or",
`a "OR" b`,
SearchSpecs{
MustHave: []string{"a", "OR", "b"},
},
},
{
"quoted or symbol",
`a "|" b`,
SearchSpecs{
MustHave: []string{"a", "|", "b"},
},
},
{
"or phrases",
`"a b" OR "c d"`,
SearchSpecs{
AnySets: [][]string{
{"a b", "c d"},
},
},
},
{
"or at start",
"OR a",
SearchSpecs{
MustHave: []string{"OR", "a"},
},
},
{
"or at end",
"a OR",
SearchSpecs{
MustHave: []string{"a", "OR"},
},
},
{
"or symbol at start",
"| a",
SearchSpecs{
MustHave: []string{"|", "a"},
},
},
{
"or symbol at end",
"a |",
SearchSpecs{
MustHave: []string{"a", "|"},
},
},
{
"nots",
"-a -b",
SearchSpecs{
MustNot: []string{"a", "b"},
},
},
{
"not or",
"-a OR b",
SearchSpecs{
AnySets: [][]string{
{"-a", "b"},
},
},
},
{
"not phrase",
`-"a b"`,
SearchSpecs{
MustNot: []string{"a b"},
},
},
{
"not in phrase",
`"-a b"`,
SearchSpecs{
MustHave: []string{"-a b"},
},
},
{
"double not",
"--a",
SearchSpecs{
MustNot: []string{"-a"},
},
},
{
"empty quote",
`"" a`,
SearchSpecs{
MustHave: []string{"a"},
},
},
{
"not empty quote",
`-"" a`,
SearchSpecs{
MustHave: []string{"a"},
},
},
{
"quote in word",
`ab"cd"`,
SearchSpecs{
MustHave: []string{`ab"cd"`},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := ParseSearchString(tt.q); !reflect.DeepEqual(got, tt.want) {
t.Errorf("FindFilterType.ParseSearchString() = %v, want %v", got, tt.want)
}
})
}
}
Loading