Skip to content

Commit

Permalink
Add support to suggestions for inputs that already has native languag…
Browse files Browse the repository at this point in the history
…e characters
  • Loading branch information
subins2000 committed Nov 20, 2023
1 parent 510e0b0 commit e6dd5e0
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 9 deletions.
2 changes: 1 addition & 1 deletion govarnam/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ func getVSTLookupDirs() []string {
}
}

//FindVSTDir Get the VST storing directory
// FindVSTDir Get the VST storing directory
func FindVSTDir() (string, error) {
for _, loc := range getVSTLookupDirs() {
if dirExists(loc) {
Expand Down
4 changes: 3 additions & 1 deletion govarnam/govarnam.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"sort"
"strings"
"time"
"unicode"
"unicode/utf8"

// sqlite3
Expand All @@ -25,6 +26,7 @@ type LangRules struct {
Virama string
IndicDigits bool
PatternLongestLength int // Longest length of pattern in VST
UnicodeBlock unicode.RangeTable
}

// SchemeDetails of VST
Expand Down Expand Up @@ -247,8 +249,8 @@ func (varnam *Varnam) setDefaultConfig() {
varnam.DictionaryMatchExact = false

varnam.LangRules.IndicDigits = false

varnam.LangRules.Virama, _ = varnam.getVirama()
varnam.LangRules.UnicodeBlock = varnam.getUnicodeBlock()

if varnam.SchemeDetails.LangCode == "ml" {
varnam.RegisterPatternWordPartializer(varnam.mlPatternWordPartializer)
Expand Down
14 changes: 14 additions & 0 deletions govarnam/govarnam_ml_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package govarnam
import (
"context"
"log"
"os"
"path"
"strings"
"testing"
Expand Down Expand Up @@ -477,6 +478,7 @@ func TestMLRecentlyLearnedWords(t *testing.T) {
}

result, err = varnam.GetRecentlyLearntWords(context.Background(), 4, len(words))
checkError(err)
assertEqual(t, result[0].Word, "ആലപ്പുഴ")
}

Expand All @@ -494,3 +496,15 @@ func TestMLGetSuggestions(t *testing.T) {

assertEqual(t, result[0].Word, "ആലപ്പുഴ")
}

func TestMLNativePartialWordsInInput(t *testing.T) {
varnam := getVarnamInstance("ml")

words := []string{"ആലപ്പുഴ", "പുസ്തകം"}
for _, word := range words {
varnam.Learn(word, 0)
}

assertEqual(t, varnam.TransliterateAdvanced("ആലppu").DictionarySuggestions[0].Word, "ആലപ്പുഴ")
assertEqual(t, varnam.TransliterateAdvanced("പുസ്തkam").DictionarySuggestions[0].Word, "പുസ്തകം")
}
3 changes: 1 addition & 2 deletions govarnam/govarnam_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,7 @@ func TestMain(m *testing.M) {
log.Fatal(err)
}

testTempDir, err = os.TempDir("", "govarnam_test")
checkError(err)
testTempDir = os.TempDir()

for _, schemeDetail := range schemeDetails {
setUp(schemeDetail.Identifier)
Expand Down
14 changes: 14 additions & 0 deletions govarnam/govarnam_ml.go → govarnam/lang_specific_rules.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package govarnam

import "unicode"

/**
* govarnam - An Indian language transliteration library
* Copyright Subin Siby <mail at subinsb (.) com>, 2021
Expand All @@ -23,3 +25,15 @@ func (varnam *Varnam) mlPatternWordPartializer(sug *Suggestion) {
sug.Word = sug.Word[0:len(sug.Word)-size] + "മ"
}
}

func (varnam *Varnam) getUnicodeBlock() unicode.RangeTable {
switch varnam.SchemeDetails.LangCode {
case "kn":
return unicode.RangeTable{R16: []unicode.Range16{{0x0C80, 0x0CFF, 1}}}
case "ml":
return unicode.RangeTable{R16: []unicode.Range16{{0x0D00, 0x0D7F, 1}}}
default:
return unicode.RangeTable{}
}
// TODO add for all languages
}
16 changes: 12 additions & 4 deletions govarnam/symbol.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"fmt"
"log"
"strings"
"unicode"

"github.com/mattn/go-sqlite3"
)
Expand Down Expand Up @@ -274,10 +275,17 @@ func (varnam *Varnam) tokenizeWord(ctx context.Context, word string, matchType i
matches := varnam.findLongestPatternMatchSymbols(ctx, sequence, matchType, acceptCondition)

if len(matches) == 0 {
// No matches, add a character token
// Note that we just add 1 character, and move on
token := Token{VARNAM_TOKEN_CHAR, matches, i, string(sequence[:1])}
results = append(results, token)
if unicode.In(sequence[0], &varnam.LangRules.UnicodeBlock) {
// This helps to get suggestions in inputs like "ആലppu"
character := string(sequence[0])
token := Token{VARNAM_TOKEN_SYMBOL, []Symbol{{Value1: character}}, i, character}
results = append(results, token)
} else {
// No matches, add a character token
// Note that we just add 1 character, and move on
token := Token{VARNAM_TOKEN_CHAR, matches, i, string(sequence[:1])}
results = append(results, token)
}

i++
} else {
Expand Down
2 changes: 1 addition & 1 deletion govarnamgo/govarnamgo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ func tearDown() {

func TestMain(m *testing.M) {
var err error
testTempDir, err = os.TempDir("", "govarnam_test")
testTempDir = os.TempDir()
checkError(err)

setUp("ml")
Expand Down

0 comments on commit e6dd5e0

Please sign in to comment.