Skip to content

Commit

Permalink
Merge pull request #29 from dadosjusbr/item-sanitizado
Browse files Browse the repository at this point in the history
sanitizando as rubricas
  • Loading branch information
joellensilva authored Oct 26, 2023
2 parents 7fdff78 + 2b73add commit 84c52c7
Showing 1 changed file with 36 additions and 0 deletions.
36 changes: 36 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"regexp"
"strings"
"time"
"unicode"

"github.com/dadosjusbr/proto/coleta"
"github.com/dadosjusbr/proto/pipeline"
Expand All @@ -17,6 +18,9 @@ import (
"github.com/dadosjusbr/storage/repo/database"
"github.com/dadosjusbr/storage/repo/file_storage"
"github.com/kelseyhightower/envconfig"
"golang.org/x/text/runes"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
"google.golang.org/protobuf/encoding/prototext"
)

Expand Down Expand Up @@ -209,6 +213,10 @@ func main() {
// rubrica inconsistente
if !m.MatchString(r.Item) {
remunerations[len(remunerations)-1].Inconsistent = true
} else {
// Se a rubrica não for inconsistente, faremos uma cópia sanitizada na coluna item_sanitizado.
itemSanitizado := sanitizarItem(r.Item)
remunerations[len(remunerations)-1].SanitizedItem = &itemSanitizado
}
i++
}
Expand Down Expand Up @@ -305,3 +313,31 @@ func ativoInativo(ativo bool, orgao string) *string {
return nil
}
}

// Sanitizando as rubricas:
// deixando-as em minúsculo, sem acentos, pontuações, caracteres especiais e espaços duplos
func sanitizarItem(item string) string {
// Converte para minúsculas
item = strings.ToLower(item)

// Remove acentos
t := transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC)
item, _, _ = transform.String(t, item)

// Remove pontuação
item = strings.Map(func(r rune) rune {
if strings.ContainsRune(".,;:!?-", r) {
return -1
}
return r
}, item)

// Remove caracteres especiais
re := regexp.MustCompile("[^a-zA-Z0-9 ]")
item = re.ReplaceAllString(item, "")

// Remove espaços duplos e espaços no início/final da string
item = strings.Join(strings.Fields(item), " ")

return item
}

0 comments on commit 84c52c7

Please sign in to comment.