Skip to content

Commit

Permalink
Merge pull request #795 from DeinAlptraum/uc-normalize-search
Browse files Browse the repository at this point in the history
Transliterate Song artist and title to ASCII for search
  • Loading branch information
barbeque-squared authored Feb 18, 2024
2 parents 8b6f9af + a6c0f2c commit d6a0669
Show file tree
Hide file tree
Showing 9 changed files with 159,001 additions and 49 deletions.
20 changes: 0 additions & 20 deletions src/base/UCommon.pas
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,6 @@ function StringDeleteFromArray(var InArray: TIntegerDynArray; const InIndex: int
function StringDeleteFromArray(var InStrings: TStringDynArray; const InIndex: integer): Boolean; overload;
function StringDeleteFromArray(var InStrings: TUTF8StringDynArray; const InIndex: integer): Boolean; overload;

function GetStringWithNoAccents(str: String):String;

type
TRGB = record
R: single;
Expand Down Expand Up @@ -224,24 +222,6 @@ function SplitString(const Str: string; MaxCount: integer; Separators: TSysCharS
AddSplit(Start+1, Length(Str)+1);
end;

const
Accents: array [0..42] of String = ('ç', 'á', 'é', 'í', 'ó', 'ú', 'ý', 'à', 'è', 'ì', 'ò', 'ù', 'ã', 'õ', 'ñ', 'ä', 'ë', 'ï', 'ö', 'ü', 'ÿ', 'â', 'ê', 'î', 'ô', 'û', 'ą', 'ć', 'ł', 'ś', 'ź', '!', '¡', '"', '&', '(', ')', '?', '¿', ',', '.', ':', ';');
NoAccents: array [0..42] of String = ('c', 'a', 'e', 'i', 'o', 'u', 'y', 'a', 'e', 'i', 'o', 'u', 'a', 'o', 'n', 'a', 'e', 'i', 'o', 'u', 'y', 'a', 'e', 'i', 'o', 'u', 'a', 'c', 'l', 's', 'z', '', '', '', '', '', '', '', '', '', '', '', '');

function GetStringWithNoAccents(str: String):String;
var
i: integer;
tmp: string;
begin
tmp := str;//Utf8ToAnsi(str);

for i := 0 to High(Accents) do
begin
str := StringReplace(str, Accents[i], NoAccents[i], [rfReplaceAll, rfIgnoreCase]);
end;

Result := str;
end;

function RGBToHex(R, G, B: integer): string;
begin
Expand Down
35 changes: 17 additions & 18 deletions src/base/USong.pas
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ interface
PseudoThread,
{$ENDIF}
UCatCovers,
UCommon,
UFilesystem,
ULog,
UPath,
Expand Down Expand Up @@ -139,12 +138,12 @@ TSong = class
Artist: UTF8String;

// use in search
TitleNoAccent: UTF8String;
ArtistNoAccent: UTF8String;
LanguageNoAccent: UTF8String;
EditionNoAccent: UTF8String;
GenreNoAccent: UTF8String;
CreatorNoAccent: UTF8String;
TitleASCII: UTF8String;
ArtistASCII: UTF8String;
LanguageASCII: UTF8String;
EditionASCII: UTF8String;
GenreASCII: UTF8String;
CreatorASCII: UTF8String;

Creator: UTF8String;

Expand Down Expand Up @@ -915,14 +914,14 @@ function TSong.ReadXMLHeader(const aFileName : IPath): boolean;

//Title
self.Title := Parser.SongInfo.Header.Title;
self.TitleNoAccent := LowerCase(GetStringWithNoAccents(UTF8Decode(Parser.SongInfo.Header.Title)));
self.TitleASCII := LowerCase(TransliterateToASCII(UTF8Decode(self.Title)));

//Add Title Flag to Done
Done := Done or 1;

//Artist
self.Artist := Parser.SongInfo.Header.Artist;
self.ArtistNoAccent := LowerCase(GetStringWithNoAccents(UTF8Decode(Parser.SongInfo.Header.Artist)));
self.ArtistASCII := LowerCase(TransliterateToASCII(UTF8Decode(self.Artist)));

//Add Artist Flag to Done
Done := Done or 2;
Expand Down Expand Up @@ -964,18 +963,18 @@ function TSong.ReadXMLHeader(const aFileName : IPath): boolean;

//Genre Sorting
self.Genre := Parser.SongInfo.Header.Genre;
self.GenreNoAccent := LowerCase(GetStringWithNoAccents(UTF8Decode(self.Genre)));
self.GenreASCII := LowerCase(TransliterateToASCII(UTF8Decode(self.Genre)));

//Edition Sorting
self.Edition := Parser.SongInfo.Header.Edition;
self.EditionNoAccent := LowerCase(GetStringWithNoAccents(UTF8Decode(self.Edition)));
self.EditionASCII := LowerCase(TransliterateToASCII(UTF8Decode(self.Edition)));

//Year Sorting
//self.Year := Parser.SongInfo.Header.Year

//Language Sorting
self.Language := Parser.SongInfo.Header.Language;
self.LanguageNoAccent := LowerCase(GetStringWithNoAccents(UTF8Decode(self.Language)));
self.LanguageASCII := LowerCase(TransliterateToASCII(UTF8Decode(self.Language)));
end
else
Log.LogError('File incomplete or not SingStar XML (A): ' + aFileName.ToNative);
Expand Down Expand Up @@ -1110,15 +1109,15 @@ function TSong.ReadTXTHeader(SongFile: TTextFileStream; ReadCustomTags: Boolean)
if (Identifier = 'TITLE') then
begin
self.Title := DecodeStringUTF8(Value, Encoding);
self.TitleNoAccent := LowerCase(GetStringWithNoAccents(DecodeStringUTF8(Value, Encoding)));
self.TitleASCII := LowerCase(TransliterateToASCII(self.Title));
//Add Title Flag to Done
Done := Done or 1;
end

else if (Identifier = 'ARTIST') then
begin
self.Artist := DecodeStringUTF8(Value, Encoding);
self.ArtistNoAccent := LowerCase(GetStringWithNoAccents(DecodeStringUTF8(Artist, Encoding)));
self.ArtistASCII := LowerCase(TransliterateToASCII(self.Artist));

//Add Artist Flag to Done
Done := Done or 2;
Expand Down Expand Up @@ -1200,28 +1199,28 @@ function TSong.ReadTXTHeader(SongFile: TTextFileStream; ReadCustomTags: Boolean)
else if (Identifier = 'GENRE') then
begin
DecodeStringUTF8(Value, Genre, Encoding);
self.GenreNoAccent := LowerCase(GetStringWithNoAccents(Genre));
self.GenreASCII := LowerCase(TransliterateToASCII(Genre));
end

//Edition Sorting
else if (Identifier = 'EDITION') then
begin
DecodeStringUTF8(Value, Edition, Encoding);
self.EditionNoAccent := LowerCase(GetStringWithNoAccents(Edition));
self.EditionASCII := LowerCase(TransliterateToASCII(Edition));
end

//Creator Tag
else if (Identifier = 'CREATOR') then
begin
DecodeStringUTF8(Value, Creator, Encoding);
self.CreatorNoAccent := LowerCase(GetStringWithNoAccents(Creator));
self.CreatorASCII := LowerCase(TransliterateToASCII(Creator));
end

//Language Sorting
else if (Identifier = 'LANGUAGE') then
begin
DecodeStringUTF8(Value, Language, Encoding);
self.LanguageNoAccent := LowerCase(GetStringWithNoAccents(Language));
self.LanguageASCII := LowerCase(TransliterateToASCII(Language));
end

//Year Sorting
Expand Down
17 changes: 8 additions & 9 deletions src/base/USongs.pas
Original file line number Diff line number Diff line change
Expand Up @@ -903,8 +903,7 @@ function TCatSongs.SetFilter(FilterStr: UTF8String; Filter: TSongFilter): cardin
WordArray: array of UTF8String;
begin

FilterStr := Trim(LowerCase(FilterStr));
FilterStr := GetStringWithNoAccents(FilterStr);
FilterStr := Trim(LowerCase(TransliterateToASCII(FilterStr)));

if (FilterStr <> '') then
begin
Expand Down Expand Up @@ -933,21 +932,21 @@ function TCatSongs.SetFilter(FilterStr: UTF8String; Filter: TSongFilter): cardin
begin
case Filter of
fltAll:
TmpString := Song[I].ArtistNoAccent + ' ' + Song[i].TitleNoAccent + ' ' + Song[i].LanguageNoAccent + ' ' + Song[i].EditionNoAccent + ' ' + Song[i].GenreNoAccent + ' ' + IntToStr(Song[i].Year) + ' ' + Song[i].CreatorNoAccent; //+ ' ' + Song[i].Folder;
TmpString := Song[I].ArtistASCII + ' ' + Song[i].TitleASCII + ' ' + Song[i].LanguageASCII + ' ' + Song[i].EditionASCII + ' ' + Song[i].GenreASCII + ' ' + IntToStr(Song[i].Year) + ' ' + Song[i].CreatorASCII; //+ ' ' + Song[i].Folder;
fltTitle:
TmpString := Song[I].TitleNoAccent;
TmpString := Song[I].TitleASCII;
fltArtist:
TmpString := Song[I].ArtistNoAccent;
TmpString := Song[I].ArtistASCII;
fltLanguage:
TmpString := Song[I].LanguageNoAccent;
TmpString := Song[I].LanguageASCII;
fltEdition:
TmpString := Song[I].EditionNoAccent;
TmpString := Song[I].EditionASCII;
fltGenre:
TmpString := Song[I].GenreNoAccent;
TmpString := Song[I].GenreASCII;
fltYear:
TmpString := IntToStr(Song[I].Year);
fltCreator:
TmpString := Song[I].CreatorNoAccent;
TmpString := Song[I].CreatorASCII;
end;
Song[i].Visible := true;
// Look for every searched word
Expand Down
12 changes: 12 additions & 0 deletions src/base/UUnicodeUtils.pas
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ interface
{$IFDEF MSWINDOWS}
Windows,
{$ENDIF}
anyascii,
StrUtils,
SysUtils;

Expand Down Expand Up @@ -188,6 +189,12 @@ function WideStringLowerCase(ch: WideChar): WideString; overload;

function WideStringReplaceChar(const text: WideString; search, rep: WideChar): WideString;

(*
* Transliterates a UTF8 string to ASCII, utilizing AnyASCII
*)
function TransliterateToASCII(const str: UTF8String) : UTF8String;


implementation

{$IFDEF UNIX}
Expand Down Expand Up @@ -675,6 +682,11 @@ function WideStringReplaceChar(const text: WideString; search, rep: WideChar): W
end;
end;

function TransliterateToASCII(const str: UTF8String) : UTF8String;
begin
Result := UCS4ToUTF8String(transliterate(UTF8ToUCS4String(str)));
end;

initialization
InitUnicodeUtils;

Expand Down
Loading

0 comments on commit d6a0669

Please sign in to comment.