Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Transliterate Song artist and title to ASCII for search #795

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 0 additions & 20 deletions src/base/UCommon.pas
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,6 @@ function StringDeleteFromArray(var InArray: TIntegerDynArray; const InIndex: int
function StringDeleteFromArray(var InStrings: TStringDynArray; const InIndex: integer): Boolean; overload;
function StringDeleteFromArray(var InStrings: TUTF8StringDynArray; const InIndex: integer): Boolean; overload;

function GetStringWithNoAccents(str: String):String;

type
TRGB = record
R: single;
Expand Down Expand Up @@ -224,24 +222,6 @@ function SplitString(const Str: string; MaxCount: integer; Separators: TSysCharS
AddSplit(Start+1, Length(Str)+1);
end;

const
Accents: array [0..42] of String = ('ç', 'á', 'é', 'í', 'ó', 'ú', 'ý', 'à', 'è', 'ì', 'ò', 'ù', 'ã', 'õ', 'ñ', 'ä', 'ë', 'ï', 'ö', 'ü', 'ÿ', 'â', 'ê', 'î', 'ô', 'û', 'ą', 'ć', 'ł', 'ś', 'ź', '!', '¡', '"', '&', '(', ')', '?', '¿', ',', '.', ':', ';');
NoAccents: array [0..42] of String = ('c', 'a', 'e', 'i', 'o', 'u', 'y', 'a', 'e', 'i', 'o', 'u', 'a', 'o', 'n', 'a', 'e', 'i', 'o', 'u', 'y', 'a', 'e', 'i', 'o', 'u', 'a', 'c', 'l', 's', 'z', '', '', '', '', '', '', '', '', '', '', '', '');

function GetStringWithNoAccents(str: String):String;
var
i: integer;
tmp: string;
begin
tmp := str;//Utf8ToAnsi(str);

for i := 0 to High(Accents) do
begin
str := StringReplace(str, Accents[i], NoAccents[i], [rfReplaceAll, rfIgnoreCase]);
end;

Result := str;
end;

function RGBToHex(R, G, B: integer): string;
begin
Expand Down
35 changes: 17 additions & 18 deletions src/base/USong.pas
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ interface
PseudoThread,
{$ENDIF}
UCatCovers,
UCommon,
UFilesystem,
ULog,
UPath,
Expand Down Expand Up @@ -139,12 +138,12 @@ TSong = class
Artist: UTF8String;

// use in search
TitleNoAccent: UTF8String;
ArtistNoAccent: UTF8String;
LanguageNoAccent: UTF8String;
EditionNoAccent: UTF8String;
GenreNoAccent: UTF8String;
CreatorNoAccent: UTF8String;
TitleASCII: UTF8String;
ArtistASCII: UTF8String;
LanguageASCII: UTF8String;
EditionASCII: UTF8String;
GenreASCII: UTF8String;
CreatorASCII: UTF8String;

Creator: UTF8String;

Expand Down Expand Up @@ -915,14 +914,14 @@ function TSong.ReadXMLHeader(const aFileName : IPath): boolean;

//Title
self.Title := Parser.SongInfo.Header.Title;
self.TitleNoAccent := LowerCase(GetStringWithNoAccents(UTF8Decode(Parser.SongInfo.Header.Title)));
self.TitleASCII := LowerCase(TransliterateToASCII(UTF8Decode(self.Title)));

//Add Title Flag to Done
Done := Done or 1;

//Artist
self.Artist := Parser.SongInfo.Header.Artist;
self.ArtistNoAccent := LowerCase(GetStringWithNoAccents(UTF8Decode(Parser.SongInfo.Header.Artist)));
self.ArtistASCII := LowerCase(TransliterateToASCII(UTF8Decode(self.Artist)));

//Add Artist Flag to Done
Done := Done or 2;
Expand Down Expand Up @@ -964,18 +963,18 @@ function TSong.ReadXMLHeader(const aFileName : IPath): boolean;

//Genre Sorting
self.Genre := Parser.SongInfo.Header.Genre;
self.GenreNoAccent := LowerCase(GetStringWithNoAccents(UTF8Decode(self.Genre)));
self.GenreASCII := LowerCase(TransliterateToASCII(UTF8Decode(self.Genre)));

//Edition Sorting
self.Edition := Parser.SongInfo.Header.Edition;
self.EditionNoAccent := LowerCase(GetStringWithNoAccents(UTF8Decode(self.Edition)));
self.EditionASCII := LowerCase(TransliterateToASCII(UTF8Decode(self.Edition)));

//Year Sorting
//self.Year := Parser.SongInfo.Header.Year

//Language Sorting
self.Language := Parser.SongInfo.Header.Language;
self.LanguageNoAccent := LowerCase(GetStringWithNoAccents(UTF8Decode(self.Language)));
self.LanguageASCII := LowerCase(TransliterateToASCII(UTF8Decode(self.Language)));
end
else
Log.LogError('File incomplete or not SingStar XML (A): ' + aFileName.ToNative);
Expand Down Expand Up @@ -1110,15 +1109,15 @@ function TSong.ReadTXTHeader(SongFile: TTextFileStream; ReadCustomTags: Boolean)
if (Identifier = 'TITLE') then
begin
self.Title := DecodeStringUTF8(Value, Encoding);
self.TitleNoAccent := LowerCase(GetStringWithNoAccents(DecodeStringUTF8(Value, Encoding)));
self.TitleASCII := LowerCase(TransliterateToASCII(self.Title));
//Add Title Flag to Done
Done := Done or 1;
end

else if (Identifier = 'ARTIST') then
begin
self.Artist := DecodeStringUTF8(Value, Encoding);
self.ArtistNoAccent := LowerCase(GetStringWithNoAccents(DecodeStringUTF8(Artist, Encoding)));
self.ArtistASCII := LowerCase(TransliterateToASCII(self.Artist));

//Add Artist Flag to Done
Done := Done or 2;
Expand Down Expand Up @@ -1200,28 +1199,28 @@ function TSong.ReadTXTHeader(SongFile: TTextFileStream; ReadCustomTags: Boolean)
else if (Identifier = 'GENRE') then
begin
DecodeStringUTF8(Value, Genre, Encoding);
self.GenreNoAccent := LowerCase(GetStringWithNoAccents(Genre));
self.GenreASCII := LowerCase(TransliterateToASCII(Genre));
end

//Edition Sorting
else if (Identifier = 'EDITION') then
begin
DecodeStringUTF8(Value, Edition, Encoding);
self.EditionNoAccent := LowerCase(GetStringWithNoAccents(Edition));
self.EditionASCII := LowerCase(TransliterateToASCII(Edition));
end

//Creator Tag
else if (Identifier = 'CREATOR') then
begin
DecodeStringUTF8(Value, Creator, Encoding);
self.CreatorNoAccent := LowerCase(GetStringWithNoAccents(Creator));
self.CreatorASCII := LowerCase(TransliterateToASCII(Creator));
end

//Language Sorting
else if (Identifier = 'LANGUAGE') then
begin
DecodeStringUTF8(Value, Language, Encoding);
self.LanguageNoAccent := LowerCase(GetStringWithNoAccents(Language));
self.LanguageASCII := LowerCase(TransliterateToASCII(Language));
end

//Year Sorting
Expand Down
17 changes: 8 additions & 9 deletions src/base/USongs.pas
Original file line number Diff line number Diff line change
Expand Up @@ -903,8 +903,7 @@ function TCatSongs.SetFilter(FilterStr: UTF8String; Filter: TSongFilter): cardin
WordArray: array of UTF8String;
begin

FilterStr := Trim(LowerCase(FilterStr));
FilterStr := GetStringWithNoAccents(FilterStr);
FilterStr := Trim(LowerCase(TransliterateToASCII(FilterStr)));

if (FilterStr <> '') then
begin
Expand Down Expand Up @@ -933,21 +932,21 @@ function TCatSongs.SetFilter(FilterStr: UTF8String; Filter: TSongFilter): cardin
begin
case Filter of
fltAll:
TmpString := Song[I].ArtistNoAccent + ' ' + Song[i].TitleNoAccent + ' ' + Song[i].LanguageNoAccent + ' ' + Song[i].EditionNoAccent + ' ' + Song[i].GenreNoAccent + ' ' + IntToStr(Song[i].Year) + ' ' + Song[i].CreatorNoAccent; //+ ' ' + Song[i].Folder;
TmpString := Song[I].ArtistASCII + ' ' + Song[i].TitleASCII + ' ' + Song[i].LanguageASCII + ' ' + Song[i].EditionASCII + ' ' + Song[i].GenreASCII + ' ' + IntToStr(Song[i].Year) + ' ' + Song[i].CreatorASCII; //+ ' ' + Song[i].Folder;
fltTitle:
TmpString := Song[I].TitleNoAccent;
TmpString := Song[I].TitleASCII;
fltArtist:
TmpString := Song[I].ArtistNoAccent;
TmpString := Song[I].ArtistASCII;
fltLanguage:
TmpString := Song[I].LanguageNoAccent;
TmpString := Song[I].LanguageASCII;
fltEdition:
TmpString := Song[I].EditionNoAccent;
TmpString := Song[I].EditionASCII;
fltGenre:
TmpString := Song[I].GenreNoAccent;
TmpString := Song[I].GenreASCII;
fltYear:
TmpString := IntToStr(Song[I].Year);
fltCreator:
TmpString := Song[I].CreatorNoAccent;
TmpString := Song[I].CreatorASCII;
end;
Song[i].Visible := true;
// Look for every searched word
Expand Down
12 changes: 12 additions & 0 deletions src/base/UUnicodeUtils.pas
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ interface
{$IFDEF MSWINDOWS}
Windows,
{$ENDIF}
anyascii,
StrUtils,
SysUtils;

Expand Down Expand Up @@ -188,6 +189,12 @@ function WideStringLowerCase(ch: WideChar): WideString; overload;

function WideStringReplaceChar(const text: WideString; search, rep: WideChar): WideString;

(*
* Transliterates a UTF8 string to ASCII, utilizing AnyASCII
*)
function TransliterateToASCII(const str: UTF8String) : UTF8String;


implementation

{$IFDEF UNIX}
Expand Down Expand Up @@ -675,6 +682,11 @@ function WideStringReplaceChar(const text: WideString; search, rep: WideChar): W
end;
end;

function TransliterateToASCII(const str: UTF8String) : UTF8String;
begin
Result := UCS4ToUTF8String(transliterate(UTF8ToUCS4String(str)));
end;

initialization
InitUnicodeUtils;

Expand Down
Loading