From 13a50dbb6c21593ee2be11ba42660793ed908b3b Mon Sep 17 00:00:00 2001 From: shenleban tongying Date: Thu, 12 Dec 2024 23:20:39 -0500 Subject: [PATCH] fix: regression in finding {id}-{id} lang pair from dict name --- src/langcoder.cc | 10 +++++++--- website/docs/manage_groups.md | 4 ++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/langcoder.cc b/src/langcoder.cc index 95c33581f..da22abe1a 100644 --- a/src/langcoder.cc +++ b/src/langcoder.cc @@ -275,14 +275,18 @@ quint32 LangCoder::guessId( const QString & lang ) std::pair< quint32, quint32 > LangCoder::findLangIdPairFromName( QString const & name ) { - static QRegularExpression reg( "(?=([a-z]{2,3})-([a-z]{2,3}))", QRegularExpression::CaseInsensitiveOption ); + static QRegularExpression reg( "(^|[^a-z])((?[a-z]{2,3})-(?[a-z]{2,3}))($|[^a-z])", + QRegularExpression::CaseInsensitiveOption ); auto matches = reg.globalMatch( name ); while ( matches.hasNext() ) { auto m = matches.next(); + if ( matches.hasNext() ) { + continue; // We use only the last match, skip previous ones + } - auto fromId = guessId( m.captured( 1 ).toLower() ); - auto toId = guessId( m.captured( 2 ).toLower() ); + auto fromId = guessId( m.captured( "lang1" ).toLower() ); + auto toId = guessId( m.captured( "lang2" ).toLower() ); if ( code2Exists( intToCode2( fromId ) ) && code2Exists( intToCode2( toId ) ) ) { return { fromId, toId }; diff --git a/website/docs/manage_groups.md b/website/docs/manage_groups.md index 623af88fc..e5c89ec2d 100644 --- a/website/docs/manage_groups.md +++ b/website/docs/manage_groups.md @@ -10,9 +10,9 @@ Additionally, multiple strategies of automatic grouping are provided: ## Auto groups by dictionary language -For formats like DSL, which has embedded language from / to metadata, GoldenDict will use the dictionary's built-in metadata. +For formats like DSL, which has embedded language from / to metadata, GD will use the dictionary's built-in metadata. -For other formats, GoldenDict will try to extract languages from the dictionary's name or its file name by finding `{id}-{id}` pair. The `{id}` is 2 or 3 letters ISO 639 codes. For example, if a dictionary named `some name en-zh`, it will be automatically grouped into `en-zh`. +For other formats, GD will try finding the last `{id}-{id}` pair delimited by non-alphabets in dictionary name or main file name to extract languages. The `{id}` is 2 or 3 letters ISO 639 codes. For example, if a dictionary named `some name en-zh`, it will be automatically grouped into `en-zh`. Groups created in this method also include a context menu when right-click the group name, in which you can do additional dictionaries grouping by source or target language and combine dictionaries in more large groups.