-
-
Notifications
You must be signed in to change notification settings - Fork 2.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix for issue 5850: Journal abbreviations in UTF-8 not recognized (#7639
) * fix issue #5850 for encoding problem * add a blank line for build.gradle * initial as main branch for build.gradle * initial as main branch for build.gradle * add the change of fix information of issue 5850 * Fix check style * Update CHANGELOG.md Co-authored-by: Christoph <siedlerkiller@gmail.com> * Add the utf8 check for biblatex and ascii check for bibtex * add the new localization string the l10 files * fix error * add the statement only in en.properties * revert changes * Update JabRef_da.properties * Update JabRef_ru.properties * Update build.gradle * Update JabRef_fa.properties * Update JabRef_no.properties * Update JabRef_pl.properties * Update JabRef_pt.properties * Update JabRef_vi.properties * Update JabRef_zh_TW.properties * reset the default charset * reset the default charset * add the javaDoc of UTF8Checker * add the javaDoc of UTF8CheckerTest and IntegrityCheckTest add 2 Junit Test for UTF8Checker.UTF8EncodingChecker in UTF8CheckerTest add 2 Junit Test for IntegrityCheck in IntegrityCheckTest * Remove the unwieldy Junit tests Co-authored-by: Christoph <siedlerkiller@gmail.com>
- Loading branch information
1 parent
2af578f
commit 434250d
Showing
6 changed files
with
140 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
package org.jabref.logic.integrity; | ||
|
||
import java.nio.ByteBuffer; | ||
import java.nio.charset.CharacterCodingException; | ||
import java.nio.charset.Charset; | ||
import java.nio.charset.CharsetDecoder; | ||
import java.nio.charset.StandardCharsets; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
||
import org.jabref.logic.l10n.Localization; | ||
import org.jabref.model.entry.BibEntry; | ||
import org.jabref.model.entry.field.Field; | ||
|
||
public class UTF8Checker implements EntryChecker { | ||
|
||
/** | ||
* Detect any non UTF-8 encoded field | ||
* @param entry the BibEntry of BibLatex. | ||
* @return return the warning of UTF-8 check for BibLatex. | ||
*/ | ||
@Override | ||
public List<IntegrityMessage> check(BibEntry entry) { | ||
List<IntegrityMessage> results = new ArrayList<>(); | ||
Charset charset = Charset.forName(System.getProperty("file.encoding")); | ||
for (Map.Entry<Field, String> field : entry.getFieldMap().entrySet()) { | ||
boolean utfOnly = UTF8EncodingChecker(field.getValue().getBytes(charset)); | ||
if (!utfOnly) { | ||
results.add(new IntegrityMessage(Localization.lang("Non-UTF-8 encoded field found"), entry, | ||
field.getKey())); | ||
} | ||
} | ||
return results; | ||
} | ||
|
||
/** | ||
* Check whether a byte array is encoded in UTF-8 charset | ||
* | ||
* Use java api decoder and try&catch block to check the charset. | ||
* @param data the byte array used to check the encoding charset | ||
* @return true if is encoded in UTF-8 & false is not encoded in UTF-8 | ||
*/ | ||
public static boolean UTF8EncodingChecker(byte[] data) { | ||
CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder(); | ||
try { | ||
decoder.decode(ByteBuffer.wrap(data)); | ||
} catch (CharacterCodingException ex) { | ||
return false; | ||
} | ||
return true; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
74 changes: 74 additions & 0 deletions
74
src/test/java/org/jabref/logic/integrity/UTF8CheckerTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
package org.jabref.logic.integrity; | ||
|
||
import java.io.UnsupportedEncodingException; | ||
import java.nio.charset.StandardCharsets; | ||
import java.util.Collections; | ||
import java.util.List; | ||
|
||
import org.jabref.model.entry.BibEntry; | ||
import org.jabref.model.entry.field.StandardField; | ||
|
||
import org.junit.jupiter.api.Test; | ||
|
||
import static org.junit.jupiter.api.Assertions.assertEquals; | ||
import static org.junit.jupiter.api.Assertions.assertFalse; | ||
import static org.junit.jupiter.api.Assertions.assertTrue; | ||
|
||
public class UTF8CheckerTest { | ||
|
||
private final BibEntry entry = new BibEntry(); | ||
|
||
/** | ||
* fieldAcceptsUTF8 to check UTF8Checker's result set | ||
* when the entry is encoded in UTF-8 (should be empty) | ||
*/ | ||
@Test | ||
void fieldAcceptsUTF8() { | ||
UTF8Checker checker = new UTF8Checker(); | ||
entry.setField(StandardField.TITLE, "Only ascii characters!'@12"); | ||
assertEquals(Collections.emptyList(), checker.check(entry)); | ||
} | ||
|
||
/** | ||
* fieldDoesNotAcceptUmlauts to check UTF8Checker's result set | ||
* when the entry is encoded in Non-Utf-8 charset and the System | ||
* environment is Non UTF-8. | ||
* Finally we need to reset the environment charset. | ||
* @throws UnsupportedEncodingException initial a String in charset GBK | ||
* Demo: new String(StringDemo.getBytes(), "GBK"); | ||
*/ | ||
@Test | ||
void fieldDoesNotAcceptUmlauts() throws UnsupportedEncodingException { | ||
String defaultCharset = System.getProperty("file.encoding"); | ||
System.getProperties().put("file.encoding", "GBK"); | ||
UTF8Checker checker = new UTF8Checker(); | ||
String NonUTF8 = new String("你好,这条语句使用GBK字符集".getBytes(), "GBK"); | ||
entry.setField(StandardField.MONTH, NonUTF8); | ||
assertEquals(List.of(new IntegrityMessage("Non-UTF-8 encoded field found", entry, StandardField.MONTH)), checker.check(entry)); | ||
System.getProperties().put("file.encoding", defaultCharset); | ||
} | ||
|
||
/** | ||
* To check the UTF8Checker.UTF8EncodingChecker | ||
* in NonUTF8 char array (should return false) | ||
* | ||
* @throws UnsupportedEncodingException initial a String in charset GBK | ||
* Demo: new String(StringDemo.getBytes(), "GBK"); | ||
*/ | ||
@Test | ||
void NonUTF8EncodingCheckerTest() throws UnsupportedEncodingException { | ||
String NonUTF8 = new String("你好,这条语句使用GBK字符集".getBytes(), "GBK"); | ||
assertFalse(UTF8Checker.UTF8EncodingChecker(NonUTF8.getBytes("GBK"))); | ||
|
||
} | ||
|
||
/** | ||
* To check the UTF8Checker.UTF8EncodingChecker | ||
* in UTF-8 char array (should return true) | ||
*/ | ||
@Test | ||
void UTF8EncodingCheckerTest() { | ||
String UTF8Demo = new String("你好,这条语句使用GBK字符集".getBytes(), StandardCharsets.UTF_8); | ||
assertTrue(UTF8Checker.UTF8EncodingChecker(UTF8Demo.getBytes(StandardCharsets.UTF_8))); | ||
} | ||
} |