Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Keep UTF-8 encoding header if present #8964

Merged
merged 2 commits into from
Jul 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
- When configured SSL certificates changed, JabRef warns the user to restart to apply the configuration.
- We improved the appearances and logic of the "Manage field names & content" dialog, and renamed it to "Automatic field editor". [#6536](https://github.com/JabRef/jabref/issues/6536)
- We improved the message explaining the options when modifying an automatic keyword group [#8911](https://github.com/JabRef/jabref/issues/8911)
- We moved the preferences option "Warn about duplicates on import" option from the tab "File" to the tab "Import and Export". [kopper#570](https://github.com/koppor/jabref/issues/570)
- We moved the preferences option "Warn about duplicates on import" option from the tab "File" to the tab "Import and Export". [koppor#570](https://github.com/koppor/jabref/issues/570)
- When JabRef encounters `% Encoding: UTF-8` header, it is kept during writing (and not removed). [#8964](https://github.com/JabRef/jabref/pull/8964)

### Fixed

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,11 @@ protected void writeEntryTypeDefinition(BibEntryType customType) throws IOExcept

@Override
protected void writeProlog(BibDatabaseContext bibDatabaseContext, Charset encoding) throws IOException {
if ((encoding == null) || (encoding == StandardCharsets.UTF_8)) {
// We write the encoding if
// - it is provided (!= null)
// - explicitly set in the .bib file OR not equal to UTF_8
// Otherwise, we do not write anything and return
if ((encoding == null) || (!bibDatabaseContext.getMetaData().getEncodingExplicitlySupplied() && (encoding == StandardCharsets.UTF_8))) {
return;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,11 @@ public ParserResult importDatabase(Path filePath) throws IOException {
}

Charset encoding;
boolean encodingExplicitlySupplied;
try (BufferedReader reader = Files.newBufferedReader(filePath, detectedCharset)) {
Optional<Charset> suppliedEncoding = getSuppliedEncoding(reader);
LOGGER.debug("Supplied encoding: {}", suppliedEncoding);
encodingExplicitlySupplied = suppliedEncoding.isPresent();

// in case no encoding information is present, use the detected one
encoding = suppliedEncoding.orElse(detectedCharset);
Expand All @@ -80,6 +82,7 @@ public ParserResult importDatabase(Path filePath) throws IOException {
try (BufferedReader reader = Files.newBufferedReader(filePath, encoding)) {
ParserResult parserResult = this.importDatabase(reader);
parserResult.getMetaData().setEncoding(encoding);
parserResult.getMetaData().setEncodingExplicitlySupplied(encodingExplicitlySupplied);
parserResult.setPath(filePath);
if (parserResult.getMetaData().getMode().isEmpty()) {
parserResult.getMetaData().setMode(BibDatabaseModeDetection.inferMode(parserResult.getDatabase()));
Expand Down
15 changes: 14 additions & 1 deletion src/main/java/org/jabref/model/metadata/MetaData.java
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ public class MetaData {
private final ContentSelectors contentSelectors = new ContentSelectors();
private final Map<String, List<String>> unknownMetaData = new HashMap<>();
private boolean isEventPropagationEnabled = true;
private boolean encodingExplicitlySupplied;

/**
* Constructs an empty metadata.
Expand Down Expand Up @@ -291,6 +292,17 @@ public void setEncoding(Charset encoding, ChangePropagation postChanges) {
}
}

public boolean getEncodingExplicitlySupplied() {
return encodingExplicitlySupplied;
}

/**
* Sets the indication whether the encoding was set using "% Encoding: ..." or whether it was detected "magically"
*/
public void setEncodingExplicitlySupplied(boolean encodingExplicitlySupplied) {
this.encodingExplicitlySupplied = encodingExplicitlySupplied;
}

/**
* If disabled {@link MetaDataChangedEvent} will not be posted.
*/
Expand Down Expand Up @@ -349,6 +361,7 @@ public boolean equals(Object o) {
return (isProtected == metaData.isProtected)
&& Objects.equals(groupsRoot.getValue(), metaData.groupsRoot.getValue())
&& Objects.equals(encoding, metaData.encoding)
&& Objects.equals(encodingExplicitlySupplied, metaData.encodingExplicitlySupplied)
&& Objects.equals(saveOrderConfig, metaData.saveOrderConfig)
&& Objects.equals(citeKeyPatterns, metaData.citeKeyPatterns)
&& Objects.equals(userFileDirectory, metaData.userFileDirectory)
Expand All @@ -362,7 +375,7 @@ public boolean equals(Object o) {

@Override
public int hashCode() {
return Objects.hash(groupsRoot.getValue(), encoding, saveOrderConfig, citeKeyPatterns, userFileDirectory,
return Objects.hash(groupsRoot.getValue(), encoding, encodingExplicitlySupplied, saveOrderConfig, citeKeyPatterns, userFileDirectory,
defaultCiteKeyPattern, saveActions, mode, isProtected, defaultFileDirectory);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,27 @@ void writeEpilogueAndEncoding() throws Exception {
"Test epilog" + OS.NEWLINE, stringWriter.toString());
}

@Test
void utf8EncodingWrittenIfExplicitlyDefined() throws Exception {
metaData.setEncoding(StandardCharsets.UTF_8);
metaData.setEncodingExplicitlySupplied(true);

databaseWriter.savePartOfDatabase(bibtexContext, Collections.emptyList());

assertEquals("% Encoding: UTF-8" + OS.NEWLINE,
stringWriter.toString());
}

@Test
void utf8EncodingNotWrittenIfNotExplicitlyDefined() throws Exception {
metaData.setEncoding(StandardCharsets.UTF_8);
metaData.setEncodingExplicitlySupplied(false);

databaseWriter.savePartOfDatabase(bibtexContext, Collections.emptyList());

assertEquals("", stringWriter.toString());
}

@Test
void writeMetadata() throws Exception {
DatabaseCitationKeyPattern bibtexKeyPattern = new DatabaseCitationKeyPattern(mock(GlobalCitationKeyPattern.class));
Expand Down Expand Up @@ -404,6 +425,44 @@ void roundtripWin1252HeaderKept(@TempDir Path bibFolder) throws Exception {
assertEquals(Files.readString(testFile, charset), Files.readString(file, charset));
}

@Test
void roundtripUtf8HeaderKept(@TempDir Path bibFolder) throws Exception {
Path testFile = Path.of(BibtexImporterTest.class.getResource("encoding-utf-8-with-header-with-databasetypecomment.bib").toURI());
ParserResult result = new BibtexImporter(importFormatPreferences, fileMonitor).importDatabase(testFile);
BibDatabaseContext context = new BibDatabaseContext(result.getDatabase(), result.getMetaData());

Path pathToFile = bibFolder.resolve("JabRef.bib");
Path file = Files.createFile(pathToFile);
Charset charset = StandardCharsets.UTF_8;

try (BufferedWriter fileWriter = Files.newBufferedWriter(file, charset)) {
BibWriter bibWriter = new BibWriter(fileWriter, context.getDatabase().getNewLineSeparator());
BibtexDatabaseWriter databaseWriter = new BibtexDatabaseWriter(bibWriter, generalPreferences, savePreferences, entryTypesManager);
databaseWriter.saveDatabase(context);
}

assertEquals(Files.readString(testFile, charset), Files.readString(file, charset));
}

@Test
void roundtripNotExplicitUtf8HeaderNotInsertedDuringWrite(@TempDir Path bibFolder) throws Exception {
Path testFile = Path.of(BibtexImporterTest.class.getResource("encoding-utf-8-without-header-with-databasetypecomment.bib").toURI());
ParserResult result = new BibtexImporter(importFormatPreferences, fileMonitor).importDatabase(testFile);
BibDatabaseContext context = new BibDatabaseContext(result.getDatabase(), result.getMetaData());

Path pathToFile = bibFolder.resolve("JabRef.bib");
Path file = Files.createFile(pathToFile);
Charset charset = StandardCharsets.UTF_8;

try (BufferedWriter fileWriter = Files.newBufferedWriter(file, charset)) {
BibWriter bibWriter = new BibWriter(fileWriter, context.getDatabase().getNewLineSeparator());
BibtexDatabaseWriter databaseWriter = new BibtexDatabaseWriter(bibWriter, generalPreferences, savePreferences, entryTypesManager);
databaseWriter.saveDatabase(context);
}

assertEquals(Files.readString(testFile, charset), Files.readString(file, charset));
}

@Test
void roundtripWithComplexBib() throws Exception {
Path testBibtexFile = Path.of("src/test/resources/testbib/complex.bib");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.mockito.Answers;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.mock;

Expand Down Expand Up @@ -161,11 +162,25 @@ public void testParsingOfWindows1252EncodedFileReadsDegreeCharacterCorrectly(Str

@ParameterizedTest
@CsvSource({"encoding-utf-8-with-header.bib", "encoding-utf-8-without-header.bib"})
public void testParsingOfUtf8EncodedFileReadsUmlatCharacterCorrectly(String filename) throws Exception {
public void testParsingOfUtf8EncodedFileReadsUmlautCharacterCorrectly(String filename) throws Exception {
ParserResult parserResult = importer.importDatabase(
Path.of(BibtexImporterTest.class.getResource(filename).toURI()));
assertEquals(
List.of(new BibEntry(StandardEntryType.Article).withField(StandardField.TITLE, "Ü ist ein Umlaut")),
parserResult.getDatabase().getEntries());
}

@Test
public void encodingSupplied() throws Exception {
ParserResult parserResult = importer.importDatabase(
Path.of(BibtexImporterTest.class.getResource("encoding-utf-8-with-header.bib").toURI()));
assertTrue(parserResult.getMetaData().getEncodingExplicitlySupplied());
}

@Test
public void encodingNotSupplied() throws Exception {
ParserResult parserResult = importer.importDatabase(
Path.of(BibtexImporterTest.class.getResource("encoding-utf-8-without-header.bib").toURI()));
assertFalse(parserResult.getMetaData().getEncodingExplicitlySupplied());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
% Encoding: UTF-8

@article{,
title = {Ü ist ein Umlaut},
}

@Comment{jabref-meta: databaseType:bibtex;}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
@article{,
title = {Ü ist ein Umlaut},
}

@Comment{jabref-meta: databaseType:bibtex;}