Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Collection of Comp Sci Bibliographies fetcher #6664

Merged
merged 18 commits into from
Jul 8, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package org.jabref.logic.formatter.bibtexfields;

import java.util.Objects;
import java.util.regex.Pattern;

import org.jabref.logic.l10n.Localization;
import org.jabref.model.cleanup.Formatter;

public class RemoveDigitsFormatter extends Formatter {

private static final Pattern DIGITS = Pattern.compile("[ ]\\d+");

@Override
public String getName() {
return Localization.lang("Remove digits");
}

@Override
public String getKey() {
return "remove_digits";
}

@Override
public String format(String value) {
Objects.requireNonNull(value);

return DIGITS.matcher(value).replaceAll("");
}

@Override
public String getDescription() {
return Localization.lang("Removes digits.");
}

@Override
public String getExampleInput() {
return "In 012 CDMA";
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package org.jabref.logic.formatter.bibtexfields;

import java.util.Objects;
import java.util.regex.Pattern;

import org.jabref.logic.l10n.Localization;
import org.jabref.model.cleanup.Formatter;

/**
* Finds any occurrence of consecutive spaces and replaces it with a single space
*/
public class RemoveRedundantSpacesFormatter extends Formatter {

private static final Pattern MULTIPLE_SPACES = Pattern.compile(" {2,}");

@Override
public String getName() {
return Localization.lang("Remove redundant spaces");
}

@Override
public String getKey() {
return "remove_redundant_spaces";
}

@Override
public String format(String value) {
Objects.requireNonNull(value);

daniel-price marked this conversation as resolved.
Show resolved Hide resolved
return MULTIPLE_SPACES.matcher(value).replaceAll(" ");
}

@Override
public String getDescription() {
return Localization.lang("Replaces consecutive spaces with a single space in the field content.");
daniel-price marked this conversation as resolved.
Show resolved Hide resolved
}

@Override
public String getExampleInput() {
return "In CDMA";
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package org.jabref.logic.formatter.bibtexfields;

import java.util.Objects;
import java.util.regex.Pattern;

import org.jabref.logic.l10n.Localization;
import org.jabref.model.cleanup.Formatter;

/**
* Replaces any tab with a space
*/
public class RemoveTabsFormatter extends Formatter {

private static final Pattern TAB = Pattern.compile("\t+");

@Override
public String getName() {
return Localization.lang("Remove tabs");
}

@Override
public String getKey() {
return "remove_tabs";
}

@Override
public String format(String value) {
Objects.requireNonNull(value);

daniel-price marked this conversation as resolved.
Show resolved Hide resolved
return TAB.matcher(value).replaceAll(" ");
daniel-price marked this conversation as resolved.
Show resolved Hide resolved
}

@Override
public String getDescription() {
return Localization.lang("Removes tabs in the field content.");
}

@Override
public String getExampleInput() {
return "In \t\t CDMA";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,17 @@
import java.net.URISyntaxException;
import java.net.URL;

import org.jabref.logic.formatter.bibtexfields.RemoveDigitsFormatter;
import org.jabref.logic.formatter.bibtexfields.RemoveNewlinesFormatter;
import org.jabref.logic.formatter.bibtexfields.RemoveRedundantSpacesFormatter;
import org.jabref.logic.formatter.bibtexfields.RemoveTabsFormatter;
import org.jabref.logic.importer.FetcherException;
import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.logic.importer.Parser;
import org.jabref.logic.importer.SearchBasedParserFetcher;
import org.jabref.model.cleanup.FieldFormatterCleanup;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;

import org.apache.http.client.utils.URIBuilder;

Expand Down Expand Up @@ -39,4 +46,12 @@ public Parser getParser() {
public String getName() {
return "Collection of Computer Science Bibliographies";
}

@Override
public void doPostCleanup(BibEntry entry) {
new FieldFormatterCleanup(StandardField.ABSTRACT, new RemoveNewlinesFormatter()).cleanup(entry);
new FieldFormatterCleanup(StandardField.ABSTRACT, new RemoveTabsFormatter()).cleanup(entry);
new FieldFormatterCleanup(StandardField.ABSTRACT, new RemoveRedundantSpacesFormatter()).cleanup(entry);
new FieldFormatterCleanup(StandardField.EDITOR, new RemoveDigitsFormatter()).cleanup(entry);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package org.jabref.logic.formatter.bibtexfields;

import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

import static org.junit.jupiter.api.Assertions.assertEquals;

public class RemoveDigitsFormatterTest {

private RemoveDigitsFormatter formatter;

@BeforeEach
public void setUp() {
formatter = new RemoveDigitsFormatter();
}

@Test
public void doNothingIfSingleSpace() {
assertEquals("one digit", formatter.format("one 1 digit"));
}

@Test
public void doNothingIfNoSpace() {
assertEquals("two digits", formatter.format("two 01 digits"));
}

@Test
public void removeAllButOneSpacesIfTwo() {
assertEquals("no digits", formatter.format("no digits"));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package org.jabref.logic.formatter.bibtexfields;

import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

import static org.junit.jupiter.api.Assertions.assertEquals;

public class RemoveRedundantSpacesFormatterTest {

private RemoveRedundantSpacesFormatter formatter;

@BeforeEach
public void setUp() {
formatter = new RemoveRedundantSpacesFormatter();
}

@Test
public void doNothingIfSingleSpace() {
assertEquals("single space", formatter.format("single space"));
}

@Test
public void doNothingIfNoSpace() {
assertEquals("nospace", formatter.format("nospace"));
}

@Test
public void removeAllButOneSpacesIfTwo() {
assertEquals("two spaces", formatter.format("two spaces"));
}

@Test
public void removeAllButOneSpacesIfThree() {
assertEquals("three spaces", formatter.format("three spaces"));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@

package org.jabref.logic.formatter.bibtexfields;

import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

import static org.junit.jupiter.api.Assertions.assertEquals;

public class RemoveTabsFormatterTest {

private RemoveTabsFormatter formatter;

@BeforeEach
public void setUp() {
formatter = new RemoveTabsFormatter();
}

@Test
public void removeSingleTab() {
assertEquals("single tab", formatter.format("single\ttab"));
}

@Test
public void removeMultipleTabs() {
assertEquals("multiple tabs", formatter.format("multiple\t\ttabs"));
}

@Test
public void doNothingIfNoTab() {
assertEquals("notab", formatter.format("notab"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -69,28 +69,28 @@ public void performSearchReturnsMatchingMultipleEntries() throws FetcherExceptio
.withField(new UnknownField("identifier"), "urn:isbn:978-1-4503-5217-8; doi:10.1145/3129790.3129810; ISI:000426556400034")
.withField(new UnknownField("subject"), "Software Architecture; Code Churn; Open Source; Architecrual Erosion; Technical Debt; Software Engineering; Programvaruteknik")
.withField(new UnknownField("relation"), "ACM International Conference Proceeding Series; ECSA '17~Proceedings of the 11th European Conference on Software Architecture : Companion Proceedings, p. 152-158")
.withField(StandardField.ABSTRACT, "The open source application JabRef has existed since\r\n" +
"\t\t 2003. In 2015, the developers decided to make an\r\n" +
"\t\t architectural refactoring as continued development was\r\n" +
"\t\t deemed too demanding. The developers also introduced\r\n" +
"\t\t Static Architecture Conformance Checking (SACC) to\r\n" +
"\t\t prevent violations to the intended architecture.\r\n" +
"\t\t Measurements mined from source code repositories such\r\n" +
"\t\t as code churn and code ownership has been linked to\r\n" +
"\t\t several problems, for example fault proneness, security\r\n" +
"\t\t vulnerabilities, code smells, and degraded\r\n" +
"\t\t maintainability. The root cause of such problems can be\r\n" +
"\t\t architectural. To determine the impact of the\r\n" +
"\t\t refactoring of JabRef, we measure the code churn and\r\n" +
"\t\t code ownership before and after the refactoring and\r\n" +
"\t\t find that large files with violations had a\r\n" +
"\t\t significantly higher code churn than large files\r\n" +
"\t\t without violations before the refactoring. After the\r\n" +
"\t\t refactoring, the files that had violations show a more\r\n" +
"\t\t normal code churn. We find no such effect on code\r\n" +
"\t\t ownership. We conclude that files that contain\r\n" +
"\t\t violations detectable by SACC methods are connected to\r\n" +
"\t\t higher than normal code churn.")
.withField(StandardField.ABSTRACT, "The open source application JabRef has existed since" +
" 2003. In 2015, the developers decided to make an" +
" architectural refactoring as continued development was" +
" deemed too demanding. The developers also introduced" +
" Static Architecture Conformance Checking (SACC) to" +
" prevent violations to the intended architecture." +
" Measurements mined from source code repositories such" +
" as code churn and code ownership has been linked to" +
" several problems, for example fault proneness, security" +
" vulnerabilities, code smells, and degraded" +
" maintainability. The root cause of such problems can be" +
" architectural. To determine the impact of the" +
" refactoring of JabRef, we measure the code churn and" +
" code ownership before and after the refactoring and" +
" find that large files with violations had a" +
" significantly higher code churn than large files" +
" without violations before the refactoring. After the" +
" refactoring, the files that had violations show a more" +
" normal code churn. We find no such effect on code" +
" ownership. We conclude that files that contain" +
" violations detectable by SACC methods are connected to" +
" higher than normal code churn.")
.withField(StandardField.TYPE, "info:eu-repo/semantics/conferenceObject")
.withField(new UnknownField("description"), "Information and Software Qualtiy")
.withField(StandardField.PAGES, "152--158")
Expand All @@ -104,6 +104,7 @@ public void performSearchReturnsMatchingMultipleEntries() throws FetcherExceptio
.withField(StandardField.AUTHOR, "Tobias Olsson and Morgan Ericsson and Anna Wingkvist")
.withField(StandardField.YEAR, "2017");

// Checking entries in the set as the query is generic and returns a changing result set
assertTrue(searchResult.contains(firstBibEntry));
daniel-price marked this conversation as resolved.
Show resolved Hide resolved
assertTrue(searchResult.contains(secondBibEntry));
}
Expand Down