Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Collection of Comp Sci Bibliographies fetcher #6664

Merged
merged 18 commits into from
Jul 8, 2020
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve

### Added

- We added a new fetcher to enable users to search "Collection of Computer Science Bibliographies". [#6638](https://github.com/JabRef/jabref/issues/6638)
daniel-price marked this conversation as resolved.
Show resolved Hide resolved
- We improved responsiveness of general fields specification dialog window. [#6643](https://github.com/JabRef/jabref/issues/6604)
- We added support for importing ris file and load DOI [#6530](https://github.com/JabRef/jabref/issues/6530)
- We added the Library properties to a context menu on the library tabs [#6485](https://github.com/JabRef/jabref/issues/6485)
Expand Down
2 changes: 2 additions & 0 deletions src/main/java/org/jabref/logic/importer/WebFetchers.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.jabref.logic.importer.fetcher.ArXiv;
import org.jabref.logic.importer.fetcher.AstrophysicsDataSystem;
import org.jabref.logic.importer.fetcher.CiteSeer;
import org.jabref.logic.importer.fetcher.CollectionOfComputerScienceBibliographiesFetcher;
import org.jabref.logic.importer.fetcher.CompositeSearchBasedFetcher;
import org.jabref.logic.importer.fetcher.CrossRef;
import org.jabref.logic.importer.fetcher.DBLPFetcher;
Expand Down Expand Up @@ -101,6 +102,7 @@ public static SortedSet<SearchBasedFetcher> getSearchBasedFetchers(ImportFormatP
set.add(new DOAJFetcher(importFormatPreferences));
set.add(new IEEE(importFormatPreferences));
set.add(new CompositeSearchBasedFetcher(set, 30));
set.add(new CollectionOfComputerScienceBibliographiesFetcher());
return set;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package org.jabref.logic.importer.fetcher;

import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;

import org.jabref.logic.importer.FetcherException;
import org.jabref.logic.importer.Parser;
import org.jabref.logic.importer.SearchBasedParserFetcher;

import org.apache.http.client.utils.URIBuilder;

public class CollectionOfComputerScienceBibliographiesFetcher implements SearchBasedParserFetcher {
private static final String BASIC_SEARCH_URL = "http://liinwww.ira.uka.de/bibliography/rss?";
daniel-price marked this conversation as resolved.
Show resolved Hide resolved

@Override
public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException {
URIBuilder uriBuilder = new URIBuilder(BASIC_SEARCH_URL);
uriBuilder.addParameter("query", query);
daniel-price marked this conversation as resolved.
Show resolved Hide resolved
uriBuilder.addParameter("sort", "score");
URI uri = uriBuilder.build();
return uri.toURL();
}

@Override
public Parser getParser() {
return new CollectionOfComputerScienceBibliographiesParser();
}

@Override
public String getName() {
return "Collection of Computer Science Bibliographies";
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
package org.jabref.logic.importer.fetcher;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
import java.util.LinkedList;
import java.util.List;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.jabref.logic.importer.ParseException;
import org.jabref.logic.importer.Parser;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

public class CollectionOfComputerScienceBibliographiesParser implements Parser {
@Override
public List<BibEntry> parseEntries(InputStream inputStream) throws ParseException {
try {
Document document = buildDocumentFromInputStream(inputStream);
// uncomment to generate test case xml
// XMLUtil.printDocument(document);
daniel-price marked this conversation as resolved.
Show resolved Hide resolved

NodeList childNodes = document.getChildNodes();
List<Element> itemElements = findItemElementsRecursively(childNodes);
List<BibEntry> bibEntries = parseItemElements(itemElements);

// uncomment to generate test case bib files
// System.out.println(bibEntries);
daniel-price marked this conversation as resolved.
Show resolved Hide resolved

return bibEntries;
} catch (ParserConfigurationException | SAXException | IOException exception) {
throw new ParseException(exception);
}
}

private Document buildDocumentFromInputStream(InputStream inputStream) throws ParserConfigurationException, SAXException, IOException {
DocumentBuilder dbuild = DocumentBuilderFactory.newInstance().newDocumentBuilder();
Reader reader = new InputStreamReader(inputStream, StandardCharsets.UTF_8);
daniel-price marked this conversation as resolved.
Show resolved Hide resolved
InputSource is = new InputSource(reader);
return dbuild.parse(is);
}

private List<Element> findItemElementsRecursively(NodeList nodeList) {
LinkedList<Element> itemNodes = new LinkedList();
daniel-price marked this conversation as resolved.
Show resolved Hide resolved
for (int i = 0; i < nodeList.getLength(); i++) {
Node child = nodeList.item(i);
if (child.getNodeName().equals("item")
&& child.getNodeType() == Node.ELEMENT_NODE) {
Element element = (Element) child;
itemNodes.add(element);
} else {
NodeList childNodes = child.getChildNodes();
List<Element> childItemNodes = findItemElementsRecursively(childNodes);
itemNodes.addAll(childItemNodes);
}
}

return itemNodes;
}

private List<BibEntry> parseItemElements(List<Element> itemElements) {
List<BibEntry> items = new LinkedList<>();
for (Element itemElement : itemElements) {
daniel-price marked this conversation as resolved.
Show resolved Hide resolved
BibEntry bibEntry = parseItemElement(itemElement);
items.add(bibEntry);
}

return items;
}

private BibEntry parseItemElement(Element item) {
BibEntry bibEntry = new BibEntry();
setFieldFromTag(bibEntry, item, StandardField.TITLE, "dc:title");
daniel-price marked this conversation as resolved.
Show resolved Hide resolved
setFieldFromTag(bibEntry, item, StandardField.AUTHOR, "dc:creator");
setFieldFromTag(bibEntry, item, StandardField.DATE, "dc:date");
setFieldFromTag(bibEntry, item, StandardField.URL, "link");
return bibEntry;
}

private void setFieldFromTag(BibEntry bibEntry, Element item, StandardField field, String tagName) {
Node element = item.getElementsByTagName(tagName).item(0);
if (element == null) {
return;
}

String value = element.getTextContent();
bibEntry.setField(field, value);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package org.jabref.logic.importer.fetcher;

import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.Collections;
import java.util.List;

import org.jabref.logic.importer.FetcherException;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;
import org.jabref.testutils.category.FetcherTest;

import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;

@FetcherTest
class CollectionOfComputerScienceBibliographiesFetcherTest {
daniel-price marked this conversation as resolved.
Show resolved Hide resolved
private CollectionOfComputerScienceBibliographiesFetcher fetcher;
private BibEntry bibEntry1;
private BibEntry bibEntry2;

@BeforeEach
public void setUp() {
fetcher = new CollectionOfComputerScienceBibliographiesFetcher();

bibEntry1 = new BibEntry();
bibEntry1.setField(StandardField.TITLE, "The relationship of code churn and architectural violations in the open source software JabRef");
bibEntry1.setField(StandardField.AUTHOR, "Tobias Olsson, Morgan Ericsson, Anna Wingkvist");
bibEntry1.setField(StandardField.DATE, "2017");
bibEntry1.setField(StandardField.URL, "http://liinwww.ira.uka.de/searchbib/index?query=lgqcdpmrnlbbtgtqnxgpnddcrtxhcdxl&results=bibtex&mode=dup&rss=1");

bibEntry2 = new BibEntry();
bibEntry2.setField(StandardField.TITLE, "Literaturverwaltungsprogramme im Überblick");
daniel-price marked this conversation as resolved.
Show resolved Hide resolved
bibEntry2.setField(StandardField.AUTHOR, "Michaele Adam, Jutta Musiat, Kathleen Hoffmann, Sandra Rahm, Matti Stöhr, Christina Wenzel");
bibEntry2.setField(StandardField.DATE, "2018");
bibEntry2.setField(StandardField.URL, "http://liinwww.ira.uka.de/searchbib/index?query=qrxmnfnthltrkcgnxdxtfdrhrxjnttxg&results=bibtex&mode=dup&rss=1");
}

@Test
public void getNameReturnsCorrectName() {
assertEquals("Collection of Computer Science Bibliographies", fetcher.getName());
}

@Test
public void getUrlForQueryReturnsCorrectUrl() throws MalformedURLException, URISyntaxException, FetcherException {
String query = "java jdk";
URL url = fetcher.getURLForQuery(query);
assertEquals("http://liinwww.ira.uka.de/bibliography/rss?query=java+jdk&sort=score", url.toString());
}

@Test
public void performSearchReturnsMatchingMultipleEntries() throws FetcherException {
List<BibEntry> searchResult = fetcher.performSearch("jabref");
assertTrue(searchResult.contains(bibEntry1));
assertTrue(searchResult.contains(bibEntry2));
}

@Test
public void performSearchReturnsEmptyListForEmptySearch() throws FetcherException {
List<BibEntry> searchResult = fetcher.performSearch("");
assertEquals(Collections.emptyList(), searchResult);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package org.jabref.logic.importer.fetcher;

import java.io.InputStream;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

import org.jabref.logic.bibtex.BibEntryAssert;
import org.jabref.model.entry.BibEntry;
import org.jabref.testutils.category.FetcherTest;

import org.junit.jupiter.api.Test;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;

@FetcherTest
public class CollectionOfComputerScienceBibliographiesParserTest {
@Test
public void parseEntriesReturnsEmptyListIfXmlHasNoResults() throws Exception {
parseXmlAndCheckResults("collection_of_computer_science_bibliographies_empty_result.xml", Collections.emptyList());
}

@Test
public void parseEntriesReturnsOneBibEntryInListIfXmlHasOneResult() throws Exception {
parseXmlAndCheckResults("collection_of_computer_science_bibliographies_single_result.xml", Collections.singletonList("collection_of_computer_science_bibliographies_single_result.bib"));
}

@Test
public void parseEntriesReturnsMultipleBibEntriesInListIfXmlHasMultipleResults() throws Exception {
parseXmlAndCheckResults("collection_of_computer_science_bibliographies_multiple_results.xml", Arrays.asList("collection_of_computer_science_bibliographies_multiple_results_first_result.bib", "collection_of_computer_science_bibliographies_multiple_results_second_result.bib"));
}

private void parseXmlAndCheckResults(String xmlName, List<String> resourceNames) throws Exception {
InputStream is = CollectionOfComputerScienceBibliographiesParserTest.class.getResourceAsStream(xmlName);
CollectionOfComputerScienceBibliographiesParser parser = new CollectionOfComputerScienceBibliographiesParser();
List<BibEntry> entries = parser.parseEntries(is);
assertNotNull(entries);
assertEquals(resourceNames.size(), entries.size());
for (int i = 0; i < resourceNames.size(); i++) {
BibEntryAssert.assertEquals(GvkParserTest.class, resourceNames.get(i), entries.get(i));
daniel-price marked this conversation as resolved.
Show resolved Hide resolved
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet title="XSL_formatting" type="text/xsl" href="http://liinwww.ira.uka.de/bibliography/rss.xsl"?><rss version="2.0">

<channel xmlns:dc="http://purl.org/dc/elements/1.1/">

<title>CCSB: "test string which returns no results"</title>
<link>http://liinwww.ira.uka.de/bibliography/#search</link>
<description>Search results in The Collection of Computer Science Bibliographies for query: "test string which returns no results"</description>
<language>en</language>
daniel-price marked this conversation as resolved.
Show resolved Hide resolved
<copyright>The data is available for noncommercial or private use only, harvesting is prohibited (the data may be obtained using other means and not this RSS feed).</copyright>
<webMaster>liinwwwa@ira.uka.de</webMaster>
<lastBuildDate>Mon, 09 Mar 2020 03:14:28 +0100</lastBuildDate>
<ttl>5760</ttl>

</channel>
</rss>
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet title="XSL_formatting" type="text/xsl" href="http://liinwww.ira.uka.de/bibliography/rss.xsl"?><rss version="2.0">

<channel xmlns:dc="http://purl.org/dc/elements/1.1/">

<title>CCSB: +"effective java" +"joshua bloch" +"java series"</title>
<link>http://liinwww.ira.uka.de/bibliography/#search</link>
<description>Search results in The Collection of Computer Science Bibliographies for query: +"effective java" +"joshua bloch" +"java series"</description>
<language>en</language>
daniel-price marked this conversation as resolved.
Show resolved Hide resolved
<copyright>The data is available for noncommercial or private use only, harvesting is prohibited (the data may be obtained using other means and not this RSS feed).</copyright>
<webMaster>liinwwwa@ira.uka.de</webMaster>
<lastBuildDate>Mon, 09 Mar 2020 03:14:28 +0100</lastBuildDate>
<ttl>5760</ttl>

<item>
<description>
<p>
Author: Joshua Bloch;
<br/>
Title: Effective Java: Programming Language Guide;
<br/>
Year: 2001;
<br/>
Abstract available;
<br/>
4 records for this title/author combination available.
</p>
</description>

<dc:title>Effective Java: Programming Language Guide</dc:title>

<dc:date>2001</dc:date>

<dc:creator>Joshua Bloch</dc:creator>

<title>[2001] Effective Java: Programming Language Guide (by: Joshua Bloch)</title>

</item>

<item>
<link>http://liinwww.ira.uka.de/searchbib/index?query=hpdtjrpbcpgllljdctmdkfnhqdcnrkkc&amp;results=bibtex&amp;mode=dup&amp;rss=1</link>

<dc:creator>Joshua Bloch</dc:creator>

<dc:title>Effective Java</dc:title>

<description>
<p>
Author: Joshua Bloch;
<br/>
Title: Effective Java;
<br/>
Year: 2001;
<br/>
URLs available (possible fulltext access);
<br/>
2 records for this title/author combination available.
</p>
</description>

<title>[2001] Effective Java (by: Joshua Bloch)</title>

</item>

</channel>
</rss>
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
@misc{,
daniel-price marked this conversation as resolved.
Show resolved Hide resolved
author = {Joshua Bloch},
date = {2001},
title = {Effective Java: Programming Language Guide}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
@misc{,
author = {Joshua Bloch},
title = {Effective Java},
url = {http://liinwww.ira.uka.de/searchbib/index?query=hpdtjrpbcpgllljdctmdkfnhqdcnrkkc&results=bibtex&mode=dup&rss=1}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
@misc{,
author = {Tobias Olsson, Morgan Ericsson, Anna Wingkvist},
daniel-price marked this conversation as resolved.
Show resolved Hide resolved
date = {2017},
title = {The relationship of code churn and architectural violations in the open source software JabRef},
url = {http://liinwww.ira.uka.de/searchbib/index?query=lgqcdpmrnlbbtgtqnxgpnddcrtxhcdxl&results=bibtex&mode=dup&rss=1}
}
Loading