Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow export as tsv #500

Merged
merged 7 commits into from
Nov 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,17 @@ jobs:
uses: actions/setup-java@v1
with:
java-version: 1.8
- name: Install metafacture-core
run: |
git clone https://github.com/metafacture/metafacture-core.git
cd metafacture-core
git checkout 5.7.0-rc1
./gradlew publishToMavenLocal
#- name: Install metafacture-core
# run: |
# git clone https://github.com/metafacture/metafacture-core.git
# cd metafacture-core
# git checkout metafacture-core-5.7.0
# ./gradlew publishToMavenLocal
- name: Install metafacture-fix
run: |
git clone https://github.com/metafacture/metafacture-fix.git
cd metafacture-fix
git checkout master
git checkout 0.7.0
./gradlew publishToMavenLocal
- name: Run tests
run: sbt update test
1 change: 1 addition & 0 deletions app/controllers/Accept.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ enum Format {
HTML("html", "text/html"), //
JAVASCRIPT("js", "text/javascript", "application/javascript"), //
CSV("csv", "text/csv"), //
TSV("tsv", "text/tab-separated-values"), //
BULK("bulk", "application/x-jsonlines"), //
RDF_XML("rdf", "application/rdf+xml", "application/xml", "text/xml"), //
N_TRIPLE("nt", "application/n-triples", "text/plain"), //
Expand Down
28 changes: 26 additions & 2 deletions app/controllers/Application.java
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,14 @@ private static Result searchResult(String q, String location, int from,
"attachment; filename=organisations.csv");
return ok(csvExport(format, orgs)).as("text/csv; charset=utf-8");
});
results.put("tsv", () -> {
String queryResultString =
searchQueryResult(q, location, from, size, aggregations);
String orgs = Json.parse(queryResultString).get("member").toString();
response().setHeader("Content-Disposition",
"attachment; filename=organisations.tsv");
return ok(csvExport(format, orgs, CsvExport.TAB_SEPARATOR)).as("text/tab-separated-values; charset=utf-8");
});
Supplier<Result> json = () -> {
String queryResultString =
searchQueryResult(q, location, from, size, aggregations);
Expand Down Expand Up @@ -490,11 +498,21 @@ private static Optional<JsonNode> getOptional(JsonNode json, String field) {
return Optional.ofNullable(json.get(field));
}

private static String csvExport(String format, String orgs) {
private static String csvExport(String format, String orgs, String separator) {
String[] formatConfig = format.split(FORMAT_CONFIG_SEP); // e.g. csv:name,id
String fields = formatConfig.length > 1 && !formatConfig[1].isEmpty()
? formatConfig[1] : defaultFields();
return new CsvExport(orgs).of(fields);
if (separator == null) {
return new CsvExport(orgs).of(fields);
}
else {
String fieldsWithNonDefaultSeparator=fields.replaceAll(",", separator);
return new CsvExport(orgs).of(fieldsWithNonDefaultSeparator, separator);
}
}

private static String csvExport(String format, String orgs) {
return csvExport(format, orgs, CsvExport.DEFAULT_SEPARATOR);
}

private static String defaultFields() {
Expand Down Expand Up @@ -705,6 +723,12 @@ private static Result resultFor(String id, JsonNode json, String format) {
return ok(csvExport(format, "[" + json.toString() + "]"))
.as("text/csv; charset=utf-8");
});
results.put("tsv", () -> {
response().setHeader("Content-Disposition",
String.format("attachment; filename=%s.tsv", id));
return ok(csvExport(format, "[" + json.toString() + "]", CsvExport.TAB_SEPARATOR))
.as("text/tab-separated-values; charset=utf-8");
});
Pair<String, String> contentAndType = contentAndType(json, format);
Supplier<Result> rdfSupplier =
() -> ok(contentAndType.getLeft()).as(contentAndType.getRight());
Expand Down
25 changes: 18 additions & 7 deletions app/transformation/CsvExport.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,16 @@
import play.libs.Json;

/**
* Export organisations JSON data as CSV.
* Export organisations JSON data as CSV. Allows defining an other
* separator than comma.
*
* @author Fabian Steeg (fsteeg)
*/
public class CsvExport {

private final JsonNode organisations;
public final static String DEFAULT_SEPARATOR = ",";
public final static String TAB_SEPARATOR = "\t";

/**
* @param json The organisations JSON data to export
Expand All @@ -35,24 +38,32 @@ public CsvExport(String json) {
* @return The data for the given fields in CSV format
*/
public String of(String fields) {
return of(fields, DEFAULT_SEPARATOR);
}

/**
* @param fields The JSON fields to include in the export
* @param separator The separator to separate entries in the CSV
* @return The data for the given fields in [C*]SV format
*/
public String of(final String fields, final String separator) {
StringBuilder csv = new StringBuilder(fields + "\n");
for (Iterator<JsonNode> iter = organisations.elements(); iter.hasNext();) {
for (Iterator<JsonNode> iter = organisations.elements(); iter.hasNext(); ) {
JsonNode org = iter.next();
csv.append(Arrays.asList(fields.split(",")).stream().map(field -> {
csv.append(Arrays.asList(fields.split(separator)).stream().map(field -> {
try {
Object value = JsonPath.read(Configuration.defaultConfiguration()
.jsonProvider().parse(org.toString()), "$." + field);
return String.format("\"%s\"",
value.toString().replaceAll("\"", "\"\""));
return separator==DEFAULT_SEPARATOR ? String.format("\"%s\"",
value.toString().replaceAll("\"", "\"\"")) : value.toString();
}
catch (PathNotFoundException x) {
Logger.trace(x.getMessage());
// https://www.w3.org/TR/2015/REC-tabular-data-model-20151217/#empty-and-quoted-cells
return "";
}
}).collect(Collectors.joining(","))).append("\n");
}).collect(Collectors.joining(separator))).append("\n");
}
return csv.toString();
}

}
7 changes: 6 additions & 1 deletion app/views/api.scala.html
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ <h2 id="content_types">@Messages.get("api.content_types.header") <small><a href=
<p><code>curl http://lobid.org@routes.Application.get("DE-6")</code></p>
<p>@Messages.get("api.content_types.negotiate")</p>
<p><code>curl --header "Accept: text/csv" http://lobid.org@routes.Application.search("kunst")</code></p>
<p><code>curl --header "Accept: text/tab-separated-values" http://lobid.org@routes.Application.search("kunst")</code></p>
<p><code>curl --header "Accept: application/x-jsonlines" http://lobid.org@routes.Application.search("kunst") > kunst.jsonl</code></p>
<p>@Messages.get("api.content_types.override") <a href='@routes.Application.get("DE-6", format="json")'>@routes.Application.get("DE-6", format="json")</a></p>
<p>@Messages.get("api.content_types.dotFormat") <a href='@routes.Application.getDotFormat("DE-6", format="json")'>@routes.Application.getDotFormat("DE-6", format="json")</a></p>
Expand All @@ -70,6 +71,10 @@ <h2 id="csv">@Messages.get("api.csv.header") <small><a href="#csv"><span class="
@desc(Messages.get("api.csv.default"), routes.Application.search("kunst", size=300, format="csv"))
@desc(Messages.get("api.csv.custom"), routes.Application.search("kunst", size=300, format="csv:name,isil,url,classification.label.de"))

<h2 id="tsv">@Messages.get("api.tsv.header") <small><a href="#tsv"><span class="glyphicon glyphicon-link"></span></a></small></h2>
@desc(Messages.get("api.tsv.default"), routes.Application.search("kunst", size=300, format="tsv"))
@desc(Messages.get("api.tsv.custom"), routes.Application.search("kunst", size=300, format="tsv:name,isil,url,classification.label.de"))

<h2 id="auto-complete">@Messages.get("api.autocomplete.header") <small><a href='#auto-complete'><span class='glyphicon glyphicon-link'></span></a></small></h2>
<p>@Messages.get("api.autocomplete.intro")</p>
@desc(Messages.get("api.autocomplete.name") + " \"format=json:name\"", routes.Application.search("name:dnb OR alternateName:dnb", format="json:name"))
Expand Down Expand Up @@ -111,4 +116,4 @@ <h2 id="openrefine">OpenRefine <small><a href="#openrefine"><span class="glyphic
<script src="https://hypothes.is/embed.js" async></script>
<link rel="canonical" href="http://lobid.org/organisations/api/@controllers.Application.currentLang()" />
@if(controllers.Application.currentLang()=="de"){<link rel="canonical" href="http://lobid.org/organisations/api/" />}
}
}
4 changes: 2 additions & 2 deletions app/views/search.scala.html
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,6 @@ <h4>@Messages.get("search.location") @if(!location.isEmpty){
</div>
@defining(if(!q.isEmpty) q else "*") { qParam =>
<p>@Html(Messages.get("search.footer.api_text", routes.Application.search(q=qParam, from=from, format="json", location=location),
routes.Application.search(q=qParam, from=from, format="csv", location=location), routes.Application.api()))</p>
routes.Application.search(q=qParam, from=from, format="csv", location=location), routes.Application.search(q=qParam, from=from, format="tsv", location=location), routes.Application.api()))</p>
}} else { @if(!q.isEmpty) {<p class='footer'>@Html(Messages.get("search.footer.no_results", q))</p>} } }
}}
}}
24 changes: 12 additions & 12 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,18 @@ libraryDependencies ++= Seq(
"com.fasterxml.jackson.core" % "jackson-annotations" % "2.15.1",
"com.github.jsonld-java" % "jsonld-java" % "0.13.4",
"org.apache.jena" % "jena-arq" % "3.17.0",
"org.metafacture" % "metamorph" % "5.7.0-rc1" exclude("org.slf4j", "slf4j-simple"),
"org.metafacture" % "metafacture-elasticsearch" % "5.7.0-rc1",
"org.metafacture" % "metamorph-test" % "5.7.0-rc1",
"org.metafacture" % "metafacture-json" % "5.7.0-rc1",
"org.metafacture" % "metafacture-csv" % "5.7.0-rc1",
"org.metafacture" % "metafacture-io" % "5.7.0-rc1",
"org.metafacture" % "metafacture-triples" % "5.7.0-rc1",
"org.metafacture" % "metafacture-biblio" % "5.7.0-rc1",
"org.metafacture" % "metafacture-xml" % "5.7.0-rc1",
"org.metafacture" % "metafacture-framework" % "5.7.0-rc1",
"org.metafacture" % "metafacture-strings" % "5.7.0-rc1",
"org.metafacture" % "metafix" % "0.6.0-SNAPSHOT",
"org.metafacture" % "metamorph" % "5.7.0" exclude("org.slf4j", "slf4j-simple"),
"org.metafacture" % "metafacture-elasticsearch" % "5.7.0",
"org.metafacture" % "metamorph-test" % "5.7.0",
"org.metafacture" % "metafacture-json" % "5.7.0",
"org.metafacture" % "metafacture-csv" % "5.7.0",
"org.metafacture" % "metafacture-io" % "5.7.0",
"org.metafacture" % "metafacture-triples" % "5.7.0",
"org.metafacture" % "metafacture-biblio" % "5.7.0",
"org.metafacture" % "metafacture-xml" % "5.7.0",
"org.metafacture" % "metafacture-framework" % "5.7.0",
"org.metafacture" % "metafacture-strings" % "5.7.0",
"org.metafacture" % "metafix" % "0.7.0",
"org.xbib.elasticsearch.plugin" % "elasticsearch-plugin-bundle" % "2.3.2.0",
"com.jayway.jsonpath" % "json-path" % "2.2.0",
"net.java.dev.jna" % "jna" % "4.1.0",
Expand Down
1 change: 1 addition & 0 deletions conf/dataset.jsonld
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@
"documentation": "http://lobid.org/organisations/api",
"encodingFormat": [
"text/csv",
"text/tab-separated-values",
"application/json",
"application/ld+json"
],
Expand Down
8 changes: 6 additions & 2 deletions conf/messages.de
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ search.location = Standort
search.prev = vorige
search.next = nächste
search.total_results = Trefferzahl
search.footer.api_text = Sie können auf diese Daten auch als <a href="{0}">JSON</a> oder <a href="{1}">CSV</a> über unsere <a href="{2}">Programmierschnittstelle</a> zugreifen.
search.footer.api_text = Sie können auf diese Daten auch als <a href="{0}">JSON</a> oder <a href="{1}">CSV</a> (resp. TSV <a href="{2}">TSV</a>) über unsere <a href="{3}">Programmierschnittstelle</a> zugreifen.
search.footer.no_results = Keine Ergebnisse für <code>{0}</code>.
search.type = Typ
search.collects = Bestandsgröße
Expand All @@ -64,7 +64,7 @@ api.location.distance = Suche über Distanz zu einem Punkt ("location": Koordina

api.content_types.header = Inhaltstypen
api.content_types.default = Standardmäßig liefert dieser Dienst strukturierte API-Antworten (als JSON):
api.content_types.negotiate = Er unterstützt Content-Negotiation über den Accept-Header für JSON (application/json), CSV (text/csv), JSON lines (application/x-jsonlines) oder HTML (text/html):
api.content_types.negotiate = Er unterstützt Content-Negotiation über den Accept-Header für JSON (application/json), CSV (text/csv), TSV (text/tab-separated-values), JSON lines (application/x-jsonlines) oder HTML (text/html):
api.content_types.override = Der Query-Parameter "format" kann verwendet werden, um den Accept-Header aufzuheben, z.B. zur Anzeige von JSON im Browser:
api.content_types.dotFormat = Der Wert des Format-Parameters kann für Einzeltreffer auch in URLs als Dateiendung verwendet werden:
api.content_types.compress = Für größere Anfragen kann die Antwort als gzip komprimiert werden:
Expand All @@ -74,6 +74,10 @@ api.csv.header = CSV-Export
api.csv.default = Standardfelder ("format=csv")
api.csv.custom = Benutzerdefinierte Felder ("format": zu verwendende Felder, mit Punkten für geschachtelte Felder im Format "csv:feld1,feld2.unterfeld")

api.tsv.header = TSV-Export
api.tsv.default = Standardfelder ("format=tsv")
api.tsv.custom = Benutzerdefinierte Felder ("format": zu verwendende Felder, mit Punkten für geschachtelte Felder im Format "tsv:feld1,feld2.unterfeld")

api.autocomplete.header = Autovervollständigung
api.autocomplete.intro = Die API unterstützt ein spezielles Antwortformat mit Vorschlägen zur Vervollständigung aus einem angegebenen Feld:
api.autocomplete.name = Name vorschlagen:
Expand Down
8 changes: 6 additions & 2 deletions conf/messages.en
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ search.location = Location
search.prev = prev
search.next = next
search.total_results = Total results
search.footer.api_text = You can also access this data as <a href="{0}">JSON</a> or <a href="{1}">CSV</a> using our <a href="{2}">API</a>.
search.footer.api_text = You can also access this data as <a href="{0}">JSON</a> or <a href="{1}">CSV</a> (resp. <a href="{2}">TSV</a>) using our <a href="{3}">API</a>.
search.footer.no_results = No results for <code>{0}</code>.
search.type = Type
search.collects = Stock size
Expand All @@ -64,7 +64,7 @@ api.location.distance = Query with distance ("location": coordinate of a point a

api.content_types.header = Content types
api.content_types.default = By default, this service returns structured API responses (as JSON):
api.content_types.negotiate = It supports content negotiation based on the "Accept" header to serve JSON (application/json), CSV (text/csv), JSON lines (application/x-jsonlines), or HTML (text/html):
api.content_types.negotiate = It supports content negotiation based on the "Accept" header to serve JSON (application/json), CSV (text/csv), TSV (text/tab-separated-values), JSON lines (application/x-jsonlines), or HTML (text/html):
api.content_types.override = An optional "format" query parameter can be used to override the "Accept" header, e.g. to display JSON in a browser:
api.content_types.dotFormat = For individual organisations, the format parameter values can be used as file extensions in URLs:
api.content_types.compress = For larger requests, the response can be compressed as gzip:
Expand All @@ -74,6 +74,10 @@ api.csv.header = CSV export
api.csv.default = Default fields ("format=csv")
api.csv.custom = Custom fields ("format": fields to use, with dots for nested fields "csv:field1,field2.subfield")

api.tsv.header = TSV export
api.tsv.default = Default fields ("format=tsv")
api.tsv.custom = Custom fields ("format": fields to use, with dots for nested fields "tsv:field1,field2.subfield")

api.autocomplete.header = Auto-complete
api.autocomplete.intro = The API supports a response format for auto-complete suggestions using a specified field:
api.autocomplete.name = Suggest name:
Expand Down
40 changes: 31 additions & 9 deletions test/transformation/CsvExportTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,27 @@

@SuppressWarnings("javadoc")
public class CsvExportTest {
@Test
public void testFlatFieldsDefaultSeparator() {
testFlatFields(CsvExport.DEFAULT_SEPARATOR);
}

@Test
public void testFlatFieldsTabulatorSeparator() {
testFlatFields(CsvExport.TAB_SEPARATOR);
}

@Test
public void testNestedFieldsDefaultSeparator() {
testNestedFields(CsvExport.DEFAULT_SEPARATOR);
}

@Test
public void testFlatFields() {
public void testNestedFieldsTabulatorSeparator() {
testNestedFields(CsvExport.TAB_SEPARATOR);
}

private void testFlatFields(final String sep) {
ObjectNode node1 = Json.newObject();
node1.put("field1", "org1-value1");
node1.put("field2", "org1-value2");
Expand All @@ -28,15 +46,17 @@ public void testFlatFields() {
node2.put("field3", "org2-value3");
List<ObjectNode> orgs = Arrays.asList(node1, node2);
CsvExport export = new CsvExport(Json.stringify(Json.toJson(orgs)));
String expected = String.format("%s,%s\n%s,%s\n%s,%s\n", //
String expected = String.format("%s" + sep + "%s\n%s" + sep + "%s\n%s" + sep + "%s\n", //
"field1", "field3", //
"\"org1-value1\"", "\"org1-value3\"", //
"\"org2-value1\"", "\"org2-value3\"");
assertThat(export.of("field1,field3")).isEqualTo(expected);
if (sep.equals(CsvExport.TAB_SEPARATOR)) {
expected=expected.replaceAll("\"","");
}
assertThat(export.of("field1" + sep + "field3", sep)).isEqualTo(expected);
}

@Test
public void testNestedFields() {
private void testNestedFields(final String sep) {
ObjectNode org1 = Json.newObject();
ObjectNode sub1 = Json.newObject();
org1.put("field1", "org1-value1");
Expand All @@ -55,15 +75,17 @@ public void testNestedFields() {
sub2.put("field3", "org2-sub3");
List<ObjectNode> orgs = Arrays.asList(org1, org2);
CsvExport export = new CsvExport(Json.stringify(Json.toJson(orgs)));
String expected = String.format("%s,%s\n%s,%s\n%s,%s\n", //
String expected = String.format("%s" + sep + "%s\n%s" + sep + "%s\n%s" + sep + "%s\n", //
"field1", "field3.field2", //
"\"org1-value1\"", "\"org1-sub2\"", //
"\"org2-value1\"", "\"org2-sub2\"");
assertThat(export.of("field1,field3.field2")).isEqualTo(expected);
if (sep.equals(CsvExport.TAB_SEPARATOR)) {
expected=expected.replaceAll("\"","");
}
assertThat(export.of("field1" + sep + "field3.field2", sep)).isEqualTo(expected);
}

@Test
public void testMissingField() {
private void testMissingField() {
ObjectNode org = Json.newObject();
org.put("field1", "org1-value1");
org.put("field2", "org1-value2");
Expand Down
Loading