Skip to content

Commit

Permalink
deserialize objects as JSON for addition to elasticsearch
Browse files Browse the repository at this point in the history
  • Loading branch information
pdurbin committed Dec 10, 2013
1 parent 88adff2 commit 2fecf68
Show file tree
Hide file tree
Showing 6 changed files with 99 additions and 5 deletions.
16 changes: 12 additions & 4 deletions scripts/search/add
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,21 @@
# <field name="entityid" type="long" indexed="true" stored="true" required="true" multiValued="false" />
# <field name="type" type="string" indexed="true" stored="true" required="true" multiValued="false" />
mkdir -p data
echo "adding to solr..."
curl -s http://localhost:8080/api/dataverses > data/dataverses.json
curl http://localhost:8983/solr/update/json?commit=true -H 'Content-type:application/json' --data-binary @data/dataverses.json

curl -s http://localhost:8080/api/datasets > data/datasets.json
curl http://localhost:8983/solr/update/json?commit=true -H 'Content-type:application/json' --data-binary @data/datasets.json

mkdir -p data/dataverses
# index type id
curl -s http://localhost:8080/api/dataverses/1 > data/dataverses/1
curl -XPOST 'http://localhost:9200/dataverse/dataverses/1' --data-binary @data/dataverses/1
echo "adding to elasticsearch..."
#curl -XPOST http://localhost:9200/dataverse/datasets/1 --data-binary @data/datasets/1.dump
for type in dataverses datasets; do
mkdir -p data/$type
for i in `./json2ids data/$type.json`; do
#echo "adding $i from $type..."
curl -s http://localhost:8080/api/$type/$i/dump > data/$type/$i.dump
curl -XPOST "http://localhost:9200/dataverse/$type/$i" --data-binary @data/$type/$i.dump
echo
done;
done
2 changes: 2 additions & 0 deletions scripts/search/clear
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
#!/bin/sh
curl http://localhost:8983/solr/update/json?commit=true -H 'Content-type: application/json' -X POST -d '{"delete": { "query":"*:*" }}'
curl -XDELETE http://localhost:9200/dataverse/
echo
8 changes: 8 additions & 0 deletions scripts/search/go
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash -x
./clear
sleep .5
./add
# elasticsearch might need more time before query
sleep 1
./query
./search
23 changes: 23 additions & 0 deletions scripts/search/json2ids
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/usr/bin/python
"""Find ids in JSON document"""
import sys
try:
import json
except ImportError:
import simplejson as json
import optparse
parser = optparse.OptionParser(description=__doc__)
options, args = parser.parse_args()

if not args:
print "Please supply a filename to process"
sys.exit(1)

json_data=open(args[0])
data = json.load(json_data)
ids=[]
for i in data:
id = i["entityid_l"]
ids.append(str(id))
print ' '.join(ids)
json_data.close()
33 changes: 32 additions & 1 deletion src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import javax.json.JsonObjectBuilder;
import javax.ws.rs.GET;
import javax.ws.rs.Path;
import javax.ws.rs.PathParam;

@Path("datasets")
public class Datasets {
Expand All @@ -31,7 +32,8 @@ public String get() {
.add(SearchFields.ENTITY_ID, dataset.getId())
.add(SearchFields.TYPE, "datasets")
/**
* @todo: should we assign a dataset title to name like this?
* @todo: should we assign a dataset title to name like
* this?
*/
.add("name", dataset.getTitle())
.add(SearchFields.TITLE, dataset.getTitle())
Expand All @@ -41,4 +43,33 @@ public String get() {
JsonArray jsonArray = datasetsArrayBuilder.build();
return Util.jsonArray2prettyString(jsonArray);
}

// used to primarily to feed data into elasticsearch
@GET
@Path("{id}/{verb}")
public Dataset get(@PathParam("id") Long id, @PathParam("verb") String verb) {
logger.info("GET called");
if (verb.equals("dump")) {
Dataset dataset = datasetService.find(id);
if (dataset != null) {
logger.info("found " + dataset);
// prevent HTTP Status 500 - Internal Server Error
dataset.setFiles(null);
// elasticsearch fails on "today" with
// MapperParsingException[failed to parse date field [today],
// tried both date format [dateOptionalTime], and timestamp number with locale []]
dataset.setCitationDate(null);
// too much information
dataset.setOwner(null);
return dataset;
}
}
/**
* @todo return an error instead of "204 No Content"?
*
*/
logger.info("GET attempted with dataset id " + id + " and verb " + verb);
return null;
}

}
22 changes: 22 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,32 @@ public String get(@PathParam("id") Long id) {
if (dataverse != null) {
return Util.jsonObject2prettyString(dataverse2json(dataverse));
} else {
/**
* @todo inconsistent with /{id}/dump which simply returns nothing
* and "204 No Content"
*/
return Util.message2ApiError("Dataverse id " + id + " not found");
}
}

// used to primarily to feed data into elasticsearch
@GET
@Path("{id}/{verb}")
public Dataverse get(@PathParam("id") Long id, @PathParam("verb") String verb) {
if (verb.equals("dump")) {
Dataverse dataverse = dataverseService.find(id);
if (dataverse != null) {
return dataverse;
}
}
/**
* @todo return an error instead of "204 No Content"?
*
*/
logger.info("GET attempted with dataverse id " + id + " and verb " + verb);
return null;
}

public JsonObject dataverse2json(Dataverse dataverse) {
JsonObjectBuilder dataverseInfoBuilder = Json.createObjectBuilder()
.add(SearchFields.ID, "dataverse_" + dataverse.getId())
Expand Down

0 comments on commit 2fecf68

Please sign in to comment.