Skip to content

Commit

Permalink
Readme edit to link covid doc. Change to raw for passage docs. Only 1…
Browse files Browse the repository at this point in the history
…st one contains it now
  • Loading branch information
nikhilro committed Mar 22, 2020
1 parent 75c8850 commit 7ff68c0
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 7 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ For the most part, these runs are based on [_default_ parameter settings](https:
The experiments described below are not associated with rigorous end-to-end regression testing and thus provide a lower standard of replicability.
For the most part, manual copying and pasting of commands into a shell is required to replicate our results:

+ [Experiments on COVID-19 Open Research Dataset](docs/experiments-covid.md)
+ [Replicating "Neural Hype" Experiments](docs/experiments-forum2018.md)
+ [Guide to running BM25 baselines on the MS MARCO Passage Retrieval Task](docs/experiments-msmarco-passage.md)
+ [Guide to running BM25 baselines on the MS MARCO Document Retrieval Task](docs/experiments-msmarco-doc.md)
Expand Down
16 changes: 9 additions & 7 deletions src/main/java/io/anserini/collection/CovidCollection.java
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ public class Segment extends FileSegment<CovidCollection.Document> {
CSVParser csvParser = null;
private CSVRecord record = null;
private Iterator<CSVRecord> iterator = null; // iterator for CSV records
private String recordFullText = "";
private Iterator<JsonNode> paragraphIterator = null; // iterator for paragraphs in a CSV record
private Integer paragraphNumber = 0;

Expand All @@ -85,9 +84,10 @@ public void readNext() throws NoSuchElementException {
if (paragraphIterator != null && paragraphIterator.hasNext()) { // if the record contains more paragraphs, we parse them
String paragraph = paragraphIterator.next().get("text").asText();
paragraphNumber += 1;
bufferedRecord = new CovidCollection.Document(record, recordFullText, paragraph, paragraphNumber);
bufferedRecord = new CovidCollection.Document(record, paragraph, paragraphNumber);
} else if (iterator.hasNext()) { // if CSV contains more lines, we parse the next record
record = iterator.next();
String recordFullText = "";
if (record.get("has_full_text").contains("True")) {
String[] hashes = record.get("sha").split(";");
String fullTextPath = "/" + record.get("full_text_file") + "/" + hashes[hashes.length - 1].strip() + ".json";
Expand All @@ -97,14 +97,12 @@ record = iterator.next();
FileReader recordFullTextFileReader = new FileReader(recordFullTextPath);
ObjectMapper mapper = new ObjectMapper();
JsonNode recordJsonNode = mapper.readerFor(JsonNode.class).readTree(recordFullTextFileReader);
paragraphIterator = recordJsonNode.get("body_text").elements();

paragraphIterator = recordJsonNode.get("body_text").elements();
} catch (IOException e) {
LOG.error("Error parsing file at " + fullTextPath + "\n" + e.getMessage());
}
} else {
paragraphIterator = null;
recordFullText = "";
}
paragraphNumber = 0;
bufferedRecord = new CovidCollection.Document(record, recordFullText);
Expand Down Expand Up @@ -135,7 +133,7 @@ public class Document implements SourceDocument {
private String raw;
private CSVRecord record;

public Document(CSVRecord record, String recordFullText, String paragraph, Integer paragraphNumber) {
public Document(CSVRecord record, String paragraph, Integer paragraphNumber, String recordFullText) {
if (paragraphNumber == 0) {
id = Long.toString(record.getRecordNumber());
} else {
Expand All @@ -148,8 +146,12 @@ public Document(CSVRecord record, String recordFullText, String paragraph, Integ
this.record = record;
}

public Document(CSVRecord record, String paragraph, Integer paragraphNumber) {
this(record, paragraph, paragraphNumber, "");
}

public Document(CSVRecord record, String recordFullText) {
this(record, recordFullText, "", 0);
this(record, "", 0, recordFullText);
}

@Override
Expand Down

0 comments on commit 7ff68c0

Please sign in to comment.