Skip to content

Commit

Permalink
add custom gobbler for progress bar; better path handling
Browse files Browse the repository at this point in the history
Former-commit-id: 6ce9f8f
  • Loading branch information
kermitt2 committed Dec 27, 2018
1 parent c3e4a23 commit 9da8702
Show file tree
Hide file tree
Showing 27 changed files with 115 additions and 317 deletions.
55 changes: 40 additions & 15 deletions grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import java.io.*;
import java.lang.StringBuilder;
import java.util.*;
import java.util.regex.*;

import jep.Jep;
import jep.JepConfig;
Expand Down Expand Up @@ -215,14 +216,16 @@ public static void train(String modelName, File trainingData, File outputModel)
"grobidTagger.py",
modelName,
"train",
"--out", GrobidProperties.getInstance().getModelPath().getAbsolutePath());
pb.directory(new File(GrobidProperties.getInstance().getDeLFTPath()));
"--input", trainingData.getAbsolutePath(),
"--output", GrobidProperties.getInstance().getModelPath().getAbsolutePath());
File delftPath = new File(GrobidProperties.getInstance().getDeLFTFilePath());
pb.directory(delftPath);
Process process = pb.start();
//pb.inheritIO();
SimpleStreamGobbler streamGobbler =
new SimpleStreamGobbler(process.getInputStream(), System.out::println);
Executors.newSingleThreadExecutor().submit(streamGobbler);
streamGobbler = new SimpleStreamGobbler(process.getErrorStream(), System.err::println);
CustomStreamGobbler customStreamGobbler =
new CustomStreamGobbler(process.getInputStream(), System.out);
Executors.newSingleThreadExecutor().submit(customStreamGobbler);
SimpleStreamGobbler streamGobbler = new SimpleStreamGobbler(process.getErrorStream(), System.err::println);
Executors.newSingleThreadExecutor().submit(streamGobbler);
int exitCode = process.waitFor();
//assert exitCode == 0;
Expand Down Expand Up @@ -287,19 +290,41 @@ public void run() {
}
}

private static class CharStreamGobbler implements Runnable {
private InputStream inputStream;
private Consumer<String> consumer;

public SimpleStreamGobbler(InputStream inputStream, Consumer<String> consumer) {
this.inputStream = inputStream;
this.consumer = consumer;
/**
* This is a custom gobbler that reproduces correctly the Keras training progress bar
* by injecting a \r for progress line updates.
*/
private static class CustomStreamGobbler implements Runnable {
public static final Logger LOGGER = LoggerFactory.getLogger(CustomStreamGobbler.class);

private final InputStream is;
private final PrintStream os;
private Pattern pattern = Pattern.compile("\\d/\\d+ \\[");

public CustomStreamGobbler(InputStream is, PrintStream os) {
this.is = is;
this.os = os;
}

@Override
public void run() {
new BufferedReader(new InputStreamReader(inputStream)).lines()
.forEach(consumer);
try {
InputStreamReader isr = new InputStreamReader(this.is);
BufferedReader br = new BufferedReader(isr);
String line = null;
while ((line = br.readLine()) != null) {
Matcher matcher = pattern.matcher(line);
if (matcher.find()) {
os.print("\r" + line);
os.flush();
} else {
os.println(line);
}
}
}
catch (IOException e) {
LOGGER.warn("IO error between embedded python and java process", e);
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ public Jep getJEPInstance() {
JepConfig config = new JepConfig();

try {
File delftPath = new File(GrobidProperties.getInstance().getDeLFTPath());
File delftPath = new File(GrobidProperties.getInstance().getDeLFTFilePath());
if (!delftPath.exists()) {
throw new GrobidResourceException("DeLFT installation path does not exist");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -425,85 +425,15 @@ public static String getDeLFTPath() {
return getPropertyValue(GrobidPropertyKeys.PROP_GROBID_DELFT_PATH);
}

/**
* Returns the id for a connection to crossref, given in the grobid-property
* file.
*
* @return id for connecting crossref
*/
/*public static String getCrossrefId() {
return getPropertyValue(GrobidPropertyKeys.PROP_CROSSREF_ID);
}*/

/**
* Sets the id for a connection to crossref, given in the grobid-property
* file.
*
* @param id for connecting crossref
*/
/*public static void setCrossrefId(final String id) {
setPropertyValue(GrobidPropertyKeys.PROP_CROSSREF_ID, id);
}*/

/**
* Returns the password for a connection to crossref, given in the
* grobid-property file.
*
* @return password for connecting crossref
*/
/*public static String getCrossrefPw() {
return getPropertyValue(GrobidPropertyKeys.PROP_CROSSREF_PW);
}*/

/**
* Sets the id for a connection to crossref, given in the grobid-property
* file.
*
* @param password for connecting crossref
*/
/*public static void setCrossrefPw(final String password) {
setPropertyValue(GrobidPropertyKeys.PROP_CROSSREF_PW, password);
}*/

/**
* Returns the host for a connection to crossref, given in the
* grobid-property file.
*
* @return host for connecting crossref
*/
/*public static String getCrossrefHost() {
return getPropertyValue(GrobidPropertyKeys.PROP_CROSSREF_HOST);
}*/

/**
* Sets the id for a connection to crossref, given in the grobid-property
* file.
*
* @param host for connecting crossref
*/
/*public static void setCrossrefHost(final String host) {
setPropertyValue(GrobidPropertyKeys.PROP_CROSSREF_HOST, host);
}*/

/**
* Returns the port for a connection to crossref, given in the
* grobid-property file.
*
* @return port for connecting crossref
*/
/*public static Integer getCrossrefPort() {
return (Integer.valueOf(getPropertyValue(GrobidPropertyKeys.PROP_CROSSREF_PORT)));
}*/

/**
* Sets the port for a connection to crossref, given in the grobid-property
* file.
*
* @param port for connecting crossref
*/
/*public static void setCrossrefPort(final String port) {
setPropertyValue(GrobidPropertyKeys.PROP_CROSSREF_PORT, port);
}*/
public static String getDeLFTFilePath() {
String rawPath = getPropertyValue(GrobidPropertyKeys.PROP_GROBID_DELFT_PATH);
File pathFile = new File(rawPath);
if (!pathFile.exists()) {
rawPath = "../" + rawPath;
pathFile = new File(rawPath);
}
return pathFile.getAbsolutePath();
}

/**
* Returns the host for a proxy connection, given in the grobid-property
Expand Down Expand Up @@ -556,36 +486,6 @@ public static void setProxyPort(final String port) {
System.setProperty("https.proxyPort", port);
}

/**
* Returns the id for a connection to mysql, given in the grobid-property
* file.
*
* @return database name for connecting mysql
*/
/*public static String getMySQLDBName() {
return getPropertyValue(GrobidPropertyKeys.PROP_MYSQL_DB_NAME);
}*/

/**
* Sets the database name for a connection to mysql, given in the
* grobid-property file.
*
* @param dbName for connecting mysql
*/
/*public static void setMySQLDBName(final String dbName) {
setPropertyValue(GrobidPropertyKeys.PROP_MYSQL_DB_NAME, dbName);
}*/

/**
* Returns the id for a connection to mysql, given in the grobid-property
* file.
*
* @return username for connecting mysql
*/
/*public static String getMySQLUsername() {
return getPropertyValue(GrobidPropertyKeys.PROP_MYSQL_USERNAME);
}*/

public static Integer getPdf2XMLMemoryLimitMb() {
return Integer.parseInt(getPropertyValue(GrobidPropertyKeys.PROP_3RD_PARTY_PDF2XML_MEMORY_LIMIT, "2048"), 10);
}
Expand All @@ -594,74 +494,6 @@ public static Integer getPdf2XMLTimeoutMs() {
return Integer.parseInt(getPropertyValue(GrobidPropertyKeys.PROP_3RD_PARTY_PDF2XML_TIMEOUT_SEC, "60"), 10) * 1000;
}

/**
* Sets the username for a connection to mysql, given in the grobid-property
* file.
*
* @param username for connecting mysql
*/
/*public static void setMySQLUsername(final String username) {
setPropertyValue(GrobidPropertyKeys.PROP_MYSQL_USERNAME, username);
}*/

/**
* Returns the password for a connection to mysql, given in the
* grobid-property file.
*
* @return password for connecting mysql
*/
/*public static String getMySQLPw() {
return getPropertyValue(GrobidPropertyKeys.PROP_MYSQL_PW);
}*/

/**
* Sets the id for a connection to mysql, given in the grobid-property file.
*
* @param password for connecting mysql
*/
/*public static void setMySQLPw(final String password) {
setPropertyValue(GrobidPropertyKeys.PROP_MYSQL_PW, password);
}*/

/**
* Returns the host for a connection to mysql, given in the grobid-property
* file.
*
* @return host for connecting mysql
*/
/*public static String getMySQLHost() {
return getPropertyValue(GrobidPropertyKeys.PROP_MYSQL_HOST);
}*/

/**
* Sets the id for a connection to mysql, given in the grobid-property file.
*
* @param host for connecting mysql
*/
/*public static void setMySQLHost(final String host) {
setPropertyValue(GrobidPropertyKeys.PROP_MYSQL_HOST, host);
}*/

/**
* Returns the port for a connection to mysql, given in the grobid-property
* file.
*
* @return port for connecting mysql
*/
/*public static Integer getMySQLPort() {
return Integer.valueOf(getPropertyValue(GrobidPropertyKeys.PROP_MYSQL_PORT));
}*/

/**
* Sets the port for a connection to mysql, given in the grobid-property
* file.
*
* @param port for connecting mysql
*/
/*public static void setMySQLPort(String port) {
setPropertyValue(GrobidPropertyKeys.PROP_MYSQL_PORT, port);
}*/

/**
* Returns the number of threads, given in the grobid-property file.
*
Expand Down Expand Up @@ -867,48 +699,6 @@ public static void setContextExecutionServer(Boolean state) {
setPropertyValue(GrobidPropertyKeys.PROP_GROBID_IS_CONTEXT_SERVER, state.toString());
}

/**
* Update the input file with the key and value given as argument.
*
* @param pPropertyFile file to update.
* @param pKey key to replace
* @param pValue value to replace
*/
/*public static void updatePropertyFile(File pPropertyFile, String pKey, String pValue) {
try {
BufferedReader reader = new BufferedReader(new FileReader(pPropertyFile));
String line, content = StringUtils.EMPTY, lineToReplace = StringUtils.EMPTY;
while ((line = reader.readLine()) != null) {
if (line.contains(pKey)) {
lineToReplace = line;
}
content += line + "\r\n";
}
reader.close();
if (!StringUtils.EMPTY.equals(lineToReplace)) {
String newContent = content.replaceAll(lineToReplace, pKey + "=" + pValue);
FileWriter writer = new FileWriter(pPropertyFile.getAbsoluteFile());
writer.write(newContent);
writer.close();
}
} catch (IOException e) {
throw new GrobidPropertyException("Error while manipulating the Grobid properties", e);
}
}*/

/**
* Update grobid.properties with the key and value given as argument.
*
* @param pKey key to replace
* @param pValue value to replace
*/
/*public static void updatePropertyFile(String pKey, String pValue) {
updatePropertyFile(getGrobidPropertiesPath(), pKey, pValue);
}*/

/**
* Sets the GROBID version.
*/
Expand Down
20 changes: 20 additions & 0 deletions grobid-home/models/figure/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"use_char_feature": true,
"case_embedding_size": 5,
"batch_size": 20,
"fold_number": 1,
"char_vocab_size": 103,
"use_crf": true,
"word_embedding_size": 300,
"recurrent_dropout": 0.5,
"max_char_length": 30,
"num_word_lstm_units": 100,
"char_embedding_size": 25,
"embeddings_name": "glove-840B",
"num_char_lstm_units": 25,
"model_type": "BidLSTM_CRF",
"use_ELMo": false,
"dropout": 0.5,
"case_vocab_size": 8,
"model_name": "figure"
}
Binary file added grobid-home/models/figure/model_weights.hdf5
Binary file not shown.
Binary file added grobid-home/models/figure/preprocessor.pkl
Binary file not shown.
Loading

0 comments on commit 9da8702

Please sign in to comment.