diff --git a/grobid-core/src/main/java/org/grobid/core/document/DocumentSource.java b/grobid-core/src/main/java/org/grobid/core/document/DocumentSource.java index 71a960d410..905fa36aa3 100644 --- a/grobid-core/src/main/java/org/grobid/core/document/DocumentSource.java +++ b/grobid-core/src/main/java/org/grobid/core/document/DocumentSource.java @@ -146,6 +146,8 @@ public File pdfalto(Integer timeout, boolean force, int startPage, if (GrobidProperties.isContextExecutionServer()) { cmd.add("--timeout"); cmd.add(String.valueOf(GrobidProperties.getPdfaltoTimeoutS())); + cmd.add("--ulimit"); + cmd.add(String.valueOf(GrobidProperties.getPdfaltoMemoryLimitMb() * 1024)); tmpPathXML = processPdfaltoServerMode(pdfPath, tmpPathXML, cmd); } else { if (!SystemUtils.IS_OS_WINDOWS && !SystemUtils.IS_OS_MAC) { diff --git a/grobid-home/pdfalto/lin-64/pdfalto_server b/grobid-home/pdfalto/lin-64/pdfalto_server index 74c2859131..db8d9f15bd 100755 --- a/grobid-home/pdfalto/lin-64/pdfalto_server +++ b/grobid-home/pdfalto/lin-64/pdfalto_server @@ -6,26 +6,36 @@ timeout=20 # 20 seconds interval=1 # Delay between posting the SIGTERM signal and destroying the process by SIGKILL. delay=0 +# Default limit for the memory usage by this bash script +memory_limit=6242304 command=${0:0:${#0}-7} args=("$@") pdfalto_params=() for ((n=0; n<="$#";n++)); do - case ${args[n]} in + case ${args[n]} in --timeout) timeout=${args[n+1]} ((n++)) ;; + --ulimit) + memory_limit=${args[n+1]} + ((n++)) + ;; *) pdfalto_params+=" ${args[n]}" ;; - esac + esac done #echo timeout $timeout #echo pdfalto commands: $command $pdfalto_params +#echo memory_limit $memory_limit +# Limit the memory usage by this bash script +ulimit -Sv $memory_limit + # kill -0 pid Exit code indicates if a signal may be sent to $pid process. ( ((t = timeout)) diff --git a/grobid-home/pdfalto/mac-64/pdfalto_server b/grobid-home/pdfalto/mac-64/pdfalto_server index 07f1b001cb..8fa2fafa78 100755 --- a/grobid-home/pdfalto/mac-64/pdfalto_server +++ b/grobid-home/pdfalto/mac-64/pdfalto_server @@ -6,8 +6,10 @@ timeout=20 # 20 seconds interval=1 # Delay between posting the SIGTERM signal and destroying the process by SIGKILL. delay=0 -command=${0:0:${#0}-7} +# Default limit for the memory usage by this bash script +memory_limit=6242304 +command=${0:0:${#0}-7} args=("$@") pdfalto_params=() @@ -17,12 +19,19 @@ for ((n=0; n<="$#";n++)); do timeout=${args[n+1]} ((n++)) ;; + --ulimit) + memory_limit=${args[n+1]} + ((n++)) + ;; *) pdfalto_params+=" ${args[n]}" ;; esac done +# Limit the memory usage by this bash script +ulimit -Sv $memory_limit + # kill -0 pid Exit code indicates if a signal may be sent to $pid process. ( ((t = timeout))