diff --git a/README.md b/README.md index 23396cd..d178c41 100644 --- a/README.md +++ b/README.md @@ -144,6 +144,7 @@ Usage: ocr-transform [-dhLv] [ []] [-- [ []] [-- if [[ "$1" == '--' ]];then script_args+=("${@:2}") @@ -86,8 +87,7 @@ main () { [[ "$outfile" != '-' ]] && script_args=("${script_args[@]}" "-o:$outfile") exec_saxon "${script_args[@]}" else - script_args=("${script_args[@]}" "$infile") - script_args=("${script_args[@]}" "$outfile") + script_args=("$infile" "$outfile" "${script_args[@]}") "$transformer" "${script_args[@]}" fi } diff --git a/lib.sh b/lib.sh index 7f276b6..63cd375 100644 --- a/lib.sh +++ b/lib.sh @@ -118,7 +118,7 @@ show_saxon_options () { #{{{ run saxon / xsd-validator (xsdv.sh) # exec_saxon () exec_saxon() { - (( DEBUG > 0 )) && loginfo Executing "java -jar $SAXON_JAR" "$@" + (( DEBUG > 0 )) && loginfo Executing "java -jar $SHAREDIR/vendor/saxon9he.jar" "$@" (( DEBUG > 1 )) && SAXON_ARGS+=('-t') java -jar "$SHAREDIR/vendor/saxon9he.jar" "$@" } diff --git a/script/transform/abbyy__page b/script/transform/abbyy__page new file mode 120000 index 0000000..15e16ee --- /dev/null +++ b/script/transform/abbyy__page @@ -0,0 +1 @@ +alto__page \ No newline at end of file diff --git a/script/transform/alto__page b/script/transform/alto__page index 152de35..0d9ceff 100755 --- a/script/transform/alto__page +++ b/script/transform/alto__page @@ -1,19 +1,32 @@ -#!/bin/bash -x +#!/bin/bash + SCRIPTDIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" VENDORDIR="$(cd $SCRIPTDIR/../../vendor/; pwd)" JAR="$VENDORDIR/JPageConverter/PageConverter.jar" INFILE="$1" OUTFILE="$2" +ARGUMENT="$3" + +if [[ "$1" = "-" ]]; then + INFILE="$(mktemp)" + cat >"$INFILE" +fi -is_temp= -if [[ "$2" = "-" ]];then - is_temp=true +if [[ "$2" = "-" ]]; then OUTFILE="$(mktemp)" fi -java -jar "$JAR" -neg-coords toZero -source-xml "$INFILE" -target-xml "$OUTFILE" +java -jar "$JAR" -neg-coords toZero -source-xml "$INFILE" -target-xml "$OUTFILE" 2>&1 + +if [[ "$1" = "-" ]]; then + rm "$INFILE" +fi -if [[ "$is_temp" = true ]];then - cat "$OUTFILE" +if [[ "$2" = "-" ]]; then + if [[ -z "$ARGUMENT" ]]; then + cat "$OUTFILE" + else + java -cp "$VENDORDIR/saxon9he.jar" net.sf.saxon.Query -s:"$OUTFILE" -qs:/ "$ARGUMENT" + fi rm "$OUTFILE" fi diff --git a/script/transform/gcv__hocr b/script/transform/gcv__hocr index 95af894..25457b7 100755 --- a/script/transform/gcv__hocr +++ b/script/transform/gcv__hocr @@ -1,4 +1,5 @@ #!/bin/bash + SCRIPTDIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" VENDORDIR="$(cd $SCRIPTDIR/../../vendor/; pwd)" VENDORSCRIPT="$VENDORDIR/gcv2hocr/gcv2hocr" @@ -8,15 +9,22 @@ OUTFILE="$2" WIDTH=2000 HEIGHT=2000 -is_temp= -if [[ "$2" = "-" ]];then - is_temp=true +if [[ "$1" = "-" ]]; then + INFILE="$(mktemp)" + cat >"$INFILE" +fi + +if [[ "$2" = "-" ]]; then OUTFILE="$(mktemp)" fi "$VENDORSCRIPT" "$INFILE" "$OUTFILE" "$WIDTH" "$HEIGHT" -if [[ "$is_temp" = true ]];then +if [[ "$1" = "-" ]]; then + rm "$INFILE" +fi + +if [[ "$2" = "-" ]]; then cat "$OUTFILE" rm "$OUTFILE" fi diff --git a/script/transform/gcv__page b/script/transform/gcv__page new file mode 120000 index 0000000..15e16ee --- /dev/null +++ b/script/transform/gcv__page @@ -0,0 +1 @@ +alto__page \ No newline at end of file diff --git a/script/transform/hocr__page b/script/transform/hocr__page new file mode 120000 index 0000000..15e16ee --- /dev/null +++ b/script/transform/hocr__page @@ -0,0 +1 @@ +alto__page \ No newline at end of file diff --git a/script/transform/page__alto b/script/transform/page__alto deleted file mode 120000 index 15e16ee..0000000 --- a/script/transform/page__alto +++ /dev/null @@ -1 +0,0 @@ -alto__page \ No newline at end of file diff --git a/script/transform/page__alto b/script/transform/page__alto new file mode 100755 index 0000000..12f063a --- /dev/null +++ b/script/transform/page__alto @@ -0,0 +1,32 @@ +#!/bin/bash + +SCRIPTDIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +VENDORDIR="$(cd $SCRIPTDIR/../../vendor/; pwd)" +JAR="$VENDORDIR/JPageConverter/PageConverter.jar" +INFILE="$1" +OUTFILE="$2" +ARGUMENT="$3" + +if [[ "$1" = "-" ]]; then + INFILE="$(mktemp)" + cat >"$INFILE" +fi + +if [[ "$2" = "-" ]]; then + OUTFILE="$(mktemp)" +fi + +java -jar "$JAR" -neg-coords toZero -source-xml "$INFILE" -target-xml "$OUTFILE" -convert-to ALTO 2>&1 + +if [[ "$1" = "-" ]]; then + rm "$INFILE" +fi + +if [[ "$2" = "-" ]]; then + if [[ -z "$ARGUMENT" ]]; then + cat "$OUTFILE" + else + java -cp "$VENDORDIR/saxon9he.jar" net.sf.saxon.Query -s:"$OUTFILE" -qs:/ "$ARGUMENT" + fi + rm "$OUTFILE" +fi