From f332e3f747ac35e0d55f5cff430d9cbc79c00ac7 Mon Sep 17 00:00:00 2001 From: Villu Ruusmann Date: Sun, 1 Dec 2024 09:27:50 +0200 Subject: [PATCH] Added support for the ASCII output mode --- .../{XDROutput.java => BinaryOutput.java} | 12 ++- .../main/java/org/jpmml/rexp/RDataOutput.java | 2 + .../main/java/org/jpmml/rexp/RExpParser.java | 8 +- .../main/java/org/jpmml/rexp/RExpWriter.java | 37 ++++++-- .../src/main/java/org/jpmml/rexp/RString.java | 2 + .../main/java/org/jpmml/rexp/TextOutput.java | 91 +++++++++++++++++++ .../java/org/jpmml/rexp/SerializeTest.java | 84 +++++++++++------ .../java/org/jpmml/rexp/TextInputTest.java | 1 + .../java/org/jpmml/rexp/TextOutputTest.java | 35 +++++++ 9 files changed, 231 insertions(+), 41 deletions(-) rename pmml-rexp/src/main/java/org/jpmml/rexp/{XDROutput.java => BinaryOutput.java} (88%) create mode 100644 pmml-rexp/src/main/java/org/jpmml/rexp/TextOutput.java create mode 100644 pmml-rexp/src/test/java/org/jpmml/rexp/TextOutputTest.java diff --git a/pmml-rexp/src/main/java/org/jpmml/rexp/XDROutput.java b/pmml-rexp/src/main/java/org/jpmml/rexp/BinaryOutput.java similarity index 88% rename from pmml-rexp/src/main/java/org/jpmml/rexp/XDROutput.java rename to pmml-rexp/src/main/java/org/jpmml/rexp/BinaryOutput.java index 8e98f9a..ed79254 100644 --- a/pmml-rexp/src/main/java/org/jpmml/rexp/XDROutput.java +++ b/pmml-rexp/src/main/java/org/jpmml/rexp/BinaryOutput.java @@ -23,12 +23,12 @@ import java.io.OutputStream; abstract -public class XDROutput implements RDataOutput { +public class BinaryOutput implements RDataOutput { private DataOutputStream dos = null; - public XDROutput(OutputStream os) throws IOException { + public BinaryOutput(OutputStream os) throws IOException { DataOutputStream dos = new DataOutputStream(os); dos.writeByte('X'); @@ -37,14 +37,16 @@ public XDROutput(OutputStream os) throws IOException { this.dos = dos; } - abstract - public RExpWriter getWriter(); - @Override public void close() throws IOException { this.dos.close(); } + @Override + public String escape(String string){ + return string; + } + @Override public void writeInt(int value) throws IOException { this.dos.writeInt(value); diff --git a/pmml-rexp/src/main/java/org/jpmml/rexp/RDataOutput.java b/pmml-rexp/src/main/java/org/jpmml/rexp/RDataOutput.java index bb6dfda..37ab02a 100644 --- a/pmml-rexp/src/main/java/org/jpmml/rexp/RDataOutput.java +++ b/pmml-rexp/src/main/java/org/jpmml/rexp/RDataOutput.java @@ -25,6 +25,8 @@ public interface RDataOutput extends Closeable { RExpWriter getWriter(); + String escape(String string); + void writeInt(int value) throws IOException; void writeDouble(double value) throws IOException; diff --git a/pmml-rexp/src/main/java/org/jpmml/rexp/RExpParser.java b/pmml-rexp/src/main/java/org/jpmml/rexp/RExpParser.java index 35897ae..bd5ef7b 100644 --- a/pmml-rexp/src/main/java/org/jpmml/rexp/RExpParser.java +++ b/pmml-rexp/src/main/java/org/jpmml/rexp/RExpParser.java @@ -18,6 +18,7 @@ */ package org.jpmml.rexp; +import java.io.Closeable; import java.io.EOFException; import java.io.IOException; import java.io.InputStream; @@ -32,7 +33,7 @@ import com.google.common.io.ByteStreams; -public class RExpParser { +public class RExpParser implements Closeable { private RDataInput input = null; @@ -73,6 +74,11 @@ public RExpParser getParser(){ } } + @Override + public void close() throws IOException { + this.input.close(); + } + public RExp parse() throws IOException { int version = readInt(); diff --git a/pmml-rexp/src/main/java/org/jpmml/rexp/RExpWriter.java b/pmml-rexp/src/main/java/org/jpmml/rexp/RExpWriter.java index 5197e89..822117c 100644 --- a/pmml-rexp/src/main/java/org/jpmml/rexp/RExpWriter.java +++ b/pmml-rexp/src/main/java/org/jpmml/rexp/RExpWriter.java @@ -18,12 +18,13 @@ */ package org.jpmml.rexp; +import java.io.Closeable; import java.io.IOException; import java.io.OutputStream; import java.util.LinkedHashMap; import java.util.Map; -public class RExpWriter { +public class RExpWriter implements Closeable { private RDataOutput output = null; @@ -31,13 +32,35 @@ public class RExpWriter { public RExpWriter(OutputStream os) throws IOException { - this.output = new XDROutput(os){ + this(os, false); + } + + public RExpWriter(OutputStream os, boolean ascii) throws IOException { + + if(ascii){ + this.output = new TextOutput(os){ + + @Override + public RExpWriter getWriter(){ + return RExpWriter.this; + } + }; + } else + + { + this.output = new BinaryOutput(os){ + + @Override + public RExpWriter getWriter(){ + return RExpWriter.this; + } + }; + } + } - @Override - public RExpWriter getWriter(){ - return RExpWriter.this; - } - }; + @Override + public void close() throws IOException { + this.output.close(); } public void write(RExp rexp) throws IOException { diff --git a/pmml-rexp/src/main/java/org/jpmml/rexp/RString.java b/pmml-rexp/src/main/java/org/jpmml/rexp/RString.java index 9cfcc67..4f332e2 100644 --- a/pmml-rexp/src/main/java/org/jpmml/rexp/RString.java +++ b/pmml-rexp/src/main/java/org/jpmml/rexp/RString.java @@ -42,6 +42,8 @@ public void write(RDataOutput output) throws IOException { } else { + value = output.escape(value); + byte[] bytes = value.getBytes(); output.writeInt(bytes.length); diff --git a/pmml-rexp/src/main/java/org/jpmml/rexp/TextOutput.java b/pmml-rexp/src/main/java/org/jpmml/rexp/TextOutput.java new file mode 100644 index 0000000..ea5e1f1 --- /dev/null +++ b/pmml-rexp/src/main/java/org/jpmml/rexp/TextOutput.java @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2024 Villu Ruusmann + * + * This file is part of JPMML-R + * + * JPMML-R is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * JPMML-R is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with JPMML-R. If not, see . + */ +package org.jpmml.rexp; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.Writer; + +abstract +public class TextOutput implements RDataOutput { + + private Writer writer = null; + + + public TextOutput(OutputStream os) throws IOException { + Writer writer = new OutputStreamWriter(os); + + writer.write('A'); + writer.write('\n'); + + this.writer = writer; + } + + @Override + public void close() throws IOException { + this.writer.close(); + } + + @Override + public String escape(String string){ + return encode(string); + } + + @Override + public void writeInt(int value) throws IOException { + this.writer.write(String.valueOf(value)); + this.writer.write('\n'); + } + + @Override + public void writeDouble(double value) throws IOException { + this.writer.write(String.valueOf(value)); + this.writer.write('\n'); + } + + @Override + public void writeByteArray(byte[] bytes) throws IOException { + this.writer.write(new String(bytes)); + this.writer.write('\n'); + } + + static + public String encode(String string){ + StringBuilder sb = new StringBuilder(2 * string.length()); + + for(int i = 0; i < string.length(); i++){ + char c = string.charAt(i); + + if(c == '\\'){ + sb.append('\\').append('\\'); + } else + + if(c <= 31 || c == ' ' || c >= 127){ + sb.append('\\').append(Integer.toOctalString(c)); + } else + + { + sb.append(c); + } + } + + return sb.toString(); + } +} \ No newline at end of file diff --git a/pmml-rexp/src/test/java/org/jpmml/rexp/SerializeTest.java b/pmml-rexp/src/test/java/org/jpmml/rexp/SerializeTest.java index ae09816..6d312bb 100644 --- a/pmml-rexp/src/test/java/org/jpmml/rexp/SerializeTest.java +++ b/pmml-rexp/src/test/java/org/jpmml/rexp/SerializeTest.java @@ -33,26 +33,52 @@ public class SerializeTest { @Test public void rdsRealVector() throws IOException { - RDoubleVector realVec = (RDoubleVector)rdsClone("RealVector"); + RDoubleVector realVec = (RDoubleVector)parse("RealVector"); - assertEquals(5 - 1, realVec.size()); - - assertEquals(Arrays.asList(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, Double.NaN, Double.NaN), realVec.getValues()); + checkRealVector(rdsClone(realVec)); + checkRealVector(rdsClone(realVec, true)); } @Test public void rdsIntegerVector() throws IOException { - RIntegerVector integerVec = (RIntegerVector)rdsClone("IntegerVector"); + RIntegerVector integerVec = (RIntegerVector)parse("IntegerVector"); - assertEquals(3 - 1, integerVec.size()); - - assertEquals(Arrays.asList(null, null), integerVec.getValues()); + checkIntegerVector(rdsClone(integerVec)); + checkIntegerVector(rdsClone(integerVec, true)); } @Test public void rdsNamedList() throws IOException { - RGenericVector namedList = (RGenericVector)rdsClone("NamedList"); + RGenericVector namedList = (RGenericVector)parse("NamedList"); + + checkNamedList(rdsClone(namedList)); + checkNamedList(rdsClone(namedList, true)); + } + + @Test + public void rdsDataFrame() throws IOException { + RGenericVector dataFrame = (RGenericVector)parse("DataFrame"); + + checkDataFrame(rdsClone(dataFrame)); + checkDataFrame(rdsClone(dataFrame, true)); + } + + static + private void checkRealVector(RDoubleVector realVec){ + assertEquals(5 - 1, realVec.size()); + + assertEquals(Arrays.asList(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, Double.NaN, Double.NaN), realVec.getValues()); + } + + static + private void checkIntegerVector(RIntegerVector integerVec){ + assertEquals(3 - 1, integerVec.size()); + + assertEquals(Arrays.asList(null, null), integerVec.getValues()); + } + static + private void checkNamedList(RGenericVector namedList){ assertFalse(namedList.hasAttribute("class")); assertEquals(10, namedList.size()); @@ -74,10 +100,8 @@ public void rdsNamedList() throws IOException { assertEquals(Arrays.asList("alpha", "beta", "gamma"), (namedList.getFactorElement("factor_vector")).getFactorValues()); } - @Test - public void rdsDataFrame() throws IOException { - RGenericVector dataFrame = (RGenericVector)rdsClone("DataFrame"); - + static + private void checkDataFrame(RGenericVector dataFrame){ assertEquals(Arrays.asList("data.frame"), (dataFrame.getStringAttribute("class")).getValues()); assertEquals(5, dataFrame.size()); @@ -90,40 +114,44 @@ public void rdsDataFrame() throws IOException { } static - private RExp rdsClone(String name) throws IOException { - RExp rexp; - - try(InputStream is = SerializeTest.class.getResourceAsStream("/rds/" + name + ".rds")){ - rexp = parse(is); - } - - return rdsClone(rexp); + private E rdsClone(E rexp) throws IOException { + return rdsClone(rexp, false); } static - private RExp rdsClone(RExp rexp) throws IOException { + private E rdsClone(E rexp, boolean ascii) throws IOException { DirectByteArrayOutputStream buffer = new DirectByteArrayOutputStream(10 * 1024); try(OutputStream os = buffer){ - write(rexp, os); + write(rexp, os, ascii); } // End try try(InputStream is = buffer.getInputStream()){ + return (E)parse(is); + } + } + + static + private RExp parse(String name) throws IOException { + + try(InputStream is = SerializeTest.class.getResourceAsStream("/rds/" + name + ".rds")){ return parse(is); } } static private RExp parse(InputStream is) throws IOException { - RExpParser parser = new RExpParser(is); - return parser.parse(); + try(RExpParser parser = new RExpParser(is)){ + return parser.parse(); + } } static - private void write(RExp rexp, OutputStream os) throws IOException { - RExpWriter writer = new RExpWriter(os); + private void write(RExp rexp, OutputStream os, boolean ascii) throws IOException { - writer.write(rexp); + try(RExpWriter writer = new RExpWriter(os, ascii)){ + writer.write(rexp); + } } } \ No newline at end of file diff --git a/pmml-rexp/src/test/java/org/jpmml/rexp/TextInputTest.java b/pmml-rexp/src/test/java/org/jpmml/rexp/TextInputTest.java index 67d543b..f8ede2c 100644 --- a/pmml-rexp/src/test/java/org/jpmml/rexp/TextInputTest.java +++ b/pmml-rexp/src/test/java/org/jpmml/rexp/TextInputTest.java @@ -31,6 +31,7 @@ public void decode(){ assertEquals("\\", TextInput.decode("\\\\")); assertEquals("\\", TextInput.decode("\\134")); + assertEquals("Hello World!", TextInput.decode("Hello\\40World!")); assertEquals("Hello World!", TextInput.decode("Hello\\040World!")); } } \ No newline at end of file diff --git a/pmml-rexp/src/test/java/org/jpmml/rexp/TextOutputTest.java b/pmml-rexp/src/test/java/org/jpmml/rexp/TextOutputTest.java new file mode 100644 index 0000000..f820594 --- /dev/null +++ b/pmml-rexp/src/test/java/org/jpmml/rexp/TextOutputTest.java @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2024 Villu Ruusmann + * + * This file is part of JPMML-R + * + * JPMML-R is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * JPMML-R is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with JPMML-R. If not, see . + */ +package org.jpmml.rexp; + +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +public class TextOutputTest { + + @Test + public void encode(){ + assertEquals("\\0", TextOutput.encode("\0")); + + assertEquals("\\\\", TextOutput.encode("\\")); + + assertEquals("Hello\\40World!", TextOutput.encode("Hello World!")); + } +} \ No newline at end of file