From 64a42bea4b38b5cb8087b5d2a14fd7f0e16d4037 Mon Sep 17 00:00:00 2001 From: Alex Bush Date: Fri, 8 Dec 2017 18:06:57 +0000 Subject: [PATCH 1/3] Fix for newline and tab in data --- .../livy/LivySparkSQLInterpreter.java | 27 ++++++++---- .../zeppelin/livy/LivySQLInterpreterTest.java | 44 +++++++++++++++++++ 2 files changed, 63 insertions(+), 8 deletions(-) diff --git a/livy/src/main/java/org/apache/zeppelin/livy/LivySparkSQLInterpreter.java b/livy/src/main/java/org/apache/zeppelin/livy/LivySparkSQLInterpreter.java index b93626d45b9..d828613421e 100644 --- a/livy/src/main/java/org/apache/zeppelin/livy/LivySparkSQLInterpreter.java +++ b/livy/src/main/java/org/apache/zeppelin/livy/LivySparkSQLInterpreter.java @@ -18,6 +18,7 @@ package org.apache.zeppelin.livy; import org.apache.commons.lang.StringUtils; +import static org.apache.commons.lang.StringEscapeUtils.escapeJavaScript; import org.apache.zeppelin.interpreter.*; import org.apache.zeppelin.scheduler.Scheduler; import org.apache.zeppelin.scheduler.SchedulerFactory; @@ -26,7 +27,6 @@ import java.util.List; import java.util.Properties; - /** * Livy SparkSQL Interpreter for Zeppelin. */ @@ -166,15 +166,18 @@ public FormType getFormType() { protected List parseSQLOutput(String output) { List rows = new ArrayList<>(); - String[] lines = output.split("\n"); + // Get first line by breaking on \n. We can guarantee + // that \n marks the end of the first line, but not for + // subsequent lines (as it could be in the data) + String firstLine = output.split("\n", 2)[0]; // at least 4 lines, even for empty sql output // +---+---+ // | a| b| // +---+---+ // +---+---+ - // use the first line to determinte the position of feach cell - String[] tokens = StringUtils.split(lines[0], "\\+"); + // use the first line to determine the position of each cell + String[] tokens = StringUtils.split(firstLine, "\\+"); // pairs keeps the start/end position of each cell. We parse it from the first row // which use '+' as separator List pairs = new ArrayList<>(); @@ -186,17 +189,25 @@ protected List parseSQLOutput(String output) { pairs.add(new Pair(start, end)); } - for (String line : lines) { + // Use the header line to determine the position + // of subsequent lines + int lineStart = 0; + int lineEnd = firstLine.length(); + while (lineEnd < output.length()) { // Only match format "|....|" // skip line like "+---+---+" and "only showing top 1 row" - if (line.matches("^\\|.*\\|$")) { + String line = output.substring(lineStart, lineEnd); + if (line.matches("(?s)^\\|.*\\|$")) { List cells = new ArrayList<>(); for (Pair pair : pairs) { - // strip the blank space around the cell - cells.add(line.substring(pair.start, pair.end).trim()); + // strip the blank space around the cell and escape the string + cells.add(escapeJavaScript(line.substring(pair.start, pair.end)).trim()); } rows.add(StringUtils.join(cells, "\t")); } + // Determine position of next line skipping newline + lineStart += firstLine.length() + 1; + lineEnd = lineStart + firstLine.length(); } return rows; } diff --git a/livy/src/test/java/org/apache/zeppelin/livy/LivySQLInterpreterTest.java b/livy/src/test/java/org/apache/zeppelin/livy/LivySQLInterpreterTest.java index 24d70ec2b04..0541b876755 100644 --- a/livy/src/test/java/org/apache/zeppelin/livy/LivySQLInterpreterTest.java +++ b/livy/src/test/java/org/apache/zeppelin/livy/LivySQLInterpreterTest.java @@ -124,5 +124,49 @@ public void testParseSQLOutput() { assertEquals(2, rows.size()); assertEquals("a", rows.get(0)); assertEquals("1", rows.get(1)); + + + // sql output with 3 rows, 3 columns, showing "only showing top 3 rows" with a line break in the data + // +---+---+---+ + // | a| b| c| + // +---+---+---+ + // | 1a| 1b| 1c| + // | 2a| 2 + // b| 2c| + // | 3a| 3b| 3c| + // +---+---+---+ + // only showing top 3 rows + rows = sqlInterpreter.parseSQLOutput("+---+----+---+\n" + + "| a| b| c|\n" + + "+---+----+---+\n" + + "| 1a| 1b| 1c|\n" + + "| 2a| 2\nb| 2c|\n" + + "| 3a| 3b| 3c|\n" + + "+---+---+---+\n" + + "only showing top 3 rows"); + assertEquals(4, rows.size()); + assertEquals("a\tb\tc", rows.get(0)); + assertEquals("1a\t1b\t1c", rows.get(1)); + assertEquals("2a\t2\\nb\t2c", rows.get(2)); + assertEquals("3a\t3b\t3c", rows.get(3)); + + + // sql output with 2 rows and one containing a tab + // +---+---+ + // | a| b| + // +---+---+ + // | 1| \ta| + // | 2| 2b| + // +---+---+ + rows = sqlInterpreter.parseSQLOutput("+---+---+\n" + + "| a| b|\n" + + "+---+---+\n" + + "| 1| \ta|\n" + + "| 2| 2b|\n" + + "+---+---+"); + assertEquals(3, rows.size()); + assertEquals("a\tb", rows.get(0)); + assertEquals("1\t\\ta", rows.get(1)); + assertEquals("2\t2b", rows.get(2)); } } From d054af0f0b164c3a74a8c71996a2e1bca526c6a3 Mon Sep 17 00:00:00 2001 From: Alex Bush Date: Fri, 8 Dec 2017 20:51:00 +0000 Subject: [PATCH 2/3] Force a dummy change for Travis --- .../java/org/apache/zeppelin/livy/LivySparkSQLInterpreter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/livy/src/main/java/org/apache/zeppelin/livy/LivySparkSQLInterpreter.java b/livy/src/main/java/org/apache/zeppelin/livy/LivySparkSQLInterpreter.java index d828613421e..ae11660e68d 100644 --- a/livy/src/main/java/org/apache/zeppelin/livy/LivySparkSQLInterpreter.java +++ b/livy/src/main/java/org/apache/zeppelin/livy/LivySparkSQLInterpreter.java @@ -168,7 +168,7 @@ protected List parseSQLOutput(String output) { List rows = new ArrayList<>(); // Get first line by breaking on \n. We can guarantee // that \n marks the end of the first line, but not for - // subsequent lines (as it could be in the data) + // subsequent lines (as it could be in the cells) String firstLine = output.split("\n", 2)[0]; // at least 4 lines, even for empty sql output // +---+---+ From 31cdbdc73b37a6f2a2c2ded701b19971552c52fc Mon Sep 17 00:00:00 2001 From: Alex Bush Date: Fri, 8 Dec 2017 20:58:50 +0000 Subject: [PATCH 3/3] Added another comment explaining the regexp change --- .../java/org/apache/zeppelin/livy/LivySparkSQLInterpreter.java | 1 + 1 file changed, 1 insertion(+) diff --git a/livy/src/main/java/org/apache/zeppelin/livy/LivySparkSQLInterpreter.java b/livy/src/main/java/org/apache/zeppelin/livy/LivySparkSQLInterpreter.java index ae11660e68d..7b2d7d66684 100644 --- a/livy/src/main/java/org/apache/zeppelin/livy/LivySparkSQLInterpreter.java +++ b/livy/src/main/java/org/apache/zeppelin/livy/LivySparkSQLInterpreter.java @@ -197,6 +197,7 @@ protected List parseSQLOutput(String output) { // Only match format "|....|" // skip line like "+---+---+" and "only showing top 1 row" String line = output.substring(lineStart, lineEnd); + // Use the DOTALL regex mode to match newlines if (line.matches("(?s)^\\|.*\\|$")) { List cells = new ArrayList<>(); for (Pair pair : pairs) {