apache · hvanhovell · Jan 2, 2016 · Jan 5, 2016 · Jan 5, 2016 · Jan 5, 2016
diff --git a/dev/deps/spark-deps-hadoop-2.2 b/dev/deps/spark-deps-hadoop-2.2
@@ -5,8 +5,7 @@ activation-1.1.jar
 akka-actor_2.10-2.3.11.jar
 akka-remote_2.10-2.3.11.jar
 akka-slf4j_2.10-2.3.11.jar
-antlr-2.7.7.jar
-antlr-runtime-3.4.jar
+antlr-runtime-3.5.2.jar
 aopalliance-1.0.jar
 apache-log4j-extras-1.2.17.jar
 arpack_combined_all-0.1.jar
@@ -179,7 +178,6 @@ spire_2.10-0.7.4.jar
 stax-api-1.0-2.jar
 stax-api-1.0.1.jar
 stream-2.7.0.jar
-stringtemplate-3.2.1.jar
 super-csv-2.2.0.jar
 tachyon-client-0.8.2.jar
 tachyon-underfs-hdfs-0.8.2.jar

diff --git a/dev/deps/spark-deps-hadoop-2.3 b/dev/deps/spark-deps-hadoop-2.3
@@ -5,8 +5,7 @@ activation-1.1.1.jar
 akka-actor_2.10-2.3.11.jar
 akka-remote_2.10-2.3.11.jar
 akka-slf4j_2.10-2.3.11.jar
-antlr-2.7.7.jar
-antlr-runtime-3.4.jar
+antlr-runtime-3.5.2.jar
 aopalliance-1.0.jar
 apache-log4j-extras-1.2.17.jar
 arpack_combined_all-0.1.jar
@@ -170,7 +169,6 @@ spire_2.10-0.7.4.jar
 stax-api-1.0-2.jar
 stax-api-1.0.1.jar
 stream-2.7.0.jar
-stringtemplate-3.2.1.jar
 super-csv-2.2.0.jar
 tachyon-client-0.8.2.jar
 tachyon-underfs-hdfs-0.8.2.jar

diff --git a/dev/deps/spark-deps-hadoop-2.4 b/dev/deps/spark-deps-hadoop-2.4
@@ -5,8 +5,7 @@ activation-1.1.1.jar
 akka-actor_2.10-2.3.11.jar
 akka-remote_2.10-2.3.11.jar
 akka-slf4j_2.10-2.3.11.jar
-antlr-2.7.7.jar
-antlr-runtime-3.4.jar
+antlr-runtime-3.5.2.jar
 aopalliance-1.0.jar
 apache-log4j-extras-1.2.17.jar
 arpack_combined_all-0.1.jar
@@ -171,7 +170,6 @@ spire_2.10-0.7.4.jar
 stax-api-1.0-2.jar
 stax-api-1.0.1.jar
 stream-2.7.0.jar
-stringtemplate-3.2.1.jar
 super-csv-2.2.0.jar
 tachyon-client-0.8.2.jar
 tachyon-underfs-hdfs-0.8.2.jar

diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6
@@ -5,8 +5,7 @@ activation-1.1.1.jar
 akka-actor_2.10-2.3.11.jar
 akka-remote_2.10-2.3.11.jar
 akka-slf4j_2.10-2.3.11.jar
-antlr-2.7.7.jar
-antlr-runtime-3.4.jar
+antlr-runtime-3.5.2.jar
 aopalliance-1.0.jar
 apache-log4j-extras-1.2.17.jar
 apacheds-i18n-2.0.0-M15.jar
@@ -177,7 +176,6 @@ spire_2.10-0.7.4.jar
 stax-api-1.0-2.jar
 stax-api-1.0.1.jar
 stream-2.7.0.jar
-stringtemplate-3.2.1.jar
 super-csv-2.2.0.jar
 tachyon-client-0.8.2.jar
 tachyon-underfs-hdfs-0.8.2.jar

diff --git a/pom.xml b/pom.xml
@@ -183,6 +183,7 @@
     <jodd.version>3.5.2</jodd.version>
     <jsr305.version>1.3.9</jsr305.version>
     <libthrift.version>0.9.2</libthrift.version>
+    <antlr.version>3.5.2</antlr.version>
 
     <test.java.home>${java.home}</test.java.home>
     <test.exclude.tags></test.exclude.tags>
@@ -1843,6 +1844,11 @@
           </exclusion>
         </exclusions>
       </dependency>
+      <dependency>
+        <groupId>org.antlr</groupId>
+        <artifactId>antlr-runtime</artifactId>
+        <version>${antlr.version}</version>
+      </dependency>
     </dependencies>
   </dependencyManagement>
 

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
@@ -247,6 +247,9 @@ object SparkBuild extends PomBuild {
   /* Enable unidoc only for the root spark project */
   enable(Unidoc.settings)(spark)
 
+  /* Catalyst ANTLR generation settings */
+  enable(Catalyst.settings)(catalyst)
+
   /* Spark SQL Core console settings */
   enable(SQL.settings)(sql)
 
@@ -357,6 +360,58 @@ object OldDeps {
   )
 }
 
+object Catalyst {
+  lazy val settings = Seq(
+    // ANTLR code-generation step.
+    //
+    // This has been heavily inspired by com.github.stefri.sbt-antlr (0.5.3). It fixes a number of
+    // build errors in the current plugin.
+    // Create Parser from ANTLR grammar files.
+    sourceGenerators in Compile += Def.task {
+      val log = streams.value.log
+
+      val grammarFileNames = Seq(
+        "SparkSqlLexer.g",
+        "SparkSqlParser.g")
+      val sourceDir = (sourceDirectory in Compile).value / "antlr3"
+      val targetDir = (sourceManaged in Compile).value
+
+      // Create default ANTLR Tool.
+      val antlr = new org.antlr.Tool
+
+      // Setup input and output directories.
+      antlr.setInputDirectory(sourceDir.getPath)
+      antlr.setOutputDirectory(targetDir.getPath)
+      antlr.setForceRelativeOutput(true)
+      antlr.setMake(true)
+
+      // Add grammar files.
+      grammarFileNames.flatMap(gFileName => (sourceDir ** gFileName).get).foreach { gFilePath =>
+        val relGFilePath = (gFilePath relativeTo sourceDir).get.getPath
+        log.info("ANTLR: Grammar file '%s' detected.".format(relGFilePath))
+        antlr.addGrammarFile(relGFilePath)
+        // We will set library directory multiple times here. However, only the
+        // last one has effect. Because the grammar files are located under the same directory,
+        // We assume there is only one library directory.
+        antlr.setLibDirectory(gFilePath.getParent)
+      }
+
+      // Generate the parser.
+      antlr.process
+      if (antlr.getNumErrors > 0) {
+        log.error("ANTLR: Caught %d build errors.".format(antlr.getNumErrors))
+      }
+
+      // Return all generated java files.
+      (targetDir ** "*.java").get.toSeq
+    }.taskValue,
+    // Include ANTLR tokens files.
+    resourceGenerators in Compile += Def.task {
+      ((sourceManaged in Compile).value ** "*.tokens").get.toSeq
+    }.taskValue
+  )
+}
+
 object SQL {
   lazy val settings = Seq(
     initialCommands in console :=
@@ -414,54 +469,7 @@ object Hive {
     // Some of our log4j jars make it impossible to submit jobs from this JVM to Hive Map/Reduce
     // in order to generate golden files.  This is only required for developers who are adding new
     // new query tests.
-    fullClasspath in Test := (fullClasspath in Test).value.filterNot { f => f.toString.contains("jcl-over") },
-    // ANTLR code-generation step.
-    //
-    // This has been heavily inspired by com.github.stefri.sbt-antlr (0.5.3). It fixes a number of
-    // build errors in the current plugin.
-    // Create Parser from ANTLR grammar files.
-    sourceGenerators in Compile += Def.task {
-      val log = streams.value.log
-
-      val grammarFileNames = Seq(
-        "SparkSqlLexer.g",
-        "SparkSqlParser.g")
-      val sourceDir = (sourceDirectory in Compile).value / "antlr3"
-      val targetDir = (sourceManaged in Compile).value
-
-      // Create default ANTLR Tool.
-      val antlr = new org.antlr.Tool
-
-      // Setup input and output directories.
-      antlr.setInputDirectory(sourceDir.getPath)
-      antlr.setOutputDirectory(targetDir.getPath)
-      antlr.setForceRelativeOutput(true)
-      antlr.setMake(true)
-
-      // Add grammar files.
-      grammarFileNames.flatMap(gFileName => (sourceDir ** gFileName).get).foreach { gFilePath =>
-        val relGFilePath = (gFilePath relativeTo sourceDir).get.getPath
-        log.info("ANTLR: Grammar file '%s' detected.".format(relGFilePath))
-        antlr.addGrammarFile(relGFilePath)
-        // We will set library directory multiple times here. However, only the
-        // last one has effect. Because the grammar files are located under the same directory,
-        // We assume there is only one library directory.
-        antlr.setLibDirectory(gFilePath.getParent)
-      }
-
-      // Generate the parser.
-      antlr.process
-      if (antlr.getNumErrors > 0) {
-        log.error("ANTLR: Caught %d build errors.".format(antlr.getNumErrors))
-      }
-
-      // Return all generated java files.
-      (targetDir ** "*.java").get.toSeq
-    }.taskValue,
-    // Include ANTLR tokens files.
-    resourceGenerators in Compile += Def.task {
-      ((sourceManaged in Compile).value ** "*.tokens").get.toSeq
-    }.taskValue
+    fullClasspath in Test := (fullClasspath in Test).value.filterNot { f => f.toString.contains("jcl-over") }
   )
 }
 

diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
@@ -71,6 +71,10 @@
       <groupId>org.codehaus.janino</groupId>
       <artifactId>janino</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.antlr</groupId>
+      <artifactId>antlr-runtime</artifactId>
+    </dependency>
   </dependencies>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
@@ -103,6 +107,24 @@
           </execution>
         </executions>
       </plugin>
+      <plugin>
+        <groupId>org.antlr</groupId>
+        <artifactId>antlr3-maven-plugin</artifactId>
+        <executions>
+          <execution>
+            <goals>
+              <goal>antlr</goal>
+            </goals>
+          </execution>
+        </executions>
+        <configuration>
+          <sourceDirectory>../catalyst/src/main/antlr3</sourceDirectory>
+          <includes>
+            <include>**/SparkSqlLexer.g</include>
+            <include>**/SparkSqlParser.g</include>
+          </includes>
+        </configuration>
+      </plugin>
     </plugins>
   </build>
   <profiles>

diff --git a/...pache/spark/sql/parser/FromClauseParser.g → ...rk/sql/catalyst/parser/FromClauseParser.g b/...pache/spark/sql/parser/FromClauseParser.g → ...rk/sql/catalyst/parser/FromClauseParser.g
@@ -13,6 +13,8 @@
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
+
+   This file is an adaptation of Hive's org/apache/hadoop/hive/ql/FromClauseParser.g grammar.
 */
 parser grammar FromClauseParser;
 
@@ -33,7 +35,7 @@ k=3;
   @Override
   public void displayRecognitionError(String[] tokenNames,
       RecognitionException e) {
-    gParent.errors.add(new ParseError(gParent, e, tokenNames));
+    gParent.displayRecognitionError(tokenNames, e);
   }
   protected boolean useSQL11ReservedKeywordsForIdentifier() {
     return gParent.useSQL11ReservedKeywordsForIdentifier();

diff --git a/...ache/spark/sql/parser/IdentifiersParser.g → ...k/sql/catalyst/parser/IdentifiersParser.g b/...ache/spark/sql/parser/IdentifiersParser.g → ...k/sql/catalyst/parser/IdentifiersParser.g
@@ -13,6 +13,8 @@
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
+
+   This file is an adaptation of Hive's org/apache/hadoop/hive/ql/IdentifiersParser.g grammar.
 */
 parser grammar IdentifiersParser;
 
@@ -33,7 +35,7 @@ k=3;
   @Override
   public void displayRecognitionError(String[] tokenNames,
       RecognitionException e) {
-    gParent.errors.add(new ParseError(gParent, e, tokenNames));
+    gParent.displayRecognitionError(tokenNames, e);
   }
   protected boolean useSQL11ReservedKeywordsForIdentifier() {
     return gParent.useSQL11ReservedKeywordsForIdentifier();

diff --git a/...che/spark/sql/parser/SelectClauseParser.g → .../sql/catalyst/parser/SelectClauseParser.g b/...che/spark/sql/parser/SelectClauseParser.g → .../sql/catalyst/parser/SelectClauseParser.g
@@ -13,6 +13,8 @@
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
+
+   This file is an adaptation of Hive's org/apache/hadoop/hive/ql/SelectClauseParser.g grammar.
 */
 parser grammar SelectClauseParser;
 
@@ -33,7 +35,7 @@ k=3;
   @Override
   public void displayRecognitionError(String[] tokenNames,
       RecognitionException e) {
-    gParent.errors.add(new ParseError(gParent, e, tokenNames));
+    gParent.displayRecognitionError(tokenNames, e);
   }
   protected boolean useSQL11ReservedKeywordsForIdentifier() {
     return gParent.useSQL11ReservedKeywordsForIdentifier();

diff --git a/...g/apache/spark/sql/parser/SparkSqlLexer.g → ...spark/sql/catalyst/parser/SparkSqlLexer.g b/...g/apache/spark/sql/parser/SparkSqlLexer.g → ...spark/sql/catalyst/parser/SparkSqlLexer.g
@@ -13,26 +13,37 @@
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
+
+   This file is an adaptation of Hive's org/apache/hadoop/hive/ql/HiveLexer.g grammar.
 */
 lexer grammar SparkSqlLexer;
 
 @lexer::header {
-package org.apache.spark.sql.parser;
+package org.apache.spark.sql.catalyst.parser;
 
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.conf.HiveConf;
 }
 
 @lexer::members {
-  private Configuration hiveConf;
+  private ParserConf parserConf;
+  private ParseErrorReporter reporter;
 
-  public void setHiveConf(Configuration hiveConf) {
-    this.hiveConf = hiveConf;
+  public void configure(ParserConf parserConf, ParseErrorReporter reporter) {
+    this.parserConf = parserConf;
+    this.reporter = reporter;
   }
 
   protected boolean allowQuotedId() {
-    String supportedQIds = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_QUOTEDID_SUPPORT);
-    return !"none".equals(supportedQIds);
+    if (parserConf == null) {
+      return true;
+    }
+    return parserConf.supportQuotedId();
+  }
+
+  @Override
+  public void displayRecognitionError(String[] tokenNames, RecognitionException e) {
+    if (reporter != null) {
+      reporter.report(this, e, tokenNames);
+    }
   }
 }