diff --git a/bin/sqoop b/bin/sqoop index 059571473..f92b65790 100755 --- a/bin/sqoop +++ b/bin/sqoop @@ -98,4 +98,25 @@ bin=`dirname ${prgm}` bin=`cd ${bin} && pwd` source ${bin}/configure-sqoop "${bin}" +#exec ${HADOOP_COMMON_HOME}/bin/hadoop org.apache.sqoop.Sqoop "$@" +if [ "--config" = "$1" ] +then +shift +if [[ $1 =~ "--" ]] +then +echo "you need input hadoop-config values." +exit -1 +elif [[ $1 = "codegen" ]] || [[ $1 = "create-hive-table" ]] || [[ $1 = "eval" ]] || [[ $1 = "export" ]] || [[ $1 = "help" ]] || [[ $1 = "import" ]] || [[ $1 = "import-all-tables" ]] || [[ $1 = "import-mainframe" ]] || [[ $1 = "job" ]] || [[ $1 = "list-databases" ]] || [[ $1 = "list-tables" ]] || [[ $1 = "merge" ]] || [[ $1 = "metastore" ]] || [[ $1 = "version" ]] +then +echo "you need input hadoop-config values." +exit -1 +else +hadoopconfig=$1 +shift +fi +fi +if [ ! -n "$hadoopconfig" ] ;then exec ${HADOOP_COMMON_HOME}/bin/hadoop org.apache.sqoop.Sqoop "$@" +else +exec ${HADOOP_COMMON_HOME}/bin/hadoop --config "$hadoopconfig" org.apache.sqoop.Sqoop "$@" +fi diff --git a/conf/sqoop-site-template.xml b/conf/sqoop-site-template.xml index 2182da3c8..57e35537c 100644 --- a/conf/sqoop-site-template.xml +++ b/conf/sqoop-site-template.xml @@ -186,5 +186,10 @@ under the License. --> + + com.newland.component.FujianBI.service.list + com.newland.component.FujianBI.service.impl.KerberosLoginService + service list + diff --git a/pom-old.xml b/pom-old.xml index a8f436135..1cb2690be 100644 --- a/pom-old.xml +++ b/pom-old.xml @@ -26,584 +26,838 @@ case that someone actually needs it. We strongly encourage you to use ant instead. You have been warned! --> - - 4.0.0 + 4.0.0 - - org.apache - apache - 9 - + + org.apache + apache + 9 + - org.apache.sqoop - sqoop - 1.4.0-incubating-SNAPSHOT - jar + org.apache.sqoop + sqoop + + 1.4.7 + jar - Apache Sqoop - TODO - http://sqoop.apache.org - 2011 - - The Apache Software Foundation - http://www.apache.org/ - - - - The Apache Software License, Version 2.0 - http://www.apache.org/licenses/LICENSE-2.0.txt - repo - - + Apache Sqoop + TODO + http://sqoop.apache.org + 2011 + + The Apache Software Foundation + http://www.apache.org/ + + + + The Apache Software License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0.txt + repo + + - - - Olivier Lamy - olamy - olamy at apache dot org - - Mentor - PPMC Member - Committer - - - + + + Olivier Lamy + olamy + olamy at apache dot org + + Mentor + PPMC Member + Committer + + + - - scm:git:http://git-wip-us.apache.org/repos/asf/sqoop.git - scm:git:https://git-wip-us.apache.org/repos/asf/sqoop.git - https://git-wip-us.apache.org/repos/asf?p=sqoop.git - - - jira - https://issues.apache.org/jira/browse/sqoop - - - Jenkins - https://builds.apache.org/job/sqoop/ - + + scm:git:http://git-wip-us.apache.org/repos/asf/sqoop.git + scm:git:https://git-wip-us.apache.org/repos/asf/sqoop.git + https://git-wip-us.apache.org/repos/asf?p=sqoop.git + + + jira + https://issues.apache.org/jira/browse/sqoop + + + Jenkins + https://builds.apache.org/job/sqoop/ + - - UTF-8 - 1.6 - 1.6 - 1.5.2 - - - 0.20.2-cdh3u1 + + UTF-8 + 1.7 + 1.7 + 1.8.1 + + + + 2.6.0 + 2.6.0-mr1-cdh5.7.2 + 1.1.1 + 1.0.0 + 1.2.16 + 1.6.0 + 1.0.0 + - 0.90.3-cdh3u1 - 1.2.16 + + + + org.apache.accumulo + accumulo-core + ${accumulo.version} + + + org.slf4j + slf4j-api + + + + + org.apache.accumulo + accumulo-minicluster + ${accumulo.version} + + + * + * + + + + + org.apache.hive.hcatalog + hive-hcatalog-core + ${hiveVersion} + + + org.apache.hive + hive-exec + + + + + commons-net + commons-net + 3.6 + + + org.apache.commons + commons-lang3 + 3.4 + + + org.kitesdk + kite-data-hive + ${kitesdk.version} + + + org.kitesdk + kite-data-mapreduce + ${kitesdk.version} + + + * + * + + + + + org.json + org.json + 2.0 + + + org.postgresql + postgresql + 9.4-1201-jdbc41 + + + * + * + + + + + com.h2database + h2 + 1.4.197 + + + * + * + + + + + com.oracle + ojdbc7 + 12.1.0.2 + + + * + * + + + - + + + com.newland.bi.component + nl-component-FujianBI-common + 1.0 + + + * + * + + + + - - - commons-cli - commons-cli - 1.2 - - - commons-logging - commons-logging - 1.0.4 - - - commons-io - commons-io - 1.4 - - - org.apache.avro - avro - ${avroVersion} - - - org.slf4j - slf4j-api - - - org.mortbay.jetty - jetty - - - org.jboss.netty - netty - - - org.apache.velocity - velocity - - - - - org.apache.avro - avro-mapred - ${avroVersion} - - - org.slf4j - slf4j-api - - - org.mortbay.jetty - jetty - - - org.jboss.netty - netty - - - org.apache.velocity - velocity - - - - - org.apache.hadoop - hadoop-core - ${hadoopVersion} - - - org.codehaus.jackson - jackson-core-asl - - - + + commons-cli + commons-cli + 1.2 + + + commons-logging + commons-logging + 1.0.4 + + + commons-io + commons-io + 1.4 + + + org.apache.avro + avro + ${avroVersion} + + + org.slf4j + slf4j-api + + + org.mortbay.jetty + jetty + + + org.jboss.netty + netty + + + org.apache.velocity + velocity + + + + + org.apache.avro + avro-mapred + ${avroVersion} + + + org.slf4j + slf4j-api + + + org.mortbay.jetty + jetty + + + org.jboss.netty + netty + + + org.apache.velocity + velocity + + + + + org.apache.hadoop + hadoop-core + ${hadoopMRVersion} + + + org.codehaus.jackson + jackson-core-asl + + + + + org.apache.hadoop + hadoop-common + ${hadoopVersion} + provided + true + - - org.apache.hbase - hbase - ${hbaseVersion} - - - org.apache.avro - avro - - - com.sun.jersey - jersey-core - - - com.sun.jersey - jersey-json - - - com.sun.jersey - jersey-server - - - org.apache.thrift - thrift - - - log4j - log4j - - - + + + org.apache.hbase + hbase-client + ${hbaseVersion} + + + org.apache.avro + avro + + + com.sun.jersey + jersey-core + + + com.sun.jersey + jersey-json + + + com.sun.jersey + jersey-server + + + org.apache.thrift + thrift + + + log4j + log4j + + + org.apache.hbase + hbase-mapreduce + + + + + org.apache.hbase + hbase-mapreduce + 2.0.1 + + + * + * + + + + + org.apache.hbase + hbase-server + ${hbaseVersion} + + + * + * + + + + + org.apache.hbase + hbase-server + ${hbaseVersion} + tests + + + * + * + + + + + org.apache.hbase + hbase-common + ${hbaseVersion} + tests + + + * + * + + + + + + + + org.apache.hive + hive-exec + ${hiveVersion} + + + * + * + + + + - - hsqldb - hsqldb - 1.8.0.10 - - - - org.apache.hadoop - hadoop-test - ${hadoopVersion} - test - - - org.apache.hadoop - hadoop-mrunit - 0.20.2-cdh3u1 - test - + + hsqldb + hsqldb + 1.8.0.10 + + - - junit - junit - 4.8.2 - test - - - - mysql - mysql-connector-java - 5.1.17 - test - - - - org.slf4j - slf4j-api - 1.5.8 - test - + + + org.apache.hadoop + hadoop-test + 1.2.1 + test + + + org.apache.mrunit + mrunit + 1.1.0 + hadoop2 + - - org.slf4j - slf4j-log4j12 - 1.5.8 - test - - - log4j - log4j - ${log4j.version} - provided - - + + junit + junit + + 4.12 + test + - - src/scripts - src/java - src/test - - - - org.apache.maven.plugins - maven-antrun-plugin - 1.7 - - - org.codehaus.mojo - build-helper-maven-plugin - 1.7 - - - org.apache.maven.plugins - maven-clean-plugin - 2.4.1 - - - org.apache.maven.plugins - maven-compiler-plugin - 2.3.2 - - ${maven.compile.source} - ${maven.compile.target} - - - - org.apache.maven.plugins - maven-jar-plugin - 2.3.1 - - - true - true - - - - - org.apache.maven.plugins - maven-surefire-plugin - 2.9 - - - - - - org.apache.maven.plugins - maven-antrun-plugin - - - generate-version-tool - generate-sources - run - - - Generating version tool - - - - - - - - - - - - - org.codehaus.mojo - build-helper-maven-plugin - - - add-generated-sources - generate-sources - - add-source - - - - ${basedir}/target/generated-sources/src - - - - - - - org.apache.maven.plugins - maven-surefire-plugin - - - ${project.build.outputDirectory} - ${project.build.outputDirectory} - ${project.build.outputDirectory} - ${project.build.outputDirectory} - ${basedir}/testdata/hive - - - **/**MySQL*.java - **/**Oracle*.java - **/**Postgresql*.java - **/**SQLServer*.java - - - - - org.apache.maven.plugins - maven-source-plugin - - - attach-javadocs - - jar - - - - attach-sources - - jar - - - - - - org.apache.rat - apache-rat-plugin - 0.7 - - - .gitignore - DEPENDENCIES - README.txt - *.sh - .git/** - .idea/** - **/.gitattributes - testdata/hive/scripts/** - - - - - rat-check - - check - - verify - - - + + mysql + mysql-connector-java + 5.1.17 + test + - - org.apache.maven.plugins - maven-javadoc-plugin - 2.8 - - - package - - javadoc - - - - - target/docs - api - - + + + org.slf4j + slf4j-api + 1.5.8 + + + * + * + + + + + + org.slf4j + slf4j-api + 1.5.8 + test + - - + + org.slf4j + slf4j-log4j12 + 1.5.8 + test + + + log4j + log4j + ${log4j.version} + provided + + - - - mysql - - - - org.apache.maven.plugins - maven-surefire-plugin - - - **/**Oracle*.java - **/**Postgresql*.java - **/**SQLServer*.java - - - - - - - - oracle - - - - org.apache.maven.plugins - maven-surefire-plugin - - - **/**MySQL*.java - **/**Postgresql*.java - **/**SQLServer*.java - - - - - - - - postgres - - - - org.apache.maven.plugins - maven-surefire-plugin - - - **/**MySQL*.java - **/**Oracle*.java - **/**SQLServer*.java - - - - - - - - sqlserver - + + src/scripts + src/java + src/test + + + + org.apache.maven.plugins + maven-antrun-plugin + 1.7 + + + org.codehaus.mojo + build-helper-maven-plugin + 1.7 + + + org.apache.maven.plugins + maven-clean-plugin + 2.4.1 + + + org.apache.maven.plugins + maven-compiler-plugin + 2.3.2 + + ${maven.compile.source} + ${maven.compile.target} + + + + org.apache.maven.plugins + maven-jar-plugin + 2.3.1 + + + true + true + + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.9 + + + - - org.apache.maven.plugins - maven-surefire-plugin - - - **/**MySQL*.java - **/**Oracle*.java - **/**Postgresql*.java - - - + + org.apache.maven.plugins + maven-antrun-plugin + + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-generated-sources + generate-sources + + add-source + + + + ${basedir}/target/generated-sources/src + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + ${project.build.outputDirectory} + ${project.build.outputDirectory} + ${project.build.outputDirectory} + ${project.build.outputDirectory} + ${basedir}/testdata/hive + + + **/**MySQL*.java + **/**Oracle*.java + **/**Postgresql*.java + **/**SQLServer*.java + + + + + org.apache.maven.plugins + maven-source-plugin + + + attach-javadocs + + jar + + + + attach-sources + + jar + + + + + + org.apache.rat + apache-rat-plugin + 0.7 + + + .gitignore + DEPENDENCIES + README.txt + *.sh + .git/** + .idea/** + **/.gitattributes + testdata/hive/scripts/** + + + + + rat-check + + check + + verify + + + + - - + - - doc-generation - - - skipDocumentation - !true - - - - - - org.codehaus.mojo - exec-maven-plugin - 1.2.1 - - - - exec - - package - - - - make - src/docs - -e - - ../../target - ${project.version} - - - - - + + + mysql + + + + org.apache.maven.plugins + maven-surefire-plugin + + + **/**Oracle*.java + **/**Postgresql*.java + **/**SQLServer*.java + + + + + + + + oracle + + + + org.apache.maven.plugins + maven-surefire-plugin + + + **/**MySQL*.java + **/**Postgresql*.java + **/**SQLServer*.java + + + + + + + + postgres + + + + org.apache.maven.plugins + maven-surefire-plugin + + + **/**MySQL*.java + **/**Oracle*.java + **/**SQLServer*.java + + + + + + + + sqlserver + + + + org.apache.maven.plugins + maven-surefire-plugin + + + **/**MySQL*.java + **/**Oracle*.java + **/**Postgresql*.java + + + + + + - + + doc-generation + + + skipDocumentation + !true + + + + + + + - - reporting - - - skipReports - !true - - + - - - - org.codehaus.mojo - taglist-maven-plugin - 2.4 - - - TODO - NOPMD - NOTE - - - - - org.codehaus.sonar-plugins - maven-report - 0.1 - - https://analysis.apache.org/ - - - - - - + + reporting + + + skipReports + !true + + + + + + + org.codehaus.mojo + taglist-maven-plugin + 2.4 + + + TODO + NOPMD + NOTE + + + + + org.codehaus.sonar-plugins + maven-report + 0.1 + + https://analysis.apache.org/ + + + + + + - - - cloudera.release - https://repository.cloudera.com/content/repositories/releases/ - - true - - - false - - - + + + cloudera.release + https://repository.cloudera.com/content/repositories/releases/ + + true + + + false + + + diff --git a/src/java/com/cloudera/sqoop/mapreduce/db/DataDrivenDBInputFormat.java b/src/java/com/cloudera/sqoop/mapreduce/db/DataDrivenDBInputFormat.java index 4cdb2186a..bf5b62439 100644 --- a/src/java/com/cloudera/sqoop/mapreduce/db/DataDrivenDBInputFormat.java +++ b/src/java/com/cloudera/sqoop/mapreduce/db/DataDrivenDBInputFormat.java @@ -46,7 +46,7 @@ public class DataDrivenDBInputFormat * * @deprecated use org.apache.sqoop.mapreduce.db.DataDrivenDBInputFormat. * DataDrivenDBInputSplit instead. - * @see org.apache.sqoop.mapreduce.db.DataDrivenDBInputFormat. + * @see org.apache.sqoop.mapreduce.db.DataDrivenDBInputFormat * DataDrivenDBInputSplit */ public static class DataDrivenDBInputSplit extends diff --git a/src/java/org/apache/sqoop/Sqoop.java b/src/java/org/apache/sqoop/Sqoop.java index 8764aff09..98f0492ac 100644 --- a/src/java/org/apache/sqoop/Sqoop.java +++ b/src/java/org/apache/sqoop/Sqoop.java @@ -20,6 +20,7 @@ import java.util.Arrays; +import com.newland.component.FujianBI.service.ServiceTool; import org.apache.commons.lang.ArrayUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -221,6 +222,14 @@ public static int runTool(String [] args, Configuration conf) { } String toolName = expandedArgs[0]; + + // Only import and export add service tool processing + if (!isWindow() && (toolName.equals("import") || toolName.equals("export"))) { + ServiceTool serviceTool = ServiceTool.builder(conf, args); + serviceTool.initServices(); + serviceTool.startServices(); + } + Configuration pluginConf = SqoopTool.loadPlugins(conf); SqoopTool tool = SqoopTool.getTool(toolName); if (null == tool) { @@ -243,6 +252,20 @@ public static int runTool(String [] args) { return runTool(args, new Configuration()); } + /** + * 是否是本地测试 + * + * @return + */ + public static boolean isWindow() { + String systemType = System.getProperty("os.name"); + if (systemType.toUpperCase().startsWith("WINDOWS")) { + return true; + } else { + return false; + } + } + public static void main(String [] args) { if (args.length == 0) { System.err.println("Try 'sqoop help' for usage."); diff --git a/src/java/org/apache/sqoop/SqoopVersion.java b/src/java/org/apache/sqoop/SqoopVersion.java new file mode 100644 index 000000000..1c543bc5f --- /dev/null +++ b/src/java/org/apache/sqoop/SqoopVersion.java @@ -0,0 +1,6 @@ +package org.apache.sqoop; + +public class SqoopVersion { + public static final String VERSION = "1.4.7"; +// public static final String GIT_HASH = ""; +} diff --git a/src/java/org/apache/sqoop/mapreduce/TextExportMapper.java b/src/java/org/apache/sqoop/mapreduce/TextExportMapper.java index 7e05d4241..0a384d994 100644 --- a/src/java/org/apache/sqoop/mapreduce/TextExportMapper.java +++ b/src/java/org/apache/sqoop/mapreduce/TextExportMapper.java @@ -31,6 +31,8 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import static org.apache.sqoop.tool.BaseSqoopTool.ENCODE; + /** * Converts an input record from a string representation to a parsed Sqoop * record and emits that DBWritable to the OutputFormat for writeback to the @@ -44,6 +46,7 @@ public class TextExportMapper public static final Log LOG = LogFactory.getLog(TextExportMapper.class.getName()); + private String encoding; private SqoopRecord recordImpl; boolean enableDataDumpOnError; @@ -80,13 +83,21 @@ protected void setup(Context context) } enableDataDumpOnError = conf.getBoolean(DUMP_DATA_ON_ERROR_KEY, false); + + encoding = conf.get(ENCODE); } public void map(LongWritable key, Text val, Context context) throws IOException, InterruptedException { try { - recordImpl.parse(val); + // 据说转码比较消耗性能 + if (encoding != null) { + String newValue = new String(val.getBytes(), 0, val.getLength(), encoding); + recordImpl.parse(newValue); + } else { + recordImpl.parse(val); + } context.write(recordImpl, NullWritable.get()); } catch (Exception e) { // Something bad has happened diff --git a/src/java/org/apache/sqoop/mapreduce/db/FloatSplitter.java b/src/java/org/apache/sqoop/mapreduce/db/FloatSplitter.java index 71a50d258..aa8047259 100644 --- a/src/java/org/apache/sqoop/mapreduce/db/FloatSplitter.java +++ b/src/java/org/apache/sqoop/mapreduce/db/FloatSplitter.java @@ -87,7 +87,8 @@ public List split(Configuration conf, ResultSet results, // Catch any overage and create the closed interval for the last split. if (curLower <= maxVal || splits.size() == 1) { splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( - lowClausePrefix + Double.toString(curUpper), +// lowClausePrefix + Double.toString(curUpper), + lowClausePrefix + Double.toString(curLower), colName + " <= " + Double.toString(maxVal))); } diff --git a/src/java/org/apache/sqoop/tool/BaseSqoopTool.java b/src/java/org/apache/sqoop/tool/BaseSqoopTool.java index 1564bdcb8..3117f21d5 100644 --- a/src/java/org/apache/sqoop/tool/BaseSqoopTool.java +++ b/src/java/org/apache/sqoop/tool/BaseSqoopTool.java @@ -174,6 +174,8 @@ public abstract class BaseSqoopTool extends com.cloudera.sqoop.tool.SqoopTool { public static final String THROW_ON_ERROR_ARG = "throw-on-error"; public static final String ORACLE_ESCAPING_DISABLED = "oracle-escaping-disabled"; public static final String ESCAPE_MAPPING_COLUMN_NAMES_ENABLED = "escape-mapping-column-names"; + public static final String FILE_ENCODING = "fileencoding";//文件编码 + public static final String ENCODE = "sqoop.mapreduce.export.encode"; // Arguments for validation. public static final String VALIDATE_ARG = "validate"; diff --git a/src/java/org/apache/sqoop/tool/ExportTool.java b/src/java/org/apache/sqoop/tool/ExportTool.java index 5512fa7ce..b6aea5c43 100644 --- a/src/java/org/apache/sqoop/tool/ExportTool.java +++ b/src/java/org/apache/sqoop/tool/ExportTool.java @@ -208,6 +208,23 @@ public void configureOptions(ToolOptions toolOptions) { toolOptions.addUniqueOptions(codeGenOpts); toolOptions.addUniqueOptions(getHCatalogOptions()); + + toolOptions.addUniqueOptions(getFileencodingOptions()); + } + + /** + * 文件编码 + * + * @return + */ + protected RelatedOptions getFileencodingOptions() { + RelatedOptions fileencodingOptions = new RelatedOptions("fileencoding arguments"); + fileencodingOptions.addOption(OptionBuilder + .hasArg() + .withDescription("fileencoding") + .withLongOpt("fileencoding") + .create()); + return fileencodingOptions; } @Override @@ -279,6 +296,11 @@ public void applyOptions(CommandLine in, SqoopOptions out) out.setCall(in.getOptionValue(CALL_ARG)); } + //设置文件编码 + if (in.hasOption(FILE_ENCODING)) { + out.getConf().set(ENCODE, in.getOptionValue(FILE_ENCODING)); + } + applyValidationOptions(in, out); applyNewUpdateOptions(in, out); applyInputFormatOptions(in, out); diff --git a/src/test/org/apache/sqoop/SqoopTest.java b/src/test/org/apache/sqoop/SqoopTest.java new file mode 100644 index 000000000..7d34483c0 --- /dev/null +++ b/src/test/org/apache/sqoop/SqoopTest.java @@ -0,0 +1,31 @@ +package org.apache.sqoop; + +import org.junit.Before; +import org.junit.Test; + +public class SqoopTest { + + private String[] args; + + @Before + public void setUp() throws Exception { +// String params = "import --connect jdbc:oracle:thin:@10.1.0.242:1521:ywxx --username bishow --password bishow -m 4 --split-by 'product_id' --query 'select sum_date,product_name,product_id from cqx_test2 where $CONDITIONS' --target-dir '/cqx/hivetable/cqx_test2/' --fields-terminated-by '|' --as-textfile --delete-target-dir --null-string '' --null-non-string ''"; +// args = params.split(" ", -1); + String[] arg = {"import", "--connect", "jdbc:oracle:thin:@10.1.0.242:1521:ywxx", + "--username", "bishow", "--password", "C%MuhN#q$4", "-m", "4", "--split-by", "product_id", "--query", + "select sum_date,product_name,product_id from cqx_test2 where $CONDITIONS", + "--target-dir", "/cqx/hivetable/cqx_test2/", "--fields-terminated-by", "|", "--as-textfile", + "--delete-target-dir", "--null-string", "", "--null-non-string", ""}; + args = arg; + System.out.println("args:"); + for (String p : args) { + System.out.print(p+" "); + } + } + + @Test + public void run() { + int ret = Sqoop.runTool(args); + System.out.println("ret:" + ret); + } +} \ No newline at end of file diff --git a/src/test/org/apache/sqoop/mapreduce/db/DateSplitterTest.java b/src/test/org/apache/sqoop/mapreduce/db/DateSplitterTest.java new file mode 100644 index 000000000..0c1a2c777 --- /dev/null +++ b/src/test/org/apache/sqoop/mapreduce/db/DateSplitterTest.java @@ -0,0 +1,126 @@ +package org.apache.sqoop.mapreduce.db; + +import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat; +import org.apache.hadoop.mapreduce.InputSplit; +import org.junit.Test; + +import java.sql.Types; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Date; +import java.util.List; + +public class DateSplitterTest { + + private OracleDateSplitter dateSplitter; + private SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + + @Test + public void split() throws Exception { + dateSplitter = new OracleDateSplitter(); + String colName = "checkTime"; + final long MS_IN_SEC = 1000L; + long minVal; + long maxVal; + + int sqlDataType = Types.TIMESTAMP; + minVal = df.parse("2019-04-22 00:00:00").getTime(); + maxVal = df.parse("2019-04-22 23:59:59").getTime(); + + String lowClausePrefix = colName + " >= "; + String highClausePrefix = colName + " < "; + + int numSplits = 1440; + if (numSplits < 1) { + numSplits = 1; + } + + if (minVal == Long.MIN_VALUE && maxVal == Long.MIN_VALUE) { + // The range of acceptable dates is NULL to NULL. Just create a single + // split. + List splits = new ArrayList(); + splits.add(new com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit( + colName + " IS NULL", colName + " IS NULL")); + return; + } + + // For split size we are using seconds. So we need to convert to milliseconds. + long splitLimit = -1 * MS_IN_SEC; + + // Gather the split point integers + List splitPoints = dateSplitter.split(numSplits, splitLimit, minVal, maxVal); + List splits = new ArrayList(); + + // Turn the split points into a set of intervals. + long start = splitPoints.get(0); + Date startDate = longToDate(start, sqlDataType); + if (sqlDataType == Types.TIMESTAMP) { + // The lower bound's nanos value needs to match the actual lower-bound + // nanos. + try { + ((java.sql.Timestamp) startDate).setNanos(0); + } catch (NullPointerException npe) { + // If the lower bound was NULL, we'll get an NPE; just ignore it and + // don't set nanos. + } + } + + for (int i = 1; i < splitPoints.size(); i++) { + long end = splitPoints.get(i); + Date endDate = longToDate(end, sqlDataType); + + if (i == splitPoints.size() - 1) { + if (sqlDataType == Types.TIMESTAMP) { + // The upper bound's nanos value needs to match the actual + // upper-bound nanos. + try { + ((java.sql.Timestamp) endDate).setNanos(0); + } catch (NullPointerException npe) { + // If the upper bound was NULL, we'll get an NPE; just ignore it + // and don't set nanos. + } + } + // This is the last one; use a closed interval. + splits.add(new com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit( + lowClausePrefix + dateSplitter.dateToString(startDate), + colName + " <= " + dateSplitter.dateToString(endDate))); + } else { + // Normal open-interval case. + splits.add(new com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit( + lowClausePrefix + dateSplitter.dateToString(startDate), + highClausePrefix + dateSplitter.dateToString(endDate))); + } + + start = end; + startDate = endDate; + } + + if (minVal == Long.MIN_VALUE || maxVal == Long.MIN_VALUE) { + // Add an extra split to handle the null case that we saw. + splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( + colName + " IS NULL", colName + " IS NULL")); + } + + printList(splits); + } + + private void printList(List list) { + for (E e : list) { + System.out.println(e.toString()); + } + } + + private Date longToDate(long val, int sqlDataType) { + switch (sqlDataType) { + case Types.DATE: + return new java.sql.Date(val); + case Types.TIME: + return new java.sql.Time(val); + case Types.TIMESTAMP: + return new java.sql.Timestamp(val); + default: // Shouldn't ever hit this case. + return null; + } + } + +} \ No newline at end of file diff --git a/src/test/org/apache/sqoop/mapreduce/db/FloatSplitterTest.java b/src/test/org/apache/sqoop/mapreduce/db/FloatSplitterTest.java new file mode 100644 index 000000000..bc1af08e7 --- /dev/null +++ b/src/test/org/apache/sqoop/mapreduce/db/FloatSplitterTest.java @@ -0,0 +1,59 @@ +package org.apache.sqoop.mapreduce.db; + +import org.apache.hadoop.mapreduce.InputSplit; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.List; + +public class FloatSplitterTest { + + @Test + public void split() { + double MIN_INCREMENT = 10000 * Double.MIN_VALUE; + + System.out.println("Generating splits for a floating-point index column. Due to the"); + System.out.println("imprecise representation of floating-point values in Java, this"); + System.out.println("may result in an incomplete import."); + System.out.println("You are strongly encouraged to choose an integral split column."); + + List splits = new ArrayList(); + String colName = "float_code"; + double minVal = 1.111; + double maxVal = 133.333; + + // Use this as a hint. May need an extra task if the size doesn't + // divide cleanly. + int numSplits = 2; + double splitSize = (maxVal - minVal) / (double) numSplits; + + if (splitSize < MIN_INCREMENT) { + splitSize = MIN_INCREMENT; + } + + String lowClausePrefix = colName + " >= "; + String highClausePrefix = colName + " < "; + + double curLower = minVal; + double curUpper = curLower + splitSize; + + while (curUpper < maxVal) { + splits.add(new com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit( + lowClausePrefix + Double.toString(curLower), + highClausePrefix + Double.toString(curUpper))); + + curLower = curUpper; + curUpper += splitSize; + } + + // Catch any overage and create the closed interval for the last split. + if (curLower <= maxVal || splits.size() == 1) { + splits.add(new com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit( +// lowClausePrefix + Double.toString(curUpper), + lowClausePrefix + Double.toString(curLower), + colName + " <= " + Double.toString(maxVal))); + } + + System.out.println(splits); + } +} \ No newline at end of file diff --git a/src/test/org/apache/sqoop/mapreduce/db/TestBigDecimalSplitter.java b/src/test/org/apache/sqoop/mapreduce/db/TestBigDecimalSplitter.java index 825743509..d29b76ca9 100644 --- a/src/test/org/apache/sqoop/mapreduce/db/TestBigDecimalSplitter.java +++ b/src/test/org/apache/sqoop/mapreduce/db/TestBigDecimalSplitter.java @@ -19,14 +19,21 @@ package org.apache.sqoop.mapreduce.db; import java.math.BigDecimal; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.List; import com.cloudera.sqoop.mapreduce.db.BigDecimalSplitter; +import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat; +import org.apache.hadoop.mapreduce.InputSplit; import org.junit.Test; import static org.junit.Assert.assertEquals; public class TestBigDecimalSplitter { + private org.apache.sqoop.mapreduce.db.BigDecimalSplitter bigDecimalSplitter = new org.apache.sqoop.mapreduce.db.BigDecimalSplitter(); + /* Test if the decimal split sizes are generated as expected */ @Test public void testDecimalTryDivide() { @@ -60,4 +67,54 @@ public void testRecurringTryDivide() { assertEquals(expected, out); } + @Test + public void testSplit() throws SQLException { + String colName = "cur_lac"; + + BigDecimal minVal = new BigDecimal(6591); + BigDecimal maxVal = new BigDecimal(24996); + + String lowClausePrefix = colName + " >= "; + String highClausePrefix = colName + " < "; + + BigDecimal numSplits = new BigDecimal(2000); + + if (minVal == null && maxVal == null) { + // Range is null to null. Return a null split accordingly. + List splits = new ArrayList(); + splits.add(new com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit( + colName + " IS NULL", colName + " IS NULL")); + return; + } + + if (minVal == null || maxVal == null) { + // Don't know what is a reasonable min/max value for interpolation. Fail. + System.out.println("Cannot find a range for NUMERIC or DECIMAL " + + "fields with one end NULL."); + return; + } + + // Get all the split points together. + List splitPoints = bigDecimalSplitter.split(numSplits, minVal, maxVal); + List splits = new ArrayList(); + + // Turn the split points into a set of intervals. + BigDecimal start = splitPoints.get(0); + for (int i = 1; i < splitPoints.size(); i++) { + BigDecimal end = splitPoints.get(i); + + if (i == splitPoints.size() - 1) { + // This is the last one; use a closed interval. + splits.add(new com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit( + lowClausePrefix + start.toString(), + colName + " <= " + end.toString())); + } else { + // Normal open-interval case. + splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( + lowClausePrefix + start.toString(), + highClausePrefix + end.toString())); + } + start = end; + } + } } diff --git a/src/test/org/apache/sqoop/mapreduce/db/TestBooleanSplitter.java b/src/test/org/apache/sqoop/mapreduce/db/TestBooleanSplitter.java new file mode 100644 index 000000000..74d429cf4 --- /dev/null +++ b/src/test/org/apache/sqoop/mapreduce/db/TestBooleanSplitter.java @@ -0,0 +1,32 @@ +package org.apache.sqoop.mapreduce.db; + +import org.apache.hadoop.mapreduce.InputSplit; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.List; + +public class TestBooleanSplitter { + + @Test + public void split() { + List splits = new ArrayList<>(); + String colName = "isCheck"; + + boolean minVal = false; + boolean maxVal = true; + + // Use one or two splits. + if (!minVal) { + splits.add(new com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit( + colName + " = FALSE", colName + " = FALSE")); + } + + if (maxVal) { + splits.add(new com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit( + colName + " = TRUE", colName + " = TRUE")); + } + + System.out.println(splits); + } +} \ No newline at end of file diff --git a/src/test/org/apache/sqoop/mapreduce/db/TestIntegerSplitter.java b/src/test/org/apache/sqoop/mapreduce/db/TestIntegerSplitter.java index efd0b952f..d3f663f4b 100644 --- a/src/test/org/apache/sqoop/mapreduce/db/TestIntegerSplitter.java +++ b/src/test/org/apache/sqoop/mapreduce/db/TestIntegerSplitter.java @@ -18,9 +18,13 @@ package org.apache.sqoop.mapreduce.db; import java.sql.SQLException; +import java.util.ArrayList; import java.util.List; +import com.cloudera.sqoop.config.ConfigurationHelper; +import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat; import com.cloudera.sqoop.mapreduce.db.IntegerSplitter; +import org.apache.hadoop.mapreduce.InputSplit; import org.junit.Test; import static org.junit.Assert.assertEquals; @@ -143,6 +147,7 @@ public void testEvenSplitsWithLimit() throws SQLException { @Test public void testOddSplitsWithLimit() throws SQLException { List splits = new IntegerSplitter().split(5, 10, 0, 95); + System.out.println(splits); long [] expected = { 0, 10, 20, 30, 40, 50, 59, 68, 77, 86, 95}; assertLongArrayEquals(expected, toLongArray(splits)); } @@ -150,6 +155,7 @@ public void testOddSplitsWithLimit() throws SQLException { @Test public void testSplitWithBiggerLimit() throws SQLException { List splits = new IntegerSplitter().split(10, 15, 0, 100); + System.out.println(splits); long [] expected = {0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100}; assertLongArrayEquals(expected, toLongArray(splits)); } @@ -157,7 +163,56 @@ public void testSplitWithBiggerLimit() throws SQLException { @Test public void testFractionalSplitWithLimit() throws SQLException { List splits = new IntegerSplitter().split(5, 1, 1, 10); + System.out.println(splits); long [] expected = {1,2, 3, 4, 5, 6, 7, 8, 9, 10, 10}; assertLongArrayEquals(expected, toLongArray(splits)); } + + @Test + public void testSplit() throws Exception { + org.apache.sqoop.mapreduce.db.IntegerSplitter integerSplitter = new org.apache.sqoop.mapreduce.db.IntegerSplitter(); + String colName = "cnt"; + long minVal = 1; + long maxVal = 100; + + String lowClausePrefix = colName + " >= "; + String highClausePrefix = colName + " < "; + + int numSplits = 3; + if (numSplits < 1) { + numSplits = 1; + } + + long splitLimit = -1; + + // Get all the split points together. + List splitPoints = integerSplitter.split(numSplits, splitLimit, minVal, maxVal); + System.out.println(String.format("Splits: [%,28d to %,28d] into %d parts", + minVal, maxVal, numSplits)); + for (int i = 0; i < splitPoints.size(); i++) { + System.out.println(String.format("%,28d", splitPoints.get(i))); + } + List splits = new ArrayList(); + + // Turn the split points into a set of intervals. + long start = splitPoints.get(0); + for (int i = 1; i < splitPoints.size(); i++) { + long end = splitPoints.get(i); + + if (i == splitPoints.size() - 1) { + // This is the last one; use a closed interval. + splits.add(new com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit( + lowClausePrefix + Long.toString(start), + colName + " <= " + Long.toString(end))); + } else { + // Normal open-interval case. + splits.add(new com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit( + lowClausePrefix + Long.toString(start), + highClausePrefix + Long.toString(end))); + } + + start = end; + } + System.out.println(splits); + } } diff --git a/src/test/org/apache/sqoop/mapreduce/db/TestTextSplitter.java b/src/test/org/apache/sqoop/mapreduce/db/TestTextSplitter.java index 911749f41..b27563927 100644 --- a/src/test/org/apache/sqoop/mapreduce/db/TestTextSplitter.java +++ b/src/test/org/apache/sqoop/mapreduce/db/TestTextSplitter.java @@ -19,9 +19,13 @@ import java.math.BigDecimal; import java.sql.SQLException; +import java.util.ArrayList; import java.util.List; +import com.cloudera.sqoop.config.ConfigurationHelper; +import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat; import com.cloudera.sqoop.mapreduce.db.TextSplitter; +import org.apache.hadoop.mapreduce.InputSplit; import org.apache.sqoop.validation.ValidationException; import org.junit.Test; @@ -173,4 +177,93 @@ public void testNChar() throws SQLException { assertEquals(false, splitter2.isUseNCharStrings()); } + @Test + public void testSplit() throws Exception { + System.out.println("Generating splits for a textual index column."); + System.out.println("If your database sorts in a case-insensitive order, " + + "this may result in a partial import or duplicate records."); + System.out.println("You are strongly encouraged to choose an integral split column."); + + org.apache.sqoop.mapreduce.db.TextSplitter textSplitter = new org.apache.sqoop.mapreduce.db.TextSplitter(); + boolean useNCharStrings = false; + String colName = "produce_name"; + String minString = "1231"; + String maxString = "12324"; + + boolean minIsNull = false; + + // If the min value is null, switch it to an empty string instead for + // purposes of interpolation. Then add [null, null] as a special case + // split. + if (null == minString) { + minString = ""; + minIsNull = true; + } + + if (null == maxString) { + // If the max string is null, then the min string has to be null too. + // Just return a special split for this case. + List splits = new ArrayList(); + splits.add(new com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit( + colName + " IS NULL", colName + " IS NULL")); + return; + } + + // Use this as a hint. May need an extra task if the size doesn't + // divide cleanly. + // 本地是1 + // 远程默认是2 + int numSplits = 3; + + String lowClausePrefix = colName + " >= " + (useNCharStrings ? "N'" : "'"); + String highClausePrefix = colName + " < " + (useNCharStrings ? "N'" : "'"); + + // If there is a common prefix between minString and maxString, establish + // it and pull it out of minString and maxString. + int maxPrefixLen = Math.min(minString.length(), maxString.length()); + int sharedLen; + for (sharedLen = 0; sharedLen < maxPrefixLen; sharedLen++) { + char c1 = minString.charAt(sharedLen); + char c2 = maxString.charAt(sharedLen); + if (c1 != c2) { + break; + } + } + + // The common prefix has length 'sharedLen'. Extract it from both. + String commonPrefix = minString.substring(0, sharedLen); + minString = minString.substring(sharedLen); + maxString = maxString.substring(sharedLen); + + List splitStrings = textSplitter.split(numSplits, minString, maxString, + commonPrefix); + List splits = new ArrayList(); + + // Convert the list of split point strings into an actual set of + // InputSplits. + String start = splitStrings.get(0); + for (int i = 1; i < splitStrings.size(); i++) { + String end = splitStrings.get(i); + + if (i == splitStrings.size() - 1) { + // This is the last one; use a closed interval. + splits.add(new com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit( + lowClausePrefix + start + "'", colName + + " <= " + (useNCharStrings ? "N'" : "'") + end + "'")); + } else { + // Normal open-interval case. + splits.add(new com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit( + lowClausePrefix + start + "'", highClausePrefix + end + "'")); + } + + start = end; + } + + if (minIsNull) { + // Add the special null split at the end. + splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( + colName + " IS NULL", colName + " IS NULL")); + } + System.out.println(splits); + } }