hive> create table user_visit_action(
> `date` string,
> user_id bigint,
> session_id string,
> page_id bigint,
> action_time string,
> search_keyword string,
> click_category_id bigint,
> click_product_id bigint,
> order_category_ids string,
> order_product_ids string,
> pay_category_ids string,
> pay_product_ids string
> );
OK
Time taken: 1.027 seconds
hive> show tables;
OK
user_visit_action
Time taken: 0.036 seconds, Fetched: 1 row(s)
user_visit_action 数据格式
| 2018-06-22 | 61 | 137fef3066b34d2eba26364c9c6a7e86 | 2 | 2018-06-22 11:56:14 | 新辣道鱼火锅 | 0 | 0 | NULL | NULL | NULL | NULL |
| 2018-06-22 | 61 | 137fef3066b34d2eba26364c9c6a7e86 | 9 | 2018-06-22 11:03:23 | 太古商场 | 0 | 0 | NULL | NULL | NULL | NULL |
| 2018-06-22 | 61 | 137fef3066b34d2eba26364c9c6a7e86 | 3 | 2018-06-22 11:04:02 | 重庆辣子鸡 | 0 | 0 | NULL | NULL | NULL | NULL |
| 2018-06-22 | 61 | 137fef3066b34d2eba26364c9c6a7e86 | 2 | 2018-06-22 11:25:14 | NULL | 0 | 0 | NULL | NULL | 49 | 1 |
| 2018-06-22 | 61 | 137fef3066b34d2eba26364c9c6a7e86 | 5 | 2018-06-22 11:00:39 | NULL | 7 | 7 | NULL | NULL | NULL | NULL |
| 2018-06-22 | 61 | 137fef3066b34d2eba26364c9c6a7e86 | 7 | 2018-06-22 11:28:27 | 重庆辣子鸡 | 0 | 0 | NULL | NULL | NULL | NULL |
| 2018-06-22 | 61 | 137fef3066b34d2eba26364c9c6a7e86 | 2 | 2018-06-22 11:31:07 | 日本料理 | 0 | 0 | NULL | NULL | NULL | NULL |
| 2018-06-22 | 61 | e0a59846b97545469f9bfdb75e6a9130 | 5 | 2018-06-22 0:09:15 | NULL | 0 | 0 | 28 | 71 | NULL | NULL |
| 2018-06-22 | 61 | e0a59846b97545469f9bfdb75e6a9130 | 9 | 2018-06-22 0:39:26 | 日本料理 | 0 | 0 | NULL | NULL | NULL | NULL |
| 2018-06-22 | 61 | e0a59846b97545469f9bfdb75e6a9130 | 8 | 2018-06-22 0:42:52 | 太古商场 | 0 | 0 | NULL | NULL | NULL | NULL |
| 2018-06-22 | 61 | e0a59846b97545469f9bfdb75e6a9130 | 0 | 2018-06-22 0:27:19 | 呷哺呷哺 | 0 | 0 | NULL | NULL | NULL | NULL |
| 2018-06-22 | 61 | e0a59846b97545469f9bfdb75e6a9130 | 5 | 2018-06-22 0:17:29 | 呷哺呷哺 | 0 | 0 | NULL | NULL
执行命令将数据导入 user_visit_action 的 hive 表中
hive> load data local inpath '/usr/local/user_visit_action.txt' into table user_visit_action;
Loading data to table default.user_visit_action
OK
Time taken: 2.998 seconds
查询前 10 条数据
hive> select * from user_visit_action limit 5;
| 2018-06-22 | 61 | 137fef3066b34d2eba26364c9c6a7e86 | 2 | 2018-06-22 11:56:14 | 新辣道鱼火锅 | 0 | 0 | NULL | NULL | NULL | NULL |
| 2018-06-22 | 61 | 137fef3066b34d2eba26364c9c6a7e86 | 9 | 2018-06-22 11:03:23 | 太古商场 | 0 | 0 | NULL | NULL | NULL | NULL |
| 2018-06-22 | 61 | 137fef3066b34d2eba26364c9c6a7e86 | 3 | 2018-06-22 11:04:02 | 重庆辣子鸡 | 0 | 0 | NULL | NULL | NULL | NULL |
| 2018-06-22 | 61 | 137fef3066b34d2eba26364c9c6a7e86 | 2 | 2018-06-22 11:25:14 | NULL | 0 | 0 | NULL | NULL | 49 | 1 |
| 2018-06-22 | 61 | 137fef3066b34d2eba26364c9c6a7e86 | 5 | 2018-06-22 11:00:39 | NULL | 7 | 7 | NULL | NULL | NULL | NULL |
hive> create table user_info(
> user_id bigint,
> username string,
> name string,
> age bigint,
> professional string,
> city string,
> sex string
> );
user_info 数据格式
mysql> select * from user_info limit 5;
| 0 | user0 | name0 | 26 | professional365 | city864 | male |
| 1 | user1 | name1 | 54 | professional300 | city957 | female |
| 2 | user2 | name2 | 28 | professional795 | city617 | female |
| 3 | user3 | name3 | 3 | professional900 | city991 | male |
| 4 | user4 | name4 | 49 | professional518 | city254 | female |
执行命令将数据导入 user_info 的 hive 表中
hive> load data local inpath '/usr/local/user_info.txt' into table user_info;
Loading data to table default.user_info
OK
Time taken: 0.998 seconds
查询前5条数据
hive> select * from user_info limit 5;
| 0 | user0 | name0 | 26 | professional365 | city864 | male |
| 1 | user1 | name1 | 54 | professional300 | city957 | female |
| 2 | user2 | name2 | 28 | professional795 | city617 | female |
| 3 | user3 | name3 | 3 | professional900 | city991 | male |
| 4 | user4 | name4 | 49 | professional518 | city254 | female |
给远程访问设置权限
#其中 518834jzb 是用于连接的密码
grant all privileges on *.* to 'hive'@'%' identified by '518834jzb' with grant option;
flush privileges;
#设置mysql密码
set password =password('518834jzb');
flush privileges;
列出MySQL数据有哪些数据库
sqoop list-databases --connect jdbc:mysql://localhost:3306/ --username root --password 518834jzb
列出MySQL中的某个数据库有哪些数据表
sqoop list-tables --connect jdbc:mysql://localhost:3306/spark_project --username root --password 518834jzb
创建与mysql一致的hive表
sqoop create-hive-table --connect jdbc:mysql://localhost:3306/spark_project --username root --password 518834jzb --table user_visit_action --hive-table user_visit_action
#本地环境
jdbc.url=jdbc:mysql://localhost:3306/spark_project?allowPublicKeyRetrieval=true
jdbc.user=root
jdbc.password=518834jzb
#生产环境 服务器的 mysql
jdbc.url.product=jdbc:mysql://39.105.94.133:3306/spark_project?allowPublicKeyRetrieval=true
jdbc.user.product=root
jdbc.password.product=518834jzb
private JDBCHelper() {
int datasourcesize = ConfManager.getInteger(Constants.JDBC_DATASORCE_SIZE);
for (int i = 0; i < datasourcesize; i++) {
boolean local = ConfManager.getBoolean(Constants.SPARK_LOCAL);
String jdbc_url = null;
String jdbc_user = null;
String jdbc_pwd = null;
if(local) {
jdbc_url = ConfManager.getProperty(Constants.JDBC_URL);
jdbc_user = ConfManager.getProperty(Constants.JDBC_USER);
jdbc_pwd = ConfManager.getProperty(Constants.JDBC_PWD);
} else {
jdbc_url = ConfManager.getProperty(Constants.JDBC_URL_PROD);
jdbc_user = ConfManager.getProperty(Constants.JDBC_USER_PROD);
jdbc_pwd = ConfManager.getProperty(Constants.JDBC_PWD_PROD);
}
try {
Connection conn = DriverManager.getConnection(jdbc_url, jdbc_user, jdbc_pwd);
datasource.push(conn);
} catch (SQLException e) {
e.printStackTrace();
}
}
}
/usr/local/spark/bin/spark-submit\
--class cn.spark.analysis.spark.page.PageOneStepConvertRate\
--num-executors 1\
--driver-memory 100m\
--executor-memory 100m\
--executor-cores 1\
--files /usr/local/hive/conf/hive-site.xml\
--driver-class-path /usr/local/hive/lib/mysql-connector-java.jar\
/usr/local/spark-project-0.0.1-SNAPSHOT-jar-withdependencies.jar\
${3}
查看服务器中 mysql 的数据
MariaDB [spark_project]> select * from page_split_convert_rate;
+---------+-------------------------------------------------------------------------+
| task_id | convert_rate |
+---------+-------------------------------------------------------------------------+
| 3 | 7_8=1.06|8_9=0.95|1_2=0.09|2_3=1.03|3_4=0.99|4_5=0.93|5_6=1.08|6_7=0.94 |
+---------+-------------------------------------------------------------------------+
1 row in set (0.00 sec)