Skip to content

Latest commit

 

History

History
230 lines (181 loc) · 9.43 KB

7.5 页面切片转化率 - 生产环境测试(Hive 表).md

File metadata and controls

230 lines (181 loc) · 9.43 KB

7.5 页面切片转化率 - 生产环境测试(Hive 表)

1. 创建 Hive 表

1.1. user_visit_action

hive> create table user_visit_action(
    > `date` string,
    > user_id bigint,
    > session_id string,
    > page_id bigint,
    > action_time string,
    > search_keyword string,
    > click_category_id bigint,
    > click_product_id bigint,
    > order_category_ids string,
    > order_product_ids string,
    > pay_category_ids string,
    > pay_product_ids string
    > );
OK
Time taken: 1.027 seconds

hive> show tables;
OK
user_visit_action
Time taken: 0.036 seconds, Fetched: 1 row(s)
导入数据

user_visit_action 数据格式

| 2018-06-22 |      61 | 137fef3066b34d2eba26364c9c6a7e86 |       2 | 2018-06-22 11:56:14 | 新辣道鱼火锅       |                 0 |                0 | NULL               | NULL              | NULL             | NULL            |
| 2018-06-22 |      61 | 137fef3066b34d2eba26364c9c6a7e86 |       9 | 2018-06-22 11:03:23 | 太古商场           |                 0 |                0 | NULL               | NULL              | NULL             | NULL            |
| 2018-06-22 |      61 | 137fef3066b34d2eba26364c9c6a7e86 |       3 | 2018-06-22 11:04:02 | 重庆辣子鸡         |                 0 |                0 | NULL               | NULL              | NULL             | NULL            |
| 2018-06-22 |      61 | 137fef3066b34d2eba26364c9c6a7e86 |       2 | 2018-06-22 11:25:14 | NULL               |                 0 |                0 | NULL               | NULL              | 49               | 1               |
| 2018-06-22 |      61 | 137fef3066b34d2eba26364c9c6a7e86 |       5 | 2018-06-22 11:00:39 | NULL               |                 7 |                7 | NULL               | NULL              | NULL             | NULL            |
| 2018-06-22 |      61 | 137fef3066b34d2eba26364c9c6a7e86 |       7 | 2018-06-22 11:28:27 | 重庆辣子鸡         |                 0 |                0 | NULL               | NULL              | NULL             | NULL            |
| 2018-06-22 |      61 | 137fef3066b34d2eba26364c9c6a7e86 |       2 | 2018-06-22 11:31:07 | 日本料理           |                 0 |                0 | NULL               | NULL              | NULL             | NULL            |
| 2018-06-22 |      61 | e0a59846b97545469f9bfdb75e6a9130 |       5 | 2018-06-22 0:09:15  | NULL               |                 0 |                0 | 28                 | 71                | NULL             | NULL            |
| 2018-06-22 |      61 | e0a59846b97545469f9bfdb75e6a9130 |       9 | 2018-06-22 0:39:26  | 日本料理           |                 0 |                0 | NULL               | NULL              | NULL             | NULL            |
| 2018-06-22 |      61 | e0a59846b97545469f9bfdb75e6a9130 |       8 | 2018-06-22 0:42:52  | 太古商场           |                 0 |                0 | NULL               | NULL              | NULL             | NULL            |
| 2018-06-22 |      61 | e0a59846b97545469f9bfdb75e6a9130 |       0 | 2018-06-22 0:27:19  | 呷哺呷哺           |                 0 |                0 | NULL               | NULL              | NULL             | NULL            |
| 2018-06-22 |      61 | e0a59846b97545469f9bfdb75e6a9130 |       5 | 2018-06-22 0:17:29  | 呷哺呷哺           |                 0 |                0 | NULL               | NULL     

执行命令将数据导入 user_visit_action 的 hive 表中

hive> load data local inpath '/usr/local/user_visit_action.txt' into table user_visit_action;
Loading data to table default.user_visit_action
OK
Time taken: 2.998 seconds

查询前 10 条数据

hive> select * from user_visit_action limit 5;
| 2018-06-22 |      61 | 137fef3066b34d2eba26364c9c6a7e86 |       2 | 2018-06-22 11:56:14 | 新辣道鱼火锅       |                 0 |                0 | NULL               | NULL              | NULL             | NULL            |
| 2018-06-22 |      61 | 137fef3066b34d2eba26364c9c6a7e86 |       9 | 2018-06-22 11:03:23 | 太古商场           |                 0 |                0 | NULL               | NULL              | NULL             | NULL            |
| 2018-06-22 |      61 | 137fef3066b34d2eba26364c9c6a7e86 |       3 | 2018-06-22 11:04:02 | 重庆辣子鸡         |                 0 |                0 | NULL               | NULL              | NULL             | NULL            |
| 2018-06-22 |      61 | 137fef3066b34d2eba26364c9c6a7e86 |       2 | 2018-06-22 11:25:14 | NULL               |                 0 |                0 | NULL               | NULL              | 49               | 1               |
| 2018-06-22 |      61 | 137fef3066b34d2eba26364c9c6a7e86 |       5 | 2018-06-22 11:00:39 | NULL               |                 7 |                7 | NULL               | NULL              | NULL             | NULL            |

1.2. user_info

hive> create table user_info(
    > user_id bigint,
    > username string,
    > name string,
    > age bigint,
    > professional string,
    > city string,
    > sex string
    > );
导入数据

user_info 数据格式

mysql> select * from user_info limit 5;
|       0 | user0     | name0 |   26 | professional365 | city864 | male   |
|       1 | user1     | name1 |   54 | professional300 | city957 | female |
|       2 | user2     | name2 |   28 | professional795 | city617 | female |
|       3 | user3     | name3 |    3 | professional900 | city991 | male   |
|       4 | user4     | name4 |   49 | professional518 | city254 | female |

执行命令将数据导入 user_info 的 hive 表中

hive> load data local inpath '/usr/local/user_info.txt' into table user_info;
Loading data to table default.user_info
OK
Time taken: 0.998 seconds

查询前5条数据

hive> select * from user_info limit 5;
|       0 | user0     | name0 |   26 | professional365 | city864 | male   |
|       1 | user1     | name1 |   54 | professional300 | city957 | female |
|       2 | user2     | name2 |   28 | professional795 | city617 | female |
|       3 | user3     | name3 |    3 | professional900 | city991 | male   |
|       4 | user4     | name4 |   49 | professional518 | city254 | female |

Linux服务器中直接执行sql文件

给远程访问设置权限

#其中 518834jzb 是用于连接的密码
grant all privileges on *.* to 'hive'@'%' identified by '518834jzb' with grant option;
flush privileges;

#设置mysql密码
set password =password('518834jzb');
flush privileges;

Sqoop学习之路 (一)

列出MySQL数据有哪些数据库

sqoop list-databases --connect jdbc:mysql://localhost:3306/ --username root --password 518834jzb

列出MySQL中的某个数据库有哪些数据表

sqoop list-tables --connect jdbc:mysql://localhost:3306/spark_project --username root --password 518834jzb

创建与mysql一致的hive表

sqoop create-hive-table --connect jdbc:mysql://localhost:3306/spark_project --username root --password 518834jzb --table user_visit_action --hive-table user_visit_action

2. 更改 mysql 运行环境

更改配置环境
#本地环境
jdbc.url=jdbc:mysql://localhost:3306/spark_project?allowPublicKeyRetrieval=true
jdbc.user=root
jdbc.password=518834jzb

#生产环境 服务器的 mysql
jdbc.url.product=jdbc:mysql://39.105.94.133:3306/spark_project?allowPublicKeyRetrieval=true
jdbc.user.product=root
jdbc.password.product=518834jzb
JDBCHelper
private JDBCHelper() {
		int datasourcesize = ConfManager.getInteger(Constants.JDBC_DATASORCE_SIZE);
		for (int i = 0; i < datasourcesize; i++) {
			boolean local =  ConfManager.getBoolean(Constants.SPARK_LOCAL);
			String jdbc_url = null;
			String jdbc_user = null;
			String jdbc_pwd = null;
			if(local) {
				jdbc_url = ConfManager.getProperty(Constants.JDBC_URL);
				jdbc_user = ConfManager.getProperty(Constants.JDBC_USER);
				jdbc_pwd = ConfManager.getProperty(Constants.JDBC_PWD);
			} else {
				jdbc_url = ConfManager.getProperty(Constants.JDBC_URL_PROD);
				jdbc_user = ConfManager.getProperty(Constants.JDBC_USER_PROD);
				jdbc_pwd = ConfManager.getProperty(Constants.JDBC_PWD_PROD);
			}
			
			try {
				Connection conn = DriverManager.getConnection(jdbc_url, jdbc_user, jdbc_pwd);
				datasource.push(conn);
			} catch (SQLException e) {
				e.printStackTrace();
			}
		}
	}

3. 打包项目,执行脚本

/usr/local/spark/bin/spark-submit\
--class cn.spark.analysis.spark.page.PageOneStepConvertRate\
--num-executors 1\
--driver-memory 100m\
--executor-memory 100m\
--executor-cores 1\
--files /usr/local/hive/conf/hive-site.xml\
--driver-class-path /usr/local/hive/lib/mysql-connector-java.jar\
/usr/local/spark-project-0.0.1-SNAPSHOT-jar-withdependencies.jar\
${3}
运行结果

查看服务器中 mysql 的数据

MariaDB [spark_project]> select * from page_split_convert_rate;
+---------+-------------------------------------------------------------------------+
| task_id | convert_rate                                                            |
+---------+-------------------------------------------------------------------------+
|       3 | 7_8=1.06|8_9=0.95|1_2=0.09|2_3=1.03|3_4=0.99|4_5=0.93|5_6=1.08|6_7=0.94 |
+---------+-------------------------------------------------------------------------+
1 row in set (0.00 sec)