- 是否要针对项目的基础数据开发 Hive ETL,对数据进行进一步处理和转换,从而能更方便和快速计算和执行spark作业。
- 设计spark作业要保存结果数据的业务表结构,从而让J2EE平台可以使用业务表中的数据来为使用者展示使用结果。(只会实现第 2 个)
DROP TABLE IF EXISTS `session_aggr_stat`;
CREATE TABLE `session_aggr_stat` (
`task_id` INTEGER(11) NULL DEFAULT NULL,
`session_count` INTEGER(11) NULL DEFAULT NULL,
`1s_3s` DOUBLE NULL DEFAULT NULL,
`4s_6s` DOUBLE NULL DEFAULT NULL,
`7s_9s` DOUBLE NULL DEFAULT NULL,
`10s_30s` DOUBLE NULL DEFAULT NULL,
`30s_60s` DOUBLE NULL DEFAULT NULL,
`1m_3m` DOUBLE NULL DEFAULT NULL,
`3m_10m` DOUBLE NULL DEFAULT NULL,
`10m_30m` DOUBLE NULL DEFAULT NULL,
`30m` DOUBLE NULL DEFAULT NULL,
`1_3` DOUBLE NULL DEFAULT NULL,
`4_6` DOUBLE NULL DEFAULT NULL,
`7_9` DOUBLE NULL DEFAULT NULL,
`10_30` DOUBLE NULL DEFAULT NULL,
`30_60` DOUBLE NULL DEFAULT NULL,
`60` DOUBLE NULL DEFAULT NULL
)ENGINE=InnoDB DEFAULT CHARSET=utf8;
ALTER TABLE `session_aggr_stat` ADD INDEX index_task_id (`task_id`);
SHOW INDEX FROM `session_aggr_stat`;
mysql> SHOW INDEX FROM `session_aggr_stat`;
+-------------------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression |
+-------------------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
| session_aggr_stat | 1 | index_task_id | 1 | task_id | A | 0 | NULL | NULL | YES | BTREE | | | YES | NULL |
+-------------------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
1 row in set (0.13 sec)
DROP TABLE IF EXISTS `session_random_extract`;
CREATE TABLE `session_random_extract` (
`task_id` INTEGER(11) NULL DEFAULT NULL,
`session_id` VARCHAR(255) NULL DEFAULT NULL,
`start_time` VARCHAR(50) NULL DEFAULT NULL,
`search_keywords` VARCHAR(255) NULL DEFAULT NULL,
`click_category_ids` VARCHAR(255) NULL DEFAULT NULL
)ENGINE=InnoDB DEFAULT CHARSET=utf8;
ALTER TABLE `session_random_extract` ADD INDEX index_task_id (`task_id`);
SHOW INDEX FROM `session_random_extract`;
mysql> SHOW INDEX FROM `session_random_extract`;
+------------------------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression |
+------------------------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
| session_random_extract | 1 | index_task_id | 1 | task_id | A | 0 | NULL | NULL | YES | BTREE | | | YES | NULL |
+------------------------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
1 row in set (0.04 sec)
DROP TABLE IF EXISTS `top10_category`;
CREATE TABLE `top10_category` (
`task_id` INTEGER(11) NULL DEFAULT NULL,
`category_id` INTEGER(11) NULL DEFAULT NULL,
`click_count` INTEGER(11) NULL DEFAULT NULL,
`order_count` INTEGER(11) NULL DEFAULT NULL,
`pay_count` INTEGER(11) NULL DEFAULT NULL
)ENGINE=InnoDB DEFAULT CHARSET=utf8;
ALTER TABLE `top10_category` ADD INDEX index_task_id (`task_id`);
SHOW INDEX FROM `top10_category`;
mysql> SHOW INDEX FROM `top10_category`;
+----------------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression |
+----------------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
| top10_category | 1 | index_task_id | 1 | task_id | A | 0 | NULL | NULL | YES | BTREE | | | YES | NULL |
+----------------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
1 row in set (0.08 sec)
DROP TABLE IF EXISTS `top10_category_session`;
CREATE TABLE `top10_category_session` (
`task_id` INTEGER(11) NULL DEFAULT NULL,
`category_id` INTEGER(11) NULL DEFAULT NULL,
`session_id` VARCHAR(255) NULL DEFAULT NULL,
`click_count` INTEGER(11) NULL DEFAULT NULL
)ENGINE=InnoDB DEFAULT CHARSET=utf8;
ALTER TABLE `top10_category_session` ADD INDEX index_task_id (`task_id`);
SHOW INDEX FROM `top10_category_session`;
mysql> SHOW INDEX FROM `top10_category_session`;
+------------------------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression |
+------------------------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
| top10_category_session | 1 | index_task_id | 1 | task_id | A | 0 | NULL | NULL | YES | BTREE | | | YES | NULL |
+------------------------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
1 row in set (0.07 sec)
DROP TABLE IF EXISTS `session_detail`;
CREATE TABLE `session_detail` (
`task_id` INTEGER(11) NULL DEFAULT NULL,
`user_id` INTEGER(11) NULL DEFAULT NULL,
`session_id` VARCHAR(255) NULL DEFAULT NULL,
`page_id` INTEGER(11) NULL DEFAULT NULL,
`action_time` VARCHAR(255) NULL DEFAULT NULL,
`search_keyword` VARCHAR(255) NULL DEFAULT NULL,
`click_category_id` INTEGER(11) NULL DEFAULT NULL,
`click_product_id` INTEGER(11) NULL DEFAULT NULL,
`order_category_ids` VARCHAR(255) NULL DEFAULT NULL,
`order_products_ids` VARCHAR(255) NULL DEFAULT NULL,
`pay_category_ids` INTEGER NULL DEFAULT NULL,
`pay_product_ids` VARCHAR(255) NULL DEFAULT NULL
)ENGINE=InnoDB DEFAULT CHARSET=utf8;
ALTER TABLE `session_detail` ADD INDEX index_task_id (`task_id`);
ALTER TABLE `session_detail` ADD INDEX index_session_id (`session_id`);
SHOW INDEX FROM `session_detail`;
mysql> SHOW INDEX FROM `session_detail`;
+----------------+------------+------------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression |
+----------------+------------+------------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
| session_detail | 1 | index_task_id | 1 | task_id | A | 0 | NULL | NULL | YES | BTREE | | | YES | NULL |
| session_detail | 1 | index_session_id | 1 | session_id | A | 0 | NULL | NULL | YES | BTREE | | | YES | NULL |
+----------------+------------+------------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
2 rows in set (0.07 sec)
DROP TABLE IF EXISTS `task`;
CREATE TABLE `task` (
`id` INTEGER(11) NOT NULL AUTO_INCREMENT,
`name` VARCHAR(255) NULL DEFAULT NULL,
`create_time` VARCHAR(255) NULL DEFAULT NULL,
`start_time` VARCHAR(255) NULL DEFAULT NULL,
`finish_time` VARCHAR(255) NULL DEFAULT NULL,
`task_type` VARCHAR(255) NULL DEFAULT NULL,
`task_status` VARCHAR(255) NULL DEFAULT NULL,
`task_param` MEDIUMTEXT NULL DEFAULT NULL,
PRIMARY KEY (`id`)
)ENGINE=InnoDB DEFAULT CHARSET=utf8;
DROP TABLE IF EXISTS `user_visit_action`;
CREATE TABLE `user_visit_action` (
`date` VARCHAR(255) NULL DEFAULT NULL,
`user_id` INTEGER(11) NULL DEFAULT NULL,
`session_id` VARCHAR(255) NULL DEFAULT NULL,
`page_id` INTEGER(11) NULL DEFAULT NULL,
`action_time` VARCHAR(255) NULL DEFAULT NULL,
`search_keyword` VARCHAR(255) NULL DEFAULT NULL,
`click_category_id` INTEGER(11) NULL DEFAULT NULL,
`click_product_id` INTEGER(11) NULL DEFAULT NULL,
`order_category_ids` VARCHAR(255) NULL DEFAULT NULL,
`order_product_ids` VARCHAR(255) NULL DEFAULT NULL,
`pay_category_ids` VARCHAR(255) NULL DEFAULT NULL,
`pay_product_ids` VARCHAR(255) NULL DEFAULT NULL
)ENGINE=InnoDB DEFAULT CHARSET=utf8;
ALTER TABLE `user_visit_action` ADD INDEX index_date (`date`);
SHOW INDEX FROM `user_visit_action`;
mysql> SHOW INDEX FROM `user_visit_action`;
+-------------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression |
+-------------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
| user_visit_action | 1 | index_date | 1 | date | A | 0 | NULL | NULL | YES | BTREE | | | YES | NULL |
+-------------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
1 row in set (0.08 sec)
DROP TABLE IF EXISTS `user_info`;
CREATE TABLE `user_info` (
`user_id` INTEGER(11) NULL DEFAULT NULL,
`user_name` VARCHAR(255) NULL DEFAULT NULL,
`name` VARCHAR(255) NULL DEFAULT NULL,
`age` INTEGER(11) NULL DEFAULT NULL,
`professional` VARCHAR(255) NULL DEFAULT NULL,
`city` VARCHAR(255) NULL DEFAULT NULL,
`sex` VARCHAR(255) NULL DEFAULT NULL
)ENGINE=InnoDB DEFAULT CHARSET=utf8;
ALTER TABLE `user_info` ADD INDEX index_task_id (`user_id`);
SHOW INDEX FROM `user_info`;
mysql> SHOW INDEX FROM `user_info`;
+-----------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression |
+-----------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
| user_info | 1 | index_task_id | 1 | user_id | A | 0 | NULL | NULL | YES | BTREE | | | YES | NULL |
+-----------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
1 row in set (0.04 sec)
运行以上所有SQL语句,创建数据库表
mysql> CREATE DATABASE spark_project;
mysql> show databases;
+--------------------+
| Database |
+--------------------+
| spark_project |
| test |
+--------------------+
6 rows in set (0.01 sec)
mysql> show tables;
+-------------------------+
| Tables_in_spark_project |
+-------------------------+
| session_aggr_stat |
| session_detail |
| session_random_extract |
| task |
| top10_category |
| top10_category_session |
| user_info |
| user_visit_action |
+-------------------------+
8 rows in set (0.00 sec)