Skip to content

Latest commit

 

History

History
256 lines (213 loc) · 13.7 KB

3.1 数据库表设计与创建.md

File metadata and controls

256 lines (213 loc) · 13.7 KB

3.1 数据库表设计与创建

  1. 是否要针对项目的基础数据开发 Hive ETL,对数据进行进一步处理和转换,从而能更方便和快速计算和执行spark作业。
  2. 设计spark作业要保存结果数据的业务表结构,从而让J2EE平台可以使用业务表中的数据来为使用者展示使用结果。(只会实现第 2 个)

session_aggr_stat:存储session聚合统计结果

DROP TABLE IF EXISTS `session_aggr_stat`;
		
CREATE TABLE `session_aggr_stat` (
  `task_id` INTEGER(11) NULL DEFAULT NULL,
  `session_count` INTEGER(11) NULL DEFAULT NULL,
  `1s_3s` DOUBLE NULL DEFAULT NULL,
  `4s_6s` DOUBLE NULL DEFAULT NULL,
  `7s_9s` DOUBLE NULL DEFAULT NULL,
  `10s_30s` DOUBLE NULL DEFAULT NULL,
  `30s_60s` DOUBLE NULL DEFAULT NULL,
  `1m_3m` DOUBLE NULL DEFAULT NULL,
  `3m_10m` DOUBLE NULL DEFAULT NULL,
  `10m_30m` DOUBLE NULL DEFAULT NULL,
  `30m` DOUBLE NULL DEFAULT NULL,
  `1_3` DOUBLE NULL DEFAULT NULL,
  `4_6` DOUBLE NULL DEFAULT NULL,
  `7_9` DOUBLE NULL DEFAULT NULL,
  `10_30` DOUBLE NULL DEFAULT NULL,
  `30_60` DOUBLE NULL DEFAULT NULL,
  `60` DOUBLE NULL DEFAULT NULL
)ENGINE=InnoDB DEFAULT CHARSET=utf8;

ALTER TABLE `session_aggr_stat` ADD INDEX index_task_id (`task_id`);

SHOW INDEX FROM `session_aggr_stat`;

mysql> SHOW INDEX FROM `session_aggr_stat`;
+-------------------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
| Table             | Non_unique | Key_name      | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression |
+-------------------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
| session_aggr_stat |          1 | index_task_id |            1 | task_id     | A         |           0 |     NULL |   NULL | YES  | BTREE      |         |               | YES     | NULL       |
+-------------------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
1 row in set (0.13 sec)

session_random_extract:存储按时间比例随机抽取出来的1000个session。

DROP TABLE IF EXISTS `session_random_extract`;

CREATE TABLE `session_random_extract` (
  `task_id` INTEGER(11) NULL DEFAULT NULL,
  `session_id` VARCHAR(255) NULL DEFAULT NULL,
  `start_time` VARCHAR(50) NULL DEFAULT NULL,
  `search_keywords` VARCHAR(255) NULL DEFAULT NULL,
  `click_category_ids` VARCHAR(255) NULL DEFAULT NULL
)ENGINE=InnoDB DEFAULT CHARSET=utf8;

ALTER TABLE `session_random_extract` ADD INDEX index_task_id (`task_id`);

SHOW INDEX FROM `session_random_extract`;

mysql> SHOW INDEX FROM `session_random_extract`;
+------------------------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
| Table                  | Non_unique | Key_name      | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression |
+------------------------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
| session_random_extract |          1 | index_task_id |            1 | task_id     | A         |           0 |     NULL |   NULL | YES  | BTREE      |         |               | YES     | NULL       |
+------------------------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
1 row in set (0.04 sec)

top10_category:存储点击,下单,支付排名前10的品类数据

DROP TABLE IF EXISTS `top10_category`;
		
CREATE TABLE `top10_category` (
  `task_id` INTEGER(11) NULL DEFAULT NULL,
  `category_id` INTEGER(11) NULL DEFAULT NULL,
  `click_count` INTEGER(11) NULL DEFAULT NULL,
  `order_count` INTEGER(11) NULL DEFAULT NULL,
  `pay_count` INTEGER(11) NULL DEFAULT NULL
)ENGINE=InnoDB DEFAULT CHARSET=utf8;

ALTER TABLE `top10_category` ADD INDEX index_task_id (`task_id`);

SHOW INDEX FROM `top10_category`;

mysql> SHOW INDEX FROM `top10_category`;
+----------------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
| Table          | Non_unique | Key_name      | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression |
+----------------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
| top10_category |          1 | index_task_id |            1 | task_id     | A         |           0 |     NULL |   NULL | YES  | BTREE      |         |               | YES     | NULL       |
+----------------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
1 row in set (0.08 sec)

top10_category_session:存储 top10 每个品类的点击的 top10 的 session

DROP TABLE IF EXISTS `top10_category_session`;
		
CREATE TABLE `top10_category_session` (
  `task_id` INTEGER(11) NULL DEFAULT NULL,
  `category_id` INTEGER(11) NULL DEFAULT NULL,
  `session_id` VARCHAR(255) NULL DEFAULT NULL,
  `click_count` INTEGER(11) NULL DEFAULT NULL
)ENGINE=InnoDB DEFAULT CHARSET=utf8;

ALTER TABLE `top10_category_session` ADD INDEX index_task_id (`task_id`);

SHOW INDEX FROM `top10_category_session`;

mysql> SHOW INDEX FROM `top10_category_session`;
+------------------------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
| Table                  | Non_unique | Key_name      | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression |
+------------------------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
| top10_category_session |          1 | index_task_id |            1 | task_id     | A         |           0 |     NULL |   NULL | YES  | BTREE      |         |               | YES     | NULL       |
+------------------------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
1 row in set (0.07 sec)

session_detail,存储随机抽取出来的session明细数据,top10品类的session的明细数据

DROP TABLE IF EXISTS `session_detail`;
		
CREATE TABLE `session_detail` (
  `task_id` INTEGER(11) NULL DEFAULT NULL,
  `user_id` INTEGER(11) NULL DEFAULT NULL,
  `session_id` VARCHAR(255) NULL DEFAULT NULL,
  `page_id` INTEGER(11) NULL DEFAULT NULL,
  `action_time` VARCHAR(255) NULL DEFAULT NULL,
  `search_keyword` VARCHAR(255) NULL DEFAULT NULL,
  `click_category_id` INTEGER(11) NULL DEFAULT NULL,
  `click_product_id` INTEGER(11) NULL DEFAULT NULL,
  `order_category_ids` VARCHAR(255) NULL DEFAULT NULL,
  `order_products_ids` VARCHAR(255) NULL DEFAULT NULL,
  `pay_category_ids` INTEGER NULL DEFAULT NULL,
  `pay_product_ids` VARCHAR(255) NULL DEFAULT NULL
)ENGINE=InnoDB DEFAULT CHARSET=utf8;

ALTER TABLE `session_detail` ADD INDEX index_task_id (`task_id`);
ALTER TABLE `session_detail` ADD INDEX index_session_id (`session_id`);

SHOW INDEX FROM `session_detail`;

mysql> SHOW INDEX FROM `session_detail`;
+----------------+------------+------------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
| Table          | Non_unique | Key_name         | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression |
+----------------+------------+------------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
| session_detail |          1 | index_task_id    |            1 | task_id     | A         |           0 |     NULL |   NULL | YES  | BTREE      |         |               | YES     | NULL       |
| session_detail |          1 | index_session_id |            1 | session_id  | A         |           0 |     NULL |   NULL | YES  | BTREE      |         |               | YES     | NULL       |
+----------------+------------+------------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
2 rows in set (0.07 sec)

task,存储J2EE平台插入其中的任务信息

DROP TABLE IF EXISTS `task`;
		
CREATE TABLE `task` (
  `id` INTEGER(11) NOT NULL AUTO_INCREMENT,
  `name` VARCHAR(255) NULL DEFAULT NULL,
  `create_time` VARCHAR(255) NULL DEFAULT NULL,
  `start_time` VARCHAR(255) NULL DEFAULT NULL,
  `finish_time` VARCHAR(255) NULL DEFAULT NULL,
  `task_type` VARCHAR(255) NULL DEFAULT NULL,
  `task_status` VARCHAR(255) NULL DEFAULT NULL,
  `task_param` MEDIUMTEXT NULL DEFAULT NULL,
  PRIMARY KEY (`id`)
)ENGINE=InnoDB DEFAULT CHARSET=utf8;

user_visit_action:用户访问行为

DROP TABLE IF EXISTS `user_visit_action`;
		
CREATE TABLE `user_visit_action` (
  `date` VARCHAR(255) NULL DEFAULT NULL,
  `user_id` INTEGER(11) NULL DEFAULT NULL,
  `session_id` VARCHAR(255) NULL DEFAULT NULL,
  `page_id` INTEGER(11) NULL DEFAULT NULL,
  `action_time` VARCHAR(255) NULL DEFAULT NULL,
  `search_keyword` VARCHAR(255) NULL DEFAULT NULL,
  `click_category_id` INTEGER(11) NULL DEFAULT NULL,
  `click_product_id` INTEGER(11) NULL DEFAULT NULL,
  `order_category_ids` VARCHAR(255) NULL DEFAULT NULL,
  `order_product_ids` VARCHAR(255) NULL DEFAULT NULL,
  `pay_category_ids` VARCHAR(255) NULL DEFAULT NULL,
  `pay_product_ids` VARCHAR(255) NULL DEFAULT NULL
)ENGINE=InnoDB DEFAULT CHARSET=utf8;

ALTER TABLE `user_visit_action` ADD INDEX index_date (`date`);

SHOW INDEX FROM `user_visit_action`;

mysql> SHOW INDEX FROM `user_visit_action`;
+-------------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
| Table             | Non_unique | Key_name   | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression |
+-------------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
| user_visit_action |          1 | index_date |            1 | date        | A         |           0 |     NULL |   NULL | YES  | BTREE      |         |               | YES     | NULL       |
+-------------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
1 row in set (0.08 sec)

user_info:用户表

DROP TABLE IF EXISTS `user_info`;
		
CREATE TABLE `user_info` (
  `user_id` INTEGER(11) NULL DEFAULT NULL,
  `user_name` VARCHAR(255) NULL DEFAULT NULL,
  `name` VARCHAR(255) NULL DEFAULT NULL,
  `age` INTEGER(11) NULL DEFAULT NULL,
  `professional` VARCHAR(255) NULL DEFAULT NULL,
  `city` VARCHAR(255) NULL DEFAULT NULL,
  `sex` VARCHAR(255) NULL DEFAULT NULL
)ENGINE=InnoDB DEFAULT CHARSET=utf8;

ALTER TABLE `user_info` ADD INDEX index_task_id (`user_id`);

SHOW INDEX FROM `user_info`;

mysql> SHOW INDEX FROM `user_info`;
+-----------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
| Table     | Non_unique | Key_name      | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression |
+-----------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
| user_info |          1 | index_task_id |            1 | user_id     | A         |           0 |     NULL |   NULL | YES  | BTREE      |         |               | YES     | NULL       |
+-----------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+
1 row in set (0.04 sec)

运行以上所有SQL语句,创建数据库表

mysql> CREATE DATABASE spark_project;
mysql> show databases;
+--------------------+
| Database           |
+--------------------+
| spark_project      |
| test               |
+--------------------+
6 rows in set (0.01 sec)

mysql> show tables;
+-------------------------+
| Tables_in_spark_project |
+-------------------------+
| session_aggr_stat       |
| session_detail          |
| session_random_extract  |
| task                    |
| top10_category          |
| top10_category_session  |
| user_info               |
| user_visit_action       |
+-------------------------+
8 rows in set (0.00 sec)