diff --git a/docs/account-entity-design-analysis.md b/docs/account-entity-design-analysis.md new file mode 100644 index 000000000..ef4f23b85 --- /dev/null +++ b/docs/account-entity-design-analysis.md @@ -0,0 +1,1031 @@ +# Account 实体多渠道整合设计方案详解 + +## 目录 +- [设计背景](#设计背景) +- [方案对比](#方案对比) +- [推荐方案详解](#推荐方案详解) +- [实现示例](#实现示例) + +--- + +## 设计背景 + +### 多渠道场景特点 + +在B2B CDP场景下,一个企业账户(Account)的数据来源于多个渠道: + +```mermaid +graph TB + A[企业账户Account
统一社会信用代码91330000MA27XYZ123
阿里巴巴网络技术有限公司] + + A --> B1[官网渠道
注册时间2023-01-15
企业ID: WEB_A001] + A --> B2[企业微信渠道
认证时间2023-02-20
CorpID: wx123456] + A --> B3[线下展会渠道
获取时间2023-03-10
展会编号: EXH_001] + A --> B4[合作伙伴渠道
推荐时间2023-04-05
伙伴ID: PT_A001] + + A --> C1[25个联系人Contact] + A --> C2[8个商机Opportunity] + A --> C3[45个线索Lead] + A --> C4[156个触点Touchpoint] + A --> C5[500万累计收入] + A --> C6[85分健康度评分] +``` + +### 核心设计问题 + +**问题1:基础字段 vs 汇总字段** +- 基础字段:`account_name`、`industry_id`、`province`、`city` 等基本信息 +- 汇总字段:`total_contacts`、`total_opportunities`、`total_revenue`、`health_score` 等统计数据 + +**问题2:实时性 vs 性能** +- 实时计算:每次查询都JOIN多张表计算,准确但慢 +- 预先汇总:定期计算存储,快速但有延迟 + +**问题3:数据一致性** +- 如何保证汇总数据与明细数据一致 +- 如何处理并发更新问题 + +--- + +## 方案对比 + +### 方案一:所有字段都在Account表中(不推荐) + +```mermaid +erDiagram + Account { + varchar account_id PK "主键" + varchar account_name "企业名称" + varchar unified_social_credit_code UK "统一信用代码" + varchar account_type "客户类型" + varchar account_status "账户状态" + varchar industry_id FK "行业ID" + varchar province "省份" + varchar city "城市" + datetime created_at "创建时间" + datetime updated_at "更新时间" + int total_contacts "联系人总数-汇总" + int total_opportunities "商机总数-汇总" + int total_leads "线索总数-汇总" + decimal total_revenue "累计收入-汇总" + decimal lifetime_value "生命周期价值-汇总" + int won_opportunities "赢单数-汇总" + int lost_opportunities "输单数-汇总" + decimal win_rate "赢单率-汇总" + int total_touchpoints "触点总数-汇总" + int health_score "健康度评分-汇总" + datetime last_activity_at "最后活跃时间-汇总" + datetime last_purchase_at "最后购买时间-汇总" + int days_since_last_contact "距上次联系天数-汇总" + } +``` + +**优点:** +- 表结构简单,只有一张表 +- 查询简单,不需要JOIN + +**缺点:** +- 字段过多,单表臃肿(20+个字段) +- 基础字段和汇总字段混在一起,职责不清晰 +- 汇总字段更新频繁,影响主表性能 +- 无法记录历史汇总数据 +- 扩展性差,增加新的汇总维度需要修改表结构 + +**性能问题:** +```sql +-- 每次更新汇总数据都需要锁定Account表 +UPDATE Account +SET total_contacts = (SELECT COUNT(*) FROM Contact WHERE primary_account_id = 'ACC_001'), + total_opportunities = (SELECT COUNT(*) FROM Opportunity WHERE account_id = 'ACC_001'), + total_revenue = (SELECT SUM(amount) FROM Opportunity WHERE account_id = 'ACC_001' AND is_won = true), + health_score = ... -- 复杂计算 +WHERE account_id = 'ACC_001'; + +-- 问题: +-- 1. 多个子查询,性能差 +-- 2. 频繁更新导致行锁竞争 +-- 3. 基础信息查询也会被汇总字段更新阻塞 +``` + +--- + +### 方案二:基础字段和汇总字段分离(推荐) + +```mermaid +erDiagram + Account ||--|| AccountSummary : aggregates + Account ||--o{ AccountChannelIdentity : has + + Account { + varchar account_id PK "主键" + varchar account_name "企业名称" + varchar unified_social_credit_code UK "统一信用代码" + varchar account_type "客户类型" + varchar account_status "账户状态" + varchar account_level "客户等级" + decimal annual_revenue "年营收-企业自身" + int employee_count "员工数-企业自身" + varchar industry_id FK "行业ID" + varchar province "省份" + varchar city "城市" + varchar district "区县" + varchar company_website "企业官网" + varchar account_source "来源渠道" + varchar primary_channel_id FK "主要渠道" + varchar owner_user_id FK "负责人" + datetime created_at "创建时间" + datetime updated_at "更新时间" + varchar lifecycle_stage "生命周期阶段" + } + + AccountSummary { + varchar summary_id PK "主键" + varchar account_id FK "账户ID-唯一" + int total_contacts "联系人总数" + int total_opportunities "商机总数" + int total_leads "线索总数" + decimal total_revenue "累计收入" + decimal lifetime_value "生命周期价值" + int won_opportunities "赢单数量" + int lost_opportunities "输单数量" + decimal win_rate "赢单率" + int total_touchpoints "触点总数" + int active_campaigns "活跃营销活动数" + int health_score "健康度评分" + datetime last_activity_at "最后活跃时间" + datetime last_purchase_at "最后购买时间" + date first_purchase_date "首次购买日期" + date latest_opportunity_date "最新商机日期" + int days_since_last_contact "距上次联系天数" + datetime calculated_at "计算时间" + datetime updated_at "更新时间" + } + + AccountChannelIdentity { + varchar identity_id PK "主键" + varchar account_id FK "账户ID" + varchar channel_id FK "渠道ID" + varchar channel_account_id "渠道内账户ID" + varchar identity_type "身份类型" + boolean is_verified "是否已验证" + datetime first_seen_at "首次发现时间" + datetime last_seen_at "最后发现时间" + json additional_info "附加信息" + } +``` + +**优点:** +1. **职责清晰** + - Account表:存储企业基础信息(相对稳定) + - AccountSummary表:存储汇总统计数据(频繁变化) + - AccountChannelIdentity表:存储多渠道身份映射 + +2. **性能优化** + - 基础信息查询不受汇总数据更新影响 + - 汇总数据可以异步计算更新 + - 可以针对不同表设置不同的缓存策略 + +3. **扩展性强** + - 增加新的汇总维度只需修改AccountSummary表 + - 可以按时间维度存储历史汇总数据 + - 支持多版本汇总模型 + +4. **数据一致性** + - 通过`calculated_at`字段明确知道汇总数据的计算时间 + - 可以通过对比`calculated_at`和明细数据的`updated_at`判断是否需要重新计算 + +**缺点:** +- 需要JOIN查询才能获取完整数据(但可以通过缓存优化) +- 需要维护汇总计算逻辑 + +--- + +## 推荐方案详解 + +### 1. 表结构设计 + +#### Account 基础表 + +**设计原则:** +- 只存储企业的基础属性信息 +- 这些信息相对稳定,变化频率低 +- 直接来源于企业自身,不是统计计算得出 + +```sql +CREATE TABLE Account ( + -- 主键和唯一标识 + account_id VARCHAR(64) PRIMARY KEY COMMENT '账户唯一标识', + account_name VARCHAR(200) NOT NULL COMMENT '企业名称', + unified_social_credit_code VARCHAR(18) UNIQUE COMMENT '统一社会信用代码', + + -- 分类和状态 + account_type VARCHAR(50) NOT NULL COMMENT '客户类型:CUSTOMER/PARTNER/PROSPECT/COMPETITOR', + account_status VARCHAR(50) NOT NULL COMMENT '账户状态:ACTIVE/DORMANT/CHURNED/BLACKLIST', + account_level VARCHAR(50) COMMENT '客户等级:STRATEGIC/IMPORTANT/NORMAL', + lifecycle_stage VARCHAR(50) COMMENT '生命周期阶段:AWARENESS/CONSIDERATION/DECISION/RETENTION/EXPANSION', + + -- 企业基本信息(来自企业自身,非统计) + annual_revenue DECIMAL(18,2) COMMENT '企业年营收(万元)-来自企业工商信息', + employee_count INT COMMENT '企业员工数-来自企业工商信息', + industry_id VARCHAR(64) COMMENT '行业分类ID', + + -- 地址信息 + province VARCHAR(50) COMMENT '省份', + city VARCHAR(50) COMMENT '城市', + district VARCHAR(50) COMMENT '区县', + company_address VARCHAR(500) COMMENT '详细地址', + company_website VARCHAR(500) COMMENT '企业官网', + + -- 来源和归属 + account_source VARCHAR(100) COMMENT '来源:WEBSITE/EXHIBITION/PARTNER/COLD_CALL', + primary_channel_id VARCHAR(64) COMMENT '主要渠道ID', + owner_user_id VARCHAR(64) COMMENT '负责销售人员ID', + + -- 时间戳 + created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + + -- 扩展字段 + custom_fields JSON COMMENT '自定义扩展字段', + + -- 索引 + INDEX idx_account_name (account_name), + INDEX idx_account_status (account_status), + INDEX idx_account_type_status (account_type, account_status), + INDEX idx_account_owner (owner_user_id), + INDEX idx_account_industry (industry_id), + INDEX idx_account_city (province, city), + INDEX idx_account_created (created_at) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='企业账户基础信息表'; +``` + +#### AccountSummary 汇总表 + +**设计原则:** +- 存储所有统计计算的汇总数据 +- 数据从其他表聚合计算得出 +- 可以定期重新计算保证准确性 + +```sql +CREATE TABLE AccountSummary ( + -- 主键 + summary_id VARCHAR(64) PRIMARY KEY COMMENT '汇总记录ID', + account_id VARCHAR(64) NOT NULL UNIQUE COMMENT '账户ID-唯一', + + -- 关联数据统计 + total_contacts INT DEFAULT 0 COMMENT '关联联系人总数', + total_opportunities INT DEFAULT 0 COMMENT '商机总数', + total_leads INT DEFAULT 0 COMMENT '线索总数', + + -- 收入统计 + total_revenue DECIMAL(18,2) DEFAULT 0 COMMENT '累计成交收入(元)', + lifetime_value DECIMAL(18,2) DEFAULT 0 COMMENT '客户生命周期价值LTV(元)', + current_year_revenue DECIMAL(18,2) DEFAULT 0 COMMENT '本年度收入(元)', + last_year_revenue DECIMAL(18,2) DEFAULT 0 COMMENT '去年收入(元)', + + -- 商机统计 + won_opportunities INT DEFAULT 0 COMMENT '赢单商机数', + lost_opportunities INT DEFAULT 0 COMMENT '输单商机数', + open_opportunities INT DEFAULT 0 COMMENT '进行中商机数', + win_rate DECIMAL(5,2) DEFAULT 0 COMMENT '赢单率(%)', + average_deal_size DECIMAL(18,2) DEFAULT 0 COMMENT '平均成交金额(元)', + + -- 互动统计 + total_touchpoints INT DEFAULT 0 COMMENT '总触点数', + total_events INT DEFAULT 0 COMMENT '总事件数', + active_campaigns INT DEFAULT 0 COMMENT '参与的活跃营销活动数', + + -- 健康度和活跃度 + health_score INT DEFAULT 0 COMMENT '健康度评分(0-100)', + engagement_score INT DEFAULT 0 COMMENT '参与度评分(0-100)', + churn_risk_score INT DEFAULT 0 COMMENT '流失风险评分(0-100,越高风险越大)', + + -- 时间维度统计 + last_activity_at DATETIME COMMENT '最后活跃时间(任何互动)', + last_touchpoint_at DATETIME COMMENT '最后触点时间', + last_purchase_at DATETIME COMMENT '最后购买时间', + last_contact_at DATETIME COMMENT '最后主动联系时间', + first_purchase_date DATE COMMENT '首次购买日期', + latest_opportunity_date DATE COMMENT '最新商机日期', + + -- 天数统计 + days_since_last_activity INT DEFAULT 0 COMMENT '距上次活跃天数', + days_since_last_contact INT DEFAULT 0 COMMENT '距上次联系天数', + days_since_last_purchase INT DEFAULT 0 COMMENT '距上次购买天数', + customer_tenure_days INT DEFAULT 0 COMMENT '成为客户的天数', + + -- 元数据 + calculated_at DATETIME NOT NULL COMMENT '汇总计算时间', + updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + + -- 外键和索引 + FOREIGN KEY (account_id) REFERENCES Account(account_id) ON DELETE CASCADE, + INDEX idx_summary_health_score (health_score DESC), + INDEX idx_summary_last_activity (last_activity_at DESC), + INDEX idx_summary_total_revenue (total_revenue DESC), + INDEX idx_summary_win_rate (win_rate DESC), + INDEX idx_summary_churn_risk (churn_risk_score DESC), + INDEX idx_summary_calculated (calculated_at) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='企业账户汇总统计表'; +``` + +#### AccountChannelIdentity 多渠道身份映射表 + +**设计原则:** +- 记录同一个企业在不同渠道的身份标识 +- 支持身份识别和合并 +- 记录首次和最后发现时间 + +```sql +CREATE TABLE AccountChannelIdentity ( + -- 主键 + identity_id VARCHAR(64) PRIMARY KEY COMMENT '身份记录ID', + + -- 关联 + account_id VARCHAR(64) NOT NULL COMMENT '统一账户ID', + channel_id VARCHAR(64) NOT NULL COMMENT '渠道ID', + + -- 渠道内身份 + channel_account_id VARCHAR(200) NOT NULL COMMENT '在该渠道内的账户ID', + identity_type VARCHAR(50) COMMENT '身份类型:CORP_ID/WEBSITE_ID/PARTNER_ID', + + -- 验证状态 + is_verified BOOLEAN DEFAULT FALSE COMMENT '是否已验证', + verification_method VARCHAR(50) COMMENT '验证方式:MANUAL/AUTO/CREDIT_CODE', + + -- 时间记录 + first_seen_at DATETIME NOT NULL COMMENT '首次在该渠道发现时间', + last_seen_at DATETIME NOT NULL COMMENT '最后在该渠道活跃时间', + + -- 附加信息 + additional_info JSON COMMENT '渠道特有的附加信息', + + -- 索引 + UNIQUE KEY uk_account_channel (account_id, channel_id), + INDEX idx_channel_account (channel_id, channel_account_id), + INDEX idx_account (account_id), + INDEX idx_first_seen (first_seen_at), + + FOREIGN KEY (account_id) REFERENCES Account(account_id) ON DELETE CASCADE, + FOREIGN KEY (channel_id) REFERENCES Channel(channel_id) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='企业账户多渠道身份映射表'; +``` + +--- + +### 2. 数据关系示意图 + +```mermaid +graph TB + subgraph 基础数据层 + A[Account基础表
account_id: ACC_001
account_name: 阿里巴巴
unified_social_credit_code: 9133...123] + end + + subgraph 渠道身份层 + CI1[AccountChannelIdentity
channel: 官网
channel_account_id: WEB_A001
first_seen: 2023-01-15] + CI2[AccountChannelIdentity
channel: 企业微信
channel_account_id: wx123456
first_seen: 2023-02-20] + CI3[AccountChannelIdentity
channel: 线下展会
channel_account_id: EXH_001
first_seen: 2023-03-10] + end + + subgraph 关联明细数据层 + C[Contact表
25条联系人记录] + O[Opportunity表
8条商机记录
5条赢单,2条输单] + L[Lead表
45条线索记录] + T[Touchpoint表
156条触点记录] + end + + subgraph 汇总数据层 + AS[AccountSummary汇总表
summary_id: ACCS_001
account_id: ACC_001
total_contacts: 25
total_opportunities: 8
won_opportunities: 5
total_revenue: 5000000
win_rate: 62.5
health_score: 85
calculated_at: 2023-11-05 16:00] + end + + A --> CI1 + A --> CI2 + A --> CI3 + + A --> C + A --> O + A --> L + A --> T + + C --> AS + O --> AS + L --> AS + T --> AS + + A -.1对1关系.-> AS +``` + +--- + +### 3. 汇总计算策略 + +#### 计算触发时机 + +```mermaid +flowchart TD + Start[触发汇总计算] --> Type{触发类型} + + Type -->|定时任务| Schedule[定时任务触发
每小时执行一次] + Type -->|实时事件| Event[关键业务事件触发] + Type -->|手动刷新| Manual[管理员手动触发] + + Schedule --> SelectAccounts[选择需要更新的Account
条件:距上次计算超过1小时] + Event --> SelectAccount[选择特定Account
如:商机赢单触发] + Manual --> SelectManual[选择指定Account] + + SelectAccounts --> Calculate[执行汇总计算] + SelectAccount --> Calculate + SelectManual --> Calculate + + Calculate --> Query1[查询Contact数量] + Calculate --> Query2[查询Opportunity统计] + Calculate --> Query3[查询Lead数量] + Calculate --> Query4[查询Touchpoint统计] + Calculate --> Query5[计算健康度评分] + + Query1 --> Aggregate[聚合所有统计数据] + Query2 --> Aggregate + Query3 --> Aggregate + Query4 --> Aggregate + Query5 --> Aggregate + + Aggregate --> Upsert[写入AccountSummary表
INSERT或UPDATE] + Upsert --> UpdateTime[更新calculated_at时间戳] + UpdateTime --> Cache[更新Redis缓存] + Cache --> End[完成] +``` + +#### 计算SQL示例 + +```sql +-- 完整的AccountSummary计算和更新 +INSERT INTO AccountSummary ( + summary_id, + account_id, + total_contacts, + total_opportunities, + total_leads, + total_revenue, + won_opportunities, + lost_opportunities, + open_opportunities, + win_rate, + total_touchpoints, + health_score, + last_activity_at, + last_purchase_at, + days_since_last_contact, + calculated_at +) +SELECT + CONCAT('ACCS_', account_id) as summary_id, + a.account_id, + + -- 联系人统计 + COALESCE(c.contact_count, 0) as total_contacts, + + -- 商机统计 + COALESCE(o.opportunity_count, 0) as total_opportunities, + + -- 线索统计 + COALESCE(l.lead_count, 0) as total_leads, + + -- 收入统计 + COALESCE(o.total_revenue, 0) as total_revenue, + COALESCE(o.won_count, 0) as won_opportunities, + COALESCE(o.lost_count, 0) as lost_opportunities, + COALESCE(o.open_count, 0) as open_opportunities, + + -- 赢单率 + CASE + WHEN COALESCE(o.won_count, 0) + COALESCE(o.lost_count, 0) > 0 + THEN ROUND(COALESCE(o.won_count, 0) * 100.0 / (COALESCE(o.won_count, 0) + COALESCE(o.lost_count, 0)), 2) + ELSE 0 + END as win_rate, + + -- 触点统计 + COALESCE(t.touchpoint_count, 0) as total_touchpoints, + + -- 健康度评分(简化版) + CASE + WHEN DATEDIFF(NOW(), COALESCE(t.last_touchpoint, a.created_at)) <= 7 THEN 90 + WHEN DATEDIFF(NOW(), COALESCE(t.last_touchpoint, a.created_at)) <= 30 THEN 70 + WHEN DATEDIFF(NOW(), COALESCE(t.last_touchpoint, a.created_at)) <= 90 THEN 50 + ELSE 30 + END as health_score, + + -- 时间统计 + COALESCE(t.last_touchpoint, a.created_at) as last_activity_at, + o.last_purchase as last_purchase_at, + DATEDIFF(NOW(), COALESCE(t.last_touchpoint, a.created_at)) as days_since_last_contact, + + NOW() as calculated_at + +FROM Account a + +-- 联系人统计 +LEFT JOIN ( + SELECT + primary_account_id, + COUNT(*) as contact_count + FROM Contact + WHERE contact_status = 'ACTIVE' + GROUP BY primary_account_id +) c ON a.account_id = c.primary_account_id + +-- 商机统计 +LEFT JOIN ( + SELECT + account_id, + COUNT(*) as opportunity_count, + SUM(CASE WHEN is_won = TRUE THEN amount ELSE 0 END) as total_revenue, + SUM(CASE WHEN is_won = TRUE THEN 1 ELSE 0 END) as won_count, + SUM(CASE WHEN is_lost = TRUE THEN 1 ELSE 0 END) as lost_count, + SUM(CASE WHEN is_won = FALSE AND is_lost = FALSE THEN 1 ELSE 0 END) as open_count, + MAX(CASE WHEN is_won = TRUE THEN actual_close_date END) as last_purchase + FROM Opportunity + GROUP BY account_id +) o ON a.account_id = o.account_id + +-- 线索统计 +LEFT JOIN ( + SELECT + converted_account_id, + COUNT(*) as lead_count + FROM Lead + WHERE converted_account_id IS NOT NULL + GROUP BY converted_account_id +) l ON a.account_id = l.converted_account_id + +-- 触点统计 +LEFT JOIN ( + SELECT + account_id, + COUNT(*) as touchpoint_count, + MAX(touchpoint_time) as last_touchpoint + FROM Touchpoint + GROUP BY account_id +) t ON a.account_id = t.account_id + +WHERE a.account_id = 'ACC_001' -- 可以批量处理多个账户 + +ON DUPLICATE KEY UPDATE + total_contacts = VALUES(total_contacts), + total_opportunities = VALUES(total_opportunities), + total_leads = VALUES(total_leads), + total_revenue = VALUES(total_revenue), + won_opportunities = VALUES(won_opportunities), + lost_opportunities = VALUES(lost_opportunities), + open_opportunities = VALUES(open_opportunities), + win_rate = VALUES(win_rate), + total_touchpoints = VALUES(total_touchpoints), + health_score = VALUES(health_score), + last_activity_at = VALUES(last_activity_at), + last_purchase_at = VALUES(last_purchase_at), + days_since_last_contact = VALUES(days_since_last_contact), + calculated_at = VALUES(calculated_at), + updated_at = NOW(); +``` + +--- + +### 4. 查询使用场景 + +#### 场景1:查询Account基础信息(高频,快速) + +```sql +-- 只需要基础信息,不需要汇总数据 +-- 查询速度快,不受汇总数据影响 +SELECT + account_id, + account_name, + account_type, + account_status, + industry_id, + province, + city, + owner_user_id +FROM Account +WHERE account_id = 'ACC_001'; +``` + +#### 场景2:查询Account完整信息(包含汇总) + +```sql +-- 需要基础信息 + 汇总数据 +-- 使用LEFT JOIN,即使汇总数据不存在也能查到基础信息 +SELECT + a.account_id, + a.account_name, + a.account_type, + a.account_status, + a.industry_id, + a.province, + a.city, + a.owner_user_id, + -- 汇总数据 + COALESCE(s.total_contacts, 0) as total_contacts, + COALESCE(s.total_opportunities, 0) as total_opportunities, + COALESCE(s.total_revenue, 0) as total_revenue, + COALESCE(s.health_score, 0) as health_score, + COALESCE(s.win_rate, 0) as win_rate, + s.last_activity_at, + s.calculated_at +FROM Account a +LEFT JOIN AccountSummary s ON a.account_id = s.account_id +WHERE a.account_id = 'ACC_001'; +``` + +#### 场景3:客户列表查询(按汇总指标排序和筛选) + +```sql +-- 查询健康度评分低于60分的客户,按收入排序 +SELECT + a.account_id, + a.account_name, + a.account_status, + a.owner_user_id, + s.health_score, + s.total_revenue, + s.win_rate, + s.last_activity_at, + s.days_since_last_contact +FROM Account a +INNER JOIN AccountSummary s ON a.account_id = s.account_id +WHERE a.account_status = 'ACTIVE' + AND s.health_score < 60 +ORDER BY s.total_revenue DESC +LIMIT 50; +``` + +#### 场景4:客户360度视图(完整数据) + +```sql +-- 查询某个客户的完整360度视图 +-- 包括基础信息、汇总数据、多渠道身份 +SELECT + -- 基础信息 + a.account_id, + a.account_name, + a.account_type, + a.account_status, + a.account_level, + a.industry_id, + a.province, + a.city, + a.owner_user_id, + a.lifecycle_stage, + + -- 汇总统计 + s.total_contacts, + s.total_opportunities, + s.total_revenue, + s.lifetime_value, + s.won_opportunities, + s.win_rate, + s.health_score, + s.last_activity_at, + s.days_since_last_contact, + s.calculated_at, + + -- 多渠道身份 + GROUP_CONCAT( + CONCAT(ch.channel_name, ':', ci.channel_account_id) + ORDER BY ci.first_seen_at + SEPARATOR '; ' + ) as channel_identities + +FROM Account a +LEFT JOIN AccountSummary s ON a.account_id = s.account_id +LEFT JOIN AccountChannelIdentity ci ON a.account_id = ci.account_id +LEFT JOIN Channel ch ON ci.channel_id = ch.channel_id +WHERE a.account_id = 'ACC_001' +GROUP BY a.account_id; +``` + +--- + +### 5. 缓存策略 + +#### Redis缓存结构 + +```python +# 缓存键设计 +# 基础信息缓存(较长TTL,1小时) +account:basic:{account_id} -> JSON(Account基础字段) + +# 汇总数据缓存(较短TTL,5分钟) +account:summary:{account_id} -> JSON(AccountSummary所有字段) + +# 完整数据缓存(综合TTL,15分钟) +account:full:{account_id} -> JSON(Account + AccountSummary) + +# 示例数据 +account:basic:ACC_001 = { + "account_id": "ACC_001", + "account_name": "阿里巴巴网络技术有限公司", + "account_type": "CUSTOMER", + "account_status": "ACTIVE", + "industry_id": "IND_001", + "province": "浙江省", + "city": "杭州市", + "owner_user_id": "USER_001" +} + +account:summary:ACC_001 = { + "summary_id": "ACCS_001", + "account_id": "ACC_001", + "total_contacts": 25, + "total_opportunities": 8, + "total_revenue": 5000000.00, + "won_opportunities": 5, + "win_rate": 62.5, + "health_score": 85, + "last_activity_at": "2023-11-05 14:30:00", + "calculated_at": "2023-11-05 16:00:00" +} +``` + +--- + +## 实现示例 + +### 完整代码示例(Python + SQLAlchemy) + +```python +from sqlalchemy import Column, String, Integer, Decimal, DateTime, Boolean, JSON, ForeignKey +from sqlalchemy.orm import relationship +from datetime import datetime + +# Account基础表模型 +class Account(Base): + __tablename__ = 'account' + + account_id = Column(String(64), primary_key=True) + account_name = Column(String(200), nullable=False) + unified_social_credit_code = Column(String(18), unique=True) + account_type = Column(String(50), nullable=False) + account_status = Column(String(50), nullable=False) + account_level = Column(String(50)) + industry_id = Column(String(64)) + province = Column(String(50)) + city = Column(String(50)) + owner_user_id = Column(String(64)) + created_at = Column(DateTime, default=datetime.now) + updated_at = Column(DateTime, default=datetime.now, onupdate=datetime.now) + lifecycle_stage = Column(String(50)) + + # 关系 + summary = relationship("AccountSummary", uselist=False, back_populates="account") + channel_identities = relationship("AccountChannelIdentity", back_populates="account") + contacts = relationship("Contact", back_populates="account") + opportunities = relationship("Opportunity", back_populates="account") + +# AccountSummary汇总表模型 +class AccountSummary(Base): + __tablename__ = 'account_summary' + + summary_id = Column(String(64), primary_key=True) + account_id = Column(String(64), ForeignKey('account.account_id'), unique=True, nullable=False) + + # 统计字段 + total_contacts = Column(Integer, default=0) + total_opportunities = Column(Integer, default=0) + total_leads = Column(Integer, default=0) + total_revenue = Column(Decimal(18, 2), default=0) + lifetime_value = Column(Decimal(18, 2), default=0) + won_opportunities = Column(Integer, default=0) + lost_opportunities = Column(Integer, default=0) + win_rate = Column(Decimal(5, 2), default=0) + total_touchpoints = Column(Integer, default=0) + health_score = Column(Integer, default=0) + + # 时间字段 + last_activity_at = Column(DateTime) + last_purchase_at = Column(DateTime) + days_since_last_contact = Column(Integer, default=0) + + # 元数据 + calculated_at = Column(DateTime, nullable=False) + updated_at = Column(DateTime, default=datetime.now, onupdate=datetime.now) + + # 关系 + account = relationship("Account", back_populates="summary") + +# AccountChannelIdentity渠道身份表模型 +class AccountChannelIdentity(Base): + __tablename__ = 'account_channel_identity' + + identity_id = Column(String(64), primary_key=True) + account_id = Column(String(64), ForeignKey('account.account_id'), nullable=False) + channel_id = Column(String(64), nullable=False) + channel_account_id = Column(String(200), nullable=False) + identity_type = Column(String(50)) + is_verified = Column(Boolean, default=False) + first_seen_at = Column(DateTime, nullable=False) + last_seen_at = Column(DateTime, nullable=False) + additional_info = Column(JSON) + + # 关系 + account = relationship("Account", back_populates="channel_identities") + + +# 业务服务类 +class AccountService: + def __init__(self, db_session, redis_client): + self.db = db_session + self.redis = redis_client + + def get_account_full(self, account_id: str) -> dict: + """获取Account完整信息(包含汇总数据)""" + # 1. 尝试从缓存获取 + cache_key = f"account:full:{account_id}" + cached = self.redis.get(cache_key) + if cached: + return json.loads(cached) + + # 2. 从数据库查询 + account = self.db.query(Account).filter_by(account_id=account_id).first() + if not account: + return None + + # 3. 构建返回数据 + result = { + # 基础信息 + "account_id": account.account_id, + "account_name": account.account_name, + "account_type": account.account_type, + "account_status": account.account_status, + "industry_id": account.industry_id, + "province": account.province, + "city": account.city, + "owner_user_id": account.owner_user_id, + + # 汇总数据 + "summary": { + "total_contacts": account.summary.total_contacts if account.summary else 0, + "total_opportunities": account.summary.total_opportunities if account.summary else 0, + "total_revenue": float(account.summary.total_revenue) if account.summary else 0, + "health_score": account.summary.health_score if account.summary else 0, + "win_rate": float(account.summary.win_rate) if account.summary else 0, + "last_activity_at": account.summary.last_activity_at.isoformat() if account.summary and account.summary.last_activity_at else None, + "calculated_at": account.summary.calculated_at.isoformat() if account.summary and account.summary.calculated_at else None, + }, + + # 多渠道身份 + "channel_identities": [ + { + "channel_id": ci.channel_id, + "channel_account_id": ci.channel_account_id, + "is_verified": ci.is_verified, + "first_seen_at": ci.first_seen_at.isoformat() + } + for ci in account.channel_identities + ] + } + + # 4. 写入缓存(15分钟) + self.redis.setex(cache_key, 900, json.dumps(result)) + + return result + + def calculate_account_summary(self, account_id: str): + """计算并更新Account汇总数据""" + from sqlalchemy import func + + # 1. 统计联系人数量 + total_contacts = self.db.query(func.count(Contact.contact_id))\ + .filter(Contact.primary_account_id == account_id)\ + .filter(Contact.contact_status == 'ACTIVE')\ + .scalar() or 0 + + # 2. 统计商机数据 + opp_stats = self.db.query( + func.count(Opportunity.opportunity_id).label('total'), + func.sum(case((Opportunity.is_won == True, Opportunity.amount), else_=0)).label('revenue'), + func.sum(case((Opportunity.is_won == True, 1), else_=0)).label('won'), + func.sum(case((Opportunity.is_lost == True, 1), else_=0)).label('lost'), + ).filter(Opportunity.account_id == account_id).first() + + total_opportunities = opp_stats.total or 0 + total_revenue = opp_stats.revenue or 0 + won_opportunities = opp_stats.won or 0 + lost_opportunities = opp_stats.lost or 0 + + # 3. 计算赢单率 + win_rate = 0 + if won_opportunities + lost_opportunities > 0: + win_rate = round(won_opportunities * 100.0 / (won_opportunities + lost_opportunities), 2) + + # 4. 统计触点数量 + total_touchpoints = self.db.query(func.count(Touchpoint.touchpoint_id))\ + .filter(Touchpoint.account_id == account_id)\ + .scalar() or 0 + + # 5. 计算健康度评分(简化版) + last_touchpoint = self.db.query(func.max(Touchpoint.touchpoint_time))\ + .filter(Touchpoint.account_id == account_id)\ + .scalar() + + if last_touchpoint: + days_since_last = (datetime.now() - last_touchpoint).days + if days_since_last <= 7: + health_score = 90 + elif days_since_last <= 30: + health_score = 70 + elif days_since_last <= 90: + health_score = 50 + else: + health_score = 30 + else: + health_score = 0 + + # 6. 更新或插入AccountSummary + summary = self.db.query(AccountSummary).filter_by(account_id=account_id).first() + + if summary: + # 更新 + summary.total_contacts = total_contacts + summary.total_opportunities = total_opportunities + summary.total_revenue = total_revenue + summary.won_opportunities = won_opportunities + summary.lost_opportunities = lost_opportunities + summary.win_rate = win_rate + summary.total_touchpoints = total_touchpoints + summary.health_score = health_score + summary.last_activity_at = last_touchpoint + summary.calculated_at = datetime.now() + else: + # 插入 + summary = AccountSummary( + summary_id=f"ACCS_{account_id}", + account_id=account_id, + total_contacts=total_contacts, + total_opportunities=total_opportunities, + total_revenue=total_revenue, + won_opportunities=won_opportunities, + lost_opportunities=lost_opportunities, + win_rate=win_rate, + total_touchpoints=total_touchpoints, + health_score=health_score, + last_activity_at=last_touchpoint, + calculated_at=datetime.now() + ) + self.db.add(summary) + + self.db.commit() + + # 7. 清除缓存 + self.redis.delete(f"account:full:{account_id}") + self.redis.delete(f"account:summary:{account_id}") +``` + +--- + +## 总结建议 + +### 强烈推荐:基础字段和汇总字段分离 + +**核心理由:** + +1. **职责分离原则** + - Account表:企业基础属性(相对稳定,来自企业自身) + - AccountSummary表:业务统计数据(频繁变化,从其他表计算得出) + - 符合单一职责原则 + +2. **性能优化** + - 基础信息查询不受汇总计算影响 + - 可以针对不同表设置不同的缓存策略 + - 汇总计算可以异步执行,不阻塞业务流程 + +3. **扩展性强** + - 新增汇总维度只需修改AccountSummary表 + - 可以支持多版本汇总(历史汇总表) + - 便于进行A/B测试不同的汇总算法 + +4. **数据一致性** + - 通过`calculated_at`字段明确知道数据新鲜度 + - 可以通过定时任务保证最终一致性 + - 支持重新计算修正数据 + +5. **多渠道支持** + - AccountChannelIdentity独立表管理渠道身份 + - 清晰的身份映射关系 + - 便于渠道数据追溯和审计 + +**实施建议:** + +1. **初期阶段** + - 先实现Account和AccountSummary两张表 + - 使用定时任务(每小时)计算汇总数据 + - 在应用层做数据聚合展示 + +2. **优化阶段** + - 引入Redis缓存,缓存热点Account数据 + - 实现关键事件的实时触发更新 + - 优化汇总计算SQL性能 + +3. **进阶阶段** + - 实现汇总计算的增量更新 + - 引入消息队列异步处理 + - 实现历史汇总数据归档 diff --git a/docs/b2b-cdp-core-architecture.md b/docs/b2b-cdp-core-architecture.md new file mode 100644 index 000000000..98a17bf62 --- /dev/null +++ b/docs/b2b-cdp-core-architecture.md @@ -0,0 +1,1043 @@ +# B2B CDP 核心架构设计方案 + +## 目录 +- [系统架构总览](#系统架构总览) +- [核心实体设计](#核心实体设计) +- [关键业务流程](#关键业务流程) +- [技术实现方案](#技术实现方案) + +--- + +## 系统架构总览 + +### 整体架构图 + +```mermaid +graph TB + subgraph 数据源层 + CRM[CRM系统] + WEWORK[企业微信] + WECHAT[微信公众号] + INTERNAL[内部系统] + end + + subgraph 数据接入层 + GATEWAY[API网关] + ADAPTER1[CRM适配器] + ADAPTER2[企微适配器] + ADAPTER3[公众号适配器] + end + + subgraph 消息队列 + KAFKA[Kafka消息队列] + end + + subgraph 数据处理层 + CLEAN[数据清洗服务] + MAPPING[身份映射服务] + MERGE[数据合并服务] + TAG[标签引擎] + SEGMENT[分群引擎] + end + + subgraph 数据存储层 + PG[(PostgreSQL)] + REDIS[(Redis缓存)] + ES[(Elasticsearch)] + end + + subgraph 应用层 + API[业务API] + QUERY[查询服务] + end + + CRM --> ADAPTER1 + WEWORK --> ADAPTER2 + WECHAT --> ADAPTER3 + INTERNAL --> GATEWAY + + ADAPTER1 --> GATEWAY + ADAPTER2 --> GATEWAY + ADAPTER3 --> GATEWAY + + GATEWAY --> KAFKA + KAFKA --> CLEAN + CLEAN --> MAPPING + MAPPING --> MERGE + MERGE --> PG + + PG --> TAG + PG --> SEGMENT + TAG --> PG + SEGMENT --> PG + + PG --> REDIS + PG --> ES + + API --> QUERY + QUERY --> REDIS + QUERY --> PG + QUERY --> ES +``` + +### 核心流程说明 + +**六大核心流程:** +1. **数据采集**:多渠道数据统一接入 +2. **数据清洗**:格式标准化、数据验证 +3. **身份映射**:识别不同渠道的同一主体 +4. **全渠道整合**:合并为统一的实体 +5. **标签打标**:根据规则和行为打标签 +6. **客户圈人**:基于条件创建客户分群 + +--- + +## 核心实体设计 + +### 1. Account账户实体模型 + +```mermaid +erDiagram + Account { + varchar account_id PK + varchar account_name + varchar unified_credit_code UK + varchar account_status + varchar province + varchar city + datetime created_at + datetime updated_at + } + + AccountSummary { + varchar summary_id PK + varchar account_id FK + int total_contacts + int total_opportunities + decimal total_revenue + int health_score + datetime calculated_at + } + + AccountChannelIdentity { + varchar identity_id PK + varchar account_id FK + varchar channel_id FK + varchar channel_account_id + boolean is_verified + datetime first_seen_at + } + + Account ||--|| AccountSummary : 汇总 + Account ||--o{ AccountChannelIdentity : 多渠道身份 +``` + +**表关系说明:** +- Account(1) : AccountSummary(1)- 一对一汇总关系 +- Account(1) : AccountChannelIdentity(N)- 一个企业在多个渠道有多个身份 + +--- + +### 2. Contact联系人实体模型 + +```mermaid +erDiagram + Contact { + varchar contact_id PK + varchar contact_name + varchar mobile_phone UK + varchar email UK + varchar primary_account_id FK + varchar contact_status + datetime created_at + } + + ContactSummary { + varchar summary_id PK + varchar contact_id FK + int total_interactions + int engagement_score + datetime last_activity_at + datetime calculated_at + } + + ContactChannelIdentity { + varchar identity_id PK + varchar contact_id FK + varchar channel_id FK + varchar channel_user_id + boolean is_verified + datetime first_seen_at + } + + AccountContactRelation { + varchar relation_id PK + varchar account_id FK + varchar contact_id FK + varchar role_in_account + boolean is_primary + } + + Contact ||--|| ContactSummary : 汇总 + Contact ||--o{ ContactChannelIdentity : 多渠道身份 + Contact ||--o{ AccountContactRelation : 企业关系 +``` + +**表关系说明:** +- Contact(1) : ContactSummary(1)- 一对一汇总关系 +- Contact(1) : ContactChannelIdentity(N)- 一个联系人在多个渠道有多个身份 +- Contact(N) : Account(M)- 通过AccountContactRelation关联 + +--- + +### 3. Lead线索实体模型 + +```mermaid +erDiagram + Lead { + varchar lead_id PK + varchar lead_name + varchar company_name + varchar mobile_phone + varchar email + varchar channel_id FK + varchar lead_status + int lead_score + datetime created_at + datetime converted_at + varchar converted_contact_id FK + } + + LeadSummary { + varchar summary_id PK + varchar lead_id FK + int form_submissions + int days_in_pipeline + datetime last_activity_at + datetime calculated_at + } + + LeadChannelIdentity { + varchar identity_id PK + varchar lead_id FK + varchar channel_id FK + varchar channel_user_id + datetime captured_at + } + + Lead ||--|| LeadSummary : 汇总 + Lead ||--o{ LeadChannelIdentity : 多渠道身份 +``` + +**表关系说明:** +- Lead(1) : LeadSummary(1)- 一对一汇总关系 +- Lead(1) : LeadChannelIdentity(N)- 一个线索可能来自多个渠道 +- Lead在转化时通过converted_contact_id关联到Contact + +--- + +### 4. 标签系统实体模型 + +```mermaid +erDiagram + Tag { + varchar tag_id PK + varchar tag_name UK + varchar tag_category + varchar tag_type + varchar description + datetime created_at + } + + TagRelation { + varchar relation_id PK + varchar tag_id FK + varchar entity_type + varchar entity_id FK + datetime tagged_at + boolean is_auto + } + + TagRule { + varchar rule_id PK + varchar tag_id FK + varchar rule_type + text rule_definition + boolean is_active + datetime created_at + } + + Tag ||--o{ TagRelation : 应用于 + Tag ||--o{ TagRule : 规则 +``` + +**表关系说明:** +- Tag(1) : TagRelation(N)- 一个标签可以打给多个实体 +- Tag(1) : TagRule(N)- 一个标签可以有多个自动打标规则 +- TagRelation通过entity_type和entity_id关联到Account/Contact/Lead + +--- + +### 5. 分群系统实体模型 + +```mermaid +erDiagram + Segment { + varchar segment_id PK + varchar segment_name + varchar target_entity_type + text segment_rules + int member_count + boolean is_dynamic + datetime last_calculated_at + datetime created_at + } + + SegmentMember { + varchar member_id PK + varchar segment_id FK + varchar entity_type + varchar entity_id FK + datetime joined_at + boolean is_active + } + + Segment ||--o{ SegmentMember : 包含 +``` + +**表关系说明:** +- Segment(1) : SegmentMember(N)- 一个分群包含多个成员 +- SegmentMember通过entity_type和entity_id关联到Account/Contact/Lead + +--- + +### 6. 渠道实体模型 + +```mermaid +erDiagram + Channel { + varchar channel_id PK + varchar channel_name + varchar channel_type + varchar channel_status + datetime created_at + } + + Campaign { + varchar campaign_id PK + varchar campaign_name + varchar channel_id FK + date start_date + date end_date + datetime created_at + } + + Channel ||--o{ Campaign : 发起 +``` + +**表关系说明:** +- Channel(1) : Campaign(N)- 一个渠道可以发起多个营销活动 + +--- + +## 关键业务流程 + +### 1. 数据采集与清洗流程 + +```mermaid +sequenceDiagram + participant 数据源 as 数据源
CRM/企微/公众号 + participant 适配器 as 渠道适配器 + participant 网关 as API网关 + participant Kafka as Kafka队列 + participant 清洗 as 数据清洗服务 + participant 数据库 as PostgreSQL + + 数据源->>适配器: 1.推送原始数据 + Note over 适配器: 数据格式转换
统一字段映射 + + 适配器->>网关: 2.发送标准格式数据 + Note over 网关: 数据验证
签名校验
限流控制 + + 网关->>Kafka: 3.写入消息队列
快速响应 + 网关-->>数据源: 4.返回接收成功 + + Kafka->>清洗: 5.消费消息 + Note over 清洗: 数据清洗处理 + + 清洗->>清洗: 6.格式标准化 + Note over 清洗: - 手机号格式统一
- 企业名称清洗
- 地址标准化
- 必填字段验证 + + 清洗->>清洗: 7.数据去重 + Note over 清洗: - 同批次去重
- 24小时内去重 + + 清洗->>清洗: 8.数据验证 + Note over 清洗: - 手机号格式
- 邮箱格式
- 身份证格式
- 企业信用代码 + + alt 验证通过 + 清洗->>数据库: 9a.写入原始数据表
保留完整记录 + 清洗->>Kafka: 9b.发送到身份映射队列 + Note over 清洗: 继续下一步处理 + else 验证失败 + 清洗->>数据库: 9c.写入异常数据表
人工处理 + Note over 清洗: 记录失败原因 + end +``` + +**关键点:** +- 适配器层:各渠道数据统一格式 +- 网关层:快速响应,写入队列后立即返回 +- 清洗层:格式标准化、去重、验证 +- 异步处理:不阻塞数据采集主流程 + +--- + +### 2. 身份映射与全渠道整合流程 + +```mermaid +sequenceDiagram + participant Kafka as Kafka队列 + participant 映射 as 身份映射服务 + participant 数据库 as PostgreSQL + participant Redis as Redis缓存 + participant 合并 as 数据合并服务 + + Kafka->>映射: 1.消费清洗后数据 + + 映射->>映射: 2.提取身份标识符 + Note over 映射: - 手机号
- 邮箱
- 企业信用代码
- 企业名称 + + 映射->>Redis: 3.查询缓存
是否已有映射 + + alt 缓存命中 + Redis-->>映射: 4a.返回已有实体ID + 映射->>合并: 4b.直接进入合并流程 + else 缓存未命中 + Redis-->>映射: 4c.缓存MISS + + 映射->>数据库: 5.按优先级查询匹配 + Note over 数据库: Contact查询优先级:
1. 手机号精确匹配
2. 邮箱精确匹配
3. 企微UserID匹配 + + 数据库-->>映射: 6.返回查询结果 + + alt 找到唯一匹配 + 映射->>映射: 7a.确认为同一实体 + 映射->>Redis: 7b.写入映射缓存 + 映射->>合并: 7c.进入合并流程 + else 找到多个匹配 + 映射->>数据库: 7d.写入待审核队列 + 映射->>映射: 7e.触发人工审核流程 + Note over 映射: 疑似重复
需人工确认 + else 未找到匹配 + 映射->>数据库: 7f.创建新实体 + 映射->>数据库: 7g.创建ChannelIdentity + 映射->>Redis: 7h.写入映射缓存 + Note over 映射: 新客户首次进入 + end + end + + 合并->>数据库: 8.读取现有数据 + 合并->>合并: 9.数据合并策略 + Note over 合并: - 非空字段优先保留
- 最新数据优先
- 重要字段不覆盖 + + 合并->>数据库: 10.更新实体数据 + 合并->>数据库: 11.新增ChannelIdentity记录 + Note over 数据库: 记录本次渠道来源 + + 合并->>Redis: 12.更新实体缓存 + 合并->>Kafka: 13.发送更新事件
触发标签和汇总 +``` + +**关键点:** +- 缓存优先:减少数据库查询压力 +- 匹配优先级:手机号 > 邮箱 > 企微ID +- 疑似重复:人工审核避免误合并 +- 数据合并策略:保护重要字段不被覆盖 + +--- + +### 3. 标签打标流程 + +```mermaid +sequenceDiagram + participant Kafka as Kafka队列 + participant 标签引擎 as 标签引擎 + participant 规则库 as 规则库 + participant 数据库 as PostgreSQL + participant Redis as Redis缓存 + + Kafka->>标签引擎: 1.消费实体更新事件 + Note over Kafka: 触发条件:
- 实体创建
- 实体更新
- 定时批量 + + 标签引擎->>规则库: 2.加载标签规则 + 规则库-->>标签引擎: 3.返回活跃规则列表 + + loop 遍历每个标签规则 + 标签引擎->>标签引擎: 4.评估规则条件 + Note over 标签引擎: 规则类型:
- 属性规则
- 行为规则
- 统计规则
- 时间规则 + + alt 规则条件满足 + 标签引擎->>数据库: 5a.查询是否已有该标签 + + alt 标签不存在 + 标签引擎->>数据库: 6a.新增标签关系 + Note over 数据库: INSERT TagRelation + else 标签已存在 + 标签引擎->>标签引擎: 6b.跳过(幂等性) + end + else 规则条件不满足 + 标签引擎->>数据库: 5b.查询是否已有该标签 + + alt 标签存在 + 标签引擎->>数据库: 6c.删除标签关系 + Note over 数据库: DELETE TagRelation
标签自动移除 + end + end + end + + 标签引擎->>Redis: 7.更新标签缓存 + Note over Redis: 缓存实体的所有标签
用于快速查询 + + 标签引擎->>数据库: 8.记录打标日志 + Note over 数据库: 审计追溯 +``` + +**标签规则示例:** + +``` +属性规则: +- 行业 = "互联网" → 打标签"互联网行业" +- 年营收 > 1亿 → 打标签"大型企业" +- 省份 = "浙江" → 打标签"浙江客户" + +行为规则: +- 30天内访问次数 > 10 → 打标签"高活跃" +- 下载过白皮书 → 打标签"内容营销线索" +- 参加过线下活动 → 打标签"线下活动客户" + +统计规则: +- 商机总数 > 5 → 打标签"重点跟进客户" +- 成交金额 > 100万 → 打标签"高价值客户" +- 流失天数 > 180 → 打标签"流失预警" + +时间规则: +- 注册时间 < 7天 → 打标签"新客户" +- 最后活跃 < 30天 → 打标签"活跃客户" +``` + +--- + +### 4. 客户圈人(分群)流程 + +```mermaid +sequenceDiagram + participant 用户 as 营销人员 + participant 前端 as 分群界面 + participant 分群引擎 as 分群引擎 + participant 数据库 as PostgreSQL + participant Redis as Redis缓存 + participant Kafka as Kafka队列 + + 用户->>前端: 1.创建分群
设置筛选条件 + Note over 前端: 条件示例:
- 行业=互联网
- 省份=浙江
- 有标签"高活跃"
- 商机数>3 + + 前端->>分群引擎: 2.提交分群规则 + + 分群引擎->>分群引擎: 3.解析规则
生成SQL查询 + Note over 分群引擎: 规则解析器
转换为SQL WHERE条件 + + 分群引擎->>数据库: 4.执行分群查询 + Note over 数据库: 复杂查询示例:
SELECT account_id
FROM Account a
JOIN AccountSummary s
JOIN TagRelation t
WHERE 条件... + + 数据库-->>分群引擎: 5.返回符合条件的实体ID列表 + + 分群引擎->>分群引擎: 6.统计分群人数 + + alt 动态分群 + 分群引擎->>数据库: 7a.创建分群记录
is_dynamic=true + Note over 数据库: 不保存成员
每次实时计算 + else 静态分群 + 分群引擎->>数据库: 7b.创建分群记录
is_dynamic=false + + loop 批量插入成员 + 分群引擎->>数据库: 7c.插入SegmentMember
批量1000条/次 + end + + Note over 数据库: 保存成员快照
不随条件变化 + end + + 分群引擎->>Redis: 8.缓存分群结果 + Note over Redis: 缓存成员ID列表
提升查询性能 + + 分群引擎-->>前端: 9.返回分群结果 + 前端-->>用户: 10.展示分群统计 + Note over 用户: - 分群人数
- 分群规则
- 最后更新时间 + + opt 营销活动使用 + 用户->>前端: 11.选择分群
执行营销动作 + 前端->>Kafka: 12.发送营销任务 + Note over Kafka: - 群发邮件
- 推送消息
- 分配线索 + end +``` + +**动态分群 vs 静态分群:** + +| 对比项 | 动态分群 | 静态分群 | +|--------|---------|---------| +| 成员存储 | 不存储,实时计算 | 存储快照 | +| 成员变化 | 自动更新 | 固定不变 | +| 查询性能 | 较慢(每次计算) | 快速(直接查表) | +| 存储占用 | 小 | 大 | +| 适用场景 | 持续性营销 | 一次性活动 | +| 示例 | "活跃客户"群 | "双11参与客户"群 | + +--- + +### 5. 汇总计算流程 + +```mermaid +sequenceDiagram + participant Kafka as Kafka队列 + participant 汇总服务 as 汇总计算服务 + participant 分布式锁 as Redis分布式锁 + participant 数据库 as PostgreSQL + participant Redis as Redis缓存 + + Kafka->>汇总服务: 1.消费汇总触发事件 + Note over Kafka: 触发场景:
- 新增Contact
- 商机状态变化
- 定时全量刷新 + + 汇总服务->>分布式锁: 2.尝试获取锁
key: summary:ACC001 + + alt 获取锁失败 + 分布式锁-->>汇总服务: 3a.锁被占用 + 汇总服务->>汇总服务: 3b.跳过本次计算 + Note over 汇总服务: 防止重复计算 + else 获取锁成功 + 分布式锁-->>汇总服务: 3c.锁获取成功
超时30秒 + + par 并行查询统计数据 + 汇总服务->>数据库: 4a.统计Contact数 + 汇总服务->>数据库: 4b.统计Opportunity数据 + 汇总服务->>数据库: 4c.统计Lead数 + 汇总服务->>数据库: 4d.查询最后活跃时间 + end + + 数据库-->>汇总服务: 5.返回所有统计结果 + + 汇总服务->>汇总服务: 6.计算汇总指标 + Note over 汇总服务: 计算:
- total_contacts
- total_revenue
- win_rate
- health_score + + 汇总服务->>数据库: 7.更新Summary表
记录calculated_at + + 汇总服务->>Redis: 8.更新汇总缓存 + Note over Redis: 缓存TTL: 300秒 + + 汇总服务->>分布式锁: 9.释放锁 + end +``` + +**汇总计算优化策略:** + +``` +增量计算: +- 只计算变化部分 +- 减少全表扫描 +- 提升计算速度 + +并行查询: +- 统计数据并行执行 +- 减少总耗时 +- 提升吞吐量 + +分布式锁: +- 防止重复计算 +- 保证数据一致性 +- 避免资源浪费 + +定时 + 实时: +- 定时任务:每小时全量刷新 +- 实时触发:关键事件立即计算 +- 保证数据新鲜度 +``` + +--- + +## 技术实现方案 + +### 1. 数据库表结构设计 + +#### Account核心表 + +```sql +-- Account基础表 +CREATE TABLE account ( + account_id VARCHAR(64) PRIMARY KEY COMMENT '账户ID', + account_name VARCHAR(200) NOT NULL COMMENT '企业名称', + unified_social_credit_code VARCHAR(18) UNIQUE COMMENT '统一信用代码', + account_type VARCHAR(50) NOT NULL COMMENT '客户类型', + account_status VARCHAR(50) NOT NULL COMMENT '账户状态', + industry_id VARCHAR(64) COMMENT '行业ID', + province VARCHAR(50) COMMENT '省份', + city VARCHAR(50) COMMENT '城市', + account_source VARCHAR(100) COMMENT '来源', + owner_user_id VARCHAR(64) COMMENT '负责人', + created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + INDEX idx_name (account_name), + INDEX idx_status (account_status), + INDEX idx_city (province, city) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='企业账户表'; + +-- AccountSummary汇总表 +CREATE TABLE account_summary ( + summary_id VARCHAR(64) PRIMARY KEY COMMENT '汇总ID', + account_id VARCHAR(64) NOT NULL UNIQUE COMMENT '账户ID', + total_contacts INT DEFAULT 0 COMMENT '联系人总数', + total_opportunities INT DEFAULT 0 COMMENT '商机总数', + total_leads INT DEFAULT 0 COMMENT '线索总数', + total_revenue DECIMAL(18,2) DEFAULT 0 COMMENT '累计收入', + won_opportunities INT DEFAULT 0 COMMENT '赢单数', + lost_opportunities INT DEFAULT 0 COMMENT '输单数', + win_rate DECIMAL(5,2) DEFAULT 0 COMMENT '赢单率', + health_score INT DEFAULT 0 COMMENT '健康度评分', + last_activity_at DATETIME COMMENT '最后活跃时间', + calculated_at DATETIME NOT NULL COMMENT '计算时间', + updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + FOREIGN KEY (account_id) REFERENCES account(account_id), + INDEX idx_health_score (health_score DESC), + INDEX idx_revenue (total_revenue DESC) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='企业账户汇总表'; + +-- AccountChannelIdentity渠道身份映射表 +CREATE TABLE account_channel_identity ( + identity_id VARCHAR(64) PRIMARY KEY COMMENT '身份ID', + account_id VARCHAR(64) NOT NULL COMMENT '账户ID', + channel_id VARCHAR(64) NOT NULL COMMENT '渠道ID', + channel_account_id VARCHAR(200) NOT NULL COMMENT '渠道内账户ID', + identity_type VARCHAR(50) COMMENT '身份类型', + is_verified BOOLEAN DEFAULT FALSE COMMENT '是否已验证', + first_seen_at DATETIME NOT NULL COMMENT '首次发现时间', + last_seen_at DATETIME NOT NULL COMMENT '最后活跃时间', + UNIQUE KEY uk_account_channel (account_id, channel_id), + INDEX idx_channel_account (channel_id, channel_account_id), + FOREIGN KEY (account_id) REFERENCES account(account_id) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='企业渠道身份映射表'; +``` + +#### Contact核心表 + +```sql +-- Contact基础表 +CREATE TABLE contact ( + contact_id VARCHAR(64) PRIMARY KEY COMMENT '联系人ID', + contact_name VARCHAR(100) NOT NULL COMMENT '姓名', + mobile_phone VARCHAR(20) UNIQUE COMMENT '手机号', + email VARCHAR(200) UNIQUE COMMENT '邮箱', + wechat_id VARCHAR(100) COMMENT '微信ID', + job_title VARCHAR(100) COMMENT '职位', + contact_status VARCHAR(50) NOT NULL COMMENT '状态', + primary_account_id VARCHAR(64) COMMENT '主要企业ID', + contact_source VARCHAR(100) COMMENT '来源', + owner_user_id VARCHAR(64) COMMENT '负责人', + created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + INDEX idx_phone (mobile_phone), + INDEX idx_email (email), + INDEX idx_account (primary_account_id) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='联系人表'; + +-- ContactSummary汇总表 +CREATE TABLE contact_summary ( + summary_id VARCHAR(64) PRIMARY KEY COMMENT '汇总ID', + contact_id VARCHAR(64) NOT NULL UNIQUE COMMENT '联系人ID', + total_interactions INT DEFAULT 0 COMMENT '互动总数', + email_opens INT DEFAULT 0 COMMENT '邮件打开数', + email_clicks INT DEFAULT 0 COMMENT '邮件点击数', + engagement_score INT DEFAULT 0 COMMENT '参与度评分', + last_activity_at DATETIME COMMENT '最后活跃时间', + calculated_at DATETIME NOT NULL COMMENT '计算时间', + updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + FOREIGN KEY (contact_id) REFERENCES contact(contact_id), + INDEX idx_engagement (engagement_score DESC) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='联系人汇总表'; + +-- ContactChannelIdentity渠道身份映射表 +CREATE TABLE contact_channel_identity ( + identity_id VARCHAR(64) PRIMARY KEY COMMENT '身份ID', + contact_id VARCHAR(64) NOT NULL COMMENT '联系人ID', + channel_id VARCHAR(64) NOT NULL COMMENT '渠道ID', + channel_user_id VARCHAR(200) NOT NULL COMMENT '渠道用户ID', + identity_type VARCHAR(50) COMMENT '身份类型', + is_verified BOOLEAN DEFAULT FALSE COMMENT '是否已验证', + first_seen_at DATETIME NOT NULL COMMENT '首次发现时间', + last_seen_at DATETIME NOT NULL COMMENT '最后活跃时间', + UNIQUE KEY uk_contact_channel (contact_id, channel_id), + INDEX idx_channel_user (channel_id, channel_user_id), + FOREIGN KEY (contact_id) REFERENCES contact(contact_id) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='联系人渠道身份映射表'; +``` + +#### 标签系统表 + +```sql +-- Tag标签表 +CREATE TABLE tag ( + tag_id VARCHAR(64) PRIMARY KEY COMMENT '标签ID', + tag_name VARCHAR(100) NOT NULL UNIQUE COMMENT '标签名称', + tag_category VARCHAR(50) COMMENT '标签分类', + tag_type VARCHAR(50) COMMENT '标签类型', + description TEXT COMMENT '描述', + created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, + INDEX idx_category (tag_category) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='标签表'; + +-- TagRelation标签关系表 +CREATE TABLE tag_relation ( + relation_id VARCHAR(64) PRIMARY KEY COMMENT '关系ID', + tag_id VARCHAR(64) NOT NULL COMMENT '标签ID', + entity_type VARCHAR(50) NOT NULL COMMENT '实体类型', + entity_id VARCHAR(64) NOT NULL COMMENT '实体ID', + tagged_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '打标时间', + is_auto BOOLEAN DEFAULT FALSE COMMENT '是否自动打标', + UNIQUE KEY uk_tag_entity (tag_id, entity_type, entity_id), + INDEX idx_entity (entity_type, entity_id), + FOREIGN KEY (tag_id) REFERENCES tag(tag_id) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='标签关系表'; + +-- TagRule标签规则表 +CREATE TABLE tag_rule ( + rule_id VARCHAR(64) PRIMARY KEY COMMENT '规则ID', + tag_id VARCHAR(64) NOT NULL COMMENT '标签ID', + rule_type VARCHAR(50) NOT NULL COMMENT '规则类型', + rule_definition TEXT NOT NULL COMMENT '规则定义', + is_active BOOLEAN DEFAULT TRUE COMMENT '是否启用', + created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (tag_id) REFERENCES tag(tag_id) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='标签规则表'; +``` + +#### 分群系统表 + +```sql +-- Segment分群表 +CREATE TABLE segment ( + segment_id VARCHAR(64) PRIMARY KEY COMMENT '分群ID', + segment_name VARCHAR(200) NOT NULL COMMENT '分群名称', + target_entity_type VARCHAR(50) NOT NULL COMMENT '目标实体类型', + segment_rules TEXT NOT NULL COMMENT '分群规则', + member_count INT DEFAULT 0 COMMENT '成员数量', + is_dynamic BOOLEAN DEFAULT FALSE COMMENT '是否动态分群', + last_calculated_at DATETIME COMMENT '最后计算时间', + created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, + INDEX idx_entity_type (target_entity_type) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='分群表'; + +-- SegmentMember分群成员表(仅静态分群使用) +CREATE TABLE segment_member ( + member_id VARCHAR(64) PRIMARY KEY COMMENT '成员ID', + segment_id VARCHAR(64) NOT NULL COMMENT '分群ID', + entity_type VARCHAR(50) NOT NULL COMMENT '实体类型', + entity_id VARCHAR(64) NOT NULL COMMENT '实体ID', + joined_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '加入时间', + is_active BOOLEAN DEFAULT TRUE COMMENT '是否活跃', + UNIQUE KEY uk_segment_entity (segment_id, entity_type, entity_id), + INDEX idx_entity (entity_type, entity_id), + FOREIGN KEY (segment_id) REFERENCES segment(segment_id) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='分群成员表'; +``` + +--- + +### 2. Kafka Topic设计 + +```yaml +Topic设计: + +1. data-ingestion (数据采集) + Partition: 8 + Replication: 3 + Retention: 7天 + 用途: 接收多渠道原始数据 + +2. data-cleaning (数据清洗) + Partition: 8 + Replication: 3 + Retention: 3天 + 用途: 清洗后的标准数据 + +3. identity-mapping (身份映射) + Partition: 8 + Replication: 3 + Retention: 3天 + 用途: 身份识别和映射任务 + +4. entity-merge (实体合并) + Partition: 8 + Replication: 3 + Retention: 3天 + 用途: 实体数据合并任务 + +5. tag-calculation (标签计算) + Partition: 8 + Replication: 3 + Retention: 3天 + 用途: 标签打标任务 + +6. summary-calculation (汇总计算) + Partition: 8 + Replication: 3 + Retention: 3天 + 用途: 汇总统计任务 +``` + +--- + +### 3. 缓存策略设计 + +```yaml +Redis缓存设计: + +1. 实体基础数据缓存 + Key: entity:{type}:{id} + TTL: 300秒 + 示例: entity:account:ACC001 + 用途: 缓存Account/Contact基础信息 + +2. 汇总数据缓存 + Key: summary:{type}:{id} + TTL: 300秒 + 示例: summary:account:ACC001 + 用途: 缓存Summary汇总数据 + +3. 标签缓存 + Key: tags:{type}:{id} + TTL: 600秒 + 示例: tags:account:ACC001 + Value: ["高价值客户", "互联网行业", "浙江"] + 用途: 缓存实体的所有标签 + +4. 分群成员缓存 + Key: segment:members:{segment_id} + TTL: 1800秒 + Value: Set类型,存储entity_id + 用途: 缓存分群成员列表 + +5. 身份映射缓存 + Key: identity:{channel}:{channel_user_id} + TTL: 3600秒 + 示例: identity:wechat:openid_xxx + Value: contact_id + 用途: 快速查询渠道身份对应的实体ID + +6. 分布式锁 + Key: lock:summary:{entity_id} + TTL: 30秒 + 用途: 防止汇总计算重复执行 +``` + +--- + +### 4. 核心服务接口设计 + +```yaml +数据采集服务: + POST /api/ingestion/crm + - 接收CRM数据 + POST /api/ingestion/wework + - 接收企业微信数据 + POST /api/ingestion/wechat + - 接收公众号数据 + +查询服务: + GET /api/account/{id} + - 查询Account详情 + GET /api/account/{id}/360 + - 查询Account 360度视图 + GET /api/contact/{id} + - 查询Contact详情 + GET /api/contact/search + - 搜索Contact + +标签服务: + GET /api/tag/list + - 获取标签列表 + POST /api/tag/create + - 创建标签 + POST /api/tag/apply + - 手动打标签 + GET /api/tag/entity/{type}/{id} + - 查询实体的所有标签 + +分群服务: + POST /api/segment/create + - 创建分群 + GET /api/segment/{id}/members + - 获取分群成员 + POST /api/segment/calculate + - 重新计算分群 + GET /api/segment/list + - 获取分群列表 +``` + +--- + +## 总结 + +### 核心能力 + +**1. 多渠道数据采集** +- 统一接入层,支持CRM、企微、公众号等多渠道 +- 快速响应,异步处理 +- 数据清洗和标准化 + +**2. 全渠道身份整合** +- 基于手机号、邮箱、企业信用代码等多维度匹配 +- ChannelIdentity记录每个渠道身份 +- 疑似重复人工审核 + +**3. 智能标签系统** +- 支持自动打标和手动打标 +- 规则引擎驱动 +- 标签分类管理 + +**4. 灵活分群能力** +- 动态分群:实时计算 +- 静态分群:快照固化 +- 支持复杂条件组合 + +**5. 实时汇总统计** +- 异步计算,不阻塞主流程 +- 分布式锁防重 +- 缓存优化查询性能 + +### 技术特点 + +``` +简洁清晰: +- 专注核心业务流程 +- 去除复杂行为数据 +- 架构易于理解 + +高性能: +- 消息队列异步解耦 +- 多级缓存策略 +- 数据库索引优化 + +高可用: +- PostgreSQL主从复制 +- Redis Cluster +- Kafka集群部署 + +可扩展: +- 微服务架构 +- 水平扩展能力 +- 插件化设计 +``` + +### 适用场景 + +- 企业客户数:100万-1000万 +- 联系人数:500万-5000万 +- 线索数:200万-2000万/年 +- 数据采集QPS:1000-10000 +- 查询QPS:5000-50000 diff --git a/docs/b2b-cdp-entity-design.md b/docs/b2b-cdp-entity-design.md new file mode 100644 index 000000000..aae6b98dc --- /dev/null +++ b/docs/b2b-cdp-entity-design.md @@ -0,0 +1,1327 @@ +# B2B CDP 实体设计详细方案 + +## 目录 +- [整体架构](#整体架构) +- [核心实体设计](#核心实体设计) +- [实体关系图](#实体关系图) +- [业务流程](#业务流程) +- [数据模型详细设计](#数据模型详细设计) + +--- + +## 整体架构 + +### 实体分层架构图 + +```mermaid +graph TB + DS1[官方网站] --> CH[渠道层] + DS2[微信生态] --> CH + DS3[社交媒体] --> CH + DS4[线下活动] --> CH + DS5[电话邮件] --> CH + DS6[CRM系统] --> CH + DS7[第三方平台] --> CH + + CH --> TP[触点层Touchpoint] + CH --> EV[事件层Event] + CM[营销活动Campaign] --> TP + + TP --> CT[联系人Contact] + TP --> LD[线索Lead] + EV --> CT + EV --> LD + + CT --> AC[企业账户Account] + LD --> CT + + CT --> CTS[ContactSummary] + LD --> LDS[LeadSummary] + AC --> ACS[AccountSummary] + + LD --> OP[商机Opportunity] + AC --> OP + OP --> PR[产品Product] + + CT --> SG[客户分群Segment] + AC --> SG + CT --> TG[标签Tag] + AC --> TG + CT --> SC[评分模型Score] + AC --> SC + LD --> SC + CT --> JN[客户旅程Journey] + CM --> AT[归因Attribution] + OP --> AT +``` + +--- + +### 技术架构图 + +```mermaid +graph LR + API[API网关] --> PG[(PostgreSQL
主数据库)] + API --> RD[(Redis
缓存层)] + API --> ES[(Elasticsearch
搜索引擎)] + + WK[后台任务] --> PG + WK --> CH[(ClickHouse
行为数据库)] + WK --> RD + + PG --> DW[(数据仓库
BI分析)] + CH --> DW +``` + +--- + +## 核心实体设计 + +### 1. Account 企业账户实体 + +```mermaid +erDiagram + Account ||--o{ AccountChannelIdentity : has + Account ||--|| AccountSummary : aggregates + Account ||--o{ AccountRelation : parent + + Account { + varchar account_id PK + varchar account_name + varchar unified_social_credit_code UK + varchar account_type + varchar account_status + varchar account_level + decimal annual_revenue + int employee_count + varchar industry_id FK + varchar owner_user_id FK + datetime created_at + datetime updated_at + } + + AccountChannelIdentity { + varchar identity_id PK + varchar account_id FK + varchar channel_id FK + varchar channel_account_id + boolean is_verified + datetime first_seen_at + } + + AccountSummary { + varchar summary_id PK + varchar account_id FK + int total_contacts + int total_opportunities + decimal total_revenue + decimal lifetime_value + int won_opportunities + decimal win_rate + int health_score + datetime last_activity_at + datetime calculated_at + } +``` + +**核心字段说明:** +- `account_id` - 账户唯一标识 +- `unified_social_credit_code` - 统一社会信用代码(企业唯一标识) +- `account_type` - 客户类型(CUSTOMER客户、PARTNER合作伙伴、COMPETITOR竞争对手、PROSPECT潜在客户) +- `account_status` - 账户状态(ACTIVE活跃、DORMANT休眠、CHURNED流失、BLACKLIST黑名单) +- `account_level` - 客户等级(STRATEGIC战略级、IMPORTANT重要级、NORMAL普通级) + +**AccountSummary 汇总指标:** +- `total_contacts` - 关联联系人总数 +- `total_revenue` - 累计收入金额 +- `lifetime_value` - 客户生命周期价值 +- `win_rate` - 赢单率百分比 +- `health_score` - 健康度评分(0-100) + +--- + +### 2. Contact 联系人实体 + +```mermaid +erDiagram + Contact ||--o{ ContactChannelIdentity : has + Contact ||--o{ AccountContactRelation : belongs_to + Contact ||--|| ContactSummary : aggregates + + Contact { + varchar contact_id PK + varchar contact_name + varchar mobile_phone UK + varchar email UK + varchar wechat_id + varchar job_title + varchar department + varchar contact_status + varchar primary_account_id FK + varchar owner_user_id FK + boolean is_decision_maker + datetime created_at + } + + ContactChannelIdentity { + varchar identity_id PK + varchar contact_id FK + varchar channel_id FK + varchar channel_user_id + boolean is_verified + datetime first_seen_at + } + + ContactSummary { + varchar summary_id PK + varchar contact_id FK + int total_touchpoints + int total_events + int email_opens + int email_clicks + int engagement_score + datetime last_activity_at + datetime calculated_at + } + + AccountContactRelation { + varchar relation_id PK + varchar account_id FK + varchar contact_id FK + varchar role_in_account + varchar decision_level + boolean is_primary_contact + } +``` + +**核心字段说明:** +- `decision_level` - 决策层级(DECISION_MAKER决策者、INFLUENCER影响者、USER使用者、GATEKEEPER把关者) +- `lifecycle_stage` - 生命周期阶段(SUBSCRIBER订阅者、LEAD线索、MQL市场合格线索、SQL销售合格线索、OPPORTUNITY商机、CUSTOMER客户) + +**ContactSummary 汇总指标:** +- `engagement_score` - 参与度评分(0-100) +- `email_opens` - 邮件打开次数 +- `days_since_last_activity` - 距上次活跃天数 + +--- + +### 3. Lead 线索实体 + +```mermaid +erDiagram + Lead ||--o{ LeadChannelIdentity : has + Lead ||--|| LeadSummary : aggregates + + Lead { + varchar lead_id PK + varchar lead_name + varchar company_name + varchar mobile_phone + varchar email + varchar lead_source + varchar channel_id FK + varchar campaign_id FK + varchar lead_status + int lead_score + varchar lead_grade + varchar owner_user_id FK + datetime created_at + datetime converted_at + varchar converted_contact_id FK + varchar converted_account_id FK + } + + LeadChannelIdentity { + varchar identity_id PK + varchar lead_id FK + varchar channel_id FK + varchar channel_user_id + datetime captured_at + } + + LeadSummary { + varchar summary_id PK + varchar lead_id FK + int total_touchpoints + int total_events + int form_submissions + int content_downloads + int days_in_pipeline + datetime last_activity_at + datetime calculated_at + } +``` + +**核心字段说明:** +- `lead_status` - 线索状态(NEW新建、CONTACTED已联系、QUALIFIED已限定、CONVERTED已转化、DISQUALIFIED无效) +- `lead_score` - 线索评分(0-100) +- `lead_grade` - 线索等级(A、B、C、D) +- `is_qualified` - 是否为合格线索(MQL市场合格线索/SQL销售合格线索) + +--- + +### 4. Opportunity 商机实体 + +```mermaid +erDiagram + Opportunity ||--o{ OpportunityStageHistory : tracks + Opportunity ||--o{ OpportunityProduct : contains + + Opportunity { + varchar opportunity_id PK + varchar opportunity_name + varchar account_id FK + varchar primary_contact_id FK + varchar lead_id FK + varchar opportunity_type + decimal amount + varchar stage + int probability + date expected_close_date + date actual_close_date + varchar owner_user_id FK + datetime created_at + boolean is_won + boolean is_lost + } + + OpportunityStageHistory { + varchar history_id PK + varchar opportunity_id FK + varchar from_stage + varchar to_stage + datetime changed_at + int duration_days + } + + OpportunityProduct { + varchar opp_product_id PK + varchar opportunity_id FK + varchar product_id FK + int quantity + decimal unit_price + decimal total_price + } +``` + +**核心字段说明:** +- `stage` - 商机阶段(LEAD线索、QUALIFICATION资格确认、NEEDS_ANALYSIS需求分析、PROPOSAL方案提议、NEGOTIATION商务谈判、CONTRACT合同签订、CLOSED_WON赢单、CLOSED_LOST输单) +- `probability` - 赢单概率(0-100) +- `opportunity_type` - 商机类型(NEW_BUSINESS新客户、UPSELL追加销售、RENEWAL续约、CROSS_SELL交叉销售) + +--- + +### 5. Channel 渠道实体 + +```mermaid +erDiagram + Channel ||--o{ ChannelPerformance : tracks + + Channel { + varchar channel_id PK + varchar channel_name + varchar channel_type + varchar channel_category + varchar parent_channel_id FK + varchar channel_status + decimal cost + datetime created_at + } + + ChannelPerformance { + varchar performance_id PK + varchar channel_id FK + date stat_date + int lead_count + int contact_count + int opportunity_count + decimal revenue + decimal roi + decimal conversion_rate + } +``` + +**渠道类型:** +- WEBSITE官方网站、SEO搜索引擎优化、SEM搜索引擎营销 +- WECHAT微信、ENTERPRISE_WECHAT企业微信、DOUYIN抖音 +- EMAIL邮件营销、PHONE电话 +- OFFLINE_EVENT线下活动、EXHIBITION展会、PARTNER合作伙伴 + +--- + +### 6. Campaign 营销活动实体 + +```mermaid +erDiagram + Campaign ||--o{ CampaignPerformance : tracks + Campaign ||--o{ CampaignMember : includes + + Campaign { + varchar campaign_id PK + varchar campaign_name + varchar campaign_type + varchar campaign_status + date start_date + date end_date + decimal budget + decimal actual_cost + varchar owner_user_id FK + datetime created_at + } + + CampaignPerformance { + varchar performance_id PK + varchar campaign_id FK + date stat_date + int impressions + int clicks + int leads_generated + decimal revenue + decimal roi + decimal cpl + decimal cpa + } + + CampaignMember { + varchar member_id PK + varchar campaign_id FK + varchar member_type + varchar member_ref_id FK + varchar member_status + datetime joined_at + } +``` + +**活动类型:** +- WEBINAR网络研讨会 +- CONFERENCE线下会议 +- EXHIBITION展会 +- EMAIL_MARKETING邮件营销 +- CONTENT_MARKETING内容营销 +- PRODUCT_TRIAL产品试用 + +--- + +### 7. Touchpoint 触点实体 + +```mermaid +erDiagram + Touchpoint ||--o{ TouchpointAttachment : has + + Touchpoint { + varchar touchpoint_id PK + varchar touchpoint_type + varchar channel_id FK + varchar campaign_id FK + varchar contact_id FK + varchar lead_id FK + varchar account_id FK + datetime touchpoint_time + varchar touchpoint_direction + varchar content_type + int duration_seconds + datetime created_at + } + + TouchpointAttachment { + varchar attachment_id PK + varchar touchpoint_id FK + varchar file_name + varchar file_url + varchar file_type + int file_size + } +``` + +**触点类型:** +- PAGE_VIEW页面浏览、FORM_SUBMIT表单提交、DOWNLOAD下载 +- EMAIL_OPEN邮件打开、EMAIL_CLICK邮件点击 +- CALL电话、MEETING会议、CHAT聊天 + +--- + +### 8. Event 行为事件实体 + +```mermaid +erDiagram + Event { + varchar event_id PK + varchar event_name + varchar event_type + varchar channel_id FK + varchar contact_id FK + varchar lead_id FK + varchar account_id FK + datetime event_time + varchar session_id + varchar device_type + varchar browser + varchar page_url + varchar referrer_url + datetime created_at + } +``` + +**事件类型:** +- PAGE_VIEW页面浏览、BUTTON_CLICK按钮点击 +- FORM_START表单开始、FORM_SUBMIT表单提交 +- FILE_DOWNLOAD文件下载、VIDEO_PLAY视频播放 +- PRODUCT_TRIAL产品试用、SEARCH搜索 + +--- + +### 9. Product 产品实体 + +```mermaid +erDiagram + Product }o--|| ProductCategory : belongs_to + + Product { + varchar product_id PK + varchar product_name + varchar product_code UK + varchar product_category_id FK + varchar product_status + decimal list_price + varchar currency + datetime created_at + } + + ProductCategory { + varchar category_id PK + varchar category_name + varchar parent_category_id FK + int level + int sort_order + } +``` + +--- + +### 10. Tag 标签实体 + +```mermaid +erDiagram + Tag ||--o{ TagRelation : applies_to + + Tag { + varchar tag_id PK + varchar tag_name UK + varchar tag_category + varchar tag_type + varchar description + datetime created_at + } + + TagRelation { + varchar relation_id PK + varchar tag_id FK + varchar entity_type + varchar entity_id FK + datetime tagged_at + boolean is_auto_tagged + } +``` + +**标签类型:** +- BEHAVIOR行为标签 +- PROFILE画像标签 +- BUSINESS业务标签 +- INTEREST兴趣标签 + +--- + +### 11. Segment 客户分群实体 + +```mermaid +erDiagram + Segment ||--o{ SegmentMember : contains + + Segment { + varchar segment_id PK + varchar segment_name + varchar segment_type + varchar target_entity_type + int member_count + boolean is_dynamic + datetime last_calculated_at + datetime created_at + } + + SegmentMember { + varchar member_id PK + varchar segment_id FK + varchar entity_type + varchar entity_id FK + datetime joined_at + boolean is_active + } +``` + +--- + +### 12. Score 评分实体 + +```mermaid +erDiagram + ScoreModel ||--o{ ScoreRecord : generates + ScoreRecord ||--o{ ScoreHistory : tracks + + ScoreModel { + varchar model_id PK + varchar model_name + varchar model_type + varchar target_entity_type + int max_score + varchar status + datetime created_at + } + + ScoreRecord { + varchar record_id PK + varchar model_id FK + varchar entity_type + varchar entity_id FK + int score + varchar grade + datetime calculated_at + } + + ScoreHistory { + varchar history_id PK + varchar entity_type + varchar entity_id FK + varchar model_id FK + int score + datetime recorded_at + } +``` + +**评分类型:** +- LEAD_SCORING线索评分 +- ACCOUNT_HEALTH企业健康度评分 +- CONTACT_ENGAGEMENT联系人参与度评分 + +--- + +### 13. Industry 行业实体 + +```mermaid +erDiagram + Industry ||--o{ Industry : parent_of + + Industry { + varchar industry_id PK + varchar industry_name + varchar industry_code UK + varchar parent_industry_id FK + int level + int sort_order + } +``` + +--- + +### 14. Attribution 归因实体 + +```mermaid +erDiagram + Attribution ||--o{ TouchpointAttribution : analyzes + + Attribution { + varchar attribution_id PK + varchar entity_type + varchar entity_id FK + varchar attribution_model + datetime created_at + } + + TouchpointAttribution { + varchar ta_id PK + varchar attribution_id FK + varchar touchpoint_id FK + varchar campaign_id FK + varchar channel_id FK + decimal attribution_weight + int position_in_journey + } +``` + +**归因模型:** +- FIRST_TOUCH首次触点归因 +- LAST_TOUCH末次触点归因 +- LINEAR线性归因 +- TIME_DECAY时间衰减归因 +- U_SHAPED U型归因 +- W_SHAPED W型归因 + +--- + +### 15. CustomerJourney 客户旅程实体 + +```mermaid +erDiagram + CustomerJourney ||--o{ JourneyStage : follows + + CustomerJourney { + varchar journey_id PK + varchar journey_name + varchar entity_type + varchar entity_id FK + varchar journey_stage + datetime journey_start_at + datetime journey_end_at + int total_touchpoints + datetime created_at + } + + JourneyStage { + varchar stage_id PK + varchar stage_name + int stage_order + varchar stage_category + } +``` + +--- + +## 实体关系图 + +### 核心实体关系总览 + +```mermaid +erDiagram + Account ||--o{ AccountContactRelation : has + Account ||--|| AccountSummary : aggregates + Account ||--o{ Opportunity : owns + Account ||--o{ Touchpoint : receives + + Contact ||--o{ AccountContactRelation : belongs + Contact ||--|| ContactSummary : aggregates + Contact ||--o{ Opportunity : participates + Contact ||--o{ Touchpoint : receives + Contact ||--o{ Event : generates + + Lead ||--o| Contact : converts_to + Lead ||--o| Account : converts_to + Lead ||--o| Opportunity : converts_to + Lead ||--|| LeadSummary : aggregates + Lead ||--o{ Touchpoint : receives + + Opportunity ||--o{ OpportunityStageHistory : tracks + Opportunity ||--o{ OpportunityProduct : contains + + Product ||--o{ OpportunityProduct : included_in + Product }o--|| ProductCategory : categorized_by + + Channel ||--o{ Touchpoint : generates + Channel ||--o{ Event : tracks + Channel ||--o{ ChannelPerformance : measured_by + + Campaign ||--o{ Touchpoint : drives + Campaign ||--o{ Lead : generates + Campaign ||--o{ CampaignMember : includes + + Tag ||--o{ TagRelation : tags + Segment ||--o{ SegmentMember : groups + + ScoreModel ||--o{ ScoreRecord : scores + + Industry ||--o{ Account : classifies + Industry ||--o{ Lead : classifies + + Attribution ||--o{ TouchpointAttribution : attributes +``` + +--- + +### 全渠道身份统一架构 + +```mermaid +graph TB + S1[微信公众号] --> IR[身份识别引擎] + S2[企业微信] --> IR + S3[官方网站] --> IR + S4[抖音] --> IR + S5[邮件系统] --> IR + S6[电话系统] --> IR + S7[线下活动] --> IR + + IR --> IM[身份匹配] + IM --> UC[统一联系人Contact] + IM --> UA[统一企业账户Account] + + UC --> CI1[ContactChannelIdentity
微信OpenID] + UC --> CI2[ContactChannelIdentity
企业微信UserID] + UC --> CI3[ContactChannelIdentity
网站CookieID] + + UA --> AI1[AccountChannelIdentity
企业微信CorpID] + UA --> AI2[AccountChannelIdentity
网站企业ID] +``` + +--- + +## 业务流程 + +### 线索到商机转化流程 + +```mermaid +stateDiagram-v2 + [*] --> 新线索 + + 新线索 --> 已联系: 首次联系 + 新线索 --> 无效线索: 标记无效 + + 已联系 --> 已限定: 资格验证 + 已联系 --> 无效线索: 标记无效 + + 已限定 --> 已转化: 转化 + + state 已转化 { + [*] --> 创建联系人 + [*] --> 创建企业账户 + [*] --> 创建商机 + } + + 已转化 --> [*] + 无效线索 --> [*] +``` + +--- + +### 商机阶段流转流程 + +```mermaid +stateDiagram-v2 + [*] --> 线索阶段 + + 线索阶段 --> 需求确认: 需求分析 + 线索阶段 --> 输单: 丢失 + + 需求确认 --> 方案设计: 方案输出 + 需求确认 --> 输单: 丢失 + + 方案设计 --> 商务谈判: 开始谈判 + 方案设计 --> 输单: 丢失 + + 商务谈判 --> 合同签订: 签订合同 + 商务谈判 --> 输单: 丢失 + + 合同签订 --> 赢单: 成功签约 + 合同签订 --> 输单: 失败 + + 赢单 --> [*] + 输单 --> [*] +``` + +--- + +### 客户生命周期管理 + +```mermaid +stateDiagram-v2 + [*] --> 认知阶段 + + 认知阶段 --> 考虑阶段: 产生兴趣 + 考虑阶段 --> 决策阶段: 深度评估 + 决策阶段 --> 留存阶段: 成交购买 + 留存阶段 --> 扩展阶段: 追加购买 + + 留存阶段 --> 流失: 停止使用 + 扩展阶段 --> 流失: 停止使用 + + 流失 --> 召回: 召回营销 + 召回 --> 留存阶段: 成功召回 + 召回 --> [*]: 永久流失 + + 扩展阶段 --> [*]: 持续合作 +``` + +--- + +### 全渠道数据流转流程 + +```mermaid +sequenceDiagram + participant 用户 + participant 渠道 + participant 事件系统 + participant 身份识别 + participant 线索管理 + participant 联系人管理 + participant 企业账户管理 + participant 汇总计算 + + 用户->>渠道: 1.访问互动 + 渠道->>事件系统: 2.记录事件 + 事件系统->>身份识别: 3.身份识别 + + alt 新用户 + 身份识别->>线索管理: 4a.创建Lead + 线索管理->>汇总计算: 5a.更新LeadSummary + else 已识别用户 + 身份识别->>联系人管理: 4b.关联Contact + 联系人管理->>企业账户管理: 5b.关联Account + 企业账户管理->>汇总计算: 6b.更新Summary + end + + 汇总计算->>渠道: 7.触发规则引擎 + 渠道->>用户: 8.个性化响应 +``` + +--- + +### 身份识别匹配流程 + +```mermaid +flowchart TD + 开始[接收多渠道数据] --> 提取[提取身份标识符] + + 提取 --> 手机{有手机号?} + 提取 --> 邮箱{有邮箱?} + 提取 --> 微信{有微信ID?} + + 手机 -->|是| 手机匹配[按手机号匹配
优先级1] + 邮箱 -->|是| 邮箱匹配[按邮箱匹配
优先级2] + 微信 -->|是| 微信匹配[按微信ID匹配
优先级3] + + 手机匹配 --> 找到{找到匹配?} + 邮箱匹配 --> 找到 + 微信匹配 --> 找到 + + 找到 -->|是| 合并[合并到现有Contact] + 找到 -->|否| 创建[创建新Contact] + + 合并 --> 添加身份[添加更新
ContactChannelIdentity] + 创建 --> 添加身份 + + 添加身份 --> 更新汇总[更新ContactSummary] + 更新汇总 --> 完成[完成] +``` + +--- + +### 汇总数据更新策略 + +```mermaid +flowchart LR + 定时任务[定时任务
每小时执行] --> 计算引擎[计算引擎] + 实时触发[实时触发
关键事件] --> 计算引擎 + 手动刷新[手动刷新
按需执行] --> 计算引擎 + + 计算引擎 --> 企业汇总[AccountSummary] + 计算引擎 --> 联系人汇总[ContactSummary] + 计算引擎 --> 线索汇总[LeadSummary] +``` + +--- + +## 数据模型详细设计 + +### Account 详细字段设计 + +| 字段名 | 类型 | 长度 | 约束 | 说明 | 示例值 | +|--------|------|------|------|------|--------| +| account_id | VARCHAR | 64 | PK, NOT NULL | 账户唯一标识 | ACC_20231105001 | +| account_name | VARCHAR | 200 | NOT NULL | 企业名称 | 阿里巴巴网络技术有限公司 | +| unified_social_credit_code | VARCHAR | 18 | UNIQUE | 统一社会信用代码 | 91330000MA27XYZ123 | +| account_type | VARCHAR | 50 | NOT NULL | 客户类型 | CUSTOMER、PARTNER、PROSPECT | +| industry_id | VARCHAR | 64 | FK | 行业分类外键 | IND_001 | +| account_status | VARCHAR | 50 | NOT NULL | 账户状态 | ACTIVE、DORMANT、CHURNED | +| account_level | VARCHAR | 50 | | 客户等级 | STRATEGIC、IMPORTANT、NORMAL | +| annual_revenue | DECIMAL | 18,2 | | 年营收(万元) | 50000.00 | +| employee_count | INT | | | 员工人数 | 5000 | +| company_website | VARCHAR | 500 | | 公司网站 | https://www.alibaba.com | +| province | VARCHAR | 50 | | 省份 | 浙江省 | +| city | VARCHAR | 50 | | 城市 | 杭州市 | +| account_source | VARCHAR | 100 | | 来源渠道 | WEBSITE、EXHIBITION、PARTNER | +| primary_channel_id | VARCHAR | 64 | FK | 主渠道ID | CH_001 | +| owner_user_id | VARCHAR | 64 | FK | 负责人ID | USER_001 | +| created_at | DATETIME | | NOT NULL | 创建时间 | 2023-11-05 10:30:00 | +| updated_at | DATETIME | | NOT NULL | 更新时间 | 2023-11-05 15:20:00 | +| lifecycle_stage | VARCHAR | 50 | | 生命周期阶段 | AWARENESS、RETENTION、EXPANSION | + +--- + +### AccountSummary 详细字段设计 + +| 字段名 | 类型 | 长度 | 约束 | 说明 | 示例值 | +|--------|------|------|------|------|--------| +| summary_id | VARCHAR | 64 | PK, NOT NULL | 汇总记录唯一标识 | ACCS_20231105001 | +| account_id | VARCHAR | 64 | FK, UNIQUE | 账户ID | ACC_20231105001 | +| total_contacts | INT | | DEFAULT 0 | 关联联系人总数 | 25 | +| total_opportunities | INT | | DEFAULT 0 | 商机总数 | 8 | +| total_leads | INT | | DEFAULT 0 | 线索总数 | 45 | +| total_revenue | DECIMAL | 18,2 | DEFAULT 0 | 累计收入(元) | 5000000.00 | +| lifetime_value | DECIMAL | 18,2 | DEFAULT 0 | 生命周期价值(元) | 8000000.00 | +| won_opportunities | INT | | DEFAULT 0 | 赢单数量 | 5 | +| lost_opportunities | INT | | DEFAULT 0 | 输单数量 | 2 | +| win_rate | DECIMAL | 5,2 | DEFAULT 0 | 赢单率(%) | 71.43 | +| total_touchpoints | INT | | DEFAULT 0 | 总触点数 | 156 | +| health_score | INT | | DEFAULT 0 | 健康度评分(0-100) | 85 | +| last_activity_at | DATETIME | | | 最后活跃时间 | 2023-11-05 14:30:00 | +| calculated_at | DATETIME | | NOT NULL | 计算时间 | 2023-11-05 16:00:00 | +| updated_at | DATETIME | | NOT NULL | 更新时间 | 2023-11-05 16:00:00 | + +--- + +### Contact 详细字段设计 + +| 字段名 | 类型 | 长度 | 约束 | 说明 | 示例值 | +|--------|------|------|------|------|--------| +| contact_id | VARCHAR | 64 | PK, NOT NULL | 联系人唯一标识 | CNT_20231105001 | +| contact_name | VARCHAR | 100 | NOT NULL | 联系人姓名 | 张伟 | +| mobile_phone | VARCHAR | 20 | UNIQUE | 手机号 | 13800138000 | +| email | VARCHAR | 200 | UNIQUE | 邮箱地址 | zhangwei@company.com | +| wechat_id | VARCHAR | 100 | | 微信ID | wx_zhangwei | +| job_title | VARCHAR | 100 | | 职位 | 首席技术官 | +| department | VARCHAR | 100 | | 部门 | 技术部 | +| contact_status | VARCHAR | 50 | NOT NULL | 联系人状态 | ACTIVE、INACTIVE、UNSUBSCRIBED | +| primary_account_id | VARCHAR | 64 | FK | 主要关联企业ID | ACC_20231105001 | +| contact_source | VARCHAR | 100 | | 来源 | WEBSITE、FORM、IMPORT | +| primary_channel_id | VARCHAR | 64 | FK | 主渠道ID | CH_001 | +| owner_user_id | VARCHAR | 64 | FK | 负责人ID | USER_001 | +| created_at | DATETIME | | NOT NULL | 创建时间 | 2023-11-05 10:30:00 | +| updated_at | DATETIME | | NOT NULL | 更新时间 | 2023-11-05 15:20:00 | +| lifecycle_stage | VARCHAR | 50 | | 生命周期阶段 | LEAD、MQL、SQL、CUSTOMER | +| is_decision_maker | BOOLEAN | | DEFAULT FALSE | 是否决策者 | true | +| is_verified | BOOLEAN | | DEFAULT FALSE | 是否已验证 | true | + +--- + +### ContactSummary 详细字段设计 + +| 字段名 | 类型 | 长度 | 约束 | 说明 | 示例值 | +|--------|------|------|------|------|--------| +| summary_id | VARCHAR | 64 | PK, NOT NULL | 汇总记录唯一标识 | CNTS_20231105001 | +| contact_id | VARCHAR | 64 | FK, UNIQUE | 联系人ID | CNT_20231105001 | +| total_touchpoints | INT | | DEFAULT 0 | 总触点数 | 87 | +| total_events | INT | | DEFAULT 0 | 总事件数 | 234 | +| email_opens | INT | | DEFAULT 0 | 邮件打开次数 | 45 | +| email_clicks | INT | | DEFAULT 0 | 邮件点击次数 | 23 | +| form_submissions | INT | | DEFAULT 0 | 表单提交次数 | 12 | +| content_downloads | INT | | DEFAULT 0 | 内容下载次数 | 8 | +| engagement_score | INT | | DEFAULT 0 | 参与度评分(0-100) | 78 | +| last_activity_at | DATETIME | | | 最后活跃时间 | 2023-11-05 14:30:00 | +| days_since_last_activity | INT | | DEFAULT 0 | 距上次活跃天数 | 1 | +| calculated_at | DATETIME | | NOT NULL | 计算时间 | 2023-11-05 16:00:00 | +| updated_at | DATETIME | | NOT NULL | 更新时间 | 2023-11-05 16:00:00 | + +--- + +### Lead 详细字段设计 + +| 字段名 | 类型 | 长度 | 约束 | 说明 | 示例值 | +|--------|------|------|------|------|--------| +| lead_id | VARCHAR | 64 | PK, NOT NULL | 线索唯一标识 | LEAD_20231105001 | +| lead_name | VARCHAR | 100 | NOT NULL | 线索姓名 | 李明 | +| company_name | VARCHAR | 200 | | 公司名称 | 腾讯科技有限公司 | +| mobile_phone | VARCHAR | 20 | | 手机号 | 13900139000 | +| email | VARCHAR | 200 | | 邮箱地址 | liming@company.com | +| wechat_id | VARCHAR | 100 | | 微信ID | wx_liming | +| job_title | VARCHAR | 100 | | 职位 | 产品经理 | +| lead_source | VARCHAR | 100 | NOT NULL | 线索来源 | WEBSITE、FORM、CAMPAIGN | +| channel_id | VARCHAR | 64 | FK | 渠道ID | CH_001 | +| campaign_id | VARCHAR | 64 | FK | 营销活动ID | CMP_001 | +| lead_status | VARCHAR | 50 | NOT NULL | 线索状态 | NEW、CONTACTED、QUALIFIED | +| lead_score | INT | | DEFAULT 0 | 线索评分(0-100) | 80 | +| lead_grade | VARCHAR | 10 | | 线索等级 | A、B、C、D | +| industry_id | VARCHAR | 64 | FK | 行业ID | IND_001 | +| province | VARCHAR | 50 | | 省份 | 广东省 | +| city | VARCHAR | 50 | | 城市 | 深圳市 | +| owner_user_id | VARCHAR | 64 | FK | 负责人ID | USER_001 | +| created_at | DATETIME | | NOT NULL | 创建时间 | 2023-11-05 10:30:00 | +| updated_at | DATETIME | | NOT NULL | 更新时间 | 2023-11-05 15:20:00 | +| last_contacted_at | DATETIME | | | 最后联系时间 | 2023-11-05 14:00:00 | +| converted_at | DATETIME | | | 转化时间 | 2023-11-10 09:00:00 | +| converted_contact_id | VARCHAR | 64 | FK | 转化后联系人ID | CNT_20231110001 | +| converted_account_id | VARCHAR | 64 | FK | 转化后企业ID | ACC_20231110001 | +| converted_opportunity_id | VARCHAR | 64 | FK | 转化后商机ID | OPP_20231110001 | +| is_qualified | BOOLEAN | | DEFAULT FALSE | 是否为合格线索 | true | + +--- + +### LeadSummary 详细字段设计 + +| 字段名 | 类型 | 长度 | 约束 | 说明 | 示例值 | +|--------|------|------|------|------|--------| +| summary_id | VARCHAR | 64 | PK, NOT NULL | 汇总记录唯一标识 | LEADS_20231105001 | +| lead_id | VARCHAR | 64 | FK, UNIQUE | 线索ID | LEAD_20231105001 | +| total_touchpoints | INT | | DEFAULT 0 | 总触点数 | 12 | +| total_events | INT | | DEFAULT 0 | 总事件数 | 45 | +| form_submissions | INT | | DEFAULT 0 | 表单提交次数 | 3 | +| content_downloads | INT | | DEFAULT 0 | 内容下载次数 | 2 | +| page_views | INT | | DEFAULT 0 | 页面浏览次数 | 28 | +| days_in_pipeline | INT | | DEFAULT 0 | 在管道中天数 | 7 | +| contact_attempts | INT | | DEFAULT 0 | 联系尝试次数 | 4 | +| last_activity_at | DATETIME | | | 最后活跃时间 | 2023-11-05 14:30:00 | +| last_contact_attempt_at | DATETIME | | | 最后联系尝试时间 | 2023-11-05 11:00:00 | +| calculated_at | DATETIME | | NOT NULL | 计算时间 | 2023-11-05 16:00:00 | +| updated_at | DATETIME | | NOT NULL | 更新时间 | 2023-11-05 16:00:00 | + +--- + +### Opportunity 详细字段设计 + +| 字段名 | 类型 | 长度 | 约束 | 说明 | 示例值 | +|--------|------|------|------|------|--------| +| opportunity_id | VARCHAR | 64 | PK, NOT NULL | 商机唯一标识 | OPP_20231105001 | +| opportunity_name | VARCHAR | 200 | NOT NULL | 商机名称 | 腾讯科技AI平台采购项目 | +| account_id | VARCHAR | 64 | FK, NOT NULL | 关联企业ID | ACC_20231105001 | +| primary_contact_id | VARCHAR | 64 | FK | 主要联系人ID | CNT_20231105001 | +| lead_id | VARCHAR | 64 | FK | 来源线索ID | LEAD_20231105001 | +| opportunity_type | VARCHAR | 50 | | 商机类型 | NEW_BUSINESS、UPSELL、RENEWAL | +| opportunity_source | VARCHAR | 100 | | 商机来源 | LEAD_CONVERSION、DIRECT_SALES | +| amount | DECIMAL | 18,2 | | 预计金额(元) | 1000000.00 | +| currency | VARCHAR | 10 | | 货币单位 | CNY、USD、EUR | +| stage | VARCHAR | 50 | NOT NULL | 当前阶段 | QUALIFICATION、PROPOSAL、CONTRACT | +| probability | INT | | DEFAULT 0 | 赢单概率(0-100) | 60 | +| expected_close_date | DATE | | | 预计成交日期 | 2023-12-31 | +| actual_close_date | DATE | | | 实际成交日期 | 2023-12-25 | +| close_reason | VARCHAR | 200 | | 关闭原因 | 价格、竞争对手、预算、成功 | +| owner_user_id | VARCHAR | 64 | FK | 负责人ID | USER_001 | +| campaign_id | VARCHAR | 64 | FK | 来源活动ID | CMP_001 | +| created_at | DATETIME | | NOT NULL | 创建时间 | 2023-11-05 10:30:00 | +| updated_at | DATETIME | | NOT NULL | 更新时间 | 2023-11-05 15:20:00 | +| days_in_stage | INT | | DEFAULT 0 | 当前阶段停留天数 | 15 | +| is_won | BOOLEAN | | DEFAULT FALSE | 是否赢单 | false | +| is_lost | BOOLEAN | | DEFAULT FALSE | 是否输单 | false | + +--- + +### Channel 详细字段设计 + +| 字段名 | 类型 | 长度 | 约束 | 说明 | 示例值 | +|--------|------|------|------|------|--------| +| channel_id | VARCHAR | 64 | PK, NOT NULL | 渠道唯一标识 | CH_001 | +| channel_name | VARCHAR | 100 | NOT NULL | 渠道名称 | 官网产品页 | +| channel_type | VARCHAR | 50 | NOT NULL | 渠道类型 | WEBSITE、WECHAT、EMAIL、PHONE | +| channel_category | VARCHAR | 50 | | 渠道分类 | ONLINE、OFFLINE、SOCIAL | +| parent_channel_id | VARCHAR | 64 | FK | 父渠道ID | CH_PARENT_001 | +| channel_status | VARCHAR | 50 | NOT NULL | 渠道状态 | ACTIVE、INACTIVE、TESTING | +| cost | DECIMAL | 18,2 | | 渠道成本(元/月) | 50000.00 | +| created_at | DATETIME | | NOT NULL | 创建时间 | 2023-11-05 10:30:00 | +| updated_at | DATETIME | | NOT NULL | 更新时间 | 2023-11-05 15:20:00 | + +--- + +### Touchpoint 详细字段设计 + +| 字段名 | 类型 | 长度 | 约束 | 说明 | 示例值 | +|--------|------|------|------|------|--------| +| touchpoint_id | VARCHAR | 64 | PK, NOT NULL | 触点唯一标识 | TP_20231105001 | +| touchpoint_type | VARCHAR | 50 | NOT NULL | 触点类型 | PAGE_VIEW、FORM_SUBMIT、EMAIL | +| channel_id | VARCHAR | 64 | FK | 渠道ID | CH_001 | +| campaign_id | VARCHAR | 64 | FK | 营销活动ID | CMP_001 | +| contact_id | VARCHAR | 64 | FK | 联系人ID | CNT_20231105001 | +| lead_id | VARCHAR | 64 | FK | 线索ID | LEAD_20231105001 | +| account_id | VARCHAR | 64 | FK | 企业ID | ACC_20231105001 | +| touchpoint_time | DATETIME | | NOT NULL | 触点发生时间 | 2023-11-05 14:30:00 | +| touchpoint_direction | VARCHAR | 20 | | 触点方向 | INBOUND、OUTBOUND | +| touchpoint_status | VARCHAR | 50 | | 触点状态 | COMPLETED、SCHEDULED、CANCELLED | +| content_type | VARCHAR | 50 | | 内容类型 | WHITEPAPER、WEBINAR、DEMO | +| content_id | VARCHAR | 64 | FK | 内容ID | CONTENT_001 | +| subject | VARCHAR | 200 | | 主题 | 产品演示会议 | +| duration_seconds | INT | | | 持续时长(秒) | 3600 | +| owner_user_id | VARCHAR | 64 | FK | 负责人ID | USER_001 | +| created_at | DATETIME | | NOT NULL | 创建时间 | 2023-11-05 14:30:00 | + +--- + +### Event 详细字段设计 + +| 字段名 | 类型 | 长度 | 约束 | 说明 | 示例值 | +|--------|------|------|------|------|--------| +| event_id | VARCHAR | 64 | PK, NOT NULL | 事件唯一标识 | EVT_20231105001 | +| event_name | VARCHAR | 100 | NOT NULL | 事件名称 | page_view | +| event_type | VARCHAR | 50 | NOT NULL | 事件类型 | PAGE_VIEW、CLICK、FORM_SUBMIT | +| channel_id | VARCHAR | 64 | FK | 渠道ID | CH_001 | +| contact_id | VARCHAR | 64 | FK | 联系人ID | CNT_20231105001 | +| lead_id | VARCHAR | 64 | FK | 线索ID | LEAD_20231105001 | +| account_id | VARCHAR | 64 | FK | 企业ID | ACC_20231105001 | +| event_time | DATETIME | | NOT NULL | 事件发生时间 | 2023-11-05 14:35:20 | +| session_id | VARCHAR | 64 | | 会话ID | SESSION_20231105001 | +| device_type | VARCHAR | 50 | | 设备类型 | DESKTOP、MOBILE、TABLET | +| browser | VARCHAR | 50 | | 浏览器 | Chrome、Safari、Firefox | +| os | VARCHAR | 50 | | 操作系统 | Windows 10、iOS 16 | +| ip_address | VARCHAR | 50 | | IP地址 | 192.168.1.1 | +| page_url | VARCHAR | 1000 | | 页面URL | https://example.com/product | +| referrer_url | VARCHAR | 1000 | | 来源URL | https://baidu.com/search | +| created_at | DATETIME | | NOT NULL | 创建时间 | 2023-11-05 14:35:20 | + +--- + +## 实体统计汇总 + +### 实体分类统计 + +| 实体分类 | 实体名称 | 数量 | 说明 | +|---------|---------|------|------| +| 客户主体实体 | Account、Contact、Lead | 3 | 核心客户数据 | +| 汇总数据实体 | AccountSummary、ContactSummary、LeadSummary | 3 | 统计汇总数据 | +| 业务实体 | Opportunity、Product、ProductCategory | 3 | 业务交易数据 | +| 营销实体 | Campaign、Channel | 2 | 营销活动管理 | +| 交互实体 | Touchpoint、Event | 2 | 客户互动数据 | +| 关系实体 | AccountContactRelation、AccountRelation、OpportunityProduct、CampaignMember、TagRelation、SegmentMember | 6 | 实体关系映射 | +| 身份实体 | AccountChannelIdentity、ContactChannelIdentity、LeadChannelIdentity | 3 | 全渠道身份映射 | +| 分析实体 | Segment、Tag、ScoreModel、ScoreRecord、ScoreHistory、Attribution、TouchpointAttribution、CustomerJourney、JourneyStage | 9 | 数据分析洞察 | +| 性能实体 | ChannelPerformance、CampaignPerformance | 2 | 绩效统计 | +| 历史实体 | OpportunityStageHistory | 1 | 变更历史追踪 | +| 支撑实体 | Industry、TouchpointAttachment | 2 | 基础支撑数据 | +| 合计 | | 36 | 完整覆盖B2B CDP业务场景 | + +--- + +## 汇总表设计说明 + +### 设计原则 + +**性能优化** +- 避免频繁的多表JOIN和聚合计算 +- 提升客户360度视图的查询速度 +- 降低数据库负载 + +**业务需求** +- 快速展示客户健康度评分 +- 实时显示客户价值指标 +- 支持客户预警和监控 + +**数据一致性** +- 统一的计算口径 +- 定时批量更新保证数据准确性 +- 避免实时计算的结果不一致 + +### 更新机制 + +**更新触发条件:** +- 定时任务:每小时执行一次全量更新 +- 实时触发:关键业务事件发生时立即更新(如商机赢单、联系人新增) +- 手动刷新:管理员按需触发更新 + +**更新策略:** +- 增量更新:仅更新有变化的记录 +- 全量更新:定期执行完整重算 +- 异步更新:使用消息队列避免阻塞主业务流程 + +--- + +## 索引设计建议 + +### Account表索引 + +```sql +-- 主键索引 +PRIMARY KEY (account_id); + +-- 唯一索引 +CREATE UNIQUE INDEX uk_account_credit_code ON Account(unified_social_credit_code) +WHERE unified_social_credit_code IS NOT NULL; + +-- 业务查询索引 +CREATE INDEX idx_account_name ON Account(account_name); +CREATE INDEX idx_account_status ON Account(account_status); +CREATE INDEX idx_account_owner ON Account(owner_user_id); +CREATE INDEX idx_account_created ON Account(created_at DESC); +CREATE INDEX idx_account_industry ON Account(industry_id); + +-- 组合索引 +CREATE INDEX idx_account_type_status ON Account(account_type, account_status); +CREATE INDEX idx_account_location ON Account(province, city); +``` + +### AccountSummary表索引 + +```sql +-- 主键和唯一索引 +PRIMARY KEY (summary_id); +CREATE UNIQUE INDEX uk_summary_account ON AccountSummary(account_id); + +-- 查询索引 +CREATE INDEX idx_summary_health_score ON AccountSummary(health_score DESC); +CREATE INDEX idx_summary_last_activity ON AccountSummary(last_activity_at DESC); +CREATE INDEX idx_summary_total_revenue ON AccountSummary(total_revenue DESC); +CREATE INDEX idx_summary_win_rate ON AccountSummary(win_rate DESC); +``` + +### Contact表索引 + +```sql +-- 主键索引 +PRIMARY KEY (contact_id); + +-- 唯一索引 +CREATE UNIQUE INDEX uk_contact_phone ON Contact(mobile_phone) +WHERE mobile_phone IS NOT NULL; +CREATE UNIQUE INDEX uk_contact_email ON Contact(email) +WHERE email IS NOT NULL; + +-- 业务查询索引 +CREATE INDEX idx_contact_name ON Contact(contact_name); +CREATE INDEX idx_contact_account ON Contact(primary_account_id); +CREATE INDEX idx_contact_status ON Contact(contact_status); +CREATE INDEX idx_contact_wechat ON Contact(wechat_id); + +-- 组合索引 +CREATE INDEX idx_contact_phone_email ON Contact(mobile_phone, email); +``` + +### Event表索引(ClickHouse) + +```sql +-- 排序键设计 +ORDER BY (channel_id, event_time, contact_id, event_type); + +-- 分区键设计 +PARTITION BY toYYYYMM(event_time); + +-- 采样表达式 +SAMPLE BY cityHash64(event_id); +``` + +### Touchpoint表索引 + +```sql +-- 主键索引 +PRIMARY KEY (touchpoint_id); + +-- 查询索引 +CREATE INDEX idx_touchpoint_contact ON Touchpoint(contact_id, touchpoint_time DESC); +CREATE INDEX idx_touchpoint_lead ON Touchpoint(lead_id, touchpoint_time DESC); +CREATE INDEX idx_touchpoint_account ON Touchpoint(account_id, touchpoint_time DESC); +CREATE INDEX idx_touchpoint_channel ON Touchpoint(channel_id, touchpoint_time DESC); +CREATE INDEX idx_touchpoint_campaign ON Touchpoint(campaign_id, touchpoint_time DESC); +CREATE INDEX idx_touchpoint_time ON Touchpoint(touchpoint_time DESC); +CREATE INDEX idx_touchpoint_type ON Touchpoint(touchpoint_type, touchpoint_time DESC); +``` + +--- + +## 总结 + +本B2B CDP实体设计方案提供: + +**核心能力** +- 36个实体,完整覆盖B2B CDP业务场景 +- 全渠道身份映射方案,支持跨渠道客户识别 +- 汇总数据表设计,优化查询性能 +- 完整的客户生命周期管理 +- 从线索到商机的完整转化流程 + +**关键特性** +- 灵活的标签和分群能力 +- 多维度的归因分析能力 +- 详细的字段设计和数据字典 +- 专业的数据库架构建议 +- 完整的索引优化方案 + +**适用场景** +- 全渠道客户数据整合 +- 客户360度画像分析 +- 精准营销和客户分群 +- 销售线索管理和转化 +- 客户旅程分析 +- 营销归因分析 +- 客户价值评估 +- 客户健康度监控 + +根据实际业务需要,可以选择性实现部分实体,并在后续迭代中逐步完善。 diff --git a/docs/b2b-cdp-production-architecture.md b/docs/b2b-cdp-production-architecture.md new file mode 100644 index 000000000..0241e4a21 --- /dev/null +++ b/docs/b2b-cdp-production-architecture.md @@ -0,0 +1,1142 @@ +# B2B CDP 生产级系统架构设计 + +## 目录 +- [系统整体架构](#系统整体架构) +- [核心实体设计](#核心实体设计) +- [关键时序流程](#关键时序流程) +- [数据流转架构](#数据流转架构) +- [技术实现方案](#技术实现方案) + +--- + +## 系统整体架构 + +### 整体架构分层图 + +```mermaid +graph TB + subgraph DataSource["数据源层 Data Source"] + CRM[CRM系统
Salesforce/SAP] + WEWORK[企业微信
API/Webhook] + WECHAT[微信公众号
表单提交] + INTERNAL[内部系统
ERP/OA] + end + + subgraph Ingestion["数据接入层 Ingestion Gateway"] + API[API Gateway
统一接入] + ADAPTER1[CRM适配器] + ADAPTER2[企微适配器] + ADAPTER3[公众号适配器] + end + + subgraph MQ["消息队列层 Message Queue"] + KAFKA[Kafka Cluster
3 Broker] + T1[data-ingestion] + T2[identity-resolution] + T3[summary-calculation] + T4[notification] + end + + subgraph Processing["数据处理层 Processing Engine"] + CLEANSING[数据清洗服务] + IDENTITY[身份识别引擎] + SCORING[评分引擎] + TAGGING[标签引擎] + SUMMARY[汇总计算服务] + end + + subgraph Storage["数据存储层 Storage"] + PG[(PostgreSQL集群
主从分库分表)] + CH[(ClickHouse集群
行为数据)] + REDIS[(Redis Cluster
缓存)] + ES[(Elasticsearch
搜索)] + end + + subgraph Application["应用服务层 Application"] + LEAD_SVC[Lead服务] + CONTACT_SVC[Contact服务] + ACCOUNT_SVC[Account服务] + OPP_SVC[Opportunity服务] + QUERY_SVC[查询服务] + end + + subgraph Client["客户端 Client"] + WEB[Web Portal] + MOBILE[Mobile App] + OPENAPI[Open API] + end + + CRM --> API + WEWORK --> API + WECHAT --> API + INTERNAL --> API + + API --> ADAPTER1 + API --> ADAPTER2 + API --> ADAPTER3 + + ADAPTER1 --> KAFKA + ADAPTER2 --> KAFKA + ADAPTER3 --> KAFKA + + KAFKA --> T1 + KAFKA --> T2 + KAFKA --> T3 + KAFKA --> T4 + + T1 --> CLEANSING + CLEANSING --> IDENTITY + T2 --> IDENTITY + IDENTITY --> LEAD_SVC + IDENTITY --> CONTACT_SVC + + T3 --> SUMMARY + T4 --> SUMMARY + + LEAD_SVC --> PG + CONTACT_SVC --> PG + ACCOUNT_SVC --> PG + OPP_SVC --> PG + + PG --> REDIS + PG --> ES + CH --> REDIS + + QUERY_SVC --> REDIS + QUERY_SVC --> PG + QUERY_SVC --> CH + QUERY_SVC --> ES + + WEB --> Application + MOBILE --> Application + OPENAPI --> Application +``` + +--- + +### 技术架构详图 + +```mermaid +graph TB + subgraph HA["高可用架构 High Availability"] + direction TB + LB[负载均衡
Nginx/ALB] + + subgraph AppCluster["应用集群"] + APP1[应用节点1] + APP2[应用节点2] + APP3[应用节点3] + end + + subgraph DBCluster["数据库集群"] + PG_MASTER[PostgreSQL主库] + PG_SLAVE1[PostgreSQL从库1] + PG_SLAVE2[PostgreSQL从库2] + end + + subgraph CacheCluster["缓存集群"] + REDIS_M1[Redis Master1] + REDIS_S1[Redis Slave1] + REDIS_M2[Redis Master2] + REDIS_S2[Redis Slave2] + end + + subgraph CHCluster["ClickHouse集群"] + CH1[CH节点1
Shard1 Replica1] + CH2[CH节点2
Shard1 Replica2] + CH3[CH节点3
Shard2 Replica1] + end + + LB --> APP1 + LB --> APP2 + LB --> APP3 + + APP1 --> PG_MASTER + APP2 --> PG_SLAVE1 + APP3 --> PG_SLAVE2 + + PG_MASTER --> PG_SLAVE1 + PG_MASTER --> PG_SLAVE2 + + APP1 --> REDIS_M1 + APP2 --> REDIS_M2 + + REDIS_M1 --> REDIS_S1 + REDIS_M2 --> REDIS_S2 + end +``` + +--- + +## 核心实体设计 + +### Account实体ER图(完整版) + +```mermaid +erDiagram + Account ||--|| AccountSummary : aggregates + Account ||--o{ AccountChannelIdentity : has + Account ||--o{ Contact : owns + Account ||--o{ Opportunity : has + + Account { + varchar account_id PK "分片键" + varchar account_name "企业名称" + varchar unified_social_credit_code UK "信用代码" + varchar account_type "类型" + varchar account_status "状态" + int shard_id "分片ID" + datetime created_at "创建时间" + datetime updated_at "更新时间" + } + + AccountSummary { + varchar summary_id PK + varchar account_id FK UK "关联账户" + int total_contacts "联系人数" + int total_opportunities "商机数" + decimal total_revenue "累计收入" + int health_score "健康度" + datetime calculated_at "计算时间" + } + + AccountChannelIdentity { + varchar identity_id PK + varchar account_id FK "关联账户" + varchar channel_id FK "渠道" + varchar channel_account_id "渠道ID" + boolean is_verified "已验证" + datetime first_seen_at "首次发现" + datetime last_seen_at "最后活跃" + } + + Contact { + varchar contact_id PK "分片键" + varchar contact_name "姓名" + varchar mobile_phone UK "手机号" + varchar email UK "邮箱" + varchar primary_account_id FK "主账户" + int shard_id "分片ID" + datetime created_at "创建时间" + } + + Opportunity { + varchar opportunity_id PK + varchar account_id FK "关联账户" + decimal amount "金额" + varchar stage "阶段" + boolean is_won "已赢单" + datetime created_at "创建时间" + } +``` + +--- + +### Contact实体ER图(完整版) + +```mermaid +erDiagram + Contact ||--|| ContactSummary : aggregates + Contact ||--o{ ContactChannelIdentity : has + Contact ||--o{ AccountContactRelation : belongs_to + Contact ||--o{ Touchpoint : receives + + Contact { + varchar contact_id PK "分片键" + varchar contact_name "姓名" + varchar mobile_phone UK "手机号" + varchar email UK "邮箱" + varchar wechat_id "微信ID" + varchar primary_account_id FK "主账户" + int shard_id "分片ID-hash(mobile_phone)" + datetime created_at "创建时间" + } + + ContactSummary { + varchar summary_id PK + varchar contact_id FK UK "关联联系人" + int total_touchpoints "触点数" + int email_opens "邮件打开" + int engagement_score "参与度" + datetime last_activity_at "最后活跃" + datetime calculated_at "计算时间" + } + + ContactChannelIdentity { + varchar identity_id PK + varchar contact_id FK "关联联系人" + varchar channel_id FK "渠道" + varchar channel_user_id "渠道用户ID" + varchar identity_type "身份类型" + boolean is_verified "已验证" + datetime first_seen_at "首次发现" + } + + AccountContactRelation { + varchar relation_id PK + varchar account_id FK "企业" + varchar contact_id FK "联系人" + varchar role_in_account "角色" + varchar decision_level "决策级别" + boolean is_primary_contact "主联系人" + } + + Touchpoint { + varchar touchpoint_id PK + varchar contact_id FK "联系人" + varchar channel_id FK "渠道" + datetime touchpoint_time "触点时间" + varchar touchpoint_type "类型" + } +``` + +--- + +### Lead实体ER图(完整版) + +```mermaid +erDiagram + Lead ||--|| LeadSummary : aggregates + Lead ||--o{ LeadChannelIdentity : has + Lead ||--o| Contact : converts_to + Lead ||--o| Opportunity : converts_to + + Lead { + varchar lead_id PK "分片键" + varchar lead_name "姓名" + varchar company_name "公司" + varchar mobile_phone "手机号" + varchar email "邮箱" + varchar channel_id FK "来源渠道" + varchar campaign_id FK "来源活动" + varchar lead_status "状态" + int lead_score "评分" + varchar lead_grade "等级" + int shard_id "分片ID" + datetime created_at "创建时间" + datetime converted_at "转化时间" + varchar converted_contact_id FK "转化联系人" + } + + LeadSummary { + varchar summary_id PK + varchar lead_id FK UK "关联线索" + int total_touchpoints "触点数" + int total_events "事件数" + int form_submissions "表单提交" + int days_in_pipeline "管道天数" + datetime last_activity_at "最后活跃" + datetime calculated_at "计算时间" + } + + LeadChannelIdentity { + varchar identity_id PK + varchar lead_id FK "关联线索" + varchar channel_id FK "渠道" + varchar channel_user_id "渠道用户ID" + datetime captured_at "捕获时间" + varchar utm_source "UTM来源" + varchar utm_campaign "UTM活动" + } +``` + +--- + +## 关键时序流程 + +### 1. 多渠道数据采集与身份识别流程 + +```mermaid +sequenceDiagram + participant Source as 数据源
CRM/企微/公众号 + participant Gateway as API Gateway
数据接入网关 + participant Kafka as Kafka
data-ingestion + participant Cleansing as 数据清洗服务 + participant Kafka2 as Kafka
identity-resolution + participant Identity as 身份识别引擎 + participant PG as PostgreSQL
主库 + participant Redis as Redis
缓存 + participant ES as Elasticsearch
搜索引擎 + participant Kafka3 as Kafka
summary-calculation + participant Summary as 汇总计算服务 + + Source->>Gateway: 1.推送数据
Webhook/API + Note over Gateway: 数据格式转换
统一数据模型 + Gateway->>Kafka: 2.写入消息队列
Topic: data-ingestion + Note over Kafka: 数据缓冲
峰值削峰 + + Kafka->>Cleansing: 3.消费消息 + Note over Cleansing: 数据清洗
- 格式规范化
- 必填字段验证
- 数据去重 + + Cleansing->>Kafka2: 4.发送到身份识别队列
Topic: identity-resolution + + Kafka2->>Identity: 5.消费身份识别消息 + + Identity->>PG: 6.查询是否存在
按手机号/邮箱/企业名称 + PG-->>Identity: 7.返回查询结果 + + alt 找到唯一匹配 + Identity->>PG: 8a.更新现有实体
合并数据 + Identity->>PG: 8b.新增ChannelIdentity
记录渠道身份 + Note over Identity: 更新模式
保留历史数据 + else 未找到匹配 + Identity->>PG: 8c.创建新实体
Lead/Contact/Account + Identity->>PG: 8d.创建ChannelIdentity
初始渠道身份 + Note over Identity: 创建模式
生成全局唯一ID + else 找到多个匹配(疑似重复) + Identity->>PG: 8e.写入待审核队列
人工确认 + Note over Identity: 风控模式
防止误合并 + end + + Identity->>Redis: 9.更新缓存
写入最新数据 + Identity->>ES: 10.更新搜索索引
异步同步 + + Identity->>Kafka3: 11.触发汇总计算
Topic: summary-calculation + Note over Identity: 异步触发
不阻塞主流程 + + Kafka3->>Summary: 12.消费汇总消息 + Summary->>PG: 13.查询关联数据
聚合统计 + Summary->>PG: 14.更新Summary表
写入汇总结果 + Summary->>Redis: 15.更新汇总缓存 + + Gateway-->>Source: 16.返回响应
同步快速响应 + Note over Gateway: 响应时间 < 100ms
不等待后续处理 +``` + +**关键设计说明:** + +1. **同步响应快速返回**:API Gateway在写入Kafka后立即返回,响应时间 < 100ms +2. **异步处理解耦**:身份识别、汇总计算都是异步处理,互不影响 +3. **消息队列削峰**:Kafka缓冲高峰流量,保护下游服务 +4. **多级缓存**:Redis缓存热点数据,减少数据库压力 +5. **最终一致性**:汇总数据允许30秒内延迟,保证高可用 + +--- + +### 2. Lead转化为Contact/Opportunity流程 + +```mermaid +sequenceDiagram + participant Sales as 销售人员 + participant App as 应用服务 + participant LeadSvc as Lead服务 + participant ContactSvc as Contact服务 + participant AccountSvc as Account服务 + participant OppSvc as Opportunity服务 + participant PG as PostgreSQL + participant Kafka as Kafka + participant Summary as 汇总计算服务 + participant Notification as 通知服务 + + Sales->>App: 1.确认Lead转化
lead_id + 转化信息 + App->>LeadSvc: 2.调用Lead转化接口 + + LeadSvc->>PG: 3.查询Lead详情
验证状态 + PG-->>LeadSvc: 4.返回Lead数据 + + alt Lead已转化 + LeadSvc-->>App: 5a.返回错误
Lead已转化 + else Lead可转化 + LeadSvc->>PG: 5b.开启事务
BEGIN TRANSACTION + + LeadSvc->>ContactSvc: 6.查询是否存在Contact
按手机号/邮箱 + ContactSvc->>PG: 7.查询Contact表 + PG-->>ContactSvc: 8.返回查询结果 + + alt Contact不存在 + ContactSvc->>PG: 9a.创建新Contact
INSERT Contact + ContactSvc->>PG: 9b.创建ContactChannelIdentity + Note over ContactSvc: 新建Contact
继承Lead的渠道信息 + else Contact已存在 + ContactSvc->>PG: 9c.更新Contact
UPDATE Contact + ContactSvc->>PG: 9d.补充ContactChannelIdentity + Note over ContactSvc: 更新Contact
合并Lead数据 + end + + ContactSvc->>AccountSvc: 10.关联或创建Account + AccountSvc->>PG: 11.查询Account
按企业名称/信用代码 + PG-->>AccountSvc: 12.返回Account + + alt Account不存在 + AccountSvc->>PG: 13a.创建Account + AccountSvc->>PG: 13b.创建AccountChannelIdentity + else Account存在 + AccountSvc->>PG: 13c.关联Contact到Account
AccountContactRelation + end + + LeadSvc->>OppSvc: 14.创建Opportunity
可选步骤 + OppSvc->>PG: 15.创建Opportunity记录 + + LeadSvc->>PG: 16.更新Lead状态
lead_status=CONVERTED + LeadSvc->>PG: 17.记录转化关系
converted_contact_id等 + + LeadSvc->>PG: 18.提交事务
COMMIT + + LeadSvc->>Kafka: 19.发送转化事件
Topic: summary-calculation + Note over Kafka: 触发汇总重算
- LeadSummary
- ContactSummary
- AccountSummary + + Kafka->>Summary: 20.消费转化事件 + Summary->>PG: 21.重新计算汇总数据
批量更新Summary表 + + LeadSvc->>Kafka: 22.发送通知事件
Topic: notification + Kafka->>Notification: 23.消费通知事件 + Notification->>Sales: 24.发送通知
邮件/站内信/企微 + + LeadSvc-->>App: 25.返回转化结果
contact_id, account_id + App-->>Sales: 26.展示转化成功 + end +``` + +**关键设计说明:** + +1. **事务保证一致性**:Lead转化涉及多表操作,使用数据库事务保证原子性 +2. **幂等性设计**:检查Lead状态,防止重复转化 +3. **异步汇总计算**:转化完成后,异步触发汇总重算,不阻塞主流程 +4. **通知解耦**:通过消息队列发送通知,失败可重试 + +--- + +### 3. 实时汇总数据计算流程 + +```mermaid +sequenceDiagram + participant Trigger as 触发源
业务事件/定时任务 + participant Kafka as Kafka
summary-calculation + participant Summary as 汇总计算服务 + participant PG as PostgreSQL + participant Redis as Redis + participant Lock as 分布式锁
Redis + + Trigger->>Kafka: 1.发送汇总任务
account_id/contact_id + Note over Trigger: 触发场景:
- 新增Contact
- 商机赢单
- 新增Touchpoint
- 定时全量刷新 + + Kafka->>Summary: 2.消费汇总消息
并发消费 + + Summary->>Lock: 3.尝试获取分布式锁
key: summary:account:ACC001 + Note over Lock: 防止并发重复计算
锁超时时间: 30秒 + + alt 获取锁失败 + Lock-->>Summary: 4a.锁已被占用 + Summary->>Summary: 4b.跳过本次计算
等待下次触发 + else 获取锁成功 + Lock-->>Summary: 4c.锁获取成功 + + Summary->>PG: 5.查询Account基础信息 + PG-->>Summary: 6.返回Account数据 + + par 并行查询统计数据 + Summary->>PG: 7a.统计Contact数量
SELECT COUNT FROM Contact + Summary->>PG: 7b.统计Opportunity数据
SELECT SUM/COUNT FROM Opportunity + Summary->>PG: 7c.统计Lead数量
SELECT COUNT FROM Lead + Summary->>PG: 7d.统计Touchpoint数据
SELECT COUNT FROM Touchpoint + Summary->>PG: 7e.查询最后活跃时间
SELECT MAX FROM Touchpoint + end + + PG-->>Summary: 8.返回所有统计结果 + + Summary->>Summary: 9.计算汇总指标
- total_contacts
- total_revenue
- win_rate
- health_score等 + Note over Summary: 计算逻辑:
win_rate = won / (won + lost)
health_score = f(last_activity) + + Summary->>PG: 10.写入AccountSummary表
INSERT ON DUPLICATE KEY UPDATE + Note over Summary: Upsert操作
记录calculated_at时间戳 + + Summary->>Redis: 11.更新缓存
SET account:summary:ACC001 + Note over Redis: 缓存TTL: 300秒
下次查询直接从缓存读取 + + Summary->>Lock: 12.释放分布式锁 + Lock-->>Summary: 13.锁释放成功 + + Summary->>Kafka: 14.发送计算完成事件
可选通知下游 + end +``` + +**关键设计说明:** + +1. **分布式锁防重**:使用Redis分布式锁,防止同一Account并发重复计算 +2. **并行查询优化**:统计数据可以并行查询,提升性能 +3. **Upsert操作**:使用`INSERT ON DUPLICATE KEY UPDATE`,支持增量更新 +4. **缓存策略**:计算完成后立即更新缓存,保证数据新鲜度 +5. **异步非阻塞**:整个计算过程异步进行,不影响主业务 + +--- + +### 4. 客户360度查询流程(高性能) + +```mermaid +sequenceDiagram + participant User as 用户 + participant App as 应用服务 + participant Cache as Redis
L1缓存 + participant Query as 查询服务 + participant PG as PostgreSQL
主数据 + participant CH as ClickHouse
行为数据 + participant ES as Elasticsearch
搜索引擎 + + User->>App: 1.查询客户360度视图
account_id: ACC001 + App->>Cache: 2.查询L1缓存
GET account:full:ACC001 + + alt 缓存命中 + Cache-->>App: 3a.返回完整数据
响应时间 < 10ms + App-->>User: 4a.返回结果 + Note over App: 缓存命中率 > 90%
热点数据实时响应 + else 缓存未命中 + Cache-->>App: 3b.缓存MISS + + par 并行查询多数据源 + App->>Query: 4b.查询基础信息 + Query->>PG: 5a.查询Account基础表 + Query->>PG: 5b.查询AccountSummary汇总表 + Query->>PG: 5c.查询Contact列表
LIMIT 10 + Query->>PG: 5d.查询Opportunity列表
LIMIT 10 + + App->>Query: 4c.查询行为数据 + Query->>CH: 6a.查询最近30天Event
GROUP BY event_type + Query->>CH: 6b.查询最近Touchpoint
ORDER BY time DESC LIMIT 20 + + App->>Query: 4d.查询多渠道身份 + Query->>PG: 7.查询AccountChannelIdentity + end + + PG-->>Query: 8.返回主数据 + CH-->>Query: 9.返回行为数据 + + Query->>Query: 10.数据聚合
组装360度视图 + Note over Query: 组装数据结构:
- 基础信息
- 汇总数据
- 关联实体
- 行为统计
- 多渠道身份 + + Query-->>App: 11.返回完整数据 + + App->>Cache: 12.写入缓存
SET account:full:ACC001
TTL: 300秒 + Note over Cache: 缓存策略:
- 热点数据长期缓存
- 冷数据短期缓存
- 更新时主动失效 + + App-->>User: 13.返回结果
响应时间 < 200ms + end +``` + +**关键设计说明:** + +1. **多级缓存**:Redis L1缓存 + 应用内存缓存(可选),缓存命中率 > 90% +2. **并行查询**:同时查询PostgreSQL、ClickHouse、ES,减少总响应时间 +3. **查询优化**: + - Contact/Opportunity只查前10条,避免大结果集 + - ClickHouse查询带时间范围,利用分区裁剪 +4. **缓存失效策略**: + - 写操作主动失效缓存(Cache Aside模式) + - 设置合理的TTL(5分钟) +5. **降级策略**: + - Redis宕机 → 直接查数据库 + - ClickHouse超时 → 降级返回部分数据 + +--- + +### 5. Event事件采集流程(高吞吐) + +```mermaid +sequenceDiagram + participant User as 用户行为 + participant SDK as 埋点SDK
Web/Mobile + participant Gateway as API Gateway + participant Kafka as Kafka
event-stream + participant Flink as Flink
实时计算 + participant CH as ClickHouse
行为数据库 + participant Redis as Redis
实时统计 + + User->>SDK: 1.触发行为事件
页面访问/按钮点击 + SDK->>SDK: 2.批量打包
100条/批或1秒/批 + Note over SDK: 客户端批量上报
减少网络请求 + + SDK->>Gateway: 3.批量上报事件
HTTP POST + Note over Gateway: 接收批量事件
QPS: 10000 + + Gateway->>Gateway: 4.快速验证
签名/限流/格式 + + Gateway->>Kafka: 5.写入Kafka
Topic: event-stream
Partition: hash(user_id) + Note over Kafka: 分区策略:
按user_id哈希
保证同一用户有序 + + Gateway-->>SDK: 6.快速响应
响应时间 < 50ms + + Kafka->>Flink: 7.Flink消费事件流
并行度: 16 + + Flink->>Flink: 8.实时计算
- Session会话识别
- 漏斗分析
- 实时统计 + Note over Flink: 窗口计算:
5秒tumbling window + + Flink->>Redis: 9.写入实时指标
PV/UV/转化率等 + Note over Redis: 实时大屏展示
秒级更新 + + Flink->>CH: 10.批量写入ClickHouse
10000条/批 + Note over CH: 批量插入优化
减少写入次数 + + par 异步关联分析 + Flink->>Flink: 11a.用户画像更新 + Flink->>Flink: 11b.行为标签计算 + Flink->>Kafka: 11c.触发下游任务 + end +``` + +**关键设计说明:** + +1. **客户端批量上报**:SDK端批量打包,减少HTTP请求数 +2. **API快速响应**:写入Kafka后立即返回,响应时间 < 50ms +3. **Kafka分区策略**:按user_id哈希分区,保证同一用户事件有序 +4. **Flink实时计算**: + - Session会话识别 + - 滑动窗口统计 + - 实时漏斗分析 +5. **批量写入ClickHouse**:累积10000条或10秒批量插入,优化写入性能 + +--- + +## 数据流转架构 + +### 数据同步与流转总览 + +```mermaid +graph LR + subgraph Sources["数据源"] + CRM[CRM] + WEWORK[企微] + WECHAT[公众号] + end + + subgraph Ingestion["数据接入"] + API[API Gateway] + end + + subgraph MQ["消息队列"] + K1[data-ingestion] + K2[identity-resolution] + K3[summary-calculation] + end + + subgraph Processing["数据处理"] + CLEAN[数据清洗] + IDENTITY[身份识别] + SUMMARY[汇总计算] + end + + subgraph Storage["数据存储"] + PG[(PostgreSQL)] + CH[(ClickHouse)] + REDIS[(Redis)] + ES[(Elasticsearch)] + end + + subgraph Sync["数据同步"] + CANAL[Canal CDC] + SYNC[数据同步] + end + + CRM --> API + WEWORK --> API + WECHAT --> API + + API --> K1 + K1 --> CLEAN + CLEAN --> K2 + K2 --> IDENTITY + IDENTITY --> PG + IDENTITY --> K3 + K3 --> SUMMARY + SUMMARY --> PG + + PG --> CANAL + CANAL --> ES + CANAL --> REDIS + + PG --> SYNC + SYNC --> CH +``` + +--- + +### PostgreSQL分库分表策略 + +```mermaid +graph TB + subgraph App["应用层"] + APP[应用服务] + end + + subgraph Proxy["分库分表中间件 ShardingSphere"] + SHARDING[分片路由
按account_id/contact_id哈希] + end + + subgraph DB0["数据库0 shard_0"] + PG0_M[(PostgreSQL
Master)] + PG0_S1[(PostgreSQL
Slave1)] + PG0_S2[(PostgreSQL
Slave2)] + end + + subgraph DB1["数据库1 shard_1"] + PG1_M[(PostgreSQL
Master)] + PG1_S1[(PostgreSQL
Slave1)] + end + + subgraph DB15["数据库15 shard_15"] + PG15_M[(PostgreSQL
Master)] + PG15_S1[(PostgreSQL
Slave1)] + end + + APP --> SHARDING + + SHARDING -->|写操作| PG0_M + SHARDING -->|写操作| PG1_M + SHARDING -->|写操作| PG15_M + + SHARDING -->|读操作| PG0_S1 + SHARDING -->|读操作| PG0_S2 + SHARDING -->|读操作| PG1_S1 + SHARDING -->|读操作| PG15_S1 + + PG0_M -.主从复制.-> PG0_S1 + PG0_M -.主从复制.-> PG0_S2 + PG1_M -.主从复制.-> PG1_S1 + PG15_M -.主从复制.-> PG15_S1 +``` + +**分片策略:** + +```sql +-- Account表分片键:account_id +-- 分片规则:hash(account_id) % 16 +-- 分片数:16个库 + +-- Contact表分片键:contact_id(由mobile_phone生成) +-- 分片规则:hash(mobile_phone) % 16 +-- 保证同一手机号的Contact在同一分片 + +-- 关联查询优化: +-- AccountContactRelation表按account_id分片 +-- 保证Account与其Contact在同一分片,避免跨库JOIN +``` + +--- + +## 技术实现方案 + +### 1. 身份识别引擎实现 + +**身份匹配规则(优先级从高到低):** + +```python +class IdentityResolutionEngine: + """身份识别引擎""" + + def resolve_account(self, data: dict) -> str: + """Account身份识别""" + # 1. 优先级1:统一社会信用代码精确匹配 + if data.get('unified_social_credit_code'): + account = self.find_by_credit_code(data['unified_social_credit_code']) + if account: + return account.account_id + + # 2. 优先级2:企业全称精确匹配 + if data.get('account_name'): + account = self.find_by_exact_name(data['account_name']) + if account: + return account.account_id + + # 3. 优先级3:企业简称+城市模糊匹配 + if data.get('account_name') and data.get('city'): + candidates = self.fuzzy_match_by_name_and_city( + data['account_name'], + data['city'] + ) + if len(candidates) == 1: + return candidates[0].account_id + elif len(candidates) > 1: + # 多个匹配,进入人工审核 + self.send_to_manual_review(data, candidates) + return None + + # 4. 未找到匹配,创建新Account + return self.create_new_account(data) + + def resolve_contact(self, data: dict) -> str: + """Contact身份识别""" + # 1. 优先级1:手机号精确匹配 + if data.get('mobile_phone'): + contact = self.find_by_phone(data['mobile_phone']) + if contact: + return contact.contact_id + + # 2. 优先级2:邮箱精确匹配 + if data.get('email'): + contact = self.find_by_email(data['email']) + if contact: + return contact.contact_id + + # 3. 优先级3:企业微信UserID + if data.get('wework_user_id'): + contact = self.find_by_wework_id(data['wework_user_id']) + if contact: + return contact.contact_id + + # 4. 未找到匹配,创建新Contact + return self.create_new_contact(data) +``` + +--- + +### 2. 汇总计算优化方案 + +**增量计算 vs 全量计算:** + +```python +class SummaryCalculationService: + """汇总计算服务""" + + def calculate_account_summary(self, account_id: str, mode: str = 'incremental'): + """计算Account汇总数据""" + if mode == 'incremental': + # 增量计算:只计算变化的部分 + return self._incremental_calculate(account_id) + else: + # 全量计算:重新统计所有数据 + return self._full_calculate(account_id) + + def _incremental_calculate(self, account_id: str): + """增量计算(推荐)""" + # 1. 获取上次计算结果 + last_summary = self.get_last_summary(account_id) + + # 2. 只统计增量数据(自上次计算后的新增数据) + last_calc_time = last_summary.calculated_at + + # 3. 统计增量 + new_contacts = self.count_new_contacts(account_id, since=last_calc_time) + new_opportunities = self.count_new_opportunities(account_id, since=last_calc_time) + new_revenue = self.sum_new_revenue(account_id, since=last_calc_time) + + # 4. 累加到原有数据 + new_summary = AccountSummary( + account_id=account_id, + total_contacts=last_summary.total_contacts + new_contacts, + total_opportunities=last_summary.total_opportunities + new_opportunities, + total_revenue=last_summary.total_revenue + new_revenue, + # ... 其他字段 + calculated_at=datetime.now() + ) + + return new_summary + + def _full_calculate(self, account_id: str): + """全量计算(定时任务使用)""" + # 重新统计所有数据 + return self._calculate_from_scratch(account_id) +``` + +--- + +### 3. 缓存策略实现 + +**多级缓存架构:** + +```python +class CacheManager: + """缓存管理器""" + + def get_account_full(self, account_id: str) -> dict: + """获取Account完整数据(多级缓存)""" + # L1: 本地内存缓存(可选,使用LRU) + # data = self.local_cache.get(f'account:{account_id}') + # if data: + # return data + + # L2: Redis缓存 + cache_key = f'account:full:{account_id}' + cached = self.redis.get(cache_key) + if cached: + return json.loads(cached) + + # L3: 数据库查询 + account = self.db.query_account_with_summary(account_id) + + # 写入缓存 + self.redis.setex( + cache_key, + 300, # TTL: 5分钟 + json.dumps(account) + ) + + return account + + def invalidate_account_cache(self, account_id: str): + """使Account缓存失效""" + # 删除所有相关缓存 + self.redis.delete(f'account:full:{account_id}') + self.redis.delete(f'account:basic:{account_id}') + self.redis.delete(f'account:summary:{account_id}') +``` + +**缓存更新策略(Cache Aside模式):** + +```python +# 写操作:先更新数据库,再删除缓存 +def update_account(account_id: str, data: dict): + # 1. 更新数据库 + db.update(account_id, data) + + # 2. 删除缓存(让下次读取时重新加载) + cache.delete(f'account:full:{account_id}') + +# 读操作:先查缓存,缓存miss则查数据库并写缓存 +def get_account(account_id: str): + # 1. 查缓存 + data = cache.get(f'account:full:{account_id}') + if data: + return data + + # 2. 查数据库 + data = db.query(account_id) + + # 3. 写缓存 + cache.set(f'account:full:{account_id}', data, ttl=300) + + return data +``` + +--- + +### 4. 消息队列Topic设计 + +```yaml +Kafka Topic设计: + +1. data-ingestion (数据采集) + - Partition: 16个分区 + - Replication: 3副本 + - Retention: 7天 + - 用途: 接收所有渠道的原始数据 + +2. identity-resolution (身份识别) + - Partition: 16个分区 + - Replication: 3副本 + - Retention: 3天 + - 用途: 身份识别任务队列 + +3. summary-calculation (汇总计算) + - Partition: 16个分区 + - Replication: 3副本 + - Retention: 3天 + - 用途: 汇总计算任务队列 + +4. notification (通知事件) + - Partition: 8个分区 + - Replication: 3副本 + - Retention: 7天 + - 用途: 通知任务(邮件、短信、企微) + +5. event-stream (事件流) + - Partition: 32个分区 + - Replication: 3副本 + - Retention: 30天 + - 用途: 用户行为事件流(高吞吐) +``` + +--- + +### 5. 监控与告警 + +**核心监控指标:** + +```yaml +业务指标: + - 数据采集QPS + - 身份识别成功率 + - 汇总计算延迟 + - 查询P99响应时间 + - 缓存命中率 + +技术指标: + - Kafka消息积压 + - PostgreSQL慢查询 + - Redis内存使用率 + - ClickHouse查询延迟 + - 服务健康检查 + +告警规则: + - Kafka消息积压 > 100万 → P1告警 + - 数据采集失败率 > 1% → P2告警 + - 汇总计算延迟 > 5分钟 → P2告警 + - 查询P99 > 1秒 → P3告警 + - 服务不可用 → P0告警 +``` + +--- + +## 总结 + +### 核心技术特点 + +1. **高可用架构** + - PostgreSQL主从复制 + 读写分离 + - Redis Cluster 3主3从 + - ClickHouse 3副本 + - Kafka 3 broker集群 + +2. **高性能设计** + - PostgreSQL分库分表(16分片) + - 多级缓存(Redis + 本地) + - 异步处理(Kafka解耦) + - 批量写入优化 + +3. **实时性保障** + - 数据采集响应 < 100ms + - 身份识别延迟 < 5秒 + - 汇总计算延迟 < 30秒 + - 查询P99 < 200ms + +4. **数据一致性** + - 事务保证原子性 + - 消息队列保证可靠性 + - 分布式锁防重复 + - 最终一致性设计 + +5. **扩展性** + - 水平扩展(加节点) + - 垂直扩展(加资源) + - 服务解耦(微服务) + - 存储分离(冷热分离) + +### 容量评估 + +``` +数据规模: +- Account: 1000万 +- Contact: 5000万 +- Lead: 2000万/年 +- Event: 10亿/年 +- Touchpoint: 5000万/年 + +存储容量: +- PostgreSQL: 2TB (SSD) +- ClickHouse: 10TB (SSD) +- Redis: 256GB (内存) + +服务器配置: +- 应用服务器: 8核16G * 5台 +- PostgreSQL: 16核64G * 16台(主) + 32台(从) +- ClickHouse: 32核128G * 3台 +- Redis: 16核64G * 6台 +- Kafka: 16核32G * 3台 +``` + +### 后续优化方向 + +1. **短期优化(3个月内)** + - 完善监控告警 + - 优化慢查询 + - 调整缓存策略 + - 压测验证性能 + +2. **中期优化(6个月内)** + - 引入本地缓存(Caffeine) + - 实现数据归档 + - 优化分库分表策略 + - 引入ES辅助查询 + +3. **长期优化(1年内)** + - 冷热数据分离 + - 引入数据湖 + - AI/ML模型集成 + - 实时OLAP分析 diff --git a/docs/flink-architecture.md b/docs/flink-architecture.md new file mode 100644 index 000000000..1477355a8 --- /dev/null +++ b/docs/flink-architecture.md @@ -0,0 +1,214 @@ +# Flink 系统架构图 + +## Flink 完整系统架构 + +```mermaid +graph TB + subgraph 应用层 + APP[Flink Application
DataStream API / Table API / SQL] + end + + subgraph 运行时层JobManager + JM[JobManager] + DISPATCHER[Dispatcher
接收作业提交] + RM[ResourceManager
资源管理] + JOBMASTER[JobMaster
作业协调] + CHECKPOINT[CheckpointCoordinator
检查点协调器] + end + + subgraph 运行时层TaskManager + TM1[TaskManager 1] + TM2[TaskManager 2] + TM3[TaskManager N] + + SLOT1[Task Slot 1
执行线程] + SLOT2[Task Slot 2
执行线程] + + NETWORK[Network Buffers
数据交换缓冲区] + MEMORY[Managed Memory
托管内存] + end + + subgraph 状态管理层 + STATE[State Backend] + MEMORY_STATE[Memory] + FS_STATE[FileSystem] + ROCKS_STATE[RocksDB] + end + + subgraph 部署层 + STANDALONE[Standalone
独立部署] + YARN[YARN
资源调度] + K8S[Kubernetes
容器编排] + MESOS[Mesos
资源调度] + end + + APP -->|提交作业| DISPATCHER + DISPATCHER --> JOBMASTER + JOBMASTER --> RM + JOBMASTER --> CHECKPOINT + + RM -->|申请资源| YARN + RM -->|申请资源| K8S + + RM -->|分配Slot| TM1 + RM -->|分配Slot| TM2 + RM -->|分配Slot| TM3 + + JOBMASTER -->|部署Task| SLOT1 + JOBMASTER -->|部署Task| SLOT2 + + SLOT1 --> NETWORK + SLOT2 --> NETWORK + + SLOT1 -.->|读写状态| STATE + SLOT2 -.->|读写状态| STATE + + STATE --> MEMORY_STATE + STATE --> FS_STATE + STATE --> ROCKS_STATE + + CHECKPOINT -.->|触发检查点| TM1 + CHECKPOINT -.->|触发检查点| TM2 +``` + +## Flink 核心组件说明 + +### 1. JobManager(主节点) +- **Dispatcher**: 接收客户端提交的作业 +- **ResourceManager**: 管理TaskManager资源和Slot分配 +- **JobMaster**: 每个作业一个,负责作业执行协调 +- **CheckpointCoordinator**: 协调分布式快照 + +### 2. TaskManager(工作节点) +- **Task Slot**: 任务执行槽位,隔离CPU和内存 +- **Network Buffers**: 任务间数据交换缓冲区 +- **Managed Memory**: 排序、哈希表等操作的托管内存 + +### 3. State Backend(状态后端) +- **MemoryStateBackend**: 内存存储(开发测试) +- **FsStateBackend**: 文件系统存储(HDFS/S3) +- **RocksDBStateBackend**: RocksDB存储(大状态场景) + +### 4. 部署模式 +- **Standalone**: 独立集群部署 +- **YARN**: Hadoop资源调度 +- **Kubernetes**: 容器化部署 +- **Mesos**: 资源调度框架 + +## Flink 数据流处理架构 + +```mermaid +graph TB + subgraph 数据源层 + KAFKA[Kafka] + DB[Database CDC] + FILE[File System] + end + + subgraph Flink处理层 + SOURCE[Source Operator
数据接入] + + MAP[Map Operator
数据转换] + FILTER[Filter Operator
数据过滤] + FLATMAP[FlatMap Operator
数据展开] + + KEYBY[KeyBy
数据分组] + + WINDOW[Window Operator
窗口聚合] + + PROCESS[Process Function
自定义处理] + + SINK[Sink Operator
结果输出] + end + + subgraph 状态管理 + KEYED_STATE[Keyed State
键控状态] + OPERATOR_STATE[Operator State
算子状态] + end + + subgraph 输出层 + KAFKA_OUT[Kafka] + DB_OUT[Database] + REDIS_OUT[Redis] + ES_OUT[Elasticsearch] + end + + KAFKA --> SOURCE + DB --> SOURCE + FILE --> SOURCE + + SOURCE --> MAP + MAP --> FILTER + FILTER --> FLATMAP + + FLATMAP --> KEYBY + KEYBY --> WINDOW + WINDOW --> PROCESS + + PROCESS --> SINK + + KEYBY -.->|读写| KEYED_STATE + WINDOW -.->|读写| KEYED_STATE + PROCESS -.->|读写| KEYED_STATE + + SOURCE -.->|读写| OPERATOR_STATE + SINK -.->|读写| OPERATOR_STATE + + SINK --> KAFKA_OUT + SINK --> DB_OUT + SINK --> REDIS_OUT + SINK --> ES_OUT +``` + +## Flink Checkpoint 机制 + +```mermaid +sequenceDiagram + participant JM as JobManager + participant TM1 as TaskManager 1 + participant TM2 as TaskManager 2 + participant STATE as State Backend + participant STORAGE as 持久化存储 + + JM->>JM: 1.触发Checkpoint + Note over JM: 定时触发
间隔如10秒 + + JM->>TM1: 2.发送Barrier
checkpoint-123 + JM->>TM2: 2.发送Barrier
checkpoint-123 + + TM1->>TM1: 3.对齐Barrier + Note over TM1: 等待所有输入流
Barrier到达 + + TM1->>STATE: 4.快照状态 + Note over STATE: 保存算子状态 + + STATE->>STORAGE: 5.持久化 + Note over STORAGE: HDFS/S3 + + TM1->>JM: 6.确认完成 + + TM2->>TM2: 3.对齐Barrier + TM2->>STATE: 4.快照状态 + STATE->>STORAGE: 5.持久化 + TM2->>JM: 6.确认完成 + + JM->>JM: 7.Checkpoint完成 + Note over JM: 所有TaskManager
都确认完成 +``` + +## Checkpoint 机制说明 + +### Barrier对齐 +- Source算子接收到Checkpoint触发信号后插入Barrier +- Barrier随数据流向下游传播 +- 算子等待所有输入流的Barrier都到达后才执行快照 + +### 状态快照 +- 保存Keyed State(键控状态) +- 保存Operator State(算子状态) +- 异步写入持久化存储 + +### 故障恢复 +- 从最近成功的Checkpoint恢复 +- 重放Checkpoint之后的数据 +- 保证Exactly-Once语义