Skip to content

Commit 2f389fa

Browse files
committed
[ci]: Introduce a regular inspection mechanism for abnormal CI reports
1 parent 0cf096e commit 2f389fa

File tree

8 files changed

+783
-0
lines changed

8 files changed

+783
-0
lines changed

.github/workflows/bsp_buildings.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,12 @@ on:
4646
types:
4747
- online-pkgs-static-building-trigger-event
4848
workflow_dispatch:
49+
inputs:
50+
trigger_type:
51+
description: '触发类型'
52+
required: false
53+
default: 'manual'
54+
type: string
4955

5056
concurrency:
5157
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
name: Weekly CI Scheduler
2+
3+
on:
4+
schedule:
5+
- cron: '0 0 * * 1'
6+
workflow_dispatch:
7+
inputs:
8+
debug:
9+
description: 'Debug mode'
10+
required: false
11+
default: 'false'
12+
13+
env:
14+
TARGET_WORKFLOWS: '["RT-Thread BSP Static Build Check", "utest_auto_run"]'
15+
DISCUSSION_CATEGORY: "Github Action Exception Reports"
16+
17+
jobs:
18+
trigger-and-monitor:
19+
name: Trigger and Monitor CIs
20+
runs-on: ubuntu-latest
21+
outputs:
22+
failed_workflows: ${{ steps.collect-results.outputs.failed_workflows }}
23+
total_workflows: ${{ steps.collect-results.outputs.total_workflows }}
24+
has_results: ${{ steps.collect-results.outputs.has_results }}
25+
26+
steps:
27+
- name: Checkout repository
28+
uses: actions/checkout@v4
29+
30+
- name: Install Python dependencies
31+
run: |
32+
python -m pip install --upgrade pip
33+
pip install requests
34+
35+
- name: Record start time
36+
id: start-time
37+
run: |
38+
echo "start_time=$(date -u +'%Y-%m-%dT%H:%M:%SZ')" >> $GITHUB_OUTPUT
39+
echo "Start time: $(date -u +'%Y-%m-%dT%H:%M:%SZ')"
40+
41+
- name: Trigger CI workflows directly
42+
id: trigger-ci
43+
run: |
44+
python tools/ci/scheduled-ci-trigger/trigger_workflows_direct.py
45+
env:
46+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
47+
TARGET_WORKFLOWS: ${{ env.TARGET_WORKFLOWS }}
48+
49+
- name: Wait for workflows to appear
50+
id: wait-for-workflows
51+
run: |
52+
echo "Waiting for workflows to appear in API..."
53+
python tools/ci/scheduled-ci-trigger/wait_for_workflows.py "${{ steps.start-time.outputs.start_time }}"
54+
env:
55+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
56+
TARGET_WORKFLOWS: ${{ env.TARGET_WORKFLOWS }}
57+
58+
- name: Monitor CI workflows
59+
id: monitor-ci
60+
run: |
61+
python tools/ci/scheduled-ci-trigger/monitor_workflows.py "${{ steps.start-time.outputs.start_time }}"
62+
env:
63+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
64+
TARGET_WORKFLOWS: ${{ env.TARGET_WORKFLOWS }}
65+
66+
- name: Collect monitoring results
67+
id: collect-results
68+
run: |
69+
echo "Checking for monitoring results..."
70+
if [ -f "monitoring_results.json" ]; then
71+
echo "monitoring_results.json found"
72+
FAILED_COUNT=$(python -c "import json; data=json.load(open('monitoring_results.json')); print(len([w for w in data if w.get('conclusion') == 'failure']))")
73+
TOTAL_COUNT=$(python -c "import json; data=json.load(open('monitoring_results.json')); print(len(data))")
74+
echo "failed_workflows=$FAILED_COUNT" >> $GITHUB_OUTPUT
75+
echo "total_workflows=$TOTAL_COUNT" >> $GITHUB_OUTPUT
76+
echo "has_results=true" >> $GITHUB_OUTPUT
77+
echo "Results: $FAILED_COUNT failed out of $TOTAL_COUNT total"
78+
else
79+
echo "monitoring_results.json not found"
80+
echo "failed_workflows=0" >> $GITHUB_OUTPUT
81+
echo "total_workflows=0" >> $GITHUB_OUTPUT
82+
echo "has_results=false" >> $GITHUB_OUTPUT
83+
fi
84+
85+
- name: Generate detailed report
86+
if: steps.collect-results.outputs.has_results == 'true' && steps.collect-results.outputs.failed_workflows != '0'
87+
id: generate-report
88+
run: |
89+
echo "Generating detailed report..."
90+
python tools/ci/scheduled-ci-trigger/generate_report.py
91+
echo "Report generation completed"
92+
93+
- name: Upload report artifact
94+
if: steps.collect-results.outputs.has_results == 'true' && steps.collect-results.outputs.failed_workflows != '0'
95+
uses: actions/upload-artifact@v4
96+
with:
97+
name: ci-failure-report
98+
path: |
99+
monitoring_results.json
100+
failure_details.md
101+
retention-days: 7
102+
103+
create-discussion:
104+
name: Create Discussion Report
105+
needs: trigger-and-monitor
106+
if: needs.trigger-and-monitor.outputs.has_results == 'true' && needs.trigger-and-monitor.outputs.failed_workflows != '0'
107+
runs-on: ubuntu-latest
108+
109+
steps:
110+
- name: Checkout repository
111+
uses: actions/checkout@v4
112+
113+
- name: Download report artifact
114+
uses: actions/download-artifact@v4
115+
with:
116+
name: ci-failure-report
117+
118+
- name: Create Discussion
119+
uses: actions/github-script@v6
120+
env:
121+
DISCUSSION_CATEGORY: ${{ env.DISCUSSION_CATEGORY }}
122+
with:
123+
script: |
124+
const fs = require('fs');
125+
126+
const reportPath = './failure_details.md';
127+
128+
let reportContent = fs.readFileSync(reportPath, 'utf8');
129+
130+
// 提取日期从第一行: # YYYYMMDD_ci_integration-failed-report
131+
const lines = reportContent.split('\n');
132+
const firstLine = lines[0].trim();
133+
const dateMatch = firstLine.match(/# (\d{8})_ci_integration-failed-report/);
134+
135+
if (!dateMatch) {
136+
console.error('Failed to extract date from first line:', firstLine);
137+
process.exit(1);
138+
}
139+
140+
const dateString = dateMatch[1];
141+
const discussionTitle = `${dateString}_ci_integration-failed-report`;
142+
143+
// === 关键修复:移除第一行(用于提取的隐藏行) ===
144+
reportContent = lines.slice(1).join('\n').trim();
145+
146+
// 获取仓库ID和分类ID
147+
const getRepoQuery = `
148+
query($owner: String!, $repo: String!) {
149+
repository(owner: $owner, name: $repo) {
150+
id
151+
discussionCategories(first: 20) {
152+
nodes {
153+
id
154+
name
155+
}
156+
}
157+
}
158+
}
159+
`;
160+
161+
const repoData = await github.graphql(getRepoQuery, {
162+
owner: context.repo.owner,
163+
repo: context.repo.repo
164+
});
165+
166+
const repositoryId = repoData.repository.id;
167+
const categories = repoData.repository.discussionCategories.nodes;
168+
const targetCategory = categories.find(cat => cat.name === process.env.DISCUSSION_CATEGORY);
169+
170+
if (!targetCategory) {
171+
console.error('Category not found:', process.env.DISCUSSION_CATEGORY);
172+
process.exit(1);
173+
}
174+
175+
const createDiscussionMutation = `
176+
mutation($repositoryId: ID!, $categoryId: ID!, $title: String!, $body: String!) {
177+
createDiscussion(input: {
178+
repositoryId: $repositoryId
179+
categoryId: $categoryId
180+
title: $title
181+
body: $body
182+
}) {
183+
discussion {
184+
id
185+
title
186+
url
187+
}
188+
}
189+
}
190+
`;
191+
192+
const result = await github.graphql(createDiscussionMutation, {
193+
repositoryId: repositoryId,
194+
categoryId: targetCategory.id,
195+
title: discussionTitle,
196+
body: reportContent // 使用清理后的内容(无第一行)
197+
});
198+
199+
console.log('Discussion created successfully:', result.createDiscussion.discussion.url);

.github/workflows/utest_auto_run.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,13 @@ on:
1818
- documentation/**
1919
- '**/README.md'
2020
- '**/README_zh.md'
21+
workflow_dispatch:
22+
inputs:
23+
trigger_type:
24+
description: '触发类型'
25+
required: false
26+
default: 'manual'
27+
type: string
2128

2229
concurrency:
2330
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
#!/usr/bin/env python3
2+
import json
3+
import os
4+
from datetime import datetime, timedelta
5+
from typing import List, Dict, Any
6+
7+
def load_monitoring_results() -> List[Dict[str, Any]]:
8+
"""加载 monitoring_results.json"""
9+
if not os.path.exists("monitoring_results.json"):
10+
print("No monitoring results found")
11+
return []
12+
try:
13+
with open("monitoring_results.json", "r", encoding="utf-8") as f:
14+
return json.load(f)
15+
except (json.JSONDecodeError, OSError) as e:
16+
print(f"Error loading monitoring_results.json: {e}")
17+
return []
18+
19+
def get_beijing_time() -> datetime:
20+
return datetime.utcnow() + timedelta(hours=8)
21+
22+
def format_time(dt: datetime) -> str:
23+
return dt.strftime("%Y-%m-%d %H:%M")
24+
25+
def classify_error(step_name: str, job_name: str) -> str:
26+
"""错误类型分类"""
27+
step_lower = step_name.lower()
28+
if any(x in step_lower for x in ["test", "suite", "pytest", "unittest"]):
29+
return "TEST_FAILURE"
30+
if "lint" in step_lower or "flake8" in step_lower:
31+
return "LINT_ERROR"
32+
if "build" in step_lower or "compile" in step_lower:
33+
return "BUILD_ERROR"
34+
if "deploy" in step_lower or "upload" in step_lower or "publish" in step_lower:
35+
return "DEPLOY_ERROR"
36+
if "check" in step_lower or "validate" in step_lower or "verify" in step_lower:
37+
return "VALIDATION_ERROR"
38+
if "generate" in step_lower or "render" in step_lower:
39+
return "GENERATION_ERROR"
40+
return "UNKNOWN"
41+
42+
def generate_report():
43+
"""生成符合最新样式的故障聚合报告"""
44+
results = load_monitoring_results()
45+
if not results:
46+
return
47+
48+
failed_workflows = [r for r in results if r.get('conclusion') == 'failure']
49+
if not failed_workflows:
50+
print("No failed workflows to report")
51+
return
52+
53+
now = get_beijing_time()
54+
date_str = now.strftime("%Y%m%d")
55+
56+
# 时间范围
57+
created_times = [
58+
datetime.fromisoformat(r["created_at"].replace("Z", "+00:00")) + timedelta(hours=8)
59+
for r in failed_workflows
60+
]
61+
updated_times = [
62+
datetime.fromisoformat(r["updated_at"].replace("Z", "+00:00")) + timedelta(hours=8)
63+
for r in failed_workflows
64+
]
65+
start_time = min(created_times)
66+
end_time = max(updated_times)
67+
68+
total = len(results)
69+
failed_count = len(failed_workflows)
70+
success_rate = 0.0 if total == 0 else round((total - failed_count) / total * 100, 1)
71+
72+
# === 第一行:用于 JS 提取标题(必须)===
73+
report = f"# {date_str}_ci_integration-failed-report\n\n"
74+
75+
# === 第二行:用户看到的主标题(H1)===
76+
report += f"# 🚨 {date_str} GitHub Actions 故障聚合报告\n\n"
77+
78+
# === 执行概览 ===
79+
report += f"## 执行概览\n"
80+
report += f"- **监控时间范围**: {format_time(start_time)}{format_time(end_time)} (UTC+8)\n"
81+
report += f"- **检测到失败运行**: {failed_count}\n"
82+
report += f"- **成功率**: {success_rate}% (本批次)\n\n"
83+
84+
# === 故障详情 ===
85+
report += f"## 🔍 故障详情\n\n"
86+
87+
for wf in failed_workflows:
88+
run_id = wf.get("run_id", "N/A")
89+
name = wf["name"]
90+
html_url = wf.get("html_url", "#")
91+
details = wf.get("failure_details", [])
92+
93+
report += f"**📌 Run-{run_id}** | [{name}]({html_url})\n"
94+
95+
if not details:
96+
report += "└─ 无失败作业详情\n\n"
97+
continue
98+
99+
failed_jobs = [j for j in details if j.get("steps")]
100+
for i, job in enumerate(failed_jobs):
101+
job_name = job["name"]
102+
steps = job["steps"]
103+
job_prefix = "└─" if i == len(failed_jobs) - 1 else "├─"
104+
report += f"{job_prefix} **失败作业**: {job_name}\n"
105+
106+
for j, step in enumerate(steps):
107+
step_name = step["name"]
108+
step_num = step["number"]
109+
error_type = classify_error(step_name, job_name)
110+
step_prefix = " └─" if j == len(steps) - 1 else " ├─"
111+
report += f"{step_prefix} **失败步骤**: {step_name} (Step {step_num})\n"
112+
indent = " " if j == len(steps) - 1 else " │ "
113+
report += f"{indent}**错误类型**: `{error_type}`\n"
114+
report += "\n"
115+
116+
# === Team Collaboration & Support ===
117+
report += f"## 👥 Team Collaboration & Support\n\n"
118+
report += f"Call for Maintenance Support: This report requires the expertise of the RT-Thread official team for review and guidance.\n\n"
119+
report += f"Requested Reviewers from RT-Thread: @kurisaW\n\n"
120+
report += f"Your prompt attention to this matter is greatly appreciated.\n"
121+
122+
# 保存
123+
try:
124+
with open("failure_details.md", "w", encoding="utf-8") as f:
125+
f.write(report.rstrip() + "\n")
126+
print("Report generated: failure_details.md")
127+
print(f"Report size: {os.path.getsize('failure_details.md')} bytes")
128+
except Exception as e:
129+
print(f"Error writing report: {e}")
130+
131+
if __name__ == "__main__":
132+
generate_report()

0 commit comments

Comments
 (0)