6
6
import logging
7
7
import psycopg2
8
8
import dateutil
9
+ import argparse
9
10
10
11
def init_logger ():
11
12
LOGLEVEL = os .environ .get ('LOGLEVEL' , 'INFO' ).upper ()
12
13
logging .basicConfig (level = LOGLEVEL ,
13
14
format = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s' ,
14
15
datefmt = '%m-%d-%Y %H:%M:%S' )
15
16
17
+ def make_parser ():
18
+ parser = argparse .ArgumentParser ()
19
+ parser .add_argument ('-r' , '--repository-name' , type = str , required = True ,
20
+ help = 'Repository name in OWNER/REPOSITORY format' )
21
+ parser .add_argument ('--run-id' , type = str , required = True ,
22
+ help = 'Workflow Run ID' )
23
+
24
+ return parser
25
+
16
26
def create_db_tables (conn , cur ):
17
- cur .execute ('''CREATE TABLE IF NOT EXISTS github_workflow_runs_test (
18
- id SERIAL,
19
- run_id BIGINT PRIMARY KEY ,
27
+ cur .execute ('''CREATE TABLE IF NOT EXISTS workflow_runs (
28
+ id SERIAL PRIMARY KEY ,
29
+ run_id BIGINT,
20
30
html_url TEXT,
21
31
name VARCHAR(255),
22
32
run_started_at TIMESTAMP,
33
+ created_at TIMESTAMP,
34
+ updated_at TIMESTAMP,
23
35
triggering_actor_login VARCHAR(255),
24
36
conclusion VARCHAR(25),
25
- run_number INT,
26
37
event VARCHAR(50),
27
38
run_attempt INT,
28
39
repository_full_name VARCHAR(255),
29
40
head_repository_full_name VARCHAR(255),
30
41
head_branch VARCHAR(255),
31
42
status VARCHAR(25),
32
43
display_title TEXT,
33
- path TEXT
44
+ path TEXT,
45
+ total_duration_seconds INT
34
46
);
35
47
''' )
36
- cur .execute ('''CREATE TABLE IF NOT EXISTS github_workflow_jobs_test (
37
- id SERIAL,
38
- job_id BIGINT PRIMARY KEY ,
39
- parent_run_id BIGINT REFERENCES github_workflow_runs_test(run_id) ,
48
+ cur .execute ('''CREATE TABLE IF NOT EXISTS workflow_jobs (
49
+ id SERIAL PRIMARY KEY ,
50
+ job_id BIGINT,
51
+ parent_run_id BIGINT,
40
52
html_url TEXT,
41
53
name VARCHAR(255),
42
54
created_at TIMESTAMP,
@@ -47,12 +59,14 @@ def create_db_tables(conn, cur):
47
59
runner_name VARCHAR(255),
48
60
status VARCHAR(25),
49
61
conclusion VARCHAR(25),
50
- head_branch VARCHAR(255)
62
+ head_branch VARCHAR(255),
63
+ run_attempt INT,
64
+ workflow_name TEXT
51
65
);
52
66
''' )
53
- cur .execute ('''CREATE TABLE IF NOT EXISTS github_workflow_steps_test (
67
+ cur .execute ('''CREATE TABLE IF NOT EXISTS workflow_steps (
54
68
id SERIAL PRIMARY KEY,
55
- parent_job_id BIGINT REFERENCES github_workflow_jobs_test(job_id) ,
69
+ parent_job_id BIGINT,
56
70
name VARCHAR(255),
57
71
conclusion VARCHAR(25),
58
72
number INT,
@@ -65,20 +79,16 @@ def create_db_tables(conn, cur):
65
79
66
80
def main ():
67
81
init_logger ()
68
-
82
+ parser = make_parser ()
83
+ args = parser .parse_args ()
69
84
logger = logging .getLogger (__name__ )
70
85
71
86
github_token = os .environ .get ('GITHUB_TOKEN' )
72
87
if not github_token :
73
88
raise ValueError ('GITHUB_TOKEN environment variable is not set!' )
74
89
75
- run_id = os .environ .get ('RUN_ID' )
76
- if not run_id :
77
- raise ValueError ('RUN_ID environment variable is not set!' )
78
-
79
- repo_name = os .environ .get ('GITHUB_REPOSITORY' )
80
- if not repo_name :
81
- raise ValueError ('GITHUB_REPOSITORY environment variable is not set!' )
90
+ run_id = args .run_id
91
+ repo_name = args .repository_name
82
92
83
93
84
94
# this should be specified in runner's env
@@ -102,18 +112,31 @@ def main():
102
112
repo = g .get_repo (repo_name )
103
113
104
114
run = repo .get_workflow_run (int (run_id ))
105
-
106
- workflow_data_query = f'''INSERT INTO github_workflow_runs_test(
115
+ logger .debug ('Processing run ID %s - %s' , run_id , run .name )
116
+ if run .status != 'completed' :
117
+ logger .error ('Run %s is not completed! Only completed runs should be in the database' , run_id )
118
+ raise SystemExit (1 )
119
+
120
+ # We rely on the following assumptions:
121
+ # - The workflow run is completed. When run.status != 'completed' we should not add it to the database
122
+ # theoretically the second attempt can be triggerred right after the completion of the first one
123
+ # or while the runner which executes this script is deploying
124
+ #
125
+ # - Job's queued duration equals "job.started_at - job.created_at" if started_at > created_at.
126
+ # Otherwise the job should not be added to the database
127
+ total_duration_seconds = round (run .timing ().run_duration_ms / 1000 )
128
+ workflow_data_query = f'''INSERT INTO workflow_runs(
107
129
run_id, html_url, name,
108
- run_started_at, triggering_actor_login, conclusion,
109
- run_number, event, run_attempt, repository_full_name,
110
- head_branch, display_title, path)
130
+ run_started_at, created_at, updated_at, triggering_actor_login, conclusion,
131
+ event, run_attempt, repository_full_name,
132
+ head_branch, display_title, path, total_duration_seconds )
111
133
VALUES(
112
134
'{ run_id } ', '{ run .html_url } ', '{ run .name } ', '{ run .run_started_at } ',
135
+ '{ run .created_at } ', '{ run .updated_at } ',
113
136
'{ run .raw_data ['triggering_actor' ]['login' ]} ',
114
- '{ run .conclusion } ', '{ run .run_number } ', ' { run . event } ',
137
+ '{ run .conclusion } ', '{ run .event } ',
115
138
'{ run .run_attempt } ', '{ run .raw_data ['repository' ]['full_name' ]} ',
116
- '{ run .head_branch } ', '{ run .display_title } ', '{ run .path } '
139
+ '{ run .head_branch } ', '{ run .display_title } ', '{ run .path } ', ' { total_duration_seconds } '
117
140
);
118
141
'''
119
142
@@ -122,10 +145,15 @@ def main():
122
145
123
146
for job in run .jobs ():
124
147
job_id = job .id
148
+ logger .debug ('Processing job %s' , job .name )
125
149
queued_duration_seconds = 0
126
150
duration_seconds = 0
127
151
128
152
job_created_at_date = dateutil .parser .parse (job .raw_data ['created_at' ])
153
+ if job_created_at_date > job .started_at :
154
+ logger .warning ('Skipping job %s of run %s - most likely a stub \
155
+ job created after workflow restart' , job .name , run_id )
156
+ continue
129
157
130
158
queued_duration_timedelta = job .started_at - job_created_at_date
131
159
queued_duration_seconds = round (queued_duration_timedelta .total_seconds ())
@@ -134,27 +162,30 @@ def main():
134
162
duration_seconds = round (duration_timedelta .total_seconds ())
135
163
136
164
job_data_query = f'''
137
- INSERT INTO github_workflow_jobs_test (
165
+ INSERT INTO workflow_jobs (
138
166
job_id, parent_run_id, html_url, name,
139
167
created_at, started_at, completed_at,
140
168
queued_duration_seconds, duration_seconds,
141
- runner_name, status, conclusion, head_branch)
169
+ runner_name, status, conclusion, head_branch,
170
+ run_attempt, workflow_name
171
+ )
142
172
VALUES(
143
173
'{ job_id } ', '{ run_id } ', '{ job .html_url } ', '{ job .name } ',
144
174
'{ job .raw_data ['created_at' ]} ', '{ job .started_at } ', '{ job .completed_at } ',
145
175
'{ queued_duration_seconds } ', '{ duration_seconds } ',
146
176
'{ job .raw_data ['runner_name' ]} ', '{ job .status } ', '{ job .conclusion } ',
147
- '{ job .raw_data ['head_branch' ]} '
177
+ '{ job .raw_data ['head_branch' ]} ', ' { job . raw_data [ 'run_attempt' ] } ', ' { job . raw_data [ 'workflow_name' ] } '
148
178
);
149
179
'''
150
180
logger .debug ('Job query: %s' , job_data_query )
151
181
cur .execute (job_data_query )
152
182
for step in job .steps :
183
+ logger .debug ('Processing step %s' , step .name )
153
184
duration_seconds_timedelta = step .completed_at - step .started_at
154
185
duration_seconds = round (duration_seconds_timedelta .total_seconds ())
155
186
156
187
step_data_query = f'''
157
- INSERT INTO github_workflow_steps_test (
188
+ INSERT INTO workflow_steps (
158
189
parent_job_id, name, conclusion,
159
190
number, started_at, completed_at,
160
191
duration_seconds)
0 commit comments