forked from kubernetes/community
-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate-devstats-repo-sql.py
executable file
·194 lines (171 loc) · 6.09 KB
/
generate-devstats-repo-sql.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
#!/usr/bin/env python3
# Copyright 2019 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Output devstats repo_groups.sql based on subproject defintions in sigs.yaml
This is likely missing a few repos because:
- some repos lack an owner (eg: kubernetes/kubernetes)
- it doesn't enumerate all repos from all kubernetes-owned orgs
- it ignores the fact that committees can own repos, only grouping by sig
The sql generated is NOT intended to overwrite/replace the file that lives at
github.com/cncf/devstats/scripts/kubernetes/repo_groups.sql, but instead aid a
human in doing some manual updates to the file. Future improvements to this
script could eliminate that part of the process, but it's where we are today.
"""
import argparse
import ruamel.yaml as yaml
import json
import re
import sys
repo_group_sql_template = """
update gha_repos set repo_group = '{}' where name in (
{}
);
"""
# copied from github.com/cncf/devstats/scripts/kubernetes/repo_groups.sql,
# if this differs, consider cncf the authoritative source and update this
repo_groups_sql_header = """-- generated by github.com/kubernetes/community/hack/generate-devstats-repo-sql.py
-- Add repository groups
"""
# copied from github.com/cncf/devstats/scripts/kubernetes/repo_groups.sql,
# if this differs, consider cncf the authoritative source and update this
repo_groups_sql_footer = """
-- All other unknown repositories should have 'Other' repository group
-- update gha_repos set repo_group = 'Other' where repo_group is null;
-- By default alias is the newest repo name for given repo ID
update
gha_repos r
set
alias = coalesce((
select e.dup_repo_name
from
gha_events e
where
e.repo_id = r.id
order by
e.created_at desc
limit 1
), name)
;
update gha_repos set alias = 'kubernetes/kubernetes' where name like '%kubernetes' or name = 'kubernetes/';
select
repo_group,
count(*) as number_of_repos
from
gha_repos
where
repo_group is not null
group by
repo_group
order by
number_of_repos desc,
repo_group asc;
"""
special_case_groups = [{
# the main repo has no single owner and has gone by many names
'name': 'Kubernetes',
'repos': [
'kubernetes/kubernetes',
'GoogleCloudPlatform/kubernetes',
'kubernetes',
'kubernetes/'
]
}]
# devstats isn't aware of repo renames or migrations; we need to keep
# old repo names in its sql groups present for historical purposes;
#
# when reconciling deletions from repo_groups.sql by this script, use
# github.com/kubernetes/org issues to determine why; renamed, migrated,
# or used-and-retired repos belong here; unused/deleted repos do not
renamed_repos = {
'sig-architecture': [
'kubernetes/contrib',
],
'sig-api-machinery': [
'kubernetes-incubator/apiserver-builder',
],
'sig-cluster-lifecycle': [
'kubernetes-incubator/kubespray',
],
'sig-multicluster': [
'kubernetes-sigs/federation-v2',
],
'sig-node': [
'kubernetes-incubator/node-feature-discovery',
],
'sig-pm': [
'kubernetes/features',
],
'sig-service-catalog': [
'kubernetes-incubator/service-catalog',
]
}
def repos_from_k8s_group(k8s_group):
"""Returns a list of org/repos given a kubernetes community group"""
repos = {}
subprojects = k8s_group.get('subprojects', [])
if subprojects is None:
subprojects = []
for sp in subprojects:
for uri in sp['owners']:
owners_path = re.sub(r"https://raw.githubusercontent.com/(.*)/master/(.*)",r"\1/\2",uri)
path_parts = owners_path.split('/')
# org/repo is owned by k8s_group if org/repo/OWNERS os in one of their subprojects
if path_parts[2] == 'OWNERS':
repo = '/'.join(path_parts[0:2])
repos[repo] = True
return sorted(repos.keys())
def k8s_group_name(k8s_group):
group_dir = k8s_group.get('dir', '')
if group_dir.startswith('sig-'):
return "SIG " + k8s_group['name']
if group_dir.startswith('committee-'):
return k8s_group['name'] + " Committee"
return "UNKNOWN " + group_dir
def write_repo_groups_template(name, repos, fp):
if len(repos):
fp.write(
repo_group_sql_template.format(
name,
',\n'.join([' \'{}\''.format(r) for r in repos])))
def write_repo_groups_sql(k8s_groups, fp):
fp.write(repo_groups_sql_header)
for g in special_case_groups:
write_repo_groups_template(g['name'], g['repos'], fp)
for group_type in ['sigs', 'committees']:
for g in k8s_groups[group_type]:
repos = set(repos_from_k8s_group(g)) | set(renamed_repos.get(g['dir'],[]))
repos = sorted(list(repos))
write_repo_groups_template(k8s_group_name(g), repos, fp)
fp.write(repo_groups_sql_footer)
def main(sigs_yaml, repo_groups_sql):
with open(sigs_yaml) as fp:
k8s_groups = yaml.round_trip_load(fp)
if repo_groups_sql is not None:
with open(repo_groups_sql, 'w') as fp:
write_repo_groups_sql(k8s_groups, fp)
else:
write_repo_groups_sql(k8s_groups, sys.stdout)
if __name__ == '__main__':
PARSER = argparse.ArgumentParser(
description='Generate a repo_groups.sql intended for github.com/cncf/devstats/scripts/kubernetes/repo_groups.sql')
PARSER.add_argument(
'--sigs-yaml',
default='./sigs.yaml',
help='Path to sigs.yaml')
PARSER.add_argument(
'--repo-groups-sql',
help='Path to output repo_groups.sql if provided')
ARGS = PARSER.parse_args()
main(ARGS.sigs_yaml, ARGS.repo_groups_sql)