9
9
import glob
10
10
import os
11
11
import os .path as osp
12
+ import re
12
13
import sys
13
14
14
15
import mmcv
16
+ from lxml import etree
15
17
16
18
MMSEG_ROOT = osp .dirname (osp .dirname ((osp .dirname (__file__ ))))
17
19
18
20
19
- def dump_yaml_and_check_difference (obj , filename ):
21
+ def dump_yaml_and_check_difference (obj , filename , sort_keys = False ):
20
22
"""Dump object to a yaml file, and check if the file content is different
21
23
from the original.
22
24
23
25
Args:
24
26
obj (any): The python object to be dumped.
25
27
filename (str): YAML filename to dump the object to.
28
+ sort_keys (str); Sort key by dictionary order.
26
29
Returns:
27
30
Bool: If the target YAML file is different from the original.
28
31
"""
29
32
30
- str_dump = mmcv .dump (obj , None , file_format = 'yaml' , sort_keys = True )
33
+ str_dump = mmcv .dump (obj , None , file_format = 'yaml' , sort_keys = sort_keys )
31
34
if osp .isfile (filename ):
32
35
file_exists = True
33
36
with open (filename , 'r' , encoding = 'utf-8' ) as f :
@@ -54,12 +57,29 @@ def parse_md(md_file):
54
57
Returns:
55
58
Bool: If the target YAML file is different from the original.
56
59
"""
57
- collection_name = osp .dirname (md_file ). split ( '/' )[ - 1 ]
60
+ collection_name = osp .split ( osp . dirname (md_file ))[ 1 ]
58
61
configs = os .listdir (osp .dirname (md_file ))
59
62
60
- collection = dict (Name = collection_name , Metadata = {'Training Data' : []})
63
+ collection = dict (
64
+ Name = collection_name ,
65
+ Metadata = {'Training Data' : []},
66
+ Paper = {
67
+ 'URL' : '' ,
68
+ 'Title' : ''
69
+ },
70
+ README = md_file ,
71
+ Code = {
72
+ 'URL' : '' ,
73
+ 'Version' : ''
74
+ })
75
+ collection .update ({'Converted From' : {'Weights' : '' , 'Code' : '' }})
61
76
models = []
62
77
datasets = []
78
+ paper_url = None
79
+ paper_title = None
80
+ code_url = None
81
+ code_version = None
82
+ repo_url = None
63
83
64
84
with open (md_file , 'r' ) as md :
65
85
lines = md .readlines ()
@@ -70,7 +90,36 @@ def parse_md(md_file):
70
90
if len (line ) == 0 :
71
91
i += 1
72
92
continue
73
- if line [:3 ] == '###' :
93
+ if line [:2 ] == '# ' :
94
+ paper_title = line .replace ('# ' , '' )
95
+ i += 1
96
+ elif line [:3 ] == '<a ' :
97
+ content = etree .HTML (line )
98
+ node = content .xpath ('//a' )[0 ]
99
+ if node .text == 'Code Snippet' :
100
+ code_url = node .get ('href' , None )
101
+ assert code_url is not None , (
102
+ f'{ collection_name } hasn\' t code snippet url.' )
103
+ # version extraction
104
+ filter_str = r'blob/(.*)/mm'
105
+ pattern = re .compile (filter_str )
106
+ code_version = pattern .findall (code_url )
107
+ assert len (code_version ) == 1 , (
108
+ f'false regular expression ({ filter_str } ) use.' )
109
+ code_version = code_version [0 ]
110
+ elif node .text == 'Official Repo' :
111
+ repo_url = node .get ('href' , None )
112
+ assert repo_url is not None , (
113
+ f'{ collection_name } hasn\' t official repo url.' )
114
+ i += 1
115
+ elif line [:9 ] == '<summary ' :
116
+ content = etree .HTML (line )
117
+ nodes = content .xpath ('//a' )
118
+ assert len (nodes ) == 1 , (
119
+ 'summary tag should only have single a tag.' )
120
+ paper_url = nodes [0 ].get ('href' , None )
121
+ i += 1
122
+ elif line [:4 ] == '### ' :
74
123
datasets .append (line [4 :])
75
124
current_dataset = line [4 :]
76
125
i += 2
@@ -113,22 +162,28 @@ def parse_md(md_file):
113
162
crop_size = els [crop_size_id ].split ('x' )
114
163
assert len (crop_size ) == 2
115
164
model = {
116
- 'Name' : model_name ,
117
- 'In Collection' : collection_name ,
165
+ 'Name' :
166
+ model_name ,
167
+ 'In Collection' :
168
+ collection_name ,
118
169
'Metadata' : {
119
170
'backbone' : els [backbone_id ],
120
171
'crop size' : f'({ crop_size [0 ]} ,{ crop_size [1 ]} )' ,
121
172
'lr schd' : int (els [lr_schd_id ]),
122
173
},
123
- 'Results' : {
124
- 'Task' : 'Semantic Segmentation' ,
125
- 'Dataset' : current_dataset ,
126
- 'Metrics' : {
127
- 'mIoU' : float (els [ss_id ]),
174
+ 'Results' : [
175
+ {
176
+ 'Task' : 'Semantic Segmentation' ,
177
+ 'Dataset' : current_dataset ,
178
+ 'Metrics' : {
179
+ 'mIoU' : float (els [ss_id ]),
180
+ },
128
181
},
129
- },
130
- 'Config' : config ,
131
- 'Weights' : weight ,
182
+ ],
183
+ 'Config' :
184
+ config ,
185
+ 'Weights' :
186
+ weight ,
132
187
}
133
188
if fps != - 1 :
134
189
try :
@@ -152,15 +207,38 @@ def parse_md(md_file):
152
207
}]
153
208
if mem != - 1 :
154
209
model ['Metadata' ]['memory (GB)' ] = float (mem )
210
+ # Only have semantic segmentation now
155
211
if ms_id and els [ms_id ] != '-' and els [ms_id ] != '' :
156
- model ['Results' ]['Metrics' ]['mIoU(ms+flip)' ] = float (
157
- els [ms_id ])
212
+ model ['Results' ][0 ]['Metrics' ][
213
+ 'mIoU(ms+flip)' ] = float ( els [ms_id ])
158
214
models .append (model )
159
215
j += 1
160
216
i = j
161
217
else :
162
218
i += 1
219
+ flag = (code_url is not None ) and (paper_url is not None ) and (repo_url
220
+ is not None )
221
+ assert flag , f'{ collection_name } readme error'
163
222
collection ['Metadata' ]['Training Data' ] = datasets
223
+ collection ['Code' ]['URL' ] = code_url
224
+ collection ['Code' ]['Version' ] = code_version
225
+ collection ['Paper' ]['URL' ] = paper_url
226
+ collection ['Paper' ]['Title' ] = paper_title
227
+ collection ['Converted From' ]['Code' ] = repo_url
228
+ # ['Converted From']['Weights] miss
229
+ # remove empty attribute
230
+ check_key_list = ['Code' , 'Paper' , 'Converted From' ]
231
+ for check_key in check_key_list :
232
+ key_list = list (collection [check_key ].keys ())
233
+ for key in key_list :
234
+ if check_key not in collection :
235
+ break
236
+ if collection [check_key ][key ] == '' :
237
+ if len (collection [check_key ].keys ()) == 1 :
238
+ collection .pop (check_key )
239
+ else :
240
+ collection [check_key ].pop (key )
241
+
164
242
result = {'Collections' : [collection ], 'Models' : models }
165
243
yml_file = f'{ md_file [:- 9 ]} { collection_name } .yml'
166
244
return dump_yaml_and_check_difference (result , yml_file )
0 commit comments