forked from open-mmlab/mmpose
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcollect.py
executable file
·130 lines (116 loc) · 4.4 KB
/
collect.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#!/usr/bin/env python
# Copyright (c) OpenMMLab. All rights reserved.
import os
import os.path as osp
import re
from glob import glob
from titlecase import titlecase
os.makedirs('topics', exist_ok=True)
os.makedirs('papers', exist_ok=True)
def _parse_task(task):
"""Parse task name.
Data modality is represented by a string of 4 or 5 parts like:
- 2d_kpt_sview_rgb_img
- gesture_sview_rgbd_vid
"""
parts = task.split('_')
if len(parts) == 5:
pass
elif len(parts) == 4:
# The first part "spatial dimension" is optional
parts = [''] + parts
else:
raise ValueError('Invalid modality')
return parts
# Step 1: get subtopics: a mix of topic and task
minisections = [
x.split(osp.sep)[-2:] for x in glob('../../configs/*/*')
if '_base_' not in x
]
alltopics = sorted(list(set(x[0] for x in minisections)))
subtopics = []
for topic in alltopics:
tasks = [_parse_task(x[1]) for x in minisections if x[0] == topic]
valid_ids = []
for i in range(len(tasks[0])):
if len(set(x[i] for x in tasks)) > 1:
valid_ids.append(i)
if len(valid_ids) > 0:
for task in tasks:
appendix = ','.join(
[task[i].title() for i in valid_ids if task[i]])
subtopic = [
f'{titlecase(topic)}({appendix})',
topic,
'_'.join(t for t in task if t),
]
subtopics.append(subtopic)
else:
subtopics.append([titlecase(topic), topic, '_'.join(tasks[0])])
contents = {}
for subtopic, topic, task in sorted(subtopics):
# Step 2: get all datasets
datasets = sorted(
list(
set(
x.split(osp.sep)[-2]
for x in glob(f'../../configs/{topic}/{task}/*/*/'))))
contents[subtopic] = {d: {} for d in datasets}
for dataset in datasets:
# Step 3: get all settings: algorithm + backbone + trick
for file in glob(f'../../configs/{topic}/{task}/*/{dataset}/*.md'):
keywords = (file.split(osp.sep)[-3],
*file.split(osp.sep)[-1].split('_')[:-1])
with open(file, 'r', encoding='utf-8') as f:
contents[subtopic][dataset][keywords] = f.read()
# Step 4: write files by topic
for subtopic, datasets in contents.items():
lines = [f'# {subtopic}', '']
for dataset, keywords in datasets.items():
if len(keywords) == 0:
continue
lines += [
'<hr/>', '<br/><br/>', '', f'## {titlecase(dataset)} Dataset', ''
]
for keyword, info in keywords.items():
keyword_strs = [titlecase(x.replace('_', ' ')) for x in keyword]
lines += [
'<br/>', '',
(f'### {" + ".join(keyword_strs)}'
f' on {titlecase(dataset)}'), '', info, ''
]
with open(f'topics/{subtopic.lower()}.md', 'w', encoding='utf-8') as f:
f.write('\n'.join(lines))
# Step 5: write files by paper
allfiles = [x.split(osp.sep)[-2:] for x in glob('../en/papers/*/*.md')]
sections = sorted(list(set(x[0] for x in allfiles)))
for section in sections:
lines = [f'# {titlecase(section)}', '']
files = [f for s, f in allfiles if s == section]
for file in files:
with open(
f'../en/papers/{section}/{file}', 'r', encoding='utf-8') as f:
keyline = [
line for line in f.readlines() if line.startswith('<summary')
][0]
papername = re.sub(r'\<.*?\>', '', keyline).strip()
paperlines = []
for subtopic, datasets in contents.items():
for dataset, keywords in datasets.items():
keywords = {k: v for k, v in keywords.items() if keyline in v}
if len(keywords) == 0:
continue
for keyword, info in keywords.items():
keyword_strs = [
titlecase(x.replace('_', ' ')) for x in keyword
]
paperlines += [
'<br/>', '',
(f'### {" + ".join(keyword_strs)}'
f' on {titlecase(dataset)}'), '', info, ''
]
if len(paperlines) > 0:
lines += ['<hr/>', '<br/><br/>', '', f'## {papername}', '']
lines += paperlines
with open(f'papers/{section}.md', 'w', encoding='utf-8') as f:
f.write('\n'.join(lines))