-
Notifications
You must be signed in to change notification settings - Fork 0
/
destarify.py
36 lines (30 loc) · 1.25 KB
/
destarify.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
'''This code removes the RDF-star part'''
import os
import re
from tqdm import tqdm
source = './dump'
dest = './dump-flat'
def run():
for folder in tqdm(sorted(os.listdir(source))):
if not os.path.isdir(os.path.join(source, folder)):
continue
for file in os.listdir(os.path.join(source, folder)):
if not file.endswith('.ttl'):
continue
with open(os.path.join(source, folder, file)) as f:
orig = f.read()
final = re.sub(r'<< (.+) >> .+\.', r'\1 .', orig, flags=re.MULTILINE)
os.makedirs(os.path.join(dest, folder), exist_ok=True)
with open(os.path.join(dest, folder, file), 'w') as f:
f.write(final)
for file in os.listdir(os.path.join(source, 'vocabularies', 'vocabularies')):
if not file.endswith('.ttl'):
continue
with open(os.path.join(source, 'vocabularies', 'vocabularies', file)) as f:
orig = f.read()
final = re.sub(r'<< (.+) >> .+\.', r'\1 .', orig, flags=re.MULTILINE)
os.makedirs(os.path.join(dest, 'vocabularies'), exist_ok=True)
with open(os.path.join(dest, 'vocabularies', file), 'w') as f:
f.write(final)
if __name__ == "__main__":
run()