-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathstatistics.py
132 lines (97 loc) · 3.33 KB
/
statistics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# generates the statistics for the data and puts them in json file
import json
import os
from SPARQLWrapper import SPARQLWrapper, JSON
def sparql_query(query):
sparql = SPARQLWrapper("http://data.judaicalink.org/sparql/query")
sparql.setQuery(query)
try:
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
return results
except Exception as e:
print('Error fetching data: ', e)
return None
# List all datasets
def sum_datasets():
query = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT ?g WHERE { GRAPH ?g { } }
"""
results = sparql_query(query)
counter = 0
for _ in enumerate(results['results']['bindings']):
counter += 1
if counter == 0:
raise Exception('Error fetching data')
return counter - 6
# Count all entities
def sum_entities():
query = """
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT (COUNT(DISTINCT ?subject) AS ?entityCount)
WHERE {
GRAPH ?graph {
?subject ?predicate ?object
}
}
"""
results = sparql_query(query)
if results is None:
raise Exception('Error fetching data')
else:
return results['results']['bindings'][0]['entityCount']['value']
# Count all triples
def sum_triples():
query = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT (COUNT(*) AS ?tripleCount)
WHERE {
GRAPH ?graph {
?sub ?pred ?obj
}
}
"""
results = sparql_query(query)
if results is None:
raise Exception('Error fetching data')
else:
return results['results']['bindings'][0]['tripleCount']['value']
def generate_html_file():
path = os.path.join('./layouts/partials/')
try:
entities = sum_entities()
datasets = sum_datasets()
triples = sum_triples()
html_string = '<div class="text-center">Currently, we provide data about <b class="counter" akhi="' + str(
entities) + '">0</b><b> entities</b>, consisting of <b class="counter" akhi="' + str(
triples) + '">0</b><b> triples</b>, within <b class="counter" akhi="' + str(
datasets) + '">0</b><b> different datasets</b>.</div>'
print('Statistics generated: Entities {}, Triples {}, Datasets {}'.format(entities, triples, datasets))
except Exception as e:
print('Error fetching data: ', e)
html_string = ''
# if path does not exist, create it
if not os.path.exists(path):
os.makedirs(path)
with open(path + 'statistics.html', 'w') as f:
f.write(html_string)
f.close()
def generate_statistics():
statistics = {
'datasets': sum_datasets(),
'entity_pages': sum_entities(),
'triples': sum_triples()
}
# path is the root of the project
path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
# save statistics to json file
with open(path + '/statistics.json', 'w') as f:
json.dump(statistics, f, indent=4)
f.close()
generate_html_file()