-
Notifications
You must be signed in to change notification settings - Fork 49
/
search_concept.py
84 lines (76 loc) · 2.68 KB
/
search_concept.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# coding = utf-8
import os
import networkx as nx
import numpy as np
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
class ConceptNet:
def __init__(self):
cur = "/".join(os.path.abspath(__file__).split('/')[:-1])
self.hiearchy_file = os.path.join(cur, "dict/hiearchy.txt")
self.concept_file = os.path.join(cur, "dict/concept_total.txt")
return
'''加载概念边'''
def load_concept_edges(self, ):
edges = []
for line in open(self.hiearchy_file):
line = line.strip().split(' ')
if len(line) < 2:
continue
from_ = line[0].split('|')[-1]
to_ = line[1].split('|')[-1]
edges.append((to_, from_))
return edges
'''利用networkx构建有向图'''
def build_graph(self, edges):
G = nx.DiGraph()
G.add_edges_from(edges)
return G
'''构造底层概念词典'''
def build_basic_concept(self):
concept_dict = {}
print("loading concept edges")
edges = self.load_concept_edges()
print("build grpah")
graph = self.build_graph(edges)
path = nx.all_pairs_shortest_path(graph)
for i in path:
wd = i[0]
path_dict = i[1]
len_dict = {i:len(j) for i,j in path_dict.items()}
len_dict_ = sorted(len_dict.items(), key=lambda asd:asd[1], reverse=True)
longest_path = path_dict.get(len_dict_[0][0])
if not longest_path:
continue
concept_dict[wd] = longest_path
return concept_dict
'''搜集主函数'''
def build_all_concepts(self):
all_dict = {}
concept_dict = self.build_basic_concept()
print('building all concepts')
for line in open(self.concept_file):
line = line.strip().split('\t')
wd = line[0]
concepts = [i.split('|')[-1] for i in line[-1].split(',')]
concept_path = concept_dict.get(wd, '')
if not concept_path:
concept_path = [[wd] + concept_dict.get(c, [c]) for c in concepts]
all_dict[wd] = concept_path
return all_dict
'''层级搜索主函数'''
def search_hiearchy(self):
import time
start_time = time.time()
all_dict = self.build_all_concepts()
print(time.time()-start_time)
while 1:
wd = input('enter an wd to search:').strip()
paths = all_dict.get(wd, '')
if paths:
for path in paths:
print(wd, '抽象路径为:', '->'.join(path))
if __name__ == '__main__':
handler = ConceptNet()
handler.search_hiearchy()