-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
67 lines (55 loc) · 2.13 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import argparse
import logging
from pathlib import Path
from typing import List
from gensim.models import KeyedVectors
from Clustering.abstract_cluster_builder import AbstractClusterBuilder
from Clustering.cluster import Cluster
from Clustering.similar_dimension_cluster_builder import SimilarDimensionClusterBuilder
from EntityLinking.entity_linking import EntityLinking
from FileParsing.embedding_file_parser import EmbeddingFileParser
from FileParsing.entity_linking_file_parser import EntityLinkingFileParser
def main(args) -> None:
logging.basicConfig(
format='%(asctime)s : [Process-%(process)d] [Thread-%(threadName)s] %(levelname)s : %(message)s',
level=logging.INFO)
logging.info("loading embedding...")
embedding: KeyedVectors = EmbeddingFileParser.create_from_file(args.embeddings)
logging.info("loading linking...")
linking: EntityLinking = EntityLinkingFileParser.create_from_file(args.linking)
logging.info("building clusters...")
cluster_builder: AbstractClusterBuilder = SimilarDimensionClusterBuilder(embedding=embedding, workers=args.workers)
clusters: List[Cluster] = cluster_builder.run()
with open(".clusters.txt", "w+") as clusters_file:
for cluster in clusters:
print(f"{cluster}", file=clusters_file)
if __name__ == "__main__":
logging.basicConfig(format='%(asctime)s : [%(threadName)s] %(levelname)s : %(message)s', level=logging.INFO)
parser: argparse.ArgumentParser = argparse.ArgumentParser()
parser.add_argument(
"--embeddings",
type=Path,
help=f"Path to the embeddings file (word2vec format)",
required=True
)
parser.add_argument(
"--linking",
type=Path,
help=f"Path to the embedding linking file",
required=True
)
parser.add_argument(
"--workers",
type=int,
help=f"Number of threads to use",
required=False,
default=8
)
parser.add_argument(
"--use-cache",
type=bool,
help=f"Indicates, whether to use the relation cache",
required=False,
default=True
)
main(parser.parse_args())