-
Notifications
You must be signed in to change notification settings - Fork 3
/
wiki_w2v.py
28 lines (19 loc) · 883 Bytes
/
wiki_w2v.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import pickle
import gensim, logging
if __name__ == '__main__':
with open('./data/wikipages/sentlist.pickle',mode='rb') as ff:
wiki_sents=pickle.load(ff)
print(wiki_sents[0].lower().split())
token_sents=[ws.lower().split() for ws in wiki_sents]
print(token_sents[0])
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
model = gensim.models.Word2Vec(token_sents,size=200, window=5, min_count=200, workers=6,negative=6)
print(model.most_similar(positive=['dessuten']))
print(model.most_similar(positive=['.']))
print(model.most_similar(positive=['?']))
print(model.most_similar(positive=['skog']))
print(model.most_similar(positive=['og']))
print(model.most_similar(positive=['kriminell']))
model.save('models/word2vec_wiki.model')