You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
if embed_type == "google":
with open(word2vec, "rb") as f:
header = f.readline()
vocab_size, layer1_size = map(int, header.split())
binary_len = np.dtype('float32').itemsize * layer1_size
for line in xrange(vocab_size):
word = []
while True:
ch = f.read(1)
if ch == ' ':
word = ''.join(word)
break
if ch != '\n':
word.append(ch)
idx = 0
emb_string = f.read(binary_len)
if word in vocabulary_user:
u = u + 1
idx = vocabulary_user[word]
initW_user[idx] = np.fromstring(emb_string, dtype='float32')
if word in vocabulary_item:
item = item + 1
idx = vocabulary_item[word]
initW_item[idx] = np.fromstring(emb_string, dtype='float32')
for line in xrange(vocab_size):
word = []
while True:
ch = f.read(1)
if ch == ' ':
word = ''.join(word)
break
if ch != '\n':
word.append(ch)
hi, when I run these code,my computer and workstation both are memory out .Some question:
1.is your word2vec file is GoogleNews-vectors-negative300.bin?if not ,what's this ?can you offer download url? ,thank U
2.In my understanding,these code is to vectorize representation user reviews, so can I use GoogleNews-vectors-negative300.bin to represent word in reviews directly?Are these ways the same?
The text was updated successfully, but these errors were encountered:
if embed_type == "google":
with open(word2vec, "rb") as f:
header = f.readline()
vocab_size, layer1_size = map(int, header.split())
binary_len = np.dtype('float32').itemsize * layer1_size
for line in xrange(vocab_size):
word = []
while True:
ch = f.read(1)
if ch == ' ':
word = ''.join(word)
break
if ch != '\n':
word.append(ch)
idx = 0
emb_string = f.read(binary_len)
if word in vocabulary_user:
u = u + 1
idx = vocabulary_user[word]
initW_user[idx] = np.fromstring(emb_string, dtype='float32')
for line in xrange(vocab_size):
word = []
while True:
ch = f.read(1)
if ch == ' ':
word = ''.join(word)
break
if ch != '\n':
word.append(ch)
hi, when I run these code,my computer and workstation both are memory out .Some question:
1.is your word2vec file is GoogleNews-vectors-negative300.bin?if not ,what's this ?can you offer download url? ,thank U
2.In my understanding,these code is to vectorize representation user reviews, so can I use GoogleNews-vectors-negative300.bin to represent word in reviews directly?Are these ways the same?
The text was updated successfully, but these errors were encountered: