From b1f004e19fed2cd9f9fd625eed24fb98405a6c05 Mon Sep 17 00:00:00 2001
From: Darin Deforest <ddeforest@iprotech.com>
Date: Wed, 11 Apr 2018 11:02:41 -0700
Subject: [PATCH] Changed from using floats to ints for doc terms & frequencies

---
 gensim/models/ldamodel.py | 3 ++-
 gensim/utils.py           | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/gensim/models/ldamodel.py b/gensim/models/ldamodel.py
index 6d37ac0f7d..4bf0f5d18b 100755
--- a/gensim/models/ldamodel.py
+++ b/gensim/models/ldamodel.py
@@ -698,7 +698,8 @@ def rho():
             dirty = False
 
             reallen = 0
-            for chunk_no, chunk in enumerate(utils.grouper(corpus, chunksize, as_numpy=chunks_as_numpy)):
+            for chunk_no, chunk in enumerate(utils.grouper(corpus, chunksize, as_numpy=chunks_as_numpy,
+                                            dtype=self.dtype)):
                 reallen += len(chunk)  # keep track of how many documents we've processed so far
 
                 if eval_every and ((reallen == lencorpus) or ((chunk_no + 1) % (eval_every * self.numworkers) == 0)):
diff --git a/gensim/utils.py b/gensim/utils.py
index f6e5c4fdf3..6ef2f4ba80 100644
--- a/gensim/utils.py
+++ b/gensim/utils.py
@@ -1119,7 +1119,7 @@ def substitute_entity(match):
     return RE_HTML_ENTITY.sub(substitute_entity, text)
 
 
-def chunkize_serial(iterable, chunksize, as_numpy=False):
+def chunkize_serial(iterable, chunksize, as_numpy=False,dtype=np.float32):
     """Give elements from the iterable in `chunksize`-ed lists.
     The last returned element may be smaller (if length of collection is not divisible by `chunksize`).
 
@@ -1148,7 +1148,7 @@ def chunkize_serial(iterable, chunksize, as_numpy=False):
         if as_numpy:
             # convert each document to a 2d numpy array (~6x faster when transmitting
             # chunk data over the wire, in Pyro)
-            wrapped_chunk = [[np.array(doc) for doc in itertools.islice(it, int(chunksize))]]
+            wrapped_chunk = [[np.asarray(doc,dtype=dtype) for doc in itertools.islice(it, int(chunksize))]]
         else:
             wrapped_chunk = [list(itertools.islice(it, int(chunksize)))]
         if not wrapped_chunk[0]: