-
-
Notifications
You must be signed in to change notification settings - Fork 4.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
LsiModel.docs_processed attribute #763
Changes from 13 commits
b673991
cc2474a
c7491d7
12d9c76
b84719a
7d5a7c2
02dc50c
cd78a03
4208045
b5dcdc8
35b3077
6313234
30a54eb
1b63366
1f9b04b
0d9ad8a
a5a799f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,14 +16,17 @@ | |
|
||
import numpy | ||
|
||
from gensim.utils import to_unicode, smart_extension | ||
from gensim.utils import to_unicode # , smart_extension | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove import if no longer needed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. Side Note: There are many more unused imports throughout gensim. They can be dangerous to remove, though, for someone like me unfamiliar with the internals of those packages being imported. For example |
||
from gensim.interfaces import TransformedCorpus | ||
from gensim.corpora import (bleicorpus, mmcorpus, lowcorpus, svmlightcorpus, | ||
ucicorpus, malletcorpus, textcorpus, indexedcorpus) | ||
|
||
# needed because sample data files are located in the same folder | ||
module_path = os.path.dirname(__file__) | ||
datapath = lambda fname: os.path.join(module_path, 'test_data', fname) | ||
|
||
|
||
def datapath(fname): | ||
return os.path.join(module_path, 'test_data', fname) | ||
|
||
|
||
def testfile(): | ||
|
@@ -180,7 +183,7 @@ def test_indexing(self): | |
self.assertEqual(len(docs), len(corpus)) | ||
self.assertEqual(len(docs), len(corpus[:])) | ||
self.assertEqual(len(docs[::2]), len(corpus[::2])) | ||
|
||
def _get_slice(corpus, slice_): | ||
# assertRaises for python 2.6 takes a callable | ||
return corpus[slice_] | ||
|
@@ -200,9 +203,9 @@ def _get_slice(corpus, slice_): | |
# corpus does, and throws an error otherwise | ||
if hasattr(corpus, 'index') and corpus.index is not None: | ||
corpus_ = TransformedCorpus(DummyTransformer(), corpus) | ||
self.assertEqual(corpus_[0][0][1], docs[0][0][1]+1) | ||
self.assertEqual(corpus_[0][0][1], docs[0][0][1] + 1) | ||
self.assertRaises(ValueError, _get_slice, corpus_, set([1])) | ||
transformed_docs = [val+1 for i, d in enumerate(docs) for _, val in d if i in [1, 3, 4]] | ||
transformed_docs = [val + 1 for i, d in enumerate(docs) for _, val in d if i in [1, 3, 4]] | ||
self.assertEquals(transformed_docs, list(v for doc in corpus_[[1, 3, 4]] for _, v in doc)) | ||
self.assertEqual(3, len(corpus_[[1, 3, 4]])) | ||
else: | ||
|
@@ -214,12 +217,19 @@ def _get_slice(corpus, slice_): | |
class TestMmCorpus(CorpusTestCase): | ||
def setUp(self): | ||
self.corpus_class = mmcorpus.MmCorpus | ||
self.corpus = self.corpus_class(datapath('testcorpus.mm')) | ||
self.file_extension = '.mm' | ||
|
||
def test_serialize_compressed(self): | ||
# MmCorpus needs file write with seek => doesn't support compressed output (only input) | ||
pass | ||
|
||
def test_load(self): | ||
self.assertEqual(self.corpus.num_docs, 9) | ||
self.assertEqual(self.corpus.num_terms, 12) | ||
self.assertEqual(self.corpus.num_nnz, 28) | ||
self.assertEqual(tuple(self.corpus.index), (97, 121, 169, 201, 225, 249, 258, 276, 303)) | ||
|
||
|
||
class TestSvmLightCorpus(CorpusTestCase): | ||
def setUp(self): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can we always use doc_no?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yea, I guess so, for this line.