piskvorky · souravsingh · Nov 13, 2016 · Nov 22, 2016 · Nov 22, 2016 · Nov 22, 2016
diff --git a/gensim/summarization/__init__.py b/gensim/summarization/__init__.py
@@ -1,4 +1,4 @@
 
 # bring model classes directly into package namespace, to save some typing
 from .summarizer import summarize, summarize_corpus
-from .keywords import keywords
+from .keywords import keywords
diff --git a/gensim/summarization/bm25.py b/gensim/summarization/bm25.py
@@ -40,16 +40,16 @@ def initialize(self):
                 self.df[word] += 1
 
         for word, freq in iteritems(self.df):
-            self.idf[word] = math.log(self.corpus_size-freq+0.5) - math.log(freq+0.5)
+            self.idf[word] = math.log(self.corpus_size - freq + 0.5) - math.log(freq + 0.5)
 
     def get_score(self, document, index, average_idf):
         score = 0
         for word in document:
             if word not in self.f[index]:
                 continue
             idf = self.idf[word] if self.idf[word] >= 0 else EPSILON * average_idf
-            score += (idf*self.f[index][word]*(PARAM_K1+1)
-                      / (self.f[index][word] + PARAM_K1*(1 - PARAM_B+PARAM_B*self.corpus_size / self.avgdl)))
+            score += (idf * self.f[index][word] * (PARAM_K1 + 1) /
+                      (self.f[index][word] + PARAM_K1 * (1 - PARAM_B + PARAM_B * self.corpus_size / self.avgdl)))
         return score
 
     def get_scores(self, document, average_idf):

diff --git a/gensim/summarization/commons.py b/gensim/summarization/commons.py
@@ -16,5 +16,6 @@ def build_graph(sequence):
 
 def remove_unreachable_nodes(graph):
     for node in graph.nodes():
-        if sum(graph.edge_weight((node, other)) for other in graph.neighbors(node)) == 0:
+        if sum(graph.edge_weight((node, other))
+               for other in graph.neighbors(node)) == 0:
             graph.del_node(node)
diff --git a/gensim/summarization/keywords.py b/gensim/summarization/keywords.py
@@ -161,7 +161,8 @@ def _get_combined_keywords(_keywords, split_text):
         if word in _keywords:
             combined_word = [word]
             if i + 1 == len_text:
-                result.append(word)   # appends last word if keyword and doesn't iterate
+                # appends last word if keyword and doesn't iterate
+                result.append(word)
             for j in xrange(i + 1, len_text):
                 other_word = _strip_word(split_text[j])
                 if other_word in _keywords and other_word == split_text[j] and not other_word in combined_word:
@@ -197,7 +198,16 @@ def _format_results(_keywords, combined_keywords, split, scores):
     return "\n".join(combined_keywords)
 
 
-def keywords(text, ratio=0.2, words=None, split=False, scores=False, pos_filter=['NN', 'JJ'], lemmatize=False):
+def keywords(
+        text,
+        ratio=0.2,
+        words=None,
+        split=False,
+        scores=False,
+        pos_filter=[
+            'NN',
+            'JJ'],
+        lemmatize=False):
     # Gets a dict of word -> lemma
     text = to_unicode(text)
     tokens = _clean_text_by_word(text)
@@ -210,7 +220,8 @@ def keywords(text, ratio=0.2, words=None, split=False, scores=False, pos_filter=
 
     _remove_unreachable_nodes(graph)
 
-    # Ranks the tokens using the PageRank algorithm. Returns dict of lemma -> score
+    # Ranks the tokens using the PageRank algorithm. Returns dict of lemma ->
+    # score
     pagerank_scores = _pagerank(graph)
 
     extracted_lemmas = _extract_tokens(graph.nodes(), pagerank_scores, ratio, words)
@@ -225,7 +236,8 @@ def keywords(text, ratio=0.2, words=None, split=False, scores=False, pos_filter=
 
     keywords = _get_keywords_with_score(extracted_lemmas, lemmas_to_word)
 
-    # text.split() to keep numbers and punctuation marks, so separeted concepts are not combined
+    # text.split() to keep numbers and punctuation marks, so separeted
+    # concepts are not combined
     combined_keywords = _get_combined_keywords(keywords, text.split())
 
     return _format_results(keywords, combined_keywords, split, scores)

diff --git a/gensim/summarization/pagerank_weighted.py b/gensim/summarization/pagerank_weighted.py
@@ -21,7 +21,8 @@ def pagerank_weighted(graph, damping=0.85):
 
     pagerank_matrix = damping * adjacency_matrix.todense() + (1 - damping) * probability_matrix
 
-    vals, vecs = eigs(pagerank_matrix.T, k=1)  # TODO raise an error if matrix has complex eigenvectors?
+    # TODO raise an error if matrix has complex eigenvectors?
+    vals, vecs = eigs(pagerank_matrix.T, k=1)
 
     return process_results(graph, vecs.real)
 
@@ -35,7 +36,7 @@ def build_adjacency_matrix(graph):
 
     for i in xrange(length):
         current_node = nodes[i]
-        neighbors_sum = sum(graph.edge_weight((current_node, neighbor)) for neighbor in graph.neighbors(current_node))
+        neighbors_sum = sum(graph.edge_weight((current_node, neighbor))for neighbor in graph.neighbors(current_node))
         for j in xrange(length):
             edge_weight = float(graph.edge_weight((current_node, nodes[j])))
             if i != j and edge_weight != 0.0:

diff --git a/gensim/summarization/summarizer.py b/gensim/summarization/summarizer.py
@@ -110,12 +110,17 @@ def _get_sentences_with_word_count(sentences, word_count):
     return selected_sentences
 
 
-def _extract_important_sentences(sentences, corpus, important_docs, word_count):
+def _extract_important_sentences(
+        sentences,
+        corpus,
+        important_docs,
+        word_count):
     important_sentences = _get_important_sentences(sentences, corpus, important_docs)
 
     # If no "word_count" option is provided, the number of sentences is
     # reduced by the provided ratio. Else, the ratio is ignored.
-    return important_sentences if word_count is None else _get_sentences_with_word_count(important_sentences, word_count)
+    return important_sentences if word_count is None else _get_sentences_with_word_count(
+        important_sentences, word_count)
 
 
 def _format_results(extracted_sentences, split):
@@ -158,7 +163,8 @@ def summarize_corpus(corpus, ratio=0.2):
     _set_graph_edge_weights(graph)
     _remove_unreachable_nodes(graph)
 
-    # Cannot calculate eigenvectors if number of unique words in text < 3. Warns user to add more text. The function ends.
+    # Cannot calculate eigenvectors if number of unique words in text < 3.
+    # Warns user to add more text. The function ends.
     if len(graph.nodes()) < 3:
         logger.warning("Please add more sentences to the text. The number of reachable nodes is below 3")
         return
@@ -198,10 +204,11 @@ def summarize(text, ratio=0.2, word_count=None, split=False):
         logger.warning("Input text is empty.")
         return
 
-    # If only one sentence is present, the function raises an error (Avoids ZeroDivisionError). 
+    # If only one sentence is present, the function raises an error (Avoids
+    # ZeroDivisionError).
     if len(sentences) == 1:
         raise ValueError("input must have more than one sentence")
-    
+
     # Warns if the text is too short.
     if len(sentences) < INPUT_MIN_LENGTH:
         logger.warning("Input text is expected to have at least " + str(INPUT_MIN_LENGTH) + " sentences.")