Skip to content

Commit

Permalink
Linting
Browse files Browse the repository at this point in the history
  • Loading branch information
vblagoje committed Sep 10, 2024
1 parent db07392 commit a113d56
Showing 1 changed file with 1 addition and 6 deletions.
7 changes: 1 addition & 6 deletions haystack/components/preprocessors/nltk_document_splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,9 @@
from haystack import Document, component, logging
from haystack.components.preprocessors.document_splitter import DocumentSplitter
from haystack.components.preprocessors.utils import Language, SentenceSplitter
from haystack.lazy_imports import LazyImport

logger = logging.getLogger(__name__)

with LazyImport("Run 'pip install nltk'") as nltk_imports:
import nltk


@component
class NLTKDocumentSplitter(DocumentSplitter):
Expand Down Expand Up @@ -52,7 +48,6 @@ def __init__(
super(NLTKDocumentSplitter, self).__init__(
split_by=split_by, split_length=split_length, split_overlap=split_overlap, split_threshold=split_threshold
)
nltk_imports.check()

if respect_sentence_boundary and split_by != "word":
logger.warning(
Expand Down Expand Up @@ -226,7 +221,7 @@ def _concatenate_sentences_based_on_word_amount(
chunk_start_idx += len("".join(processed_sentences))
# Next chunk starts with the sentences that were overlapping with the previous chunk
current_chunk = current_chunk[-num_sentences_to_keep:]
chunk_word_count = sum([len(s.split()) for s in current_chunk])
chunk_word_count = sum(len(s.split()) for s in current_chunk)
else:
# Here processed_sentences is the same as current_chunk since there is no overlap
chunk_starting_page_number += sum(sent.count("\f") for sent in current_chunk)
Expand Down

0 comments on commit a113d56

Please sign in to comment.