diff --git a/haystack/components/preprocessors/nltk_document_splitter.py b/haystack/components/preprocessors/nltk_document_splitter.py index 9501d333cd..b11ebd0c71 100644 --- a/haystack/components/preprocessors/nltk_document_splitter.py +++ b/haystack/components/preprocessors/nltk_document_splitter.py @@ -8,13 +8,9 @@ from haystack import Document, component, logging from haystack.components.preprocessors.document_splitter import DocumentSplitter from haystack.components.preprocessors.utils import Language, SentenceSplitter -from haystack.lazy_imports import LazyImport logger = logging.getLogger(__name__) -with LazyImport("Run 'pip install nltk'") as nltk_imports: - import nltk - @component class NLTKDocumentSplitter(DocumentSplitter): @@ -52,7 +48,6 @@ def __init__( super(NLTKDocumentSplitter, self).__init__( split_by=split_by, split_length=split_length, split_overlap=split_overlap, split_threshold=split_threshold ) - nltk_imports.check() if respect_sentence_boundary and split_by != "word": logger.warning( @@ -226,7 +221,7 @@ def _concatenate_sentences_based_on_word_amount( chunk_start_idx += len("".join(processed_sentences)) # Next chunk starts with the sentences that were overlapping with the previous chunk current_chunk = current_chunk[-num_sentences_to_keep:] - chunk_word_count = sum([len(s.split()) for s in current_chunk]) + chunk_word_count = sum(len(s.split()) for s in current_chunk) else: # Here processed_sentences is the same as current_chunk since there is no overlap chunk_starting_page_number += sum(sent.count("\f") for sent in current_chunk)