diff --git a/flair/data.py b/flair/data.py index 2722a85510..de969a1083 100644 --- a/flair/data.py +++ b/flair/data.py @@ -917,7 +917,9 @@ def to_original_text(self) -> str: if len(self) == 0: return "" # otherwise, return concatenation of tokens with the correct offsets - return self[0].start_position * " " + "".join([t.text + t.whitespace_after * " " for t in self.tokens]).strip() + return (self[0].start_position - self.start_position) * " " + "".join( + [t.text + t.whitespace_after * " " for t in self.tokens] + ).strip() def to_dict(self, tag_type: str = None): labels = []