Skip to content

Commit

Permalink
remove references from paper body
Browse files Browse the repository at this point in the history
  • Loading branch information
j2whiting committed Feb 5, 2024
1 parent 2f74522 commit c8df584
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 0 deletions.
11 changes: 11 additions & 0 deletions core/utils.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,13 @@
import regex as re


def escape_newlines(text: str):
return text.replace("\n", "\\n")

def remove_references(text: str):
"""
Removes reference sections from a scientific paper.
"""
pattern = r"References\n([\s\S]*?)(?:\n\n|\Z)"
new_text = re.sub(pattern, '', text)
return new_text.strip()
26 changes: 26 additions & 0 deletions tests/utils/test_regex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from core.utils import remove_references
import unittest

class TestRemoveReferences(unittest.TestCase):

def test_remove_without_following_section(self):
text = """
Some initial text.
References
1. Reference one details.
2. Reference two details.
"""
result = remove_references(text)
self.assertNotIn("References", result)
self.assertIn("Some initial text.", result)

def test_no_references_section(self):
text = """
Some initial text.
No references here.
"""
result = remove_references(text)
self.assertEqual(text.strip(), result)

if __name__ == '__main__':
unittest.main()

0 comments on commit c8df584

Please sign in to comment.