Merge pull request #43 from Code4GovTech/dev

Markdown changes & Testcase
Code4GovTech · Jul 26, 2024 · 1a72e6d · 1a72e6d
2 parents 8bba1d1 + 69534e1
commit 1a72e6d
Show file tree

Hide file tree

Showing 2 changed files with 192 additions and 10 deletions.
diff --git a/tests.py b/tests.py
@@ -0,0 +1,122 @@
+import unittest
+from v2_utils import remove_unmatched_tags
+from app import app
+import json,random
+
+
+class CustomTestResult(unittest.TextTestResult):
+    def addSuccess(self, test):
+        super().addSuccess(test)
+        print(f"{test._testMethodName} - passed")
+
+
+class CustomTestRunner(unittest.TextTestRunner):
+    resultclass = CustomTestResult
+
+
+class TestRemoveUnmatchedTags(unittest.TestCase):
+    """
+    Static test case input & output for check markdown handler function
+    """
+    def test_remove_unmatched_tags_basic(self):
+        input_text = "<div>Test content</p></div>"
+        expected_output = "<div>Test content</div>"
+        self.assertEqual(remove_unmatched_tags(input_text), expected_output)
+
+    def test_remove_unmatched_tags_unmatched_opening(self):
+        input_text = "<div>Test content"
+        expected_output = "<div>Test content</div>"
+        self.assertEqual(remove_unmatched_tags(input_text), expected_output)
+
+    def test_remove_unmatched_tags_unmatched_closing(self):
+        input_text = "<div><span><p>Test content</div>"
+        expected_output = "<div><span><p>Test content</p></span></div>"
+        self.assertEqual(remove_unmatched_tags(input_text), expected_output)
+
+    def test_remove_unmatched_tags_nested_tags(self):
+        input_text = "<div><p>Test content</p></p></div>"
+        expected_output = "<div><p>Test content</p></div>"
+        self.assertEqual(remove_unmatched_tags(input_text), expected_output)
+
+    def test_remove_unmatched_tags_unmatched_nested_opening(self):
+        input_text = "<div><p>Test content</div>"
+        expected_output = "<div><p>Test content</p></div>"
+        self.assertEqual(remove_unmatched_tags(input_text), expected_output)
+
+    def test_remove_unmatched_tags_unmatched_nested_closing(self):
+        input_text = "<div>Test content</p></div>"
+        expected_output = "<div>Test content</div>"
+        self.assertEqual(remove_unmatched_tags(input_text), expected_output)
+
+    def test_remove_unmatched_tags_multiple_unmatched_tags(self):
+        input_text = "<div>Test</div><p>Content</p><span>Here"
+        expected_output = "<div>Test</div><p>Content</p><span>Here</span>"
+        self.assertEqual(remove_unmatched_tags(input_text), expected_output)
+
+    def test_remove_unmatched_tags_text_with_no_tags(self):
+        input_text = "Plain text with no tags"
+        expected_output = "Plain text with no tags"
+        self.assertEqual(remove_unmatched_tags(input_text), expected_output)
+
+    def test_remove_unmatched_tags_empty_string(self):
+        input_text = ""
+        expected_output = ""
+        self.assertEqual(len(remove_unmatched_tags(input_text)),len(expected_output))
+
+
+class TestIssuesEndpoints(unittest.TestCase):
+
+    def setUp(self):
+        self.app = app.test_client()
+        self.app.testing = True
+        self.issues_data = None  # To store issues data for use in subsequent tests
+
+        # Fetch issues data during setup
+        self._fetch_issues_data()
+
+    def _fetch_issues_data(self):
+        # Validate the /issues endpoint and store the issues data
+        response = self.app.get('/issues')
+        self.assertEqual(response.status_code, 200)
+
+        data = json.loads(response.data)
+        self.issues_data = data.get('issues', [])
+        self.assertTrue(len(self.issues_data) > 0, "No issues found in response")
+
+    def test_get_issues_success(self):
+        # Check if issues data is correctly fetched
+        self.assertIsNotNone(self.issues_data, "Issues data is not populated")
+
+    def test_get_issues_detail_success(self):
+        # Ensure the /issues endpoint was successfully called and issues data is available
+        if not self.issues_data:
+            self.skipTest("Skipping detail test as /issues endpoint did not return data")
+
+        # Use first data from /issues response to form the endpoint URL
+
+        index = random.randrange(1,len(self.issues_data)-1)
+        sample_issue = self.issues_data[index]['issues'][0]
+        issue_id = sample_issue['id']
+        orgname = self.issues_data[index]['org_name']
+
+        endpoint = f'/v2/issues/{orgname}/{issue_id}'
+
+        response = self.app.get(endpoint)
+        self.assertEqual(response.status_code, 200)
+
+    def test_get_repo_detail_success(self):
+        # Ensure the /issues endpoint was successfully called and issues data is available
+        if not self.issues_data:
+            self.skipTest("Skipping detail test as /issues endpoint did not return data")
+
+        # Use first data from /issues response to form the endpoint URL
+        index = random.randrange(1,len(self.issues_data)-1)
+        orgname = self.issues_data[index]['org_name']
+        endpoint = f'/issues/{orgname}'        
+        response = self.app.get(endpoint)
+        self.assertEqual(response.status_code, 200)
+
+
+
+if __name__ == '__main__':
+    unittest.main(testRunner=CustomTestRunner())
diff --git a/v2_utils.py b/v2_utils.py
@@ -27,43 +27,103 @@ def define_link_data(usernames):
         logging.info(f"{e}---define_link_data")
         return []
 
+def preprocess_nested_tags(text):
+    try:        
+        segments = re.split(r'(<[^>]+>)', text)
+        tag_stack = []
+        corrected_segments = []
+
+        for segment in segments:
+            if re.match(r'<[^/][^>]*>', segment):  # Opening tag
+                tag_stack.append(segment)
+                corrected_segments.append(segment)
+            elif re.match(r'</[^>]+>', segment):  # Closing tag
+                if tag_stack and tag_stack[-1][1:].split()[0] == segment[2:].split()[0]:
+                    tag_stack.pop()
+                    corrected_segments.append(segment)
+                else:
+                    continue  # Ignore unmatched closing tag
+            else:
+                corrected_segments.append(segment)
+
+        while tag_stack:
+            open_tag = tag_stack.pop()
+            tag_name = re.match(r'<([^ ]+)', open_tag).group(1)
+            corrected_segments.append(f'</{tag_name}>')
+
+        return ''.join(corrected_segments)
+
+    except Exception as e:
+        print(e,"error in preprocess_nested_tags function")
+        return text
+
+
+
 def remove_unmatched_tags(text):
     try:
-       # Remove unmatched closing tags at the beginning of the string
-        text = re.sub(r'^\s*</[^>]+>\s*', '', text)
+        # Preprocess text to handle unmatched nested tags
+        text = preprocess_nested_tags(text)
 
+        # Remove unmatched closing tags at the beginning of the string
+        text = re.sub(r'^\s*</[^>]+>\s*', '', text)
         # Regex pattern to find matched or unmatched tags
-        pattern = re.compile(r'(<([^>]+)>.*?</\2>)|(<[^/][^>]*>.*)', re.DOTALL)
+        pattern = re.compile(r'(<([^>]+)>.*?</\2>)|(<[^/][^>]*>.*?)(?=<[^/][^>]*>|$)', re.DOTALL)
         matches = pattern.findall(text)
-
+
+        #If get text without html tags
+        if matches == []:
+            return text
+
         cleaned_text = ''
+        open_tags = []
+
         for match in matches:
             if match[0]:  # Full matched <tag>...</tag> pairs
                 cleaned_text += match[0]
             elif match[2]:  # Unmatched opening <tag> tags
+                # Add the tag to the list of open tags
+                tag = re.match(r'<([^/][^>]*)>', match[2])
+                if tag:
+                    tag_name = tag.group(1).split()[0]
+                    open_tags.append(tag_name)
                 cleaned_text += match[2]
+
+        # Close any unmatched opening tags
+        while open_tags:
+            tag = open_tags.pop()
+            cleaned_text += f'</{tag}>'
+
+        # Remove extra unmatched angle brackets
+        cleaned_text = re.sub(r'>+', '>', cleaned_text)
+        cleaned_text = re.sub(r'<+', '<', cleaned_text)
 
+        #For front end renders add ul tags 
+        if not cleaned_text.strip().startswith("<ul>"):
+            return f"<ul>{cleaned_text}</ul>"
+
         return cleaned_text
+
     except Exception as e:
         print(e)
         return text
-
-
+
+
+
 
 
 def week_data_formatter(html_content, type):
 
     try:
         # Use regex to find week titles (e.g., Week 1, Week 2) and their corresponding task lists
-        week_matches = re.findall(r'(Week \d+)', html_content)
-        tasks_per_week = re.split(r'Week \d+', html_content)[1:]  # Split the content by weeks and skip the first empty split
+        week_matches = re.findall(r'Week\s*-?\s*\d+', html_content)
+        tasks_per_week = re.split(r'Week\s*-?\s*\d+', html_content)[1:]  # Split the content by weeks and skip the first empty split
 
         weekly_updates = []
 
         if type == "Learnings":
             # tasks_per_week = re.split(r'<h3>Week \d+</h3>', html_content)[1:]
-            tasks_per_week = re.split(r'(<.*?>Week \d+<.*?>)', html_content)[1:]
-            tasks_per_week = [tasks_per_week[i] for i in range(1, len(tasks_per_week), 2)]
+            tasks_per_week = re.split(r'Week\s*-?\s*\d+', html_content)[1:]
+            tasks_per_week = [tasks_per_week[i] for i in range(0, len(tasks_per_week))]
             for i, week in enumerate(week_matches):
                 task_list_html = tasks_per_week[i] if i < len(tasks_per_week) else ""
                 weekly_updates.append({