adds some more validation

mrmegatelo · Jul 16, 2024 · b3f01e6 · b3f01e6
1 parent f763538
commit b3f01e6
Showing 1 changed file with 36 additions and 18 deletions.
diff --git a/feed/forms/feed.py b/feed/forms/feed.py
@@ -1,3 +1,4 @@
+from enum import Enum
 from urllib.error import HTTPError, URLError
 from urllib.parse import urljoin
 
@@ -9,37 +10,52 @@
 from feed.utils.helpers import get_url_content_type
 
 
+class FeedContentTypes(Enum):
+    TEXT_HTML = 'text/html'
+    TEXT_XML = 'text/xml'
+    APPLICATION_XML = 'application/xml'
+    APPLICATION_XML_RSS = 'application/rss+xml'
+
+
 class FeedForm(forms.ModelForm):
 
+    def __init__(self, *args, **kwargs):
+        data = kwargs.get('data')
+        if data is not None:
+            url = kwargs.get('data').get('url')
+            self.content_type = get_url_content_type(url)
+            if self.content_type == FeedContentTypes.TEXT_HTML.value:
+                self.articulo = Articulo(url)
+        super().__init__(*args, **kwargs)
+
     def save(self, commit=True):
-        print(self.is_valid())
         if self.is_valid():
-            print('saving...')
             base_url = self.cleaned_data['url']
-            content_type = get_url_content_type(base_url)
 
-            match content_type:
-                case 'text/html':
+            match self.content_type:
+                case FeedContentTypes.TEXT_HTML:
                     self.parse_html(base_url)
-                case 'application/xml' | 'text/xml' | 'application/rss+xml':
+                case FeedContentTypes.TEXT_XML.value \
+                     | FeedContentTypes.APPLICATION_XML.value \
+                     | FeedContentTypes.APPLICATION_XML_RSS.value:
                     self.parse_xml(base_url)
                 case _:
                     print('Unsupported content type')
 
-            return super().save(commit=commit)
+        return super().save(commit=commit)
 
     def clean(self):
-        url = self.data.get('url')
         try:
-            content_type = get_url_content_type(url)
             # Assuming that URL is valid and resolved.
-            match content_type:
-                case 'text/html':
+            match self.content_type:
+                case FeedContentTypes.TEXT_HTML:
                     # TODO Validate if:
-                    #   - HTML has RSS link
                     #   - RSS link is reachable
-                    pass
-                case 'application/xml' | 'text/xml' | 'application/rss+xml':
+                    if self.articulo.rss is None:
+                        self.add_error('url', 'This URL could not be parsed.')
+                case FeedContentTypes.TEXT_XML.value \
+                     | FeedContentTypes.APPLICATION_XML.value \
+                     | FeedContentTypes.APPLICATION_XML_RSS.value:
                     pass
                 case _:
                     self.add_error('url', 'This URL could not be parsed.')
@@ -50,6 +66,7 @@ def clean(self):
             # Assuming that URL is invalid or cannot be resolved.
             self.add_error('url', 'This URL could not be parsed.')
 
+        return super().clean()
 
     def parse_xml(self, base_url):
         info = feedparser.parse(self.cleaned_data['url'])
@@ -61,16 +78,17 @@ def parse_xml(self, base_url):
         self.instance.url = info.feed.link
 
     def parse_html(self, base_url):
-        article = Articulo(base_url)
-        rss = article.rss
+        if self.articulo is None:
+            return
 
+        rss = self.articulo.rss
         rss = urljoin(base_url, rss)
         info = feedparser.parse(rss)
 
         self.instance.title = info.feed.get('title')
-        self.instance.description = info.feed.get('description') or article.description
+        self.instance.description = info.feed.get('description') or self.articulo.description
         self.instance.rss_url = rss
-        self.instance.icon = info.feed.image.get('href') if info.feed.get('image') else article.icon
+        self.instance.icon = info.feed.image.get('href') if info.feed.get('image') else self.articulo.icon
 
     class Meta:
         model = Feed