Skip to content

Commit

Permalink
adds some more validation
Browse files Browse the repository at this point in the history
  • Loading branch information
mrmegatelo committed Jul 16, 2024
1 parent f763538 commit b3f01e6
Showing 1 changed file with 36 additions and 18 deletions.
54 changes: 36 additions & 18 deletions feed/forms/feed.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from enum import Enum
from urllib.error import HTTPError, URLError
from urllib.parse import urljoin

Expand All @@ -9,37 +10,52 @@
from feed.utils.helpers import get_url_content_type


class FeedContentTypes(Enum):
TEXT_HTML = 'text/html'
TEXT_XML = 'text/xml'
APPLICATION_XML = 'application/xml'
APPLICATION_XML_RSS = 'application/rss+xml'


class FeedForm(forms.ModelForm):

def __init__(self, *args, **kwargs):
data = kwargs.get('data')
if data is not None:
url = kwargs.get('data').get('url')
self.content_type = get_url_content_type(url)
if self.content_type == FeedContentTypes.TEXT_HTML.value:
self.articulo = Articulo(url)
super().__init__(*args, **kwargs)

def save(self, commit=True):
print(self.is_valid())
if self.is_valid():
print('saving...')
base_url = self.cleaned_data['url']
content_type = get_url_content_type(base_url)

match content_type:
case 'text/html':
match self.content_type:
case FeedContentTypes.TEXT_HTML:
self.parse_html(base_url)
case 'application/xml' | 'text/xml' | 'application/rss+xml':
case FeedContentTypes.TEXT_XML.value \
| FeedContentTypes.APPLICATION_XML.value \
| FeedContentTypes.APPLICATION_XML_RSS.value:
self.parse_xml(base_url)
case _:
print('Unsupported content type')

return super().save(commit=commit)
return super().save(commit=commit)

def clean(self):
url = self.data.get('url')
try:
content_type = get_url_content_type(url)
# Assuming that URL is valid and resolved.
match content_type:
case 'text/html':
match self.content_type:
case FeedContentTypes.TEXT_HTML:
# TODO Validate if:
# - HTML has RSS link
# - RSS link is reachable
pass
case 'application/xml' | 'text/xml' | 'application/rss+xml':
if self.articulo.rss is None:
self.add_error('url', 'This URL could not be parsed.')
case FeedContentTypes.TEXT_XML.value \
| FeedContentTypes.APPLICATION_XML.value \
| FeedContentTypes.APPLICATION_XML_RSS.value:
pass
case _:
self.add_error('url', 'This URL could not be parsed.')
Expand All @@ -50,6 +66,7 @@ def clean(self):
# Assuming that URL is invalid or cannot be resolved.
self.add_error('url', 'This URL could not be parsed.')

return super().clean()

def parse_xml(self, base_url):
info = feedparser.parse(self.cleaned_data['url'])
Expand All @@ -61,16 +78,17 @@ def parse_xml(self, base_url):
self.instance.url = info.feed.link

def parse_html(self, base_url):
article = Articulo(base_url)
rss = article.rss
if self.articulo is None:
return

rss = self.articulo.rss
rss = urljoin(base_url, rss)
info = feedparser.parse(rss)

self.instance.title = info.feed.get('title')
self.instance.description = info.feed.get('description') or article.description
self.instance.description = info.feed.get('description') or self.articulo.description
self.instance.rss_url = rss
self.instance.icon = info.feed.image.get('href') if info.feed.get('image') else article.icon
self.instance.icon = info.feed.image.get('href') if info.feed.get('image') else self.articulo.icon

class Meta:
model = Feed
Expand Down

0 comments on commit b3f01e6

Please sign in to comment.