Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
WilliamYuxinXu committed May 6, 2024
2 parents 7d8569d + 4e8183e commit e22307d
Showing 1 changed file with 43 additions and 15 deletions.
58 changes: 43 additions & 15 deletions userdata/articles.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,16 @@
At first, it will just contain stubs that return fake data.
Gradually, we will fill in actual calls to our datastore.
"""
import requests
from bs4 import BeautifulSoup
import logging
import userdata.db_connect as dbc # userdata.
from bson.objectid import ObjectId
from bson.errors import InvalidId
import userdata.extras as extras
import userdata.users as users # articles depends on user, avoid circular import!
import re
import textwrap


# ------ configuration for MongoDB ------ #
Expand All @@ -30,44 +34,68 @@
OBJECTID = '_id'


def store_article_submission(submitter_id: str, article_title: str, article_link: str = "",
article_body: str = "", article_preview: str = "", private_article: bool = False) -> (bool, str):
def fetch_article_content(url):
headers = {'User-Agent': 'Mozilla/5.0'}
try:
response = requests.get(url, headers=headers)
response.raise_for_status() # Raises exception for 4XX or 5XX errors
except requests.exceptions.RequestException as e:
logging.error(f"Request failed: {e}")
return None

return response.text


def extract_content(html_content):
soup = BeautifulSoup(html_content, 'html.parser')
title_tag = soup.find('title')
title = title_tag.get_text(strip=True) if title_tag else "No Title Found"
article = soup.find('article')
if not article:
logging.warning("Article tag not found.")
return None
article_text = article.get_text(strip=True)
wrapped_text = textwrap.fill(article_text, width=85) # Wrap text to 85 characters per line
return title, wrapped_text


def store_article_submission(submitter_id: str, article_url: str) -> (bool, str):
"""
Store the submitted article for review.
Store the submitted article content fetched from a URL.
"""
user = users.get_user_by_id(submitter_id)
if not user:
return False, f"User with {submitter_id} NOT found"

# Create a new article submission record
html_content = fetch_article_content(article_url)
if not html_content:
return False, "Failed to fetch article content"

article_title, article_text = extract_content(html_content)
if not article_text:
return False, "Failed to extract article content"

submission_record = {
ARTICLE_LINK: article_link,
ARTICLE_LINK: article_url,
ARTICLE_TITLE: article_title,
ARTICLE_BODY: article_body,
ARTICLE_PREVIEW: article_preview,
ARTICLE_BODY: article_text,
SUBMITTER_ID_FIELD: user[OBJECTID],
PRIVATE: private_article
PRIVATE: False
}

dbc.connect_db()
submission_id = dbc.insert_one(ARTICLE_COLLECTION, submission_record)
return True, submission_id


def get_article_by_id(article_id, user_id=None):
"""
Fetches an article from the database by their ID.
"""
try:
# MUST convert the string ID to an ObjectId
object_id = ObjectId(article_id)
except InvalidId:
return None

# ret the article only if it belongs to the user or its public
dbc.connect_db()
article = dbc.fetch_one(ARTICLE_COLLECTION, {OBJECTID: object_id})
if article and (article[SUBMITTER_ID_FIELD] == user_id or article[PRIVATE] == "False"):
if article and (article[SUBMITTER_ID_FIELD] == user_id or not article[PRIVATE]):
return article
else:
return None
Expand Down

0 comments on commit e22307d

Please sign in to comment.