Skip to content

Commit

Permalink
Merge branch 'Dev-backend' into build/#10-backend-cicd
Browse files Browse the repository at this point in the history
  • Loading branch information
yeonjy committed Apr 9, 2024
2 parents 82ab53e + 0c9e03b commit 7eeec25
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 21 deletions.
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import os
import time
from dotenv import load_dotenv
import json
import logging
from pika import BlockingConnection, ConnectionParameters, PlainCredentials
from pika import BlockingConnection, ConnectionParameters, PlainCredentials, exceptions

from news.crud.news_summarizer import summarize_news

Expand Down Expand Up @@ -37,4 +38,8 @@ def callback(ch, method, properties, body):


channel.basic_consume(queue=os.getenv('SUMMARY_QUEUE'), on_message_callback=callback, auto_ack=True)
channel.start_consuming()
try:
channel.start_consuming()
except (exceptions.AMQPConnectionError, exceptions.StreamLostError) as e:
logger.error("Connection lost or failed, retrying... Error: {}".format(e))
time.sleep(10)
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import os
import time
from dotenv import load_dotenv
import json
from pika import BlockingConnection, ConnectionParameters, PlainCredentials, BasicProperties
from pika import BlockingConnection, ConnectionParameters, PlainCredentials, BasicProperties, exceptions

from news.schema.message_item import MessageItem

Expand All @@ -26,16 +27,22 @@ def get_connection_params():
heartbeat=600,
blocked_connection_timeout=300)


def send_message(message: MessageItem):
connection = BlockingConnection(get_connection_params())
channel = connection.channel()
channel.queue_declare(queue=CONFIG['queue_name'], durable=True)

props = BasicProperties(content_type=CONTENT_TYPE, delivery_mode=1)
serialized_message = json.dumps(message.__dict__)

channel.basic_publish(exchange=CONFIG['exchange_name'],
routing_key=CONFIG['routing_key'],
body=serialized_message,
properties=props)
connection.close()
try:
connection = BlockingConnection(get_connection_params())
channel = connection.channel()
channel.queue_declare(queue=CONFIG['queue_name'], durable=True)

props = BasicProperties(content_type=CONTENT_TYPE, delivery_mode=1)
serialized_message = json.dumps(message.__dict__)

channel.basic_publish(exchange=CONFIG['exchange_name'],
routing_key=CONFIG['routing_key'],
body=serialized_message,
properties=props)
connection.close()
except (exceptions.AMQPConnectionError, exceptions.StreamLostError) as e:
print("Connection failed, retrying in 5 seconds... Error: {}".format(e))
time.sleep(5)
send_message(message)
2 changes: 1 addition & 1 deletion backend/ai_response_processor/news/crud/news_summarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def summarize_news(news_id: int, content: str):

template = ChatPromptTemplate.from_messages(
[
("system", "You're a news summarizer. Also, the answer must be no more than 500 characters in Korean."),
("system", "You're a news summarizer. Also, the answer must be summarized in Korean within 30% of the user's request."),
("user", "{raw_news_content}"),
]
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
Expand All @@ -25,6 +26,9 @@ public class NewsCrawlingService {
private static final String CRON = "0 0 6,12 * * *";
private static final String ZONE = "Asia/Seoul";

@Value("${crawling.quantity}")
private int crawlingQuantity;

private final NewsService newsService;

@Transactional
Expand All @@ -34,19 +38,32 @@ public void scrap() throws IOException {
String categoryUrl = MAIN_URL + category.getNum();
String categoryName = category.getName();

scrapNewsUrls(categoryUrl);
scrapCategoryNews(categoryUrl);
for (final News news : newsService.getNotCrawled()) {
scrapNewsContentsAndUpdate(categoryName, news);
Document doc = Jsoup.connect(news.getUrl()).get();
String title = scrapTitle(doc);
String content = scrapContent(doc);
String postDate = scrapPostDate(doc);

news.addNewsBody(title, content, categoryName, postDate);
}
}
newsService.summarizeNewsContent();
}

private void scrapNewsUrls(String categoryUrl) throws IOException {
private void scrapCategoryNews(String categoryUrl) throws IOException {
Document doc = Jsoup.connect(categoryUrl).get();
Elements newsList = doc.select(".sa_list");
Elements newsList = doc.select(".sa_list").select("li");
if (newsList.size() < crawlingQuantity) {
scrapNewsUrl(newsList.size(), newsList);
return;
}
scrapNewsUrl(crawlingQuantity, newsList);
}

for (Element news : newsList.select("li")) {
private void scrapNewsUrl(int quantity, Elements newsList) {
for (int i = 0; i < quantity; i++) {
Element news = newsList.get(i);
String thumbnailUrl = scrapThumbnailUrl(news);
String url = Objects.requireNonNull(news.selectFirst(".sa_text_title")).attr("href");

Expand Down

0 comments on commit 7eeec25

Please sign in to comment.