Skip to content

Commit

Permalink
Update Website_scraping_tab.py
Browse files Browse the repository at this point in the history
  • Loading branch information
rmusser01 committed Oct 22, 2024
1 parent c1da921 commit 81625d3
Showing 1 changed file with 67 additions and 4 deletions.
71 changes: 67 additions & 4 deletions App_Function_Libraries/Gradio_UI/Website_scraping_tab.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,15 +365,23 @@ def create_website_scraping_tab():
value="default,no_keyword_set",
visible=True
)
# FIXME
# Updated: Added output to display parsed URLs
bookmarks_file_input = gr.File(
label="Upload Bookmarks File",
type="filepath",
file_types=[".json", ".html"],
visible=True
)
parsed_urls_output = gr.Textbox(
label="Parsed URLs from Bookmarks",
placeholder="URLs will be displayed here after uploading a bookmarks file.",
lines=10,
interactive=False,
visible=False # Initially hidden, shown only when URLs are parsed
)

scrape_button = gr.Button("Scrape and Summarize")

with gr.Column():
progress_output = gr.Textbox(label="Progress", lines=3)
result_output = gr.Textbox(label="Result", lines=20)
Expand Down Expand Up @@ -414,10 +422,61 @@ def update_prompts(preset_name):

preset_prompt.change(
update_prompts,
inputs=preset_prompt,
inputs=[preset_prompt],
outputs=[website_custom_prompt_input, system_prompt_input]
)

def parse_bookmarks(file_path):
"""
Parses the uploaded bookmarks file and extracts URLs.
Args:
file_path (str): Path to the uploaded bookmarks file.
Returns:
str: Formatted string of extracted URLs or error message.
"""
try:
bookmarks = collect_bookmarks(file_path)
# Extract URLs
urls = []
for value in bookmarks.values():
if isinstance(value, list):
urls.extend(value)
elif isinstance(value, str):
urls.append(value)
if not urls:
return "No URLs found in the bookmarks file."
# Format URLs for display
formatted_urls = "\n".join(urls)
return formatted_urls
except Exception as e:
logging.error(f"Error parsing bookmarks file: {str(e)}")
return f"Error parsing bookmarks file: {str(e)}"

def show_parsed_urls(bookmarks_file):
"""
Determines whether to show the parsed URLs output.
Args:
bookmarks_file: Uploaded file object.
Returns:
Tuple indicating visibility and content of parsed_urls_output.
"""
if bookmarks_file is None:
return gr.update(visible=False), ""
file_path = bookmarks_file.name
parsed_urls = parse_bookmarks(file_path)
return gr.update(visible=True), parsed_urls

# Connect the parsing function to the file upload event
bookmarks_file_input.change(
fn=show_parsed_urls,
inputs=[bookmarks_file_input],
outputs=[parsed_urls_output, parsed_urls_output]
)

async def scrape_and_summarize_wrapper(
scrape_method: str,
url_input: str,
Expand Down Expand Up @@ -578,11 +637,15 @@ def convert_json_to_markdown(json_str: str) -> str:
markdown += f"- **Scrape Method:** {data['scrape_method']}\n"
markdown += f"- **API Used:** {data['api_used']}\n"
markdown += f"- **Keywords:** {data['keywords']}\n"
if data['url_level'] is not None:
if data.get('url_level') is not None:
markdown += f"- **URL Level:** {data['url_level']}\n"
if data.get('max_pages') is not None:
markdown += f"- **Maximum Pages:** {data['max_pages']}\n"
if data.get('max_depth') is not None:
markdown += f"- **Maximum Depth:** {data['max_depth']}\n"
markdown += f"- **Total Articles Scraped:** {data['total_articles_scraped']}\n\n"

# Add URLs scraped
# Add URLs Scraped
markdown += "## URLs Scraped\n\n"
for url in data['urls_scraped']:
markdown += f"- {url}\n"
Expand Down

0 comments on commit 81625d3

Please sign in to comment.