Merge pull request #354 from Yolo-cell-hash/dom-branch

DOM Extraction Script add
larymak · Jan 24, 2024 · c643b6f · c643b6f
2 parents 3eb3ae4 + 3e67747
commit c643b6f
Show file tree

Hide file tree

Showing 2 changed files with 45 additions and 0 deletions.
diff --git a/DOM EXTRACTION/README.md b/DOM EXTRACTION/README.md
@@ -0,0 +1,19 @@
+# DOM Extraction Script
+
+Extract the DOM elements of a webpage efficiently.
+
+## Installation
+
+Use the package manager [pip](https://pip.pypa.io/en/stable/) to install the required libraries.
+
+```bash
+pip install requests beautifulsoup4
+
+```
+
+## Usage
+
+```python
+url = 'https://example.com'
+```
+Replace 'https://example.com' with the URL of the website you want to extract the DOM from. 
diff --git a/DOM EXTRACTION/main.py b/DOM EXTRACTION/main.py
@@ -0,0 +1,26 @@
+import requests
+from bs4 import BeautifulSoup
+
+# Define the URL of the website you want to extract the DOM from
+url = 'https://example.com'
+
+response = requests.get(url)
+
+if response.status_code == 200:
+    soup = BeautifulSoup(response.text, 'html.parser')
+
+
+    title = soup.title
+    if title:
+        print("Page Title:", title.text)
+    else:
+        print("No title tag found.")
+
+
+    links = soup.find_all('a')
+    print("Links in the page:")
+    for link in links:
+        print(link.get('href'))
+
+else:
+    print("Failed to retrieve the page. Status code:", response.status_code)