Skip to content

Commit 2cfaed4

Browse files
committed
Web crawlers
1 parent 8397433 commit 2cfaed4

File tree

2 files changed

+50
-0
lines changed

2 files changed

+50
-0
lines changed

Uncubed_Webcrawler.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import requests
2+
import re
3+
from bs4 import BeautifulSoup
4+
5+
6+
def Uncubed(max_pages):
7+
page = 1
8+
while page <= max_pages:
9+
url = 'https://uncubed.com/jobs/category/analytics_data_science/search?keyword=data+scientist&page=' + str(page)
10+
response = requests.get(url)
11+
content = response.content
12+
parser = BeautifulSoup(content, 'html.parser')
13+
for company in parser.find_all('p'):
14+
print(company.string)
15+
# for post in parser.find_all('div', class_ = 'job-info-container'):
16+
# job_title = post.find('p', class_ = 'title').string
17+
# location = post.find('span', class_ = 'location').string[3:]
18+
19+
# print(company + " is looking for a " + job_title + " in " + location)
20+
page += 1
21+
22+
Uncubed(1)
23+
24+
25+
# <div class="job-info-container">
26+
# <p class="title">
27+
# Insight Data Engineering Fellows Program - New York
28+
# </p>
29+
# <p>
30+
# Insight Fellows Programs
31+
# <span class="location"> - Starting September 10, 2018 (Accepting Late Applications) - New York</span>
32+
# </p>
33+
# </div>

webcrawler.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import requests
2+
import re
3+
from bs4 import BeautifulSoup
4+
5+
6+
def aiPosts(max_pages):
7+
page = 1
8+
while page <= max_pages:
9+
url = 'http://aiweirdness.com/page/' + str(page)
10+
response = requests.get(url)
11+
content = response.content
12+
parser = BeautifulSoup(content, 'html.parser')
13+
for post in parser.find_all('article', class_=re.compile('^post type-text')):
14+
print(post.h2.string)
15+
page += 1
16+
17+
aiPosts(5)

0 commit comments

Comments
 (0)