-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnew.py
86 lines (73 loc) · 3.38 KB
/
new.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import requests
from bs4 import BeautifulSoup
import json
def scrape_cards(keyword: str, location: str = "remote"):
if location.lower() == "remote":
url = f"https://wellfound.com/role/r/{keyword}"
else:
url = f"https://wellfound.com/role/l/{keyword}/{location}"
try:
response = requests.get(url)
if response.status_code != 200:
return {"error": f"Failed to fetch data from {url}, status code: {response.status_code}"}
soup = BeautifulSoup(response.text, 'html.parser')
cards = []
# Loop through each card and limit to the top 5
for index, card in enumerate(soup.select('div.mb-6.w-full.rounded.border.border-gray-400.bg-white')):
if index >= 10:
break
# Extract company name
company_name = card.select_one('h2.inline.text-md.font-semibold')
company_name = company_name.text.strip() if company_name else "N/A"
# Extract positions and their attributes
positions = []
for position in card.select('a.mr-2.text-sm.font-semibold.text-brand-burgandy.hover\\:underline'):
position_name = position.text.strip()
position_attributes = position.find_next('span')
position_attributes = position_attributes.text.strip() if position_attributes else "N/A"
positions.append({
"position_name": position_name,
"attributes": position_attributes
})
# Extract salary ranges and other details
salary_ranges = [span.text.strip() for span in card.select('span.pl-1.text-xs')]
details = {
"years_of_exp": [],
"salary": [],
"location": []
}
for detail in salary_ranges:
if "years" in detail:
details["years_of_exp"].append(detail)
elif detail.startswith("$") or detail[0].isdigit():
details["salary"].append(detail)
else:
details["location"].append(detail)
# Map details to positions
for i, position in enumerate(positions):
position_details = {
"years_of_exp": details["years_of_exp"][i] if i < len(details["years_of_exp"]) else "N/A",
"salary": details["salary"][i] if i < len(details["salary"]) else "N/A",
"location": details["location"][i] if i < len(details["location"]) else "N/A"
}
position["details"] = position_details
# Extract company tagline
company_tagline = card.select_one('span.text-xs.text-neutral-1000')
company_tagline = company_tagline.text.strip() if company_tagline else "N/A"
job_card = {
"company_name": company_name,
"open_positions": positions,
"company_tagline": company_tagline
}
cards.append(job_card)
return {
"keyword": keyword,
"location": location,
"cards": cards
}
except Exception as e:
return {"error": str(e)}
# Test the function
if __name__ == "__main__":
test_result = scrape_cards("software-engineer", "remote")
print(json.dumps(test_result, indent=4))