-
Notifications
You must be signed in to change notification settings - Fork 0
/
dnf_bags.py
72 lines (59 loc) · 2.39 KB
/
dnf_bags.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# a dunfa scraper that grabs the top 100 damage dealers and inserts their data in a CSV file
import asyncio
import os
import pandas as pan
from jobs import JOBS as jobs
from requests_html import AsyncHTMLSession
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
'Accept-Language': 'en-US, en;q=0.5'
}
CHROMEDRIVER_PATH = os.getenv('CHROMEDRIVER_PATH')
DUNFA_RANKING_URL = 'https://dunfaoff.com/ranking.df?jobName={}&jobGrowName={}&gender={}&page=1'
DUNFA_CHARACTER_URL = 'https://dunfaoff.com/SearchResult.df?server={}&characterid={}'
options = Options()
options.add_argument('--disable-web-security')
driver = webdriver.Chrome(CHROMEDRIVER_PATH, options=options)
# find all character ranking data
async def get_rank_data(session, url):
r = await session.get(url)
ranks = r.html.find('tr.character-row')
data = []
for row in ranks:
rank_data = {
'server': row.attrs['data-server'],
'id': row.attrs['data-characterid']
}
data.append(rank_data)
return data
# main func for ranking data
async def get_ranks(urls):
asession = AsyncHTMLSession()
tasks = (get_rank_data(asession, url) for url in urls)
return await asyncio.gather(*tasks)
# convert the job data into URLs
urls = []
for job in jobs:
for sub in job['subs']:
urls.append(DUNFA_RANKING_URL.format(job['korean_name'], sub, job['gender']))
results = asyncio.run(get_ranks(urls))
# convert the ranking data into URLs
urls = []
for ranks in results:
for char in ranks:
urls.append(DUNFA_CHARACTER_URL.format(char['server'], char['id']))
dealers = []
# open up the char page in selenium
# selenium is required since this damage value is kept in JS
for url in urls:
driver.implicitly_wait(1)
driver.get(url)
tag = driver.find_element_by_id('damage_side').click()
damage = driver.find_element_by_class_name("sinergeDmg0").text.replace(',','')
dealers.append([url, damage])
# convert everything to a dataframe & save it as a CSV
df = pan.DataFrame(dealers, columns=['url', 'damage'])
df.sort_values(by=['damage'], ascending=True, inplace=True).to_csv('results/rankings.csv', index=False)
print("done.")