-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcrawler.py
106 lines (83 loc) · 3.15 KB
/
crawler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import requests
import pandas as pd
import numpy as np
import folium
from folium.plugins import MiniMap
import requests
import folium
import collections
def whole_region(keyword, start_x, start_y, end_x, end_y):
page_num=1
all_data_list=[]
while(1):
url='https://dapi.kakao.com/v2/local/search/keyword.json'
params={'query':keyword, 'page':page_num, 'rect': f'{start_x},{start_y},{end_x},{end_y}'}
headers={'Authorization': 'KakaoAK 5595e9b8209b83161571a42bc8b662c0'}
resp=requests.get(url, params=params, headers=headers)
search_count=resp.json()['meta']['total_count']
# print('총 개수', search_count)
if search_count>45:
#print('좌표 4등분')
dividing_x = (start_x+end_x)/2
dividing_y = (start_y+end_y)/2
all_data_list.extend(whole_region(keyword,start_x, start_y, dividing_x, dividing_y))
all_data_list.extend(whole_region(keyword,dividing_x, start_y, end_x, dividing_y))
all_data_list.extend(whole_region(keyword,start_x, dividing_y, dividing_x, end_y))
all_data_list.extend(whole_region(keyword,dividing_x, dividing_y, end_x, end_y))
return all_data_list
else:
if resp.json()['meta']['is_end']:
all_data_list.extend(resp.json()['documents'])
return all_data_list
else:
#print('다음페이지')
page_num +=1
all_data_list.extend(resp.json()['documents'])
def overlapped_data(keyword, start_x, start_y, next_x, next_y, num_x, num_y):
overlapped_result=[]
for i in range(1, num_x+1):
end_x=start_x+next_x
initial_start_y=start_y
for j in range(1, num_y+1):
print(i,j)
end_y = initial_start_y+next_y
each_result=whole_region(keyword, start_x, initial_start_y, end_x, end_y)
overlapped_result.extend(each_result)
initial_start_y=end_y
start_x=end_x
return overlapped_result
def make_map(dfs):
m=folium.Map(location=[37.566826, 126.9786567], zoom_start=12)
minimap=MiniMap()
m.add_child(minimap)
for i in range(len(dfs)):
folium.Marker([df['Y'][i], df['X'][i]], tooltip=dfs['stores'][i], popup=dfs['place_url'][i]).add_to(m)
return m
keyword='카페'
start_x=124.60
start_y=33.11
next_x=0.5
next_y=0.5
num_x=13
num_y=12
overlapped_result= overlapped_data(keyword, start_x, start_y, next_x, next_y, num_x, num_y)
results=list(map(dict, collections.OrderedDict.fromkeys(tuple(sorted(d.items())) for d in overlapped_result)))
X=[]
Y=[]
stores=[]
road_address=[]
place_url=[]
ID=[]
for place in results:
X.append(float(place['x']))
Y.append(float(place['y']))
stores.append(place['place_name'])
road_address.append(place['road_address_name'])
place_url.append(place['place_url'])
ID.append(place['id'])
ar=np.array([ID, stores, X, Y, road_address, place_url]).T
df=pd.DataFrame(ar, columns = ['ID', 'stores', 'X', 'Y', 'road_address', 'place_url'])
print('total_result_number = ' , len(df))
#print(df)
df.to_csv("crawled_korea.csv", mode='w')
#make_map(df)