-
Notifications
You must be signed in to change notification settings - Fork 6
/
facebook_user_crawler.py
72 lines (56 loc) · 2.38 KB
/
facebook_user_crawler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import requests
from bs4 import BeautifulSoup as BS
class FbBaseCrawler(object):
default_headers = {
'Accept' :'*/*',
'Cache-Control' :'no-cache',
'upgrade-insecure-requests' :'1',
'User-Agent' :'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/62.0.3202.94 Chrome/62.0.3202.94 Safari/537.36'
}
def __init__(self,email,password,users_fbid:list=None):
self.r = requests.Session()
self._user = email
self._pass = password
self._users_fbid = users_fbid or []
def _export_to_csv(self,data):
import csv
with open('data_output.csv', 'w') as csv_file:
fieldnames = ['name','email','job', 'address', 'phone', 'website']
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for dat in data:
writer.writerow(dat)
def _get(self,url,params=None,headers=None,cookies=None):
if params is None:
params = {}
if cookies is None:
cookies = {}
h=self.default_headers
if headers:
h.update(headers)
return self.r.get(url,params=params,headers=h,cookies=cookies,timeout=10)
def _post(self,url,params=None,data=None,headers=None):
h=self.default_headers
if headers is not None:
h.update(headers)
return self.r.post(url,params=params,data=data,headers=h,allow_redirects=False,timeout=10)
def _fblink(self,link):
return 'https://www.facebook.com%s' % str(link)
def parser(self, html):
return BS(html, 'html.parser')
def _login_fb(self):
print('Fresh login')
try:
self._get('https://www.facebook.com')
data = {
'email': self._user,
'pass': self._pass,
}
login = self._post('https://www.facebook.com/login.php?login_attempt=1&lwv=110', data=data, headers={
'Content-Type': 'application/x-www-form-urlencoded'
})
except Exception as e:
print('Error login')
raise e
self._fbuser_id = self.r.cookies.get('c_user')
return login.status_code == 302 and self._fbuser_id