-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtools.py
62 lines (51 loc) · 1.62 KB
/
tools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import os
import random
import threading as t
from bs4 import BeautifulSoup
import sys
import config
# 代理ip文件
proxy_ip_file = "proxy_ip.txt"
proxy_ip_list = []
lock = t.RLock()
# 1.按行读取文件里的内容添加到列表中返回
def load_list_from_file(file_path):
if os.path.exists(file_path):
data_list = []
with open(file_path, "r+", encoding='utf-8') as f:
for ip in f:
data_list.append(ip.replace("\n", ""))
return data_list
# 2.随缘获得一枚代理ip
def get_proxy_ip():
global proxy_ip_list
if proxy_ip_list is None and len(proxy_ip_list) == 0:
proxy_ip_list = load_list_from_file(proxy_ip_file)
list_len = len(proxy_ip_list)
if not list_len == 0:
ip = proxy_ip_list[random.randint(0, list_len - 1)]
return {
'http': 'http://' + ip,
'https': 'https://' + ip
}
# 3.获得一个BeautifulSoup对象(默认在线,可以加载本地html)
def get_bs(html, online=True):
if online:
return BeautifulSoup(html, "lxml")
else:
return BeautifulSoup(open(html), "lxml")
# 4.判断文件路径是否存在,不存在是否创建
def is_dir_existed(path, mkdir=True):
if mkdir:
if not os.path.exists(path):
os.makedirs(path)
else:
return os.path.exists(path)
# 5.往文件写入内容(默认追加)
def write_str_data(content, file_path, mode="a+"):
with lock:
try:
with open(file_path, mode, encoding='utf-8') as f:
f.write(content + "\n", )
except OSError as reason:
print(str(reason))