-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbybit_data_downloader.py
112 lines (100 loc) · 4.31 KB
/
bybit_data_downloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# сохраняет данные с байбита в csv файлы. спизжено и поправлено отсюда:
# Ryuryu's Bybit Historical Data Downloader
# (Production Mode 6973)
# -------------------------------------
# (c) 2022 Ryan Hayabusa
# Github: https://github.com/ryu878
# Web: https://aadresearch.xyz/
# Discord: ryuryu#4087
# -------------------------------------
# pip install beautifulsoup4
# pip install requests
import urllib.request
import os
import re
import gzip
import time
import requests
from bs4 import BeautifulSoup
# Set the file version
ver = '1.3:02/05/23'
# Define the base URL
base_url = 'https://public.bybit.com/trading/'
# Select the start date
start_date = '2023-05-01'
# Set the list of coins
coins = ['BTCUSD']
# Create a function to download the files
def download_file(url, local_path):
with urllib.request.urlopen(url) as response, open(local_path, 'wb') as out_file:
data = response.read()
out_file.write(data)
# Make a GET request to the base URL and parse the HTML
response = requests.get(base_url)
soup = BeautifulSoup(response.text, 'html.parser')
# Find all the links on the page
links = soup.find_all('a')
# create csv dir to store csv files
csv_dir = 'csv'
if not os.path.exists(csv_dir):
os.mkdir(csv_dir)
# Loop through all the links
for link in links:
# Get the href attribute of the link
href = link.get('href')
# Check if the href attribute is a directory
if href.endswith('/'):
# Get the directory name
dir_name = href[:-1]
if not dir_name in coins:
continue
# Create the directory locally if it doesn't exist
full_dir_name = os.path.join(csv_dir, dir_name)
if not os.path.exists(full_dir_name):
os.mkdir(full_dir_name)
# Make a GET request to the directory URL and parse the HTML
dir_url = base_url + href
dir_response = requests.get(dir_url)
dir_soup = BeautifulSoup(dir_response.text, 'html.parser')
# Find all the CSV files in the directory
csv_links = dir_soup.find_all(href=re.compile('.csv.gz$'))
# Loop through all the CSV files
for csv_link in csv_links:
# Get the CSV file name
csv_name = csv_link.text
# Extract the date from the CSV file name
csv_date = re.findall(r'\d{4}-\d{2}-\d{2}', csv_name)[0]
# Check if the file is from or after the selected start date
if csv_date >= start_date:
# Construct the full URL of the CSV file
csv_url = dir_url + csv_name
# Construct the local path of the extracted file
extracted_path = os.path.join(full_dir_name, csv_name[:-3])
# Check if the extracted file exists locally
if os.path.exists(extracted_path):
print('Skipping download of', csv_name, '- extracted file already exists.')
else:
# Construct the local path of the archive file
archive_path = os.path.join(full_dir_name, csv_name)
# Download the archive file if it doesn't exist locally
if not os.path.exists(archive_path):
download_file(csv_url, archive_path)
print('Downloaded:', archive_path)
time.sleep(0.1)
# Check if the file is a gzip archive
if csv_name.endswith('.gz'):
# Open the gzip archive and extract the contents
with gzip.open(archive_path, 'rb') as f_in:
with open(extracted_path, 'wb') as f_out:
f_out.write(f_in.read())
print('Extracted:', extracted_path)
# Remove the archive file
os.remove(archive_path)
print('Removed:', archive_path)
else:
# Rename the file to remove the .csv extension
os.rename(archive_path, extracted_path)
print('Renamed:', archive_path, 'to', extracted_path)
else:
# Skip the file
print('Skipping download of', csv_name, '- date is before start date.')