-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsubs_scraper.py
239 lines (191 loc) · 8.94 KB
/
subs_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from moviepy.editor import VideoFileClip, clips_array
from fake_useragent import UserAgent
from bs4 import BeautifulSoup
from datetime import datetime, timezone
import requests
import random
import time
import os
def add_subs(name: str, lang: str, absolute_path) -> None:
print(f"[SUBS] Adding Subtitles to {name}")
# Convert relative path to absolute path for file upload (necessary)
today = datetime.now(timezone.utc).date()
ua = UserAgent(platforms='pc') # Create a UserAgent instance
random_user_agent = ua.random # Get a random user-agent string
# Configure Chrome options
chrome_options = Options()
chrome_options.add_argument("--headless=new") #Disabel for debug
#chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument(f"user-agent={random_user_agent}")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("window-size=1920,1080")
chrome_options.add_argument("log-level=1")
# Create a new instance of the Chrome driver
driver = webdriver.Chrome(options=chrome_options)
url = "https://www.veed.io/tools/add-subtitles"
driver.get(url)
driver.add_cookie({"name": "OptanonAlertBoxClosed", "value": f"{today}T00:00:00.000Z"})
time.sleep(random.uniform(2.0, 3.5))
# Click button to generate new session
selector = 'a[data-testid="@titleSection/CTA"]'
element = driver.find_element(By.CSS_SELECTOR, selector)
element.click()
time.sleep(random.uniform(2.0, 2.5))
# Accept TOS
selector = 'button[data-testid="@component/terms-consent-modal/btn"]'
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, selector)))
element = driver.find_element(By.CSS_SELECTOR, selector)
element.click()
time.sleep(random.uniform(0.5, 1.0))
# Upload video
selector = 'div[data-testid="@editor/activation-flow-new-modal"] input[type="file"]'
wait = WebDriverWait(driver, 10) # Wait up to 10 seconds
file_input = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, selector)))
file_input.send_keys(f"{absolute_path}.mp4")
WebDriverWait(driver, 360).until(EC.invisibility_of_element_located((By.CSS_SELECTOR, ".ActivationLoaderstyled__ActivationLoaderProgress-sc-2xypj-0.ldMcbd")))
# Choose video language
selector = 'div[data-testid="@editor/subtitles/create-subtitles-language-picker"]'
element = WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.CSS_SELECTOR, selector)))
element.click()
time.sleep(random.uniform(0.5, 1.0))
element = driver.find_element(By.CSS_SELECTOR, 'input[id="react-select-5-input"]')
if lang == "es":
element.send_keys("Spanish (Spain)")
elif lang == "en":
element.send_keys("English (US)")
elif lang == "de":
element.send_keys("German")
elif lang == "fr":
element.send_keys("French (France)")
elif lang == "ko":
element.send_keys("Korean")
elif lang == "zh" or "zh-tw" or "zh-cn" or "zh-sg" or "zh-hk":
element.send_keys("Chinese")
elif lang == "jp":
element.send_keys("Japanese")
else:
print(f"Unsupported language code: {lang}")
return
element.send_keys(Keys.ENTER)
selector = 'button[data-testid="@editor/subtitles/create-subtitles-button"]'
element = driver.find_element(By.CSS_SELECTOR, selector)
element.click()
WebDriverWait(driver, 240).until(EC.url_matches("veed.io/edit/.*/subtitles"))
# Translate subtitle
if lang != "en":
selector = 'button[data-testid="@editor/subtitles/translate"]'
element = driver.find_element(By.CSS_SELECTOR, selector)
element.click()
time.sleep(random.uniform(0.7, 1.2))
element = driver.find_element(By.CSS_SELECTOR, 'input[id="react-select-7-input"]')
element.send_keys("English (US)")
element.send_keys(Keys.ENTER)
selector = 'div[data-testid="@editor/subtitles/translate-automatically-button-wrapper"]'
element = driver.find_element(By.CSS_SELECTOR, selector)
element.click()
time.sleep(random.uniform(1.0, 1.5))
# Click on subtitle style folder
selector = 'button[data-testid="@editor/subtitles/styles"]'
element = driver.find_element(By.CSS_SELECTOR, selector)
element.click()
time.sleep(random.uniform(0.5, 1.25))
# Choose one font and highlight style
selector = 'div[data-testid="@editor/subtitles/style-preset-thumbnail-slay"]'
element = driver.find_element(By.CSS_SELECTOR, selector)
element.click()
# "Done"
selector = 'button[data-testid="@header-controls/publish-button"]'
element = driver.find_element(By.CSS_SELECTOR, selector)
element.click()
time.sleep(random.uniform(0.5, 1.25))
# Add subtitles to video instead of extra file
text_element = driver.find_element('xpath', '//span[text()="Burn Subtitles"]')
parent_element = text_element.find_element('xpath', './..')
element = parent_element.find_element(By.CSS_SELECTOR, 'div[data-testid="@components/toggle"]')
element.click()
time.sleep(random.uniform(0.3, 0.75))
# Set FPS
selector = 'button[data-testid="@export/advanced-settings"]'
element = driver.find_element(By.CSS_SELECTOR, selector)
element.click()
time.sleep(random.uniform(0.3, 0.75))
element = driver.find_element(By.CSS_SELECTOR, 'input[data-testid="@slider-with-input/text-input"]')
element.click()
element.send_keys(Keys.DELETE)
element.send_keys("6")
# Export
selector = 'button[data-testid="@export/export-button"]'
element = driver.find_element(By.CSS_SELECTOR, selector)
element.click()
WebDriverWait(driver, 120).until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'button[aria-label="Play video"]')))
''' Proceed with Downloading the Video '''
# Get the video URL
soup = BeautifulSoup(driver.page_source, "html.parser")
video_source = soup.find("source", {"data-testid": "@video-player/source"})
if video_source:
video_url = video_source.get("src")
else:
print("Video source element not found.")
driver.quit()
return
# Set the headers
headers = {
"Referer": driver.current_url,
}
# Send a HEAD request to get the video headers
head_response = requests.head(video_url, headers=headers)
# Check if the video is accessible
if head_response.status_code == 200:
# Get the video content length
content_length = int(head_response.headers.get("Content-Length", 0))
# Open a file to save the video content
with open(f"{absolute_path}_subed.mp4", "wb") as video_file:
start_range = 0
end_range = 0
while end_range < content_length:
# Update the range headers
end_range = min(start_range + 1024 * 1024, content_length)
headers["Range"] = f"bytes={start_range}-{end_range - 1}"
# Send a GET request to fetch the video content range
get_response = requests.get(video_url, headers=headers, stream=True)
if get_response.status_code == 206: # Partial Content
# Write the received content to the file
for chunk in get_response.iter_content(chunk_size=1024):
if chunk:
video_file.write(chunk)
# Update the start range for the next request
start_range = end_range
else:
print(f"Failed to download the video. Status code: {get_response.status_code}")
break
print(f"Video transcribed successfully: {name}")
else:
print(f"The video URL is not accessible. Status code: {head_response.status_code}")
# Close the browser
driver.quit()
def rmv_wtrmrk(path:str ) -> None:
print("Removing Watermark")
try:
og_vid = VideoFileClip(f"{path}.mp4")
sub_vid = VideoFileClip(f"{path}_subed.mp4")
width, height = og_vid.size
upper_vid = og_vid.crop(x1=0, y1=0, x2=width, y2=height//2)
lower_vid = sub_vid.crop(x1=0, y1=height//2, x2=width, y2=height)
# Remove the audio from the lower clip
lower_vid = lower_vid.without_audio()
# Combine the upper and lower clips vertically
final_clips = clips_array([[upper_vid], [lower_vid]])
# Write the final clip to a new Full HD video file
final_clips.write_videofile(f"{path}_merged.mp4", preset="slow", verbose=False, logger=None)
# Close the video clips
og_vid.close()
sub_vid.close()
final_clips.close()
except Exception as e:
print(f"Error removing watermark: {e}")