forked from PetterKraabol/Twitch-Chat-Downloader
-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Support new Twitch API
- Loading branch information
Showing
1 changed file
with
81 additions
and
292 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,307 +1,96 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
import requests, sys, time, os, json, shutil, argparse, datetime | ||
|
||
try: | ||
# On Python 2, the function to use is raw_input | ||
input = raw_input | ||
# And the native string type is bytes, so we need UTF-8 | ||
def enc(s): return s.encode('utf-8') | ||
except NameError: | ||
# On Python 3, the input() function is the one we want. | ||
# And the native string type is Unicode. | ||
def enc(s): return s | ||
import sys | ||
import json | ||
import datetime | ||
|
||
# Parse arguments | ||
parser = argparse.ArgumentParser(description='Twitch Chat Downloader') | ||
parser.add_argument('-v', '--video', help='Video id') | ||
parser.add_argument('-i', '--client-id', help='Twitch client id') | ||
parser.add_argument('-p', '--print', dest='p', help='Print messages', action='store_true') | ||
parser.add_argument('-o', '--output', help='Output folder') | ||
parser.add_argument('-f', '--format', help='Message format', choices=['timestamp', 'relative', 'srt', 'ssa', 'ass', 'raw']) | ||
parser.add_argument('--cooldown', type=float, help='Cooldown time between API requests in seconds') | ||
parser.add_argument('--start', type=int, help='Start time in seconds from video start') | ||
parser.add_argument('--stop', type=int, help='Stop time in seconds from video start') | ||
parser.add_argument('--subtitle-duration', type=int, help='If using a subtitle format, subtitle duration in seconds') | ||
import requests | ||
|
||
arguments = parser.parse_args() | ||
|
||
# Get video ID | ||
if arguments.video: | ||
videoId = 'v' + arguments.video.replace('v', '') | ||
else: | ||
videoId = 'v' + input('Video ID: ').replace('v', '') | ||
class Messages(list): | ||
def __init__(self, video_id): | ||
self.video_id = video_id | ||
self.base_url = "https://api.twitch.tv/v5/videos/%d/comments" % video_id | ||
|
||
# Program requires at least example.setting.json or settings.json to run properly | ||
if not os.path.isfile('example.settings.json') and not os.path.isfile('settings.json'): | ||
print('Error: Missing settings file.') | ||
sys.exit(1) | ||
self.client = requests.Session() | ||
self.client.headers["Acccept"] = "application/vnd.twitchtv.v5+json" | ||
self.client.headers["Client-ID"] = "jzkbprff40iqj646a697cyrvl0zt2m6" | ||
This comment has been minimized.
Sorry, something went wrong.
This comment has been minimized.
Sorry, something went wrong.
TheDrHax
Author
Owner
|
||
|
||
# Copy settings example file if settings.json doesn't exist | ||
if not os.path.isfile('settings.json'): | ||
shutil.copyfile('example.settings.json', 'settings.json') | ||
def __iter__(self): | ||
self.cursor = None | ||
self.stop = False | ||
return self | ||
|
||
# Load settings | ||
with open('settings.json', 'r') as settings_file: | ||
settings = json.load(settings_file) | ||
def _load_more(self): | ||
if self.cursor is None: | ||
url = self.base_url + "?content_offset_seconds=0" | ||
else: | ||
url = self.base_url + "?cursor=" + self.cursor | ||
|
||
# Check settings version | ||
if os.path.isfile('example.settings.json'): | ||
with open('example.settings.json', 'r') as example_settings_file: | ||
exampleSettings = json.load(example_settings_file) | ||
if 'version' not in settings: | ||
print('[Warning]\nYour settings.json file does not contain a version number. Compare settings.json to example.settings.json to make sure it\'s up to date.\n') | ||
response = self.client.get(url).json() | ||
|
||
elif 'version' in settings and settings['version'] != exampleSettings['version']: | ||
print('[Warning]\nYour settings.json file is outdated. Compare settings.json to example.settings.json.\nYour version: ' + settings['version'] + '\nNewest version: ' + exampleSettings['version'] + '\n') | ||
for comment in response["comments"]: | ||
self.append(comment) | ||
|
||
# Check if a client_id was provided as an argument | ||
if arguments.client_id: | ||
if not arguments.client_id == settings['client_id']: | ||
settings['client_id'] = arguments.client_id | ||
answer = input('Save client ID? (Y/n): ') | ||
if (not answer.lower() == "n"): | ||
with open('settings.json', 'w') as settings_file: | ||
json.dump(settings, settings_file) | ||
self.cursor = response["_next"] | ||
|
||
# Check if client_id is required | ||
if settings['require_client_id'] and not settings['client_id']: | ||
print("Twitch requires a client ID to use their API.\nRegister an application on https://www.twitch.tv/settings/connections to get yours.") | ||
settings['client_id'] = input('Client ID: ') | ||
answer = input('Save client ID? (Y/n): ') | ||
if (not answer.lower() == "n"): | ||
with open('settings.json', 'w') as settings_file: | ||
json.dump(settings, settings_file) | ||
if self.cursor is None or len(self) == 0: | ||
self.stop = True | ||
|
||
# Get client id parameter for URL | ||
def getClientIdParameter(): | ||
if (settings['client_id']): | ||
return 'client_id='+settings['client_id'] + '&' | ||
else: | ||
return '' | ||
|
||
# Overwrite remaining settings with arguments | ||
if arguments.p: | ||
settings['print'] = arguments.p | ||
|
||
if arguments.format: | ||
settings['format'] = arguments.format | ||
|
||
if arguments.cooldown: | ||
settings['cooldown'] = arguments.cooldown | ||
|
||
if arguments.subtitle_duration: | ||
settings['subtitle_duration'] = arguments.subtitle_duration | ||
|
||
# API URL | ||
apiUrl = 'https://rechat.twitch.tv/rechat-messages' | ||
|
||
# Get start and stop time by looking at the 'detail' message from Twitch | ||
# | ||
# If you query this API with invalid an invalid timestamp (none or out of range), | ||
# it will tell you the start and stop timestamp, however, in text format. | ||
response = requests.get(apiUrl + '?' + str(getClientIdParameter()) + 'start=0&video_id=' + videoId).json() | ||
|
||
# Parse response for start and stop | ||
# | ||
# The response will look something like this | ||
# { | ||
# "errors": [ | ||
# { | ||
# "status": 400, | ||
# "detail": "0 is not between 1469108651 and 1469133795" | ||
# } | ||
# ] | ||
# } | ||
# | ||
# As the start and stop timestamp is (for some weird reason) | ||
# in text format, we have to parse the response. | ||
detail = response['errors'][0]['detail'].split(' ') # We split the detail string into an array | ||
|
||
# Check if valid video ID | ||
# If the length is 8, it's (most likely) invalid | ||
# If the length is 7, it's (most likely) valid | ||
if len(detail) != 7: | ||
if settings['require_client_id']: | ||
print('Error: Invalid video or client ID') | ||
else: | ||
print('Error: Invalid video ID') | ||
sys.exit(1) | ||
|
||
# Start and stop points | ||
start = int(detail[4]) # The start timestamp is on index 4 | ||
stop = int(detail[6]) # while stop has the index 6 | ||
|
||
# Original start and stop | ||
fullStart = start # Keep original full-length start point | ||
fullStop = stop # Keep original full-length stop potin | ||
|
||
# Used message ids | ||
# | ||
# Every message has an unique ID, which can be used for checking if we've already stored it. | ||
messageIds = [] | ||
|
||
# Open output file | ||
# | ||
# This is where we save the messages. | ||
|
||
# Output directory | ||
|
||
if arguments.output: | ||
directory = arguments.output | ||
else: | ||
directory = settings['output'] | ||
|
||
if not os.path.exists(directory): | ||
os.makedirs(directory) | ||
|
||
# Open file (different file extension for subtitle formats) | ||
if settings['format'] == 'srt' or settings['format'] == 'ssa' or settings['format'] == 'ass': | ||
file = open(directory + '/' + videoId + '.' + settings['format'], 'w') | ||
elif settings['format'] == 'raw': | ||
file = open(directory + '/' + videoId + '.json', 'w') | ||
else: | ||
file = open(directory + '/' + videoId + '.txt', 'w') | ||
|
||
# Add format line if SSA/ASS subtitle format | ||
if settings['format'] == 'ssa' or settings['format'] == 'ass': | ||
file.write('[Script Info]\n') | ||
|
||
# Hack for videojs-ass | ||
file.write('PlayResX: 1280\n') | ||
file.write('PlayResY: 720\n') | ||
|
||
file.write('\n[V4 Styles]\n') | ||
file.write(settings['ssa_style_format'] + '\n') | ||
file.write(settings['ssa_style_default'] + '\n') | ||
|
||
file.write('\n[Events]\n') | ||
file.write(settings['ssa_events_format'] + '\n') | ||
|
||
# When saving as raw format (json), messages will be added to this | ||
# object array and written to file after fetching all the messages | ||
# to avoid opening, reading, writing to and closing the file for every | ||
# message. | ||
rawData = [] | ||
|
||
# Stop time argument | ||
if arguments.stop and start + arguments.stop <= stop: | ||
stop = start + arguments.stop | ||
|
||
|
||
# Start time argument | ||
if arguments.start: | ||
start += arguments.start | ||
|
||
|
||
# Download messages from timestamps between start and stop. | ||
timestamp = start | ||
|
||
while timestamp <= stop: | ||
|
||
# Wait for cooldown timer and request new messages from Twitch | ||
# The API returns the next 30 seconds of messages | ||
time.sleep(settings['cooldown']) | ||
response = requests.get(apiUrl + '?start=' + str(timestamp) + '&video_id=' + videoId).json() | ||
data = response['data']; | ||
|
||
# Increase timestamp to get the next 30 seconds of messages in the next loop | ||
timestamp += 30 | ||
|
||
for message in data: | ||
|
||
# Timestamp for message (seconds) | ||
messageTimestampInSeconds = message['attributes']['timestamp']/1000. | ||
|
||
# Check the unique message ID to make sure it's not already saved. | ||
if not any(message['id'] in s for s in messageIds) and messageTimestampInSeconds <= stop: | ||
|
||
# If this is a new message, save the unique ID to prevent duplication later. | ||
messageIds.append(message['id']) | ||
date = time.strftime('%Y-%m-%d %H:%M:%S %Z', time.gmtime(messageTimestampInSeconds)) | ||
sender = enc(message['attributes']['from']) | ||
color = message['attributes']['color'] | ||
text = enc(message['attributes']['message']) | ||
|
||
if color is None: | ||
color = 'FFFFFF' | ||
def next(self): | ||
if len(self) == 0: | ||
if self.stop is True: | ||
raise StopIteration | ||
else: | ||
color = color.replace('#', '') | ||
|
||
# Bugfix https://github.com/PetterKraabol/Twitch-Chat-Downloader/issues/6 | ||
color = str(color) | ||
|
||
# Timestamp format | ||
if settings['format'] == 'timestamp': | ||
line = date + ' ' + sender + ': ' + text + '\n' | ||
printLine = '\033[94m' + date + ' \033[92m'+ sender + '\033[0m' + ': ' + text | ||
|
||
# Relative timestamp format | ||
if settings['format'] == 'relative': | ||
messageRelativeTimestamp = str(datetime.timedelta(seconds=messageTimestampInSeconds - start)) | ||
|
||
if len(messageRelativeTimestamp) == 7: | ||
messageRelativeTimestamp += '.000000' | ||
|
||
line = messageRelativeTimestamp[:-3] + ' ' + sender + ': ' + text + '\n' | ||
printLine = '\033[94m' + messageRelativeTimestamp[:-3] + ' \033[92m'+ sender + '\033[0m' + ': ' + text | ||
|
||
# Subtitle formats | ||
if settings['format'] in {'srt', 'ass', 'ssa'}: | ||
subtitleStart = str(datetime.timedelta(seconds=messageTimestampInSeconds - start)) | ||
subtitleStop = str(datetime.timedelta(seconds=messageTimestampInSeconds - start + settings['subtitle_duration'])) | ||
|
||
# Bugfix - add milliseconds if missing | ||
# https://github.com/PetterKraabol/Twitch-Chat-Downloader/issues/3 | ||
if len(subtitleStart) == 7: | ||
subtitleStart += '.000000' | ||
|
||
if len(subtitleStop) == 7: | ||
subtitleStop += '.000000' | ||
|
||
# srt format | ||
if settings['format'] == 'srt': | ||
line = str(len(messageIds)) + '\n' + subtitleStart[:-3] + ' --> ' + subtitleStop[:-3] + '\n' + sender + ': ' + text + '\n\n' | ||
printLine = printLine = '\033[94m' + subtitleStart + ' \033[92m'+ sender + '\033[0m' + ': ' + text | ||
|
||
# SSA/ASS format | ||
# Note: sender's color code is reversed for SSA and ASS format. | ||
if settings['format'] == 'ssa' or settings['format'] == 'ass': | ||
|
||
# SSA/ASS expects BGR instead of RBG | ||
BGRColor = color[4:6] + color[2:4] + color[0:2] | ||
|
||
line = 'Dialogue: Marked=0, ' + subtitleStart[:-4] + ', ' + subtitleStop[:-4] + ', Default, ' + sender + ', 0000, 0000, 0000 , , {\c&H' + BGRColor + '&}' + sender + '{\c&HFFFFFF&}: ' + text + '\n' | ||
printLine = printLine = '\033[94m' + str(datetime.timedelta(seconds=messageTimestampInSeconds - start)) + ' \033[92m'+ sender + '\033[0m' + ': ' + text | ||
|
||
|
||
if settings['format'] == 'raw': | ||
rawData.append(message) | ||
|
||
# Save messages to file unless saving raw data. | ||
# This is done after download all messages | ||
if settings['format'] != 'raw': | ||
file.write(line) | ||
|
||
# Print messages, if not, show progress | ||
if settings['print']: | ||
print(printLine) | ||
else: | ||
|
||
#Show progress % | ||
progress = round((timestamp - start)*100 / float(stop - start), 2) | ||
|
||
# Bugfix: progress can go slightly above 100% on the last loop | ||
if progress > 100.0: | ||
progress = 100.0 | ||
|
||
sys.stdout.write('Downloading ' + str(int(messageTimestampInSeconds - start)) + '/' + str(stop - start) + 's (' + str(progress) + '%) \r') | ||
sys.stdout.flush() | ||
|
||
# If format is set to raw, save raw data | ||
if settings['format'] == 'raw': | ||
file.write(json.dumps(rawData)) | ||
|
||
# Close file | ||
file.close() | ||
sys.stdout.write('Finished downloading ' + videoId + '\r') | ||
self._load_more(); | ||
|
||
return self.pop(0) | ||
|
||
class SubtitlesASS(object): | ||
def __init__(self, file): | ||
self.file = open(file, mode='w+') | ||
|
||
self.file.writelines([ | ||
'[Script Info]\n', | ||
'\n', | ||
'PlayResX: 1280\n', | ||
'PlayResY: 720\n', | ||
'\n', | ||
'[V4 Styles]\n', | ||
'Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, TertiaryColour, BackColour, Bold, Italic, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, AlphaLevel, Encoding\n', | ||
'Style: Default,Arial,20,65535,65535,65535,-2147483640,-1,0,1,3,0,1,5,0,5,0,0\n', | ||
'\n', | ||
'[Events]\n', | ||
'Format: Marked, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n' | ||
]) | ||
|
||
self.line_format = 'Dialogue: Marked=0, {start}, {end}, Default, {user}, , 0000, 0000, 0000, , {user}: {message}\n' | ||
|
||
def _date(self, seconds): | ||
result = str(datetime.timedelta(seconds=seconds)) | ||
if '.' not in result: | ||
result += '.000000' | ||
return result | ||
|
||
def add(self, comment): | ||
time_offset = comment['content_offset_seconds'] | ||
|
||
self.file.write(self.line_format.format( | ||
start=self._date(time_offset), | ||
end=self._date(time_offset + 2), | ||
user=comment['commenter']['display_name'].encode('utf-8'), | ||
message=comment['message']['body'].encode('utf-8') | ||
)) | ||
|
||
def close(self): | ||
self.file.flush() | ||
self.file.close() | ||
|
||
if __name__ == "__main__": | ||
s = SubtitlesASS('test.ass') | ||
|
||
for comment in Messages(179882105): | ||
s.add(comment) | ||
|
||
s.close() |
Don't forget to remove your client ID after testing