-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathtranscoder.py
335 lines (294 loc) · 12.7 KB
/
transcoder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
#!/usr/bin/python
import logging
import os
import re
import shlex
import shutil
import signal
import subprocess
import sys
import time
def non_zero_min(values):
"Return the min value but always prefer non-zero values if they exist"
if len(values) == 0:
raise TypeError('non_zero_min expected 1 arguments, got 0')
non_zero_values = [i for i in values if i != 0]
if non_zero_values:
return min(non_zero_values)
return 0
class Transcoder(object):
# name of the share defined in virtualbox that will contain input/output video
VBOX_SHARE_NAME = 'transcoder'
# path to mount the virtual box share
TRANSCODER_ROOT = "/media/transcoder"
# directory containing new video to transcode
INPUT_DIRECTORY = TRANSCODER_ROOT + '/input'
# directory where handbrake will save the output to. this is a temporary
# location and the file is moved to OUTPUT_DIRECTORY after complete
WORK_DIRECTORY = TRANSCODER_ROOT + '/work'
# directory containing the original inputs after they've been transcoded
COMPLETED_DIRECTORY = TRANSCODER_ROOT + '/completed-originals'
# directory contained the compressed outputs
OUTPUT_DIRECTORY = TRANSCODER_ROOT + '/output'
# standard options for the transcode-video script
TRANSCODE_OPTIONS = '--mkv --slow --allow-dts --allow-ac3 --find-forced add --copy-all-ac3'
# number of seconds a file must remain unmodified in the INPUT_DIRECTORY
# before it is considered done copying. increase this value for more
# tolerance on bad network connections.
WRITE_THRESHOLD = 30
# path to logfile
LOGFILE = TRANSCODER_ROOT + '/transcoder.log'
def __init__(self):
self.running = False
self.logger = None
self.current_command = None
self._default_handlers = {}
def setup_signal_handlers(self):
"Setup graceful shutdown and cleanup when sent a signal"
def handler(signum, frame):
self.stop()
for sig in (signal.SIGTERM, signal.SIGHUP, signal.SIGINT):
self._default_handlers[sig] = signal.signal(sig, handler)
def restore_signal_handlers(self):
"Restore the default handlers"
for sig, handler in self._default_handlers.items():
signal.signal(sig, handler)
self._default_handlers = {}
def execute(self, command):
# TODO: use Popen and assign to current_command so we can terminate
args = shlex.split(command)
out = subprocess.check_output(args=args, stderr=subprocess.STDOUT)
return out
def mount_share(self):
"""
Mount the VBox share if it's not already mounted.
Returns True if mounted, otherwise False.
"""
out = self.execute('mount')
if '%s type vboxsf' % self.TRANSCODER_ROOT in out:
return True
# attempt to mount
uid, gid = os.getuid(), os.getgid()
command = 'sudo mount -t vboxsf -o uid=%s,gid=%s %s %s' % (
uid, gid, self.VBOX_SHARE_NAME, self.TRANSCODER_ROOT)
try:
self.execute(command)
except subprocess.CalledProcessError as ex:
msg = 'Unable to mount Virtual Box Share: %s' % ex.output
sys.stdout.write(msg)
sys.stdout.flush()
return False
return True
def setup_logging(self):
self.logger = logging.getLogger('transcoder')
self.logger.setLevel(logging.DEBUG)
handler = logging.FileHandler(self.LOGFILE)
handler.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(message)s')
handler.setFormatter(formatter)
self.logger.addHandler(handler)
self.logger.info('Transcoder started and scanning for input')
def check_filesystem(self):
"Checks that the filesystem and logger is setup properly"
dirs = (self.INPUT_DIRECTORY, self.WORK_DIRECTORY,
self.OUTPUT_DIRECTORY, self.COMPLETED_DIRECTORY)
if not all(map(os.path.exists, dirs)):
if not self.mount_share():
return False
for path in dirs:
if not os.path.exists(path):
try:
os.mkdir(path)
except OSError as ex:
msg = 'Cannot create directory "%s": %s' % (
path, ex.strerror)
sys.stdout.write(msg)
sys.stdout.flush()
return False
if not self.logger:
self.setup_logging()
return True
def stop(self):
# guard against multiple signals being sent before the first one
# finishes
if not self.running:
return
self.running = False
self.logger.info('Transcoder shutting down')
if self.current_command:
self.current_command.terminate()
# logging
logging.shutdown()
self.logger = None
# signal handlers
self.restore_signal_handlers()
def run(self):
self.running = True
self.setup_signal_handlers()
while self.running:
if self.check_filesystem():
self.check_for_input()
time.sleep(5)
def check_for_input(self):
"Look in INPUT_DIRECTORY for an input file and process it"
for filename in os.listdir(self.INPUT_DIRECTORY):
if filename.startswith('.'):
continue
path = os.path.join(self.INPUT_DIRECTORY, filename)
if (time.time() - os.stat(path).st_mtime) > self.WRITE_THRESHOLD:
# when copying a file from windows to the VM, the filesize and
# last modified times don't change as data is written.
# fortunately these files seem to be locked such that
# attempting to open the file for reading raises an IOError.
# it seems reasonable to skip any file we can't open
try:
f = open(path, 'r')
f.close()
except IOError:
continue
self.process_input(path)
# move the source to the COMPLETED_DIRECTORY
dst = os.path.join(self.COMPLETED_DIRECTORY,
os.path.basename(path))
shutil.move(path, dst)
break
def process_input(self, path):
name = os.path.basename(path)
self.logger.info('Found new input "%s"', name)
# if any of the following functions return no output, something
# bad happened and we can't continue
# parse the input meta info.
meta = self.scan_media(path)
if not meta:
return
# determine crop dimensions
crop = self.detect_crop(path)
if not crop:
return
# transcode the video
work_path = self.transcode(path, crop, meta)
if not work_path:
return
# move the completed output to the output directory
self.logger.info('Moving completed work output %s to output directory',
os.path.basename(work_path))
output_path = os.path.join(self.OUTPUT_DIRECTORY,
os.path.basename(work_path))
shutil.move(work_path, output_path)
shutil.move(work_path + '.log', output_path + '.log')
def scan_media(self, path):
"Use handbrake to scan the media for metadata"
name = os.path.basename(path)
self.logger.info('Scanning "%s" for metadata', name)
command = 'HandBrakeCLI --scan --input "%s"' % path
try:
out = self.execute(command)
except subprocess.CalledProcessError as ex:
if 'unrecognized file type' in ex.output:
self.logger.info('Unknown media type for input "%s"', name)
else:
self.logger.info('Unknown error for input "%s" with error: %s',
name, ex.output)
return None
# process out
return out
def detect_crop(self, path):
crop_re = r'[0-9]+:[0-9]+:[0-9]+:[0-9]+'
name = os.path.basename(path)
self.logger.info('Detecting crop for input "%s"', name)
command = 'detect-crop.sh --values-only "%s"' % path
try:
out = self.execute(command)
except subprocess.CalledProcessError as ex:
# when detect-crop detects discrepancies between handbrake and
# mplayer, each crop is written out but detect-crop also returns
# an error code. if this is the case, we don't want to error out.
if re.findall(crop_re, ex.output):
out = ex.output
else:
self.logger.info('detect-crop failed for input "%s", '
'proceeding with no crop. error: %s',
name, ex.output)
return '0:0:0:0'
crops = re.findall(crop_re, out)
if not crops:
self.logger.info('No crop found for input "%s", '
'proceeding with no crop', name)
return '0:0:0:0'
else:
# use the smallest crop for each edge. prefer non-zero values if
# they exist
dimensions = zip(*[map(int, c.split(':')) for c in crops])
crop = ':'.join(map(str, [non_zero_min(piece) for piece in dimensions]))
self.logger.info('Using crop "%s" for input "%s"', crop, name)
return crop
def transcode(self, path, crop, meta):
name = os.path.basename(path)
output_name = os.path.splitext(name)[0] + '.mkv'
output = os.path.join(self.WORK_DIRECTORY, output_name)
# if these paths exist in the work directory, remove them first
for workpath in (output, output + '.log'):
if os.path.exists(workpath):
self.logger.info('Removing old work output: "%s"', workpath)
os.unlink(workpath)
command_parts = [
'transcode-video.sh',
'--crop %s' % crop,
self.parse_audio_tracks(meta),
self.TRANSCODE_OPTIONS,
'--output "%s"' % output,
'"%s"' % path
]
command = ' '.join(command_parts)
self.logger.info('Transcoding input "%s" with command: %s',
path, command)
try:
self.execute(command)
except subprocess.CalledProcessError as ex:
self.logger.info('Transcoding failed for input "%s": %s',
name, ex.output)
return None
self.logger.info('Transcoding completed for input "%s"', name)
return output
def parse_audio_tracks(self, meta):
"Parse the meta info for audio tracks beyond the first one"
# find all the audio streams and their optional language and title data
streams = []
stream_re = r'(\s{4}Stream #[0-9]+\.[0-9]+(?:\((?P<lang>[a-z]+)\))?: Audio:.*?\n)(?=(?:\s{4}Stream)|(?:[^\s]))'
title_re = r'^\s{6}title\s+:\s(?P<title>[^\n]+)'
for stream, lang in re.findall(stream_re, meta, re.DOTALL | re.MULTILINE):
lang = lang = ''
title = ''
title_match = re.search(title_re, stream, re.MULTILINE)
if title_match:
title = title_match.group(1)
streams.append({'title': title, 'lang': lang})
# find the audio track numbers
tracks = []
pos = meta.find('+ audio tracks:')
track_re = r'^\s+\+\s(?P<track>[0-9]+),\s(?P<title>[^\(\n]*)'
for line in meta[pos:].split('\n')[1:]:
if line.startswith(' + subtitle tracks:'):
break
match = re.match(track_re, line)
if match:
tracks.append({'number': match.group(1), 'title': match.group(2)})
# assuming there's an equal number of tracks and streams, we can
# match up stream titles to tracks and have a nicer output
use_stream_titles = len(streams) == len(tracks)
additional_tracks = []
for i, track in enumerate(tracks[1:]):
title = ''
if use_stream_titles:
title = streams[i+1]['title']
title = title or track['title']
# remove any quotes in the title so we don't mess up the command
title = title.replace('"', '')
self.logger.info('Adding audio track #%s with title: %s',
track['number'], title)
additional_tracks.append('--add-audio %s,"%s"' % (
track['number'], title.replace('"', '')))
return ' '.join(additional_tracks)
if __name__ == '__main__':
transcoder = Transcoder()
transcoder.run()