Rename from_file to from_file_using_temporary_files and add a new fro…

…m_file Rename from_file to from_file_using_temporary_files just in case there's any case in which the new from_file doesn't work (I couldn't find any, but just in case, I guess it would be nice to keep it maybe as deprecated). Add a new from_file function that does all the reading on memory with pipes, not using any temporary file, which is faster and doesn't wear down disks for heavy usages. The new from_file function reads the input file and passes it to ffmpeg using a pipe and then reads ffmpeg output using another pipe directly to memory. Since wav files have the file length in the header and ffmpeg can't write it since it's working on a stream, we modify the resulting raw data from ffmpeg before reading it using the standard method. Fixes jiaaro#237 Might also fix jiaaro#209
antlarr · Mar 23, 2018 · da0722d · da0722d
1 parent e231209
commit da0722d
Showing 1 changed file with 74 additions and 1 deletion.
diff --git a/pydub/audio_segment.py b/pydub/audio_segment.py
@@ -445,7 +445,7 @@ def from_mono_audiosegments(cls, *mono_segments):
         )
 
     @classmethod
-    def from_file(cls, file, format=None, codec=None, parameters=None, **kwargs):
+    def from_file_using_temporary_files(cls, file, format=None, codec=None, parameters=None, **kwargs):
         orig_file = file
         file = _fd_or_path_or_tempfile(file, 'rb', tempfile=False)
 
@@ -544,6 +544,79 @@ def is_format(f):
 
         return obj
 
+    @classmethod
+    def from_file(cls, file, format=None, codec=None, parameters=None, **kwargs):
+        orig_file = file
+        file = _fd_or_path_or_tempfile(file, 'rb', tempfile=False)
+
+        if format:
+            format = format.lower()
+            format = AUDIO_FILE_EXT_ALIASES.get(format, format)
+
+        def is_format(f):
+            f = f.lower()
+            if format == f:
+                return True
+            if isinstance(orig_file, basestring):
+                return orig_file.lower().endswith(".{0}".format(f))
+            return False
+
+        if is_format("wav"):
+            try:
+                return cls._from_safe_wav(file)
+            except:
+                file.seek(0)
+        elif is_format("raw") or is_format("pcm"):
+            sample_width = kwargs['sample_width']
+            frame_rate = kwargs['frame_rate']
+            channels = kwargs['channels']
+            metadata = {
+                'sample_width': sample_width,
+                'frame_rate': frame_rate,
+                'channels': channels,
+                'frame_width': channels * sample_width
+            }
+            return cls(data=file.read(), metadata=metadata)
+
+        conversion_command = [cls.converter,
+                              '-y',  # always overwrite existing files
+                              ]
+
+        # If format is not defined
+        # ffmpeg/avconv will detect it automatically
+        if format:
+            conversion_command += ["-f", format]
+
+        if codec:
+            # force audio decoder
+            conversion_command += ["-acodec", codec]
+
+        conversion_command += [
+            "-i", "-",  # input_file options (filename last)
+            "-vn",  # Drop any video streams if there are any
+            "-f", "wav",  # output options (filename last)
+            "-"
+        ]
+
+        if parameters is not None:
+            # extend arguments with arbitrary set
+            conversion_command.extend(parameters)
+
+        log_conversion(conversion_command)
+
+        p = subprocess.Popen(conversion_command, stdin=subprocess.PIPE,
+                             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        p_out, p_err = p.communicate(input=file.read())
+
+        if p.returncode != 0:
+            raise CouldntDecodeError("Decoding failed. ffmpeg returned error code: {0}\n\nOutput from ffmpeg/avlib:\n\n{1}".format(p.returncode, p_err))
+
+        p_out = bytearray(p_out)
+        p_out[4:8] = struct.pack('<i', len(p_out) - 8)
+        obj = cls._from_safe_wav(BytesIO(p_out))
+
+        return obj
+
     @classmethod
     def from_mp3(cls, file, parameters=None):
         return cls.from_file(file, 'mp3', parameters)