diff --git a/CHANGELOG.md b/CHANGELOG.md index a85fce1dac..462c7bc4cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. * Better syntax errors for invalid use of `!`. +* `futhark literate` now supports a `$loadaudio` builtin function for loading + audio to Futhark programs. + ### Removed ### Changed diff --git a/docs/man/futhark-literate.rst b/docs/man/futhark-literate.rst index cbf743557c..19ba46db0d 100644 --- a/docs/man/futhark-literate.rst +++ b/docs/man/futhark-literate.rst @@ -270,6 +270,10 @@ Futhark. The following builtins are supported: of values is returned, which should be destructured before use. For example: ``let (a, b) = $loaddata "foo.in" in bar a b``. +* ``$loadaudio "file"`` reads audio from the given file and returns it as a + ``[][]f64``, where each row corresponds to a channel of the original + sound-file. + SAFETY ====== diff --git a/src/Futhark/CLI/Literate.hs b/src/Futhark/CLI/Literate.hs index 7afbe92784..fd443d830f 100644 --- a/src/Futhark/CLI/Literate.hs +++ b/src/Futhark/CLI/Literate.hs @@ -18,6 +18,7 @@ import Data.Set qualified as S import Data.Text qualified as T import Data.Text.Encoding qualified as T import Data.Text.IO qualified as T +import Data.Text.Read qualified as T import Data.Vector.Storable qualified as SVec import Data.Vector.Storable.ByteString qualified as SVec import Data.Void @@ -609,6 +610,33 @@ loadData datafile = do Just vs -> pure $ ValueTuple $ map ValueAtom vs +loadPCM :: Int -> FilePath -> ScriptM (Compound Value) +loadPCM num_channels pcmfile = do + contents <- liftIO $ LBS.readFile pcmfile + let v = SVec.byteStringToVector $ LBS.toStrict contents + channel_length = SVec.length v `div` num_channels + shape = + SVec.fromList + [ fromIntegral num_channels, + fromIntegral channel_length + ] + -- ffmpeg outputs audio data in column-major format. `backPermuter` computes the + -- tranposed indexes for a backpermutation. + backPermuter i = (i `mod` channel_length) * num_channels + i `div` channel_length + perm = SVec.generate (SVec.length v) backPermuter + pure $ ValueAtom $ F64Value shape $ SVec.backpermute v perm + +loadAudio :: FilePath -> ScriptM (Compound Value) +loadAudio audiofile = do + s <- system "ffprobe" [audiofile, "-show_entries", "stream=channels", "-select_streams", "a", "-of", "compact=p=0:nk=1", "-v", "0"] mempty + case T.decimal s of + Right (num_channels, _) -> do + withTempDir $ \dir -> do + let pcmfile = dir takeBaseName audiofile `replaceExtension` "pcm" + void $ system "ffmpeg" ["-i", audiofile, "-c:a", "pcm_f64le", "-map", "0", "-f", "data", pcmfile] mempty + loadPCM num_channels pcmfile + _ -> throwError "$loadImg failed to detect the number of channels in the audio input" + literateBuiltin :: EvalBuiltin ScriptM literateBuiltin "loadimg" vs = case vs of @@ -630,6 +658,16 @@ literateBuiltin "loaddata" vs = throwError $ "$loaddata does not accept arguments of types: " <> T.intercalate ", " (map (prettyText . fmap valueType) vs) +literateBuiltin "loadaudio" vs = + case vs of + [ValueAtom v] + | Just path <- getValue v -> do + let path' = map (chr . fromIntegral) (path :: [Word8]) + loadAudio path' + _ -> + throwError $ + "$loadaudio does not accept arguments of types: " + <> T.intercalate ", " (map (prettyText . fmap valueType) vs) literateBuiltin f _ = throwError $ "Unknown builtin function $" <> prettyText f diff --git a/tests_literate/audio.fut b/tests_literate/audio.fut index e90e6fe738..69eda335c3 100644 --- a/tests_literate/audio.fut +++ b/tests_literate/audio.fut @@ -76,3 +76,7 @@ entry surround = in [left, right, right, right, right, right] -- > :audio surround + +-- > $loadaudio "mono.wav" + +-- > $loadaudio "stereo.wav" diff --git a/tests_literate/expected/audio.md b/tests_literate/expected/audio.md index f73be5437e..a5e31b460f 100644 --- a/tests_literate/expected/audio.md +++ b/tests_literate/expected/audio.md @@ -107,3 +107,22 @@ entry surround = ![](audio-img/7e0c96822c449db1dd5712a2b809ff40-output.wav) + +``` +> $loadaudio "mono.wav" +``` + +``` +[[-0.9921875f64, -0.984375f64, -0.9765625f64]] +``` + + +``` +> $loadaudio "stereo.wav" +``` + +``` +[[-0.9921875f64, -0.984375f64, -0.9765625f64], +[-0.96875f64, -0.9609375f64, -0.953125f64]] +``` +