diff --git a/examples/alignment.dx b/examples/alignment.dx new file mode 100644 index 000000000..c4257d548 --- /dev/null +++ b/examples/alignment.dx @@ -0,0 +1,341 @@ +import fft +import parser +import plot + +' # Speech Processing + +' This notebook describes the basic steps behind speech processing. + The goal will be to walk through the process from reading in a wave + file to being ready to train a full speech recognition system. + + +' ## Wave Files + To begin we need to be able to read in .wav files. To do this + we follow the wave file spec and define a header and the raw + data format. + +' [Wave Format Documentation](https://docs.fileformat.com/audio/wav/) + + +data WavHeader = + AsWavHeader {size:Int & + type:(Fin 4=>Char) & + chunk:(Fin 4=>Char) & + formatlength:Int & + channels:Int & + samplerate:Int & + bitspersample:Int & + datasize:Int} + +data Wav = AsWav header:WavHeader dat:(List Int) + +' ### Binary Manipulation + We will need a couple additional binary conversion functions. (These are naive for now.) + +z = FToI (pow 2.0 15.0) +def W32ToI (x : Word32): Int = + y:Int = internalCast _ x + select (y <= z) y ((-1)*z + (y- z)) + +def W8ToW32 (x : Word8): Word32 = internalCast _ x + +def bytesToInt32 (chunk : Fin 4 => Byte) : Int = + [a, b, c, d] = map W8ToW32 chunk + W32ToI ((d .<<. 24) .|. (c .<<. 16) .|. (b .<<. 8) .|. a) + + +def bytesToInt16 (chunk : Fin 2 => Byte) : Int = + [a, b] = map W8ToW32 chunk + W32ToI ((b .<<. 8) .|. a) + + +' ### Parsing Types + +' We also add a couple additional parser helpers. + +def parseChars (n:Int) : (Parser (Fin n => Char)) = + MkParser \h. for i. parse h parseAny + +def parseI16 : Parser Int = MkParser \h. + bytesToInt16 $ parse h $ parseChars 2 + +' ### Wave Parsing and IO + +' We define a parser to construct the header and to read in the data. + +wavparser : Parser Wav = MkParser \h. + parse h $ parseChars 4 + size = bytesToInt32 $ parse h $ parseChars 4 + type = parse h $ parseChars 4 + chunk = parse h $ parseChars 4 + parse h parseI16 + formatlength = bytesToInt32 $ parse h $ parseChars 4 + channels = parse h parseI16 + samplerate = bytesToInt32 $ parse h $ parseChars 4 + bytesToInt32 $ parse h $ parseChars 4 + parse h parseI16 + bitspersample = parse h parseI16 + parse h $ pString (AsList 4 ['d', 'a', 't', 'a']) + datasize = bytesToInt32 $ parse h $ parseChars 4 + x = parse h $ parseMany parseI16 + header = AsWavHeader {size, + type, chunk, + formatlength, + channels, samplerate, + bitspersample, + datasize} + AsWav header x + + +x = unsafeIO do runParserPartial (readFile "examples/speech2.wav") wavparser +(AsList len raw_sample_wav) = case x of + Nothing -> mempty + (Just (AsWav header dat)) -> dat + + +samplerate = 8000 + +' ### The Audio + +' We are going to work with an MNist style of audio that has short snippets of + people saying letters. + +s = unsafeIO do base64Encode (readFile "examples/speech2.wav") + +:html "