diff --git a/src/rs-core/transmux/fmp4.rs b/src/rs-core/transmux/fmp4.rs new file mode 100644 index 0000000..a949b7d --- /dev/null +++ b/src/rs-core/transmux/fmp4.rs @@ -0,0 +1,1081 @@ +use super::nal_unit_producer::NalVideoProperties; + +static AVC1: [u8; 4] = [97, 118, 99, 49]; +static AVCC: [u8; 4] = [97, 118, 99, 67]; +static BTRT: [u8; 4] = [98, 116, 114, 116]; +static DINF: [u8; 4] = [100, 105, 110, 102]; +static DREF: [u8; 4] = [100, 114, 101, 102]; +static ESDS: [u8; 4] = [101, 115, 100, 115]; +static FTYP: [u8; 4] = [102, 116, 121, 112]; +static HDLR: [u8; 4] = [104, 100, 108, 114]; +static MDAT: [u8; 4] = [109, 100, 97, 116]; +static MDHD: [u8; 4] = [109, 100, 104, 100]; +static MDIA: [u8; 4] = [109, 100, 105, 97]; +static MFHD: [u8; 4] = [109, 102, 104, 100]; +static MINF: [u8; 4] = [109, 105, 110, 102]; +static MOOF: [u8; 4] = [109, 111, 111, 102]; +static MOOV: [u8; 4] = [109, 111, 111, 118]; +static MP4A: [u8; 4] = [109, 112, 52, 97]; +static MVEX: [u8; 4] = [109, 118, 101, 120]; +static MVHD: [u8; 4] = [109, 118, 104, 100]; +static PASP: [u8; 4] = [112, 97, 115, 112]; +static SDTP: [u8; 4] = [115, 100, 116, 112]; +static SMHD: [u8; 4] = [115, 109, 104, 100]; +static STBL: [u8; 4] = [115, 116, 98, 108]; +static STCO: [u8; 4] = [115, 116, 99, 111]; +static STSC: [u8; 4] = [115, 116, 115, 99]; +static STSD: [u8; 4] = [115, 116, 115, 100]; +static STSZ: [u8; 4] = [115, 116, 115, 122]; +static STTS: [u8; 4] = [115, 116, 116, 115]; +static STYP: [u8; 4] = [115, 116, 121, 112]; +static TFDT: [u8; 4] = [116, 102, 100, 116]; +static TFHD: [u8; 4] = [116, 102, 104, 100]; +static TRAF: [u8; 4] = [116, 114, 97, 102]; +static TRAK: [u8; 4] = [116, 114, 97, 107]; +static TRUN: [u8; 4] = [116, 114, 117, 110]; +static TREX: [u8; 4] = [116, 114, 101, 120]; +static TKHD: [u8; 4] = [116, 107, 104, 100]; +static VMHD: [u8; 4] = [118, 109, 104, 100]; + +/// Create a box with the given name and corresponding content. +fn create_box(box_name: [u8; 4], children: &[Vec]) -> Vec { + let len = children.iter().fold(0, |acc, c| acc + c.len()) + 4 + 4; + let mut result = Vec::with_capacity(len); + result.extend(box_name); + result.extend((len as u32).to_be_bytes()); + children.iter().for_each(|v| { + result.extend(v); + }); + result +} + +/// Create a template `dinf` ISOBMFF box. +fn create_dinf() -> Vec { + let dref_content: Vec = vec![ + 0x00, // version 0 + 0x00, 0x00, 0x00, // flags + 0x00, 0x00, 0x00, 0x01, // entry_count + 0x00, 0x00, 0x00, 0x0c, // entry_size + 0x75, 0x72, 0x6c, 0x20, // 'url' type + 0x00, // version 0 + 0x00, 0x00, 0x01, // entry_flags + ]; + create_box(DINF, &vec![create_box(DREF, &vec![dref_content])]) +} + +/// Create an `esds` ISOBMFF box. +fn create_esds(audio_object_type: u8, sampling_frequency_index: u8, channel_count: u16) -> Vec { + create_box( + ESDS, + &vec![vec![ + 0x00, // version + 0x00, + 0x00, + 0x00, // flags + // ES_Descriptor + 0x03, // tag, ES_DescrTag + 0x19, // length + 0x00, + 0x00, // ES_ID + 0x00, // streamDependenceFlag, URL_flag, reserved, streamPriority + // DecoderConfigDescriptor + 0x04, // tag, DecoderConfigDescrTag + 0x11, // length + 0x40, // object type + 0x15, // streamType + 0x00, + 0x06, + 0x00, // bufferSizeDB + 0x00, + 0x00, + 0xda, + 0xc0, // maxBitrate + 0x00, + 0x00, + 0xda, + 0xc0, // avgBitrate + // DecoderSpecificInfo + 0x05, // tag, DecoderSpecificInfoTag + 0x02, // length + // ISO/IEC 14496-3, AudioSpecificConfig + // for samplingFrequencyIndex see ISO/IEC 13818-7:2006, 8.1.3.2.2, Table 35 + (audio_object_type << 3) | (sampling_frequency_index >> 1), + (sampling_frequency_index << 7) | ((channel_count << 3) as u8), + 0x06, + 0x01, + 0x02, // GASpecificConfig + ]], + ) +} + +/// Create a template `ftyp` ISOBMFF box. +fn create_ftyp() -> Vec { + let major_brand: Vec = vec![105, 115, 111, 109]; + let avc1_brand: Vec = vec![97, 118, 99, 49]; + let minor_version: Vec = vec![0, 0, 0, 1]; + create_box( + FTYP, + &vec![major_brand.clone(), minor_version, major_brand, avc1_brand], + ) +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub(super) enum MediaType { + Audio, + Video, +} + +/// Create an `hdlr` ISOBMFF box. +fn create_hdlr(hdlr_type: MediaType) -> Vec { + let video_hdlr: Vec = vec![ + 0x00, // version 0 + 0x00, 0x00, 0x00, // flags + 0x00, 0x00, 0x00, 0x00, // pre_defined + 0x76, 0x69, 0x64, 0x65, // handler_type: 'vide' + 0x00, 0x00, 0x00, 0x00, // reserved + 0x00, 0x00, 0x00, 0x00, // reserved + 0x00, 0x00, 0x00, 0x00, // reserved + 0x56, 0x69, 0x64, 0x65, 0x6f, 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x72, + 0x00, // name: 'VideoHandler' + ]; + let audio_hdlr: Vec = vec![ + 0x00, // version 0 + 0x00, 0x00, 0x00, // flags + 0x00, 0x00, 0x00, 0x00, // pre_defined + 0x73, 0x6f, 0x75, 0x6e, // handler_type: 'soun' + 0x00, 0x00, 0x00, 0x00, // reserved + 0x00, 0x00, 0x00, 0x00, // reserved + 0x00, 0x00, 0x00, 0x00, // reserved + 0x53, 0x6f, 0x75, 0x6e, 0x64, 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x72, + 0x00, // name: 'SoundHandler' + ]; + match hdlr_type { + MediaType::Audio => create_box(HDLR, &vec![audio_hdlr]), + MediaType::Video => create_box(HDLR, &vec![video_hdlr]), + } +} + +/// Create a `mdat` ISOBMFF box with the corresponding data. +pub(super) fn create_mdat(data: Vec) -> Vec { + create_box(MDAT, &vec![data]) +} + +/// Create a `mdhd` ISOBMFF box. +fn create_mdhd(duration: u32, sample_rate: Option) -> Vec { + let mut result: Vec = vec![ + 0x00, // version 0 + 0x00, + 0x00, + 0x00, // flags + 0x00, + 0x00, + 0x00, + 0x02, // creation_time + 0x00, + 0x00, + 0x00, + 0x03, // modification_time + 0x00, + 0x01, + 0x5f, + 0x90, // timescale, 90,000 "ticks" per second + ((duration >> 24) & 0xff) as u8, + ((duration >> 16) & 0xff) as u8, + ((duration >> 8) & 0xff) as u8, + (duration & 0xff) as u8, // duration + 0x55, + 0xc4, // 'und' language (undetermined) + 0x00, + 0x00, + ]; + + // Use the sample rate from the metadata, when it is + // defined. The sample rate can be parsed out of an ADTS header, for + // instance. + if let Some(sample_rate) = sample_rate { + result[12] = ((sample_rate >> 24) & 0xff) as u8; + result[13] = ((sample_rate >> 16) & 0xff) as u8; + result[14] = ((sample_rate >> 8) & 0xff) as u8; + result[15] = (sample_rate & 0xff) as u8; + } + create_box(MDHD, &vec![result]) +} + +/// Create a `mdia` ISOBMFF box. +fn create_mdia(md: &IsobmffMetadata, duration: u32, sample_rate: Option) -> Vec { + create_box( + MDIA, + &vec![ + create_mdhd(duration, sample_rate), + create_hdlr(md.media_type()), + create_minf(md), + ], + ) +} + +/// Create a `mfhd` ISOBMFF box. +fn create_mfhd(sequence_number: u32) -> Vec { + create_box( + MFHD, + &vec![vec![ + 0x00, + 0x00, + 0x00, + 0x00, // flags + ((sequence_number & 0xff000000) >> 24) as u8, + ((sequence_number & 0xff0000) >> 16) as u8, + ((sequence_number & 0xff00) >> 8) as u8, + (sequence_number & 0xff) as u8, // sequence_number + ]], + ) +} + +/// Create a `minf` ISOBMFF box. +fn create_minf(md: &IsobmffMetadata) -> Vec { + let smhd_content: Vec = vec![ + 0x00, // version + 0x00, 0x00, 0x00, // flags + 0x00, 0x00, // balance, 0 means centered + 0x00, 0x00, // reserved + ]; + let vmhd_content: Vec = vec![ + 0x00, // version + 0x00, 0x00, 0x01, // flags + 0x00, 0x00, // graphicsmode + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // opcolor + ]; + create_box( + MINF, + &vec![ + match md { + IsobmffMetadata::Video(_) => create_box(VMHD, &vec![vmhd_content]), + IsobmffMetadata::Audio(_) => create_box(SMHD, &vec![smhd_content]), + }, + create_dinf(), + create_stbl(md), + ], + ) +} + +/// Create a `moof` ISOBMFF box. +pub(super) fn create_moof(sequence_number: u32, tracks: &[TrackInfo]) -> Vec { + let track_fragments: Vec> = tracks.iter().map(|t| create_traf(t)).collect(); + let mut content = vec![create_mfhd(sequence_number)]; + content.extend(track_fragments); + create_box(MOOF, &content) +} + +pub(super) struct TrackInfo { + md: IsobmffMetadata, + track_id: u32, + base_media_decode_time: u32, + duration: Option, + samples: Vec, + sample_rate: Option, +} + +/// Creates a `moov` ISOBMFF box. +fn create_moov(tracks: &[TrackInfo]) -> Vec { + let mut inner_boxes = vec![create_mvhd(0xffffffff)]; + tracks + .iter() + .for_each(|t| inner_boxes.push(create_trak(&t.md, t.track_id, t.duration, t.sample_rate))); + inner_boxes.push(create_mvex(tracks)); + create_box(MOOV, &inner_boxes) +} + +/// Creates a `mvex` ISOBMFF box. +fn create_mvex(tracks: &[TrackInfo]) -> Vec { + let boxes: Vec> = tracks + .iter() + .map(|t| create_trex(t.md.media_type(), t.track_id)) + .collect(); + create_box(MVEX, &boxes) +} + +/// Creates a `mvhd` ISOBMFF box. +fn create_mvhd(duration: u32) -> Vec { + let bytes: Vec = vec![ + 0x00, // version 0 + 0x00, + 0x00, + 0x00, // flags + 0x00, + 0x00, + 0x00, + 0x01, // creation_time + 0x00, + 0x00, + 0x00, + 0x02, // modification_time + 0x00, + 0x01, + 0x5f, + 0x90, // timescale, 90,000 "ticks" per second + ((duration & 0xff000000) >> 24) as u8, + ((duration & 0xff0000) >> 16) as u8, + ((duration & 0xff00) >> 8) as u8, + (duration & 0xff) as u8, // duration + 0x00, + 0x01, + 0x00, + 0x00, // 1.0 rate + 0x01, + 0x00, // 1.0 volume + 0x00, + 0x00, // reserved + 0x00, + 0x00, + 0x00, + 0x00, // reserved + 0x00, + 0x00, + 0x00, + 0x00, // reserved + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x40, + 0x00, + 0x00, + 0x00, // transformation: unity matrix + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, // pre_defined + 0xff, + 0xff, + 0xff, + 0xff, // next_track_ID + ]; + create_box(MVHD, &vec![bytes]) +} + +/// Creates a `sdtp` ISOBMFF box. +fn create_sdtp(samples: &[SampleInfo]) -> Vec { + let mut bytes = Vec::with_capacity(4 + samples.len()); + + // leave the full box header (4 bytes) all zero + + // write the sample table + for i in 0..samples.len() { + if let Some(ref flags) = samples[i].flags { + bytes[i + 4] = + (flags.depends_on << 4) | (flags.is_depended_on << 2) | flags.has_redundancy; + } + } + + create_box(SDTP, &vec![bytes]) +} + +/// Creates a `stbl` ISOBMFF box. +fn create_stbl(md: &IsobmffMetadata) -> Vec { + let empty_content: Vec = vec![ + 0x00, // version + 0x00, 0x00, 0x00, // flags + 0x00, 0x00, 0x00, 0x00, // entry_count + ]; + create_box( + STBL, + &vec![ + create_stsd(md), + create_box(STTS, &vec![empty_content.clone()]), + create_box(STSC, &vec![empty_content.clone()]), + create_box(STSZ, &vec![empty_content.clone()]), + create_box(STCO, &vec![empty_content]), + ], + ) +} + +pub(super) enum IsobmffMetadata { + Video(VideoMetadata), + Audio(AudioMetadata), +} + +impl IsobmffMetadata { + fn media_type(&self) -> MediaType { + match self { + IsobmffMetadata::Video(_) => MediaType::Video, + IsobmffMetadata::Audio(_) => MediaType::Audio, + } + } +} + +pub(super) struct VideoMetadata { + nal_video_properties: NalVideoProperties, + ppss: Vec>, + spss: Vec>, +} + +pub(super) struct AudioMetadata { + audio_object_type: u8, + sampling_frequency_index: u8, + channel_count: u16, + sample_size: u32, + sample_rate: u32, +} + +/// Creates a `stsd` ISOBMFF box. +fn create_stsd(md: &IsobmffMetadata) -> Vec { + return create_box( + STSD, + &vec![ + vec![ + 0x00, // version 0 + 0x00, 0x00, 0x00, // flags + 0x00, 0x00, 0x00, 0x01, + ], + match md { + IsobmffMetadata::Audio(md) => create_mp4a(md), + IsobmffMetadata::Video(md) => create_avc1(md), + }, + ], + ); +} + +/// Creates an `avcc` ISOBMFF box. +fn create_avcc( + md: &VideoMetadata, + sequence_parameter_sets: Vec, + picture_parameter_sets: Vec, +) -> Vec { + let props = &md.nal_video_properties; + let mut content = vec![ + 0x01, // configurationVersion + props.profile_idc(), // AVCProfileIndication + props.profile_compatibility(), // profile_compatibility + props.level_idc(), // AVCLevelIndication + 0xff, // lengthSizeMinusOne, hard-coded to 4 bytes + md.spss.len() as u8, + ]; + content.extend(sequence_parameter_sets); + content.push(md.ppss.len() as u8); + content.extend(picture_parameter_sets); + create_box(AVCC, &vec![content]) +} + +/// Creates an `avc1` ISOBMFF box. +fn create_avc1(md: &VideoMetadata) -> Vec { + let mut sequence_parameter_sets: Vec = vec![]; + let mut picture_parameter_sets: Vec = vec![]; + let spss = &md.spss; + let ppss = &md.ppss; + let props = &md.nal_video_properties; + + // assemble the SPSs + for sps in spss { + sequence_parameter_sets.push(((sps.len() & 0xff00) >> 8) as u8); + sequence_parameter_sets.push((sps.len() & 0xff) as u8); // sequenceParameterSetLength + sequence_parameter_sets.extend(sps); // SPS + } + + // assemble the PPSs + for pps in ppss { + picture_parameter_sets.push(((pps.len() & 0xff00) >> 8) as u8); + picture_parameter_sets.push((pps.len() & 0xff) as u8); + picture_parameter_sets.extend(pps); + } + + let mut avc1_box: Vec> = vec![ + vec![ + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, // reserved + 0x00, + 0x01, // data_reference_index + 0x00, + 0x00, // pre_defined + 0x00, + 0x00, // reserved + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, // pre_defined + ((props.width() & 0xff00) >> 8) as u8, + (props.width() & 0xff) as u8, // width + ((props.height() & 0xff00) >> 8) as u8, + (props.height() & 0xff) as u8, // height + 0x00, + 0x48, + 0x00, + 0x00, // horizresolution + 0x00, + 0x48, + 0x00, + 0x00, // vertresolution + 0x00, + 0x00, + 0x00, + 0x00, // reserved + 0x00, + 0x01, // frame_count + 0x13, + 0x76, + 0x69, + 0x64, + 0x65, + 0x6f, + 0x6a, + 0x73, + 0x2d, + 0x63, + 0x6f, + 0x6e, + 0x74, + 0x72, + 0x69, + 0x62, + 0x2d, + 0x68, + 0x6c, + 0x73, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, // compressorname + 0x00, + 0x18, // depth = 24 + 0x11, + 0x11, // pre_defined = -1 + ], + create_avcc(md, sequence_parameter_sets, picture_parameter_sets), + create_box( + BTRT, + &vec![vec![ + 0x00, 0x1c, 0x9c, 0x80, // bufferSizeDB + 0x00, 0x2d, 0xc6, 0xc0, // maxBitrate + 0x00, 0x2d, 0xc6, 0xc0, // avgBitrate + ]], + ), + ]; + + let (h_spacing, v_spacing) = props.sar_ratio(); + let pasp_box: Vec = create_box( + PASP, + &vec![vec![0, 0, 0, h_spacing & 0xff, 0, 0, 0, v_spacing & 0xff]], + ); + avc1_box.push(pasp_box); + create_box(AVC1, &avc1_box) +} + +/// Creates a `mp4a` ISOBMFF box. +fn create_mp4a(md: &AudioMetadata) -> Vec { + create_box( + MP4A, + &vec![ + vec![ + // SampleEntry, ISO/IEC 14496-12 + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, // reserved + 0x00, + 0x01, // data_reference_index + // AudioSampleEntry, ISO/IEC 14496-12 + 0x00, + 0x00, + 0x00, + 0x00, // reserved + 0x00, + 0x00, + 0x00, + 0x00, // reserved + ((md.channel_count & 0xff00) >> 8) as u8, + (md.channel_count & 0xff) as u8, // channelcount + ((md.sample_size & 0xff00) >> 8) as u8, + (md.sample_size & 0xff) as u8, // samplesize + 0x00, + 0x00, // pre_defined + 0x00, + 0x00, // reserved + ((md.sample_rate & 0xff00) >> 8) as u8, + (md.sample_rate & 0xff) as u8, + 0x00, + 0x00, // samplerate, 16.16 + + // MP4AudioSampleEntry, ISO/IEC 14496-14 + ], + create_esds( + md.audio_object_type, + md.sampling_frequency_index, + md.channel_count, + ), + ], + ) +} + +/// Creates a `tkhd` ISOBMFF box. +fn create_tkhd(track_id: u32, duration: u32, width: u32, height: u32) -> Vec { + create_box( + TKHD, + &vec![vec![ + 0x00, // version 0 + 0x00, + 0x00, + 0x07, // flags + 0x00, + 0x00, + 0x00, + 0x00, // creation_time + 0x00, + 0x00, + 0x00, + 0x00, // modification_time + ((track_id & 0xff000000) >> 24) as u8, + ((track_id & 0xff0000) >> 16) as u8, + ((track_id & 0xff00) >> 8) as u8, + (track_id & 0xff) as u8, // track_ID + 0x00, + 0x00, + 0x00, + 0x00, // reserved + ((duration & 0xff000000) >> 24) as u8, + ((duration & 0xff0000) >> 16) as u8, + ((duration & 0xff00) >> 8) as u8, + (duration & 0xff) as u8, // duration + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, // reserved + 0x00, + 0x00, // layer + 0x00, + 0x00, // alternate_group + 0x01, + 0x00, // non-audio track_info volume + 0x00, + 0x00, // reserved + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x40, + 0x00, + 0x00, + 0x00, // transformation: unity matrix + ((width & 0xff00) >> 8) as u8, + (width & 0xff) as u8, + 0x00, + 0x00, // width + ((height & 0xff00) >> 8) as u8, + (height & 0xff) as u8, + 0x00, + 0x00, // height + ]], + ) +} + +/// Creates a `traf` ISOBMFF box. +fn create_traf(track_info: &TrackInfo) -> Vec { + let track_id = track_info.track_id; + let track_fragment_header = create_box( + TFHD, + &vec![vec![ + 0x00, // version 0 + 0x00, + 0x00, + 0x3a, // flags + ((track_id & 0xff000000) >> 24) as u8, + ((track_id & 0xff0000) >> 16) as u8, + ((track_id & 0xff00) >> 8) as u8, + (track_id & 0xff) as u8, // track_ID + 0x00, + 0x00, + 0x00, + 0x01, // sample_description_index + 0x00, + 0x00, + 0x00, + 0x00, // default_sample_duration + 0x00, + 0x00, + 0x00, + 0x00, // default_sample_size + 0x00, + 0x00, + 0x00, + 0x00, // default_sample_flags + ]], + ); + + let upper_word_base_media_decode_time = track_info.base_media_decode_time / u32::MAX; + let lower_word_base_media_decode_time = track_info.base_media_decode_time % u32::MAX; + let track_fragment_decode_time = create_box( + TFDT, + &vec![vec![ + 0x01, // version 1 + 0x00, + 0x00, + 0x00, // flags + // base_media_decode_time + ((upper_word_base_media_decode_time >> 24) & 0xff) as u8, + ((upper_word_base_media_decode_time >> 16) & 0xff) as u8, + ((upper_word_base_media_decode_time >> 8) & 0xff) as u8, + (upper_word_base_media_decode_time & 0xff) as u8, + ((lower_word_base_media_decode_time >> 24) & 0xff) as u8, + ((lower_word_base_media_decode_time >> 16) & 0xff) as u8, + ((lower_word_base_media_decode_time >> 8) & 0xff) as u8, + (lower_word_base_media_decode_time & 0xff) as u8, + ]], + ); + + // the data offset specifies the number of bytes from the start of + // the containing moof to the first payload byte of the associated + // mdat + let data_offset = 32 + // tfhd + 20 + // tfdt + 8 + // traf header + 16 + // mfhd + 8 + // moof header + 8; // mdat header + + match track_info.md { + IsobmffMetadata::Audio(_) => { + // audio tracks require less metadata + let track_fragment_run = + create_trun(MediaType::Audio, &track_info.samples, data_offset); + create_box( + TRAF, + &vec![ + track_fragment_header, + track_fragment_decode_time, + track_fragment_run, + ], + ) + } + IsobmffMetadata::Video(_) => { + // video tracks should contain an independent and disposable samples + // box (sdtp) + // generate one and adjust offsets to match + let sample_dependency_table = create_sdtp(&track_info.samples); + let track_fragment_run = create_trun( + MediaType::Video, + &track_info.samples, + (sample_dependency_table.len() as u32) + data_offset, + ); + create_box( + TRAF, + &vec![ + track_fragment_header, + track_fragment_decode_time, + track_fragment_run, + sample_dependency_table, + ], + ) + } + } +} + +/// Creates a `trak` ISOBMFF box. +fn create_trak( + md: &IsobmffMetadata, + track_id: u32, + duration: Option, + sample_rate: Option, +) -> Vec { + let duration = duration.unwrap_or(0xffffffff); + let (width, height) = match md { + IsobmffMetadata::Video(info) => ( + info.nal_video_properties.width(), + info.nal_video_properties.height(), + ), + IsobmffMetadata::Audio(_) => (0, 0), + }; + return create_box( + TRAK, + &vec![ + create_tkhd(track_id, duration, width, height), + create_mdia(md, duration, sample_rate), + ], + ); +} + +/// Creates a `trex` ISOBMFF box. +fn create_trex(media_type: MediaType, track_id: u32) -> Vec { + let mut result = vec![ + 0x00, // version 0 + 0x00, + 0x00, + 0x00, // flags + ((track_id & 0xff000000) >> 24) as u8, + ((track_id & 0xff0000) >> 16) as u8, + ((track_id & 0xff00) >> 8) as u8, + (track_id & 0xff) as u8, // track_ID + 0x00, + 0x00, + 0x00, + 0x01, // default_sample_description_index + 0x00, + 0x00, + 0x00, + 0x00, // default_sample_duration + 0x00, + 0x00, + 0x00, + 0x00, // default_sample_size + 0x00, + 0x01, + 0x00, + 0x01, // default_sample_flags + ]; + // the last two bytes of default_sample_flags is the sample + // degradation priority, a hint about the importance of this sample + // relative to others. Lower the degradation priority for all sample + // other than video. + if media_type != MediaType::Video { + let last_idx = result.len() - 1; + result[last_idx] = 0x00; + } + create_box(TREX, &vec![result]) +} + +pub(super) struct SampleFlag { + is_leading: u8, + depends_on: u8, + is_depended_on: u8, + has_redundancy: u8, + is_non_sync_sample: u8, +} + +pub(super) struct SampleInfo { + duration: u32, + size: u32, + flags: Option, + composition_time_offset: Option, +} + +/// Creates the header of a `trun` ISOBMFF box. +/// +/// This method assumes all samples are uniform. That is, if a +/// duration is present for the first sample, it will be present for +/// all subsequent samples. +fn create_trun_header(samples: &[SampleInfo], offset: u32) -> Vec { + let mut presence_flags: u8 = 0; + if !samples.is_empty() { + presence_flags = presence_flags | 0x1; + presence_flags = presence_flags | 0x2; + if let Some(_) = samples[0].flags { + presence_flags = presence_flags | 0x4; + } + if let Some(_) = samples[0].composition_time_offset { + presence_flags = presence_flags | 0x8; + } + } + vec![ + 0x00, // version 0 + 0x00, + presence_flags, + 0x01, // flags + ((samples.len() & 0xff000000) >> 24) as u8, + ((samples.len() & 0xff0000) >> 16) as u8, + ((samples.len() & 0xff00) >> 8) as u8, + (samples.len() & 0xff) as u8, // sample_count + ((offset & 0xff000000) >> 24) as u8, + ((offset & 0xff0000) >> 16) as u8, + ((offset & 0xff00) >> 8) as u8, + (offset & 0xff) as u8, // data_offset + ] +} + +/// Creates a `trun` ISOBMFF box for a video media. +fn create_video_trun(samples: &[SampleInfo], initial_offset: u32) -> Vec { + let offset = initial_offset + 8 + 12 + 16 * samples.len() as u32; + let header = create_trun_header(samples, offset); + let mut bytes = Vec::with_capacity(header.len() + samples.len() * 16); + let mut bytes_offset = header.len(); + bytes.extend(header); + for sample in samples { + bytes[bytes_offset] = ((sample.duration & 0xff000000) >> 24) as u8; + bytes_offset += 1; + bytes[bytes_offset] = ((sample.duration & 0xff0000) >> 16) as u8; + bytes_offset += 1; + bytes[bytes_offset] = ((sample.duration & 0xff00) >> 8) as u8; + bytes_offset += 1; + bytes[bytes_offset] = (sample.duration & 0xff) as u8; // sample_duration + bytes_offset += 1; + bytes[bytes_offset] = ((sample.size & 0xff000000) >> 24) as u8; + bytes_offset += 1; + bytes[bytes_offset] = ((sample.size & 0xff0000) >> 16) as u8; + bytes_offset += 1; + bytes[bytes_offset] = ((sample.size & 0xff00) >> 8) as u8; + bytes_offset += 1; + bytes[bytes_offset] = (sample.size & 0xff) as u8; // sample_size + bytes_offset += 1; + if let Some(ref flags) = sample.flags { + bytes[bytes_offset] = (flags.is_leading << 2) | flags.depends_on; + bytes_offset += 1; + bytes[bytes_offset] = (flags.is_depended_on << 6) + | (flags.has_redundancy << 4) + | flags.is_non_sync_sample; + bytes_offset += 3; // Skip degradation priority + } else { + bytes_offset += 4; + } + let composition_time_offset = if let Some(offset) = sample.composition_time_offset { + offset + } else { + 0 + }; + bytes[bytes_offset] = ((composition_time_offset & 0xff000000) >> 24) as u8; + bytes_offset += 1; + bytes[bytes_offset] = ((composition_time_offset & 0xff0000) >> 16) as u8; + bytes_offset += 1; + bytes[bytes_offset] = ((composition_time_offset & 0xff00) >> 8) as u8; + bytes_offset += 1; + + // sample_composition_time_offset + bytes[bytes_offset] = (composition_time_offset & 0xff) as u8; + bytes_offset += 1; + } + create_box(TRUN, &vec![bytes]) +} + +/// Creates a `trun` ISOBMFF box for an audio media. +fn create_audio_trun(samples: &[SampleInfo], initial_offset: u32) -> Vec { + let offset = initial_offset + 8 + 12 + 8 * samples.len() as u32; + let header = create_trun_header(samples, offset); + let mut bytes = Vec::with_capacity(header.len() + samples.len() * 8); + let mut bytes_offset = header.len(); + bytes.extend(header); + for sample in samples { + bytes[bytes_offset] = ((sample.duration & 0xff000000) >> 24) as u8; + bytes_offset += 1; + bytes[bytes_offset] = ((sample.duration & 0xff0000) >> 16) as u8; + bytes_offset += 1; + bytes[bytes_offset] = ((sample.duration & 0xff00) >> 8) as u8; + bytes_offset += 1; + bytes[bytes_offset] = (sample.duration & 0xff) as u8; // sample_duration + bytes_offset += 1; + bytes[bytes_offset] = ((sample.size & 0xff000000) >> 24) as u8; + bytes_offset += 1; + bytes[bytes_offset] = ((sample.size & 0xff0000) >> 16) as u8; + bytes_offset += 1; + bytes[bytes_offset] = ((sample.size & 0xff00) >> 8) as u8; + bytes_offset += 1; + bytes[bytes_offset] = (sample.size & 0xff) as u8; // sample_size + bytes_offset += 1; + } + create_box(TRUN, &vec![bytes]) +} + +/// Creates a `trun` ISOBMFF box. +fn create_trun(media_type: MediaType, samples: &[SampleInfo], initial_offset: u32) -> Vec { + if media_type == MediaType::Audio { + create_audio_trun(samples, initial_offset) + } else { + create_video_trun(samples, initial_offset) + } +} + +/// Creates an fmp4 initialization segment for segments with the corresponding +/// track information. +pub(super) fn create_init_segment(tracks: &[TrackInfo]) -> Vec { + let ftyp_box = create_ftyp(); + let moov_box = create_moov(tracks); + let mut result = Vec::with_capacity(ftyp_box.len() + moov_box.len()); + result.extend(ftyp_box); + result.extend(moov_box); + result +} diff --git a/src/rs-core/transmux/mod.rs b/src/rs-core/transmux/mod.rs index 04bf324..131e5f4 100644 --- a/src/rs-core/transmux/mod.rs +++ b/src/rs-core/transmux/mod.rs @@ -6,6 +6,7 @@ mod elementary_packet_parser; mod exp_golomb; +mod fmp4; mod nal_unit_producer; mod transport_packet_parser; mod transport_stream_splitter; diff --git a/src/rs-core/transmux/nal_unit_producer.rs b/src/rs-core/transmux/nal_unit_producer.rs index 1764345..bd3f828 100644 --- a/src/rs-core/transmux/nal_unit_producer.rs +++ b/src/rs-core/transmux/nal_unit_producer.rs @@ -181,6 +181,27 @@ pub(super) struct NalVideoProperties { sar_ratio: (u8, u8), } +impl NalVideoProperties { + pub(super) fn width(&self) -> u32 { + self.width + } + pub(super) fn height(&self) -> u32 { + self.height + } + pub(super) fn profile_idc(&self) -> u8 { + self.profile_idc + } + pub(super) fn level_idc(&self) -> u8 { + self.level_idc + } + pub(super) fn profile_compatibility(&self) -> u8 { + self.profile_compatibility + } + pub(super) fn sar_ratio(&self) -> (u8, u8) { + self.sar_ratio + } +} + /// Produces H.264 NAL unit data events. pub(super) struct NalUnitProducer { nal_unit_finder: NalUnitFinder,