Feature: Support mp3 in TS

xqq · Dec 31, 2022 · 081a3eb · 081a3eb
1 parent 4ca8797
commit 081a3eb
Show file tree

Hide file tree

Showing 6 changed files with 229 additions and 48 deletions.
diff --git a/d.ts/src/demux/mp3.d.ts b/d.ts/src/demux/mp3.d.ts
@@ -0,0 +1,6 @@
+export declare class MP3Data {
+    object_type: number;
+    sample_rate: number;
+    channel_count: number;
+    data: Uint8Array;
+}
diff --git a/d.ts/src/demux/pat-pmt-pes.d.ts b/d.ts/src/demux/pat-pmt-pes.d.ts
@@ -28,6 +28,7 @@ export declare class PMT {
         h264: number | undefined;
         h265: number | undefined;
         adts_aac: number | undefined;
+        mp3: number | undefined;
     };
     pes_private_data_pids: {
         [pid: number]: boolean;

diff --git a/d.ts/src/demux/ts-demuxer.d.ts b/d.ts/src/demux/ts-demuxer.d.ts
@@ -71,6 +71,7 @@ declare class TSDemuxer extends BaseDemuxer {
     private dispatchAudioMediaSegment;
     private dispatchAudioVideoMediaSegment;
     private parseAACPayload;
+    private parseMP3Payload;
     private detectAudioMetadataChange;
     private dispatchAudioInitSegment;
     private dispatchPESPrivateDataDescriptor;

diff --git a/src/demux/mp3.ts b/src/demux/mp3.ts
@@ -0,0 +1,7 @@
+export class MP3Data {
+    object_type: number;
+    sample_rate: number;
+    channel_count: number;
+
+    data: Uint8Array;
+}
diff --git a/src/demux/pat-pmt-pes.ts b/src/demux/pat-pmt-pes.ts
@@ -35,10 +35,12 @@ export class PMT {
         h264: number | undefined,
         h265: number | undefined;
         adts_aac: number | undefined
+        mp3: number | undefined
     } = {
         h264: undefined,
         h265: undefined,
-        adts_aac: undefined
+        adts_aac: undefined,
+        mp3: undefined
     };
 
     pes_private_data_pids: {

diff --git a/src/demux/ts-demuxer.ts b/src/demux/ts-demuxer.ts
@@ -31,6 +31,28 @@ import { readSCTE35, SCTE35Data } from './scte35';
 import { H265AnnexBParser, H265NaluHVC1, H265NaluPayload, H265NaluType, HEVCDecoderConfigurationRecord } from './h265';
 import H265Parser from './h265-parser';
 import { SMPTE2038Data, smpte2038parse } from './smpte2038';
+import { MP3Data } from './mp3';
+
+type AACAudioMetadata = {
+    codec: 'aac',
+    audio_object_type: MPEG4AudioObjectTypes;
+    sampling_freq_index: MPEG4SamplingFrequencyIndex;
+    sampling_frequency: number;
+    channel_config: number;
+};
+type MP3AudioMetadata = {
+    codec: 'mp3',
+    object_type: number,
+    sample_rate: number,
+    channel_count: number;
+};
+type AudioData = {
+    codec: 'aac';
+    data: AACFrame;
+} | {
+    codec: 'mp3';
+    data: MP3Data;
+}
 
 class TSDemuxer extends BaseDemuxer {
 
@@ -67,12 +89,8 @@ class TSDemuxer extends BaseDemuxer {
         details: undefined
     };
 
-    private audio_metadata_: {
-        audio_object_type: MPEG4AudioObjectTypes;
-        sampling_freq_index: MPEG4SamplingFrequencyIndex;
-        sampling_frequency: number;
-        channel_config: number;
-    } = {
+    private audio_metadata_: AACAudioMetadata | MP3AudioMetadata = {
+        codec: undefined,
         audio_object_type: undefined,
         sampling_freq_index: undefined,
         sampling_frequency: undefined,
@@ -271,6 +289,7 @@ class TSDemuxer extends BaseDemuxer {
                     if (pid === this.pmt_.common_pids.h264
                             || pid === this.pmt_.common_pids.h265
                             || pid === this.pmt_.common_pids.adts_aac
+                            || pid === this.pmt_.common_pids.mp3
                             || this.pmt_.pes_private_data_pids[pid] === true
                             || this.pmt_.timed_id3_pids[pid] === true) {
                         this.handlePESSlice(chunk,
@@ -557,6 +576,7 @@ class TSDemuxer extends BaseDemuxer {
             switch (pes_data.stream_type) {
                 case StreamType.kMPEG1Audio:
                 case StreamType.kMPEG2Audio:
+                    this.parseMP3Payload(payload, pts);
                     break;
                 case StreamType.kPESPrivateData:
                     if (this.pmt_.smpte2038_pids[pes_data.pid]) {
@@ -715,6 +735,8 @@ class TSDemuxer extends BaseDemuxer {
                 pmt.common_pids.h265 = elementary_PID;
             } else if (stream_type === StreamType.kADTSAAC && !pmt.common_pids.adts_aac) {
                 pmt.common_pids.adts_aac = elementary_PID;
+            } else if ((stream_type === StreamType.kMPEG1Audio || stream_type === StreamType.kMPEG2Audio) && !pmt.common_pids.mp3) {
+                pmt.common_pids.mp3 = elementary_PID;
             } else if (stream_type === StreamType.kPESPrivateData) {
                 pmt.pes_private_data_pids[elementary_PID] = true;
                 if (ES_info_length > 0) {
@@ -752,7 +774,7 @@ class TSDemuxer extends BaseDemuxer {
             if (pmt.common_pids.h264 || pmt.common_pids.h265) {
                 this.has_video_ = true;
             }
-            if (pmt.common_pids.adts_aac) {
+            if (pmt.common_pids.adts_aac || pmt.common_pids.mp3) {
                 this.has_audio_ = true;
             }
         }
@@ -1076,22 +1098,25 @@ class TSDemuxer extends BaseDemuxer {
 
         if (pts != undefined) {
             base_pts_ms = pts / this.timescale_;
-        } else if (this.aac_last_sample_pts_ != undefined) {
-            ref_sample_duration = 1024 / this.audio_metadata_.sampling_frequency * 1000;
-            base_pts_ms = this.aac_last_sample_pts_ + ref_sample_duration;
-        } else {
-            Log.w(this.TAG, `AAC: Unknown pts`);
-            return;
         }
+        if (this.audio_metadata_.codec === 'aac') {
+            if (pts == undefined && this.aac_last_sample_pts_ != undefined) {
+                ref_sample_duration = 1024 / this.audio_metadata_.sampling_frequency * 1000;
+                base_pts_ms = this.aac_last_sample_pts_ + ref_sample_duration;
+            } else if (pts == undefined){
+                Log.w(this.TAG, `AAC: Unknown pts`);
+                return;
+            }
 
-        if (this.aac_last_incomplete_data_ && this.aac_last_sample_pts_) {
-            ref_sample_duration = 1024 / this.audio_metadata_.sampling_frequency * 1000;
-            let new_pts_ms = this.aac_last_sample_pts_ + ref_sample_duration;
+            if (this.aac_last_incomplete_data_ && this.aac_last_sample_pts_) {
+                ref_sample_duration = 1024 / this.audio_metadata_.sampling_frequency * 1000;
+                let new_pts_ms = this.aac_last_sample_pts_ + ref_sample_duration;
 
-            if (Math.abs(new_pts_ms - base_pts_ms) > 1) {
-                Log.w(this.TAG, `AAC: Detected pts overlapped, ` +
-                                `expected: ${new_pts_ms}ms, PES pts: ${base_pts_ms}ms`);
-                base_pts_ms = new_pts_ms;
+                if (Math.abs(new_pts_ms - base_pts_ms) > 1) {
+                    Log.w(this.TAG, `AAC: Detected pts overlapped, ` +
+                                    `expected: ${new_pts_ms}ms, PES pts: ${base_pts_ms}ms`);
+                    base_pts_ms = new_pts_ms;
+                }
             }
         }
 
@@ -1102,18 +1127,25 @@ class TSDemuxer extends BaseDemuxer {
 
         while ((aac_frame = adts_parser.readNextAACFrame()) != null) {
             ref_sample_duration = 1024 / aac_frame.sampling_frequency * 1000;
+            const audio_sample = {
+                codec: 'aac',
+                data: aac_frame
+            } as const;
 
             if (this.audio_init_segment_dispatched_ == false) {
-                this.audio_metadata_.audio_object_type = aac_frame.audio_object_type;
-                this.audio_metadata_.sampling_freq_index = aac_frame.sampling_freq_index;
-                this.audio_metadata_.sampling_frequency = aac_frame.sampling_frequency;
-                this.audio_metadata_.channel_config = aac_frame.channel_config;
-                this.dispatchAudioInitSegment(aac_frame);
-            } else if (this.detectAudioMetadataChange(aac_frame)) {
+                this.audio_metadata_ = {
+                    codec: 'aac',
+                    audio_object_type: aac_frame.audio_object_type,
+                    sampling_freq_index: aac_frame.sampling_freq_index,
+                    sampling_frequency: aac_frame.sampling_frequency,
+                    channel_config: aac_frame.channel_config
+                };
+                this.dispatchAudioInitSegment(audio_sample);
+            } else if (this.detectAudioMetadataChange(audio_sample)) {
                 // flush stashed frames before notify new AudioSpecificConfig
                 this.dispatchAudioMediaSegment();
                 // notify new AAC AudioSpecificConfig
-                this.dispatchAudioInitSegment(aac_frame);
+                this.dispatchAudioInitSegment(audio_sample);
             }
 
             last_sample_pts_ms = sample_pts_ms;
@@ -1140,42 +1172,174 @@ class TSDemuxer extends BaseDemuxer {
         }
     }
 
-    private detectAudioMetadataChange(frame: AACFrame): boolean {
-        if (frame.audio_object_type !== this.audio_metadata_.audio_object_type) {
-            Log.v(this.TAG, `AAC: AudioObjectType changed from ` +
-                            `${this.audio_metadata_.audio_object_type} to ${frame.audio_object_type}`);
-            return true;
+    private parseMP3Payload(data: Uint8Array, pts: number) {
+        if (this.has_video_ && !this.video_init_segment_dispatched_) {
+            // If first video IDR frame hasn't been detected,
+            // Wait for first IDR frame and video init segment being dispatched
+            return;
         }
 
-        if (frame.sampling_freq_index !== this.audio_metadata_.sampling_freq_index) {
-            Log.v(this.TAG, `AAC: SamplingFrequencyIndex changed from ` +
-                            `${this.audio_metadata_.sampling_freq_index} to ${frame.sampling_freq_index}`);
-            return true;
+        let _mpegAudioV10SampleRateTable = [44100, 48000, 32000, 0];
+        let _mpegAudioV20SampleRateTable = [22050, 24000, 16000, 0];
+        let _mpegAudioV25SampleRateTable = [11025, 12000, 8000,  0];
+        let _mpegAudioL1BitRateTable = [0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, -1];
+        let _mpegAudioL2BitRateTable = [0, 32, 48, 56,  64,  80,  96, 112, 128, 160, 192, 224, 256, 320, 384, -1];
+        let _mpegAudioL3BitRateTable = [0, 32, 40, 48,  56,  64,  80,  96, 112, 128, 160, 192, 224, 256, 320, -1];
+
+        let ver = (data[1] >>> 3) & 0x03;
+        let layer = (data[1] & 0x06) >> 1;
+        let bitrate_index = (data[2] & 0xF0) >>> 4;
+        let sampling_freq_index = (data[2] & 0x0C) >>> 2;
+        let channel_mode = (data[3] >>> 6) & 0x03;
+        let channel_count = channel_mode !== 3 ? 2 : 1;
+
+        let sample_rate = 0;
+        let bit_rate = 0;
+        let object_type = 34;  // Layer-3, listed in MPEG-4 Audio Object Types
+
+        let codec = 'mp3';
+        switch (ver) {
+            case 0:  // MPEG 2.5
+                sample_rate = _mpegAudioV25SampleRateTable[sampling_freq_index];
+                break;
+            case 2:  // MPEG 2
+                sample_rate = _mpegAudioV20SampleRateTable[sampling_freq_index];
+                break;
+            case 3:  // MPEG 1
+                sample_rate = _mpegAudioV10SampleRateTable[sampling_freq_index];
+                break;
         }
 
-        if (frame.channel_config !== this.audio_metadata_.channel_config) {
-            Log.v(this.TAG, `AAC: Channel configuration changed from ` +
-                            `${this.audio_metadata_.channel_config} to ${frame.channel_config}`);
+        switch (layer) {
+            case 1:  // Layer 3
+                object_type = 34;
+                if (bitrate_index < _mpegAudioL3BitRateTable.length) {
+                    bit_rate = _mpegAudioL3BitRateTable[bitrate_index];
+                }
+                break;
+            case 2:  // Layer 2
+                object_type = 33;
+                if (bitrate_index < _mpegAudioL2BitRateTable.length) {
+                    bit_rate = _mpegAudioL2BitRateTable[bitrate_index];
+                }
+                break;
+            case 3:  // Layer 1
+                object_type = 32;
+                if (bitrate_index < _mpegAudioL1BitRateTable.length) {
+                    bit_rate = _mpegAudioL1BitRateTable[bitrate_index];
+                }
+                break;
+        }
+
+        const sample = new MP3Data();
+        sample.object_type = object_type;
+        sample.sample_rate = sample_rate;
+        sample.channel_count = channel_count;
+        sample.data = data;
+        const audio_sample = {
+            codec: 'mp3',
+            data: sample
+        } as const;
+
+
+        if (this.audio_init_segment_dispatched_ == false) {
+            this.audio_metadata_ = {
+                codec: 'mp3',
+                object_type,
+                sample_rate,
+                channel_count
+            }
+            this.dispatchAudioInitSegment(audio_sample);
+        } else if (this.detectAudioMetadataChange(audio_sample)) {
+            // flush stashed frames before notify new AudioSpecificConfig
+            this.dispatchAudioMediaSegment();
+            // notify new AAC AudioSpecificConfig
+            this.dispatchAudioInitSegment(audio_sample);
+        }
+
+        let mp3_sample = {
+            unit: data,
+            length: data.byteLength,
+            pts: pts / this.timescale_,
+            dts: pts / this.timescale_
+        };
+        this.audio_track_.samples.push(mp3_sample);
+        this.audio_track_.length += data.byteLength;
+    }
+
+    private detectAudioMetadataChange(sample: AudioData): boolean {
+        if (sample.codec !== this.audio_metadata_.codec) {
+            Log.v(this.TAG, `Audio: Audio Codecs changed from ` +
+                                `${this.audio_metadata_.codec} to ${sample.codec}`);
             return true;
         }
 
+        if (sample.codec === 'aac' && this.audio_metadata_.codec === 'aac') {
+            const frame = sample.data;
+            if (frame.audio_object_type !== this.audio_metadata_.audio_object_type) {
+                Log.v(this.TAG, `AAC: AudioObjectType changed from ` +
+                                `${this.audio_metadata_.audio_object_type} to ${frame.audio_object_type}`);
+                return true;
+            }
+
+            if (frame.sampling_freq_index !== this.audio_metadata_.sampling_freq_index) {
+                Log.v(this.TAG, `AAC: SamplingFrequencyIndex changed from ` +
+                                `${this.audio_metadata_.sampling_freq_index} to ${frame.sampling_freq_index}`);
+                return true;
+            }
+
+            if (frame.channel_config !== this.audio_metadata_.channel_config) {
+                Log.v(this.TAG, `AAC: Channel configuration changed from ` +
+                                `${this.audio_metadata_.channel_config} to ${frame.channel_config}`);
+                return true;
+            }
+        } else if (sample.codec === 'mp3' && this.audio_metadata_.codec === 'mp3') {
+            const data = sample.data;
+            if (data.object_type !== this.audio_metadata_.object_type) {
+                Log.v(this.TAG, `MP3: AudioObjectType changed from ` +
+                                `${this.audio_metadata_.object_type} to ${data.object_type}`);
+                return true;
+            }
+
+            if (data.sample_rate !== this.audio_metadata_.sample_rate) {
+                Log.v(this.TAG, `MP3: SamplingFrequencyIndex changed from ` +
+                                `${this.audio_metadata_.sample_rate} to ${data.sample_rate}`);
+                return true;
+            }
+
+            if (data.channel_count !== this.audio_metadata_.channel_count) {
+                Log.v(this.TAG, `MP3: Channel count changed from ` +
+                                `${this.audio_metadata_.channel_count} to ${data.channel_count}`);
+                return true;
+            }
+        }
+
         return false;
     }
 
-    private dispatchAudioInitSegment(aac_frame: AACFrame) {
-        let audio_specific_config = new AudioSpecificConfig(aac_frame);
+    private dispatchAudioInitSegment(sample: AudioData) {
         let meta: any = {};
-
         meta.type = 'audio';
         meta.id = this.audio_track_.id;
         meta.timescale = 1000;
         meta.duration = this.duration_;
 
-        meta.audioSampleRate = audio_specific_config.sampling_rate;
-        meta.channelCount = audio_specific_config.channel_count;
-        meta.codec = audio_specific_config.codec_mimetype;
-        meta.originalCodec = audio_specific_config.original_codec_mimetype;
-        meta.config = audio_specific_config.config;
+        if (this.audio_metadata_.codec === 'aac') {
+            let aac_frame = sample.data as AACFrame;
+            let audio_specific_config = new AudioSpecificConfig(aac_frame);
+
+            meta.audioSampleRate = audio_specific_config.sampling_rate;
+            meta.channelCount = audio_specific_config.channel_count;
+            meta.codec = audio_specific_config.codec_mimetype;
+            meta.originalCodec = audio_specific_config.original_codec_mimetype;
+            meta.config = audio_specific_config.config;
+        } else if (this.audio_metadata_.codec === 'mp3') {
+            meta.audioSampleRate = this.audio_metadata_.sample_rate;
+            meta.channelCount = this.audio_metadata_.channel_count;
+            meta.codec = 'mp3';
+            meta.originalCodec = 'mp3';
+            meta.config = undefined;
+        }
 
         meta.refSampleDuration = 1024 / meta.audioSampleRate * meta.timescale;