diff --git a/trunk/src/kernel/srs_kernel_codec.hpp b/trunk/src/kernel/srs_kernel_codec.hpp index 9f67723788..79073f58a1 100644 --- a/trunk/src/kernel/srs_kernel_codec.hpp +++ b/trunk/src/kernel/srs_kernel_codec.hpp @@ -189,6 +189,7 @@ enum SrsCodecFlvTag { // set to the zero to reserved, for array map. SrsCodecFlvTagReserved = 0, + SrsCodecFlvTagForbidden = 0, // 8 = audio SrsCodecFlvTagAudio = 8, diff --git a/trunk/src/kernel/srs_kernel_error.hpp b/trunk/src/kernel/srs_kernel_error.hpp index c792ce4c01..b7b4fcb32c 100644 --- a/trunk/src/kernel/srs_kernel_error.hpp +++ b/trunk/src/kernel/srs_kernel_error.hpp @@ -251,6 +251,10 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define ERROR_MP4_ESDS_SL_Config 3075 #define ERROR_MP4_ILLEGAL_MOOV 3076 #define ERROR_MP4_ILLEGAL_HANDLER 3077 +#define ERROR_MP4_ILLEGAL_TRACK 3078 +#define ERROR_MP4_MOOV_OVERFLOW 3079 +#define ERROR_MP4_ILLEGAL_SAMPLES 3080 +#define ERROR_MP4_ILLEGAL_TIMESTAMP 3081 /////////////////////////////////////////////////////// // HTTP/StreamCaster/KAFKA protocol error. diff --git a/trunk/src/kernel/srs_kernel_mp4.cpp b/trunk/src/kernel/srs_kernel_mp4.cpp index 458c5196e0..5481919cc7 100644 --- a/trunk/src/kernel/srs_kernel_mp4.cpp +++ b/trunk/src/kernel/srs_kernel_mp4.cpp @@ -37,6 +37,8 @@ using namespace std; #define SRS_MP4_EOF_SIZE 0 #define SRS_MP4_USE_LARGE_SIZE 1 +#define SRS_MP4_BUF_SIZE 4096 + int srs_mp4_string_length(const string& v) { return (int)v.length()+1; @@ -872,6 +874,48 @@ SrsMp4TrackHeaderBox* SrsMp4TrackBox::tkhd() return dynamic_cast(box); } +SrsMp4ChunkOffsetBox* SrsMp4TrackBox::stco() +{ + SrsMp4SampleTableBox* box = stbl(); + return box? box->stco():NULL; +} + +SrsMp4SampleSizeBox* SrsMp4TrackBox::stsz() +{ + SrsMp4SampleTableBox* box = stbl(); + return box? box->stsz():NULL; +} + +SrsMp4Sample2ChunkBox* SrsMp4TrackBox::stsc() +{ + SrsMp4SampleTableBox* box = stbl(); + return box? box->stsc():NULL; +} + +SrsMp4DecodingTime2SampleBox* SrsMp4TrackBox::stts() +{ + SrsMp4SampleTableBox* box = stbl(); + return box? box->stts():NULL; +} + +SrsMp4CompositionTime2SampleBox* SrsMp4TrackBox::ctts() +{ + SrsMp4SampleTableBox* box = stbl(); + return box? box->ctts():NULL; +} + +SrsMp4SyncSampleBox* SrsMp4TrackBox::stss() +{ + SrsMp4SampleTableBox* box = stbl(); + return box? box->stss():NULL; +} + +SrsMp4MediaHeaderBox* SrsMp4TrackBox::mdhd() +{ + SrsMp4MediaBox* box = mdia(); + return box? box->mdhd():NULL; +} + SrsCodecVideo SrsMp4TrackBox::vide_codec() { SrsMp4SampleDescriptionBox* box = stsd(); @@ -1166,6 +1210,12 @@ SrsMp4TrackType SrsMp4MediaBox::track_type() } } +SrsMp4MediaHeaderBox* SrsMp4MediaBox::mdhd() +{ + SrsMp4Box* box = get(SrsMp4BoxTypeMDHD); + return dynamic_cast(box); +} + SrsMp4MediaInformationBox* SrsMp4MediaBox::minf() { SrsMp4Box* box = get(SrsMp4BoxTypeMINF); @@ -1695,6 +1745,42 @@ SrsMp4SampleDescriptionBox* SrsMp4SampleTableBox::stsd() return dynamic_cast(box); } +SrsMp4ChunkOffsetBox* SrsMp4SampleTableBox::stco() +{ + SrsMp4Box* box = get(SrsMp4BoxTypeSTCO); + return dynamic_cast(box); +} + +SrsMp4SampleSizeBox* SrsMp4SampleTableBox::stsz() +{ + SrsMp4Box* box = get(SrsMp4BoxTypeSTSZ); + return dynamic_cast(box); +} + +SrsMp4Sample2ChunkBox* SrsMp4SampleTableBox::stsc() +{ + SrsMp4Box* box = get(SrsMp4BoxTypeSTSC); + return dynamic_cast(box); +} + +SrsMp4DecodingTime2SampleBox* SrsMp4SampleTableBox::stts() +{ + SrsMp4Box* box = get(SrsMp4BoxTypeSTTS); + return dynamic_cast(box); +} + +SrsMp4CompositionTime2SampleBox* SrsMp4SampleTableBox::ctts() +{ + SrsMp4Box* box = get(SrsMp4BoxTypeCTTS); + return dynamic_cast(box); +} + +SrsMp4SyncSampleBox* SrsMp4SampleTableBox::stss() +{ + SrsMp4Box* box = get(SrsMp4BoxTypeSTSS); + return dynamic_cast(box); +} + SrsMp4SampleEntry::SrsMp4SampleEntry() { memset(reserved, 0, 6); @@ -2499,6 +2585,8 @@ SrsMp4DecodingTime2SampleBox::SrsMp4DecodingTime2SampleBox() entry_count = 0; entries = NULL; + + index = count = 0; } SrsMp4DecodingTime2SampleBox::~SrsMp4DecodingTime2SampleBox() @@ -2506,6 +2594,43 @@ SrsMp4DecodingTime2SampleBox::~SrsMp4DecodingTime2SampleBox() srs_freepa(entries); } +int SrsMp4DecodingTime2SampleBox::initialize_counter() +{ + int ret = ERROR_SUCCESS; + + index = 0; + if (index >= entry_count) { + ret = ERROR_MP4_ILLEGAL_TIMESTAMP; + srs_error("MP4 illegal ts, empty stts. ret=%d", ret); + return ret; + } + + count = entries[0].sample_count; + + return ret; +} + +int SrsMp4DecodingTime2SampleBox::on_sample(uint32_t sample_index, SrsMp4SttsEntry** ppentry) +{ + int ret = ERROR_SUCCESS; + + if (sample_index + 1 > count) { + index++; + + if (index >= entry_count) { + ret = ERROR_MP4_ILLEGAL_TIMESTAMP; + srs_error("MP4 illegal ts, stts overflow, count=%d. ret=%d", entry_count, ret); + return ret; + } + + count += entries[index].sample_count; + } + + *ppentry = &entries[index]; + + return ret; +} + int SrsMp4DecodingTime2SampleBox::nb_header() { return SrsMp4FullBox::nb_header() + 4 + 8*entry_count; @@ -2562,6 +2687,8 @@ SrsMp4CompositionTime2SampleBox::SrsMp4CompositionTime2SampleBox() entry_count = 0; entries = NULL; + + index = count = 0; } SrsMp4CompositionTime2SampleBox::~SrsMp4CompositionTime2SampleBox() @@ -2569,6 +2696,43 @@ SrsMp4CompositionTime2SampleBox::~SrsMp4CompositionTime2SampleBox() srs_freepa(entries); } +int SrsMp4CompositionTime2SampleBox::initialize_counter() +{ + int ret = ERROR_SUCCESS; + + index = 0; + if (index >= entry_count) { + ret = ERROR_MP4_ILLEGAL_TIMESTAMP; + srs_error("MP4 illegal ts, empty ctts. ret=%d", ret); + return ret; + } + + count = entries[0].sample_count; + + return ret; +} + +int SrsMp4CompositionTime2SampleBox::on_sample(uint32_t sample_index, SrsMp4CttsEntry** ppentry) +{ + int ret = ERROR_SUCCESS; + + if (sample_index + 1 > count) { + index++; + + if (index >= entry_count) { + ret = ERROR_MP4_ILLEGAL_TIMESTAMP; + srs_error("MP4 illegal ts, ctts overflow, count=%d. ret=%d", entry_count, ret); + return ret; + } + + count += entries[index].sample_count; + } + + *ppentry = &entries[index]; + + return ret; +} + int SrsMp4CompositionTime2SampleBox::nb_header() { return SrsMp4FullBox::nb_header() + 4 + 8*entry_count; @@ -2955,7 +3119,203 @@ int SrsMp4UserDataBox::decode_header(SrsBuffer* buf) return ret; } -#define SRS_MP4_BUF_SIZE 4096 +SrsMp4Sample::SrsMp4Sample() +{ + type = SrsCodecFlvTagForbidden; + offset = 0; + index = 0; + dts = pts = 0; + nb_data = 0; + data = NULL; + frame_type = SrsCodecVideoAVCFrameForbidden; + tbn = 0; +} + +SrsMp4Sample::~SrsMp4Sample() +{ + srs_freepa(data); +} + +uint32_t SrsMp4Sample::get_dts() +{ + return (uint32_t)(dts * 1000 / tbn); +} + +uint32_t SrsMp4Sample::get_pts() +{ + return (uint32_t)(pts * 1000 / tbn); +} + +SrsMp4SampleManager::SrsMp4SampleManager() +{ +} + +SrsMp4SampleManager::~SrsMp4SampleManager() +{ + vector::iterator it; + for (it = samples.begin(); it != samples.end(); ++it) { + SrsMp4Sample* sample = *it; + srs_freep(sample); + } + samples.clear(); +} + +int SrsMp4SampleManager::load(SrsMp4MovieBox* moov) +{ + int ret = ERROR_SUCCESS; + + map tses; + + // Load samples from moov, merge to temp samples. + if ((ret = do_load(tses, moov)) != ERROR_SUCCESS) { + map::iterator it; + for (it = tses.begin(); it != tses.end(); ++it) { + SrsMp4Sample* sample = it->second; + srs_freep(sample); + } + + return ret; + } + + // Dumps temp samples. + if (true) { + map::iterator it; + for (it = tses.begin(); it != tses.end(); ++it) { + SrsMp4Sample* sample = it->second; + samples.push_back(sample); + } + } + + return ret; +} + +int SrsMp4SampleManager::do_load(map& tses, SrsMp4MovieBox* moov) +{ + int ret = ERROR_SUCCESS; + + SrsMp4TrackBox* vide = moov->video(); + if (vide) { + SrsMp4MediaHeaderBox* mdhd = vide->mdhd(); + SrsMp4TrackType tt = vide->track_type(); + SrsMp4ChunkOffsetBox* stco = vide->stco(); + SrsMp4SampleSizeBox* stsz = vide->stsz(); + SrsMp4Sample2ChunkBox* stsc = vide->stsc(); + SrsMp4DecodingTime2SampleBox* stts = vide->stts(); + // The composition time to sample table is optional and must only be present if DT and CT differ for any samples. + SrsMp4CompositionTime2SampleBox* ctts = vide->ctts(); + // If the sync sample box is not present, every sample is a sync sample. + SrsMp4SyncSampleBox* stss = vide->stss(); + + if (!mdhd || !stco || !stsz || !stsc || !stts) { + ret = ERROR_MP4_ILLEGAL_TRACK; + srs_error("MP4 illegal track, empty mdhd/stco/stsz/stsc/stts, type=%d. ret=%d", tt, ret); + return ret; + } + + if ((ret = load_trak(tses, SrsCodecFlvTagVideo, mdhd, stco, stsz, stsc, stts, ctts, stss)) != ERROR_SUCCESS) { + return ret; + } + } + + SrsMp4TrackBox* soun = moov->audio(); + if (soun) { + SrsMp4MediaHeaderBox* mdhd = soun->mdhd(); + SrsMp4TrackType tt = soun->track_type(); + SrsMp4ChunkOffsetBox* stco = soun->stco(); + SrsMp4SampleSizeBox* stsz = soun->stsz(); + SrsMp4Sample2ChunkBox* stsc = soun->stsc(); + SrsMp4DecodingTime2SampleBox* stts = soun->stts(); + + if (!mdhd || !stco || !stsz || !stsc || !stts) { + ret = ERROR_MP4_ILLEGAL_TRACK; + srs_error("MP4 illegal track, empty mdhd/stco/stsz/stsc/stts, type=%d. ret=%d", tt, ret); + return ret; + } + + if ((ret = load_trak(tses, SrsCodecFlvTagAudio, mdhd, stco, stsz, stsc, stts, NULL, NULL)) != ERROR_SUCCESS) { + return ret; + } + } + + return ret; +} + +int SrsMp4SampleManager::load_trak(map& tses, SrsCodecFlvTag tt, + SrsMp4MediaHeaderBox* mdhd, SrsMp4ChunkOffsetBox* stco, SrsMp4SampleSizeBox* stsz, SrsMp4Sample2ChunkBox* stsc, + SrsMp4DecodingTime2SampleBox* stts, SrsMp4CompositionTime2SampleBox* ctts, SrsMp4SyncSampleBox* stss) +{ + int ret = ERROR_SUCCESS; + + // Samples per chunk. + uint32_t stsci = 0; + + // DTS box. + if ((ret = stts->initialize_counter()) != ERROR_SUCCESS) { + return ret; + } + + // CTS/PTS box. + if (ctts && (ret = ctts->initialize_counter()) != ERROR_SUCCESS) { + return ret; + } + + SrsMp4Sample* previous = NULL; + + // For each chunk offset. + for (uint32_t stcoi = 0; stcoi < stco->entry_count; stcoi++) { + uint32_t chunk_offset = stco->entries[stcoi]; + + // Find how many samples from stsc. + if (stsci < stsc->entry_count - 1 && stcoi + 1 >= stsc->entries[stsci + 1].first_chunk) { + stsci++; + } + uint32_t samples_per_chunk = stsc->entries[stsci].samples_per_chunk; + for (uint32_t i = 0; i < samples_per_chunk; i++) { + SrsMp4Sample* sample = new SrsMp4Sample(); + sample->type = tt; + sample->index = (previous? previous->index+1:0); + sample->tbn = mdhd->timescale; + + uint32_t sample_size = stsz->sample_size; + if (sample_size == 0) { + if (sample->index >= stsz->sample_count) { + ret = ERROR_MP4_MOOV_OVERFLOW; + srs_error("MP4 stsz overflow, sample_count=%d. ret=%d", stsz->sample_count, ret); + } + sample_size = stsz->entry_sizes[sample->index]; + } + sample->offset = chunk_offset + sample_size * i; + + SrsMp4SttsEntry* stts_entry = NULL; + if ((ret = stts->on_sample(sample->index, &stts_entry)) != ERROR_SUCCESS) { + return ret; + } + if (previous) { + sample->pts = sample->dts = previous->dts + stts_entry->sample_delta; + } + + SrsMp4CttsEntry* ctts_entry = NULL; + if (ctts && (ret = ctts->on_sample(sample->index, &ctts_entry)) != ERROR_SUCCESS) { + return ret; + } + if (ctts_entry) { + sample->pts = sample->dts + ctts_entry->sample_offset; + } + + previous = sample; + tses[sample->offset] = sample; + } + } + + // Check total samples. + if (previous && previous->index + 1 != stsz->sample_count) { + ret = ERROR_MP4_ILLEGAL_SAMPLES; + srs_error("MP4 illegal samples count, expect=%d, actual=%d. ret=%d", stsz->sample_count, previous->index + 1, ret); + return ret; + } + + return ret; +} SrsMp4Decoder::SrsMp4Decoder() { @@ -2971,6 +3331,7 @@ SrsMp4Decoder::SrsMp4Decoder() sample_rate = SrsCodecAudioSampleRateForbidden; sound_bits = SrsCodecAudioSampleSizeForbidden; channels = SrsCodecAudioSoundTypeForbidden; + samples = new SrsMp4SampleManager(); } SrsMp4Decoder::~SrsMp4Decoder() @@ -2979,6 +3340,7 @@ SrsMp4Decoder::~SrsMp4Decoder() srs_freep(stream); srs_freepa(pasc); srs_freepa(pavcc); + srs_freep(samples); } int SrsMp4Decoder::initialize(ISrsReadSeeker* rs) @@ -3170,6 +3532,12 @@ int SrsMp4Decoder::parse_moov(SrsMp4MovieBox* moov) memcpy(pasc, asc->asc, nb_asc); } + // Build the samples structure from moov. + if ((ret = samples->load(moov)) != ERROR_SUCCESS) { + srs_error("MP4 load samples failed. ret=%d", ret); + return ret; + } + stringstream ss; ss << "dur=" << mvhd->duration() << "ms"; // video codec. diff --git a/trunk/src/kernel/srs_kernel_mp4.hpp b/trunk/src/kernel/srs_kernel_mp4.hpp index 0a99fef774..f1341d13a1 100644 --- a/trunk/src/kernel/srs_kernel_mp4.hpp +++ b/trunk/src/kernel/srs_kernel_mp4.hpp @@ -34,6 +34,7 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include #include +#include class ISrsReadSeeker; class SrsMp4TrackBox; @@ -50,6 +51,13 @@ class SrsMp4VisualSampleEntry; class SrsMp4AvccBox; class SrsMp4AudioSampleEntry; class SrsMp4EsdsBox; +class SrsMp4ChunkOffsetBox; +class SrsMp4SampleSizeBox; +class SrsMp4Sample2ChunkBox; +class SrsMp4DecodingTime2SampleBox; +class SrsMp4CompositionTime2SampleBox; +class SrsMp4SyncSampleBox; +class SrsMp4MediaHeaderBox; /** * 4.2 Object Structure @@ -383,6 +391,22 @@ class SrsMp4TrackBox : public SrsMp4Box virtual SrsMp4TrackType track_type(); // Get the track header box. virtual SrsMp4TrackHeaderBox* tkhd(); +public: + // Get the chunk offset box. + virtual SrsMp4ChunkOffsetBox* stco(); + // Get the sample size box. + virtual SrsMp4SampleSizeBox* stsz(); + // Get the sample to chunk box. + virtual SrsMp4Sample2ChunkBox* stsc(); + // Get the dts box. + virtual SrsMp4DecodingTime2SampleBox* stts(); + // Get the cts/pts box. + virtual SrsMp4CompositionTime2SampleBox* ctts(); + // Get the sync dts box. + virtual SrsMp4SyncSampleBox* stss(); + // Get the media header box. + virtual SrsMp4MediaHeaderBox* mdhd(); +public: // For vide track, get the video codec. virtual SrsCodecVideo vide_codec(); // For soun track, get the audio codec. @@ -541,6 +565,8 @@ class SrsMp4MediaBox : public SrsMp4Box // for example, it maybe Audio|Video when contains both. // Generally, only single type, no combination. virtual SrsMp4TrackType track_type(); + // Get the media header box. + virtual SrsMp4MediaHeaderBox* mdhd(); // Get the media info box. virtual SrsMp4MediaInformationBox* minf(); }; @@ -781,6 +807,18 @@ class SrsMp4SampleTableBox : public SrsMp4Box public: // Get the sample description box virtual SrsMp4SampleDescriptionBox* stsd(); + // Get the chunk offset box. + virtual SrsMp4ChunkOffsetBox* stco(); + // Get the sample size box. + virtual SrsMp4SampleSizeBox* stsz(); + // Get the sample to chunk box. + virtual SrsMp4Sample2ChunkBox* stsc(); + // Get the dts box. + virtual SrsMp4DecodingTime2SampleBox* stts(); + // Get the cts/pts box. + virtual SrsMp4CompositionTime2SampleBox* ctts(); + // Get the sync dts box. + virtual SrsMp4SyncSampleBox* stss(); }; /** @@ -1123,9 +1161,18 @@ class SrsMp4DecodingTime2SampleBox : public SrsMp4FullBox // an integer that gives the number of entries in the following table. uint32_t entry_count; SrsMp4SttsEntry* entries; +private: + // The index for counter to calc the dts for samples. + uint32_t index; + uint32_t count; public: SrsMp4DecodingTime2SampleBox(); virtual ~SrsMp4DecodingTime2SampleBox(); +public: + // Initialize the counter. + virtual int initialize_counter(); + // When got an sample, index starts from 0. + virtual int on_sample(uint32_t sample_index, SrsMp4SttsEntry** ppentry); protected: virtual int nb_header(); virtual int encode_header(SrsBuffer* buf); @@ -1165,9 +1212,18 @@ class SrsMp4CompositionTime2SampleBox : public SrsMp4FullBox // an integer that gives the number of entries in the following table. uint32_t entry_count; SrsMp4CttsEntry* entries; +private: + // The index for counter to calc the dts for samples. + uint32_t index; + uint32_t count; public: SrsMp4CompositionTime2SampleBox(); virtual ~SrsMp4CompositionTime2SampleBox(); +public: + // Initialize the counter. + virtual int initialize_counter(); + // When got an sample, index starts from 0. + virtual int on_sample(uint32_t sample_index, SrsMp4CttsEntry** ppentry); protected: virtual int nb_header(); virtual int encode_header(SrsBuffer* buf); @@ -1337,6 +1393,75 @@ class SrsMp4UserDataBox : public SrsMp4Box virtual int decode_header(SrsBuffer* buf); }; +/** + * Generally, a MP4 sample contains a frame, for example, a video frame or audio frame. + */ +class SrsMp4Sample +{ +public: + // The type of sample, audio or video. + SrsCodecFlvTag type; + // The offset of sample in file. + uint64_t offset; + // The index of sample with a track, start from 0. + uint32_t index; + // The dts in tbn. + uint64_t dts; + // For video, the pts in tbn. + uint64_t pts; + // The tbn(timebase). + uint32_t tbn; + // For video, the frame type, whether keyframe. + SrsCodecVideoAVCFrame frame_type; + // The sample data. + uint32_t nb_data; + uint8_t* data; +public: + SrsMp4Sample(); + virtual ~SrsMp4Sample(); +public: + // Get the dts in ms. + virtual uint32_t get_dts(); + // Get the pts in ms. + virtual uint32_t get_pts(); +}; + +/** + * Build samples from moov, or write samples to moov. + * One or more sample are grouped to a chunk, each track contains one or more chunks. + * The offset of chunk is specified by stco. + * The chunk-sample series is speicified by stsc. + * The sample size is specified by stsz. + * The dts is specified by stts. + * For video: + * The cts/pts is specified by ctts. + * The keyframe is specified by stss. + */ +class SrsMp4SampleManager +{ +private: + std::vector samples; +public: + SrsMp4SampleManager(); + virtual ~SrsMp4SampleManager(); +public: + /** + * Load the samples from moov. + * There must be atleast one track. + */ + virtual int load(SrsMp4MovieBox* moov); +private: + virtual int do_load(std::map& tses, SrsMp4MovieBox* moov); +private: + // Load the samples of track from stco, stsz and stsc. + // @param tses The temporary samples, key is offset, value is sample. + // @param tt The type of sample, convert to flv tag type. + // TODO: Support co64 for stco. + virtual int load_trak(std::map& tses, SrsCodecFlvTag tt, + SrsMp4MediaHeaderBox* mdhd, SrsMp4ChunkOffsetBox* stco, SrsMp4SampleSizeBox* stsz, SrsMp4Sample2ChunkBox* stsc, + SrsMp4DecodingTime2SampleBox* stts, SrsMp4CompositionTime2SampleBox* ctts, SrsMp4SyncSampleBox* stss); +}; + /** * The MP4 demuxer. */ @@ -1345,6 +1470,7 @@ class SrsMp4Decoder private: // The major brand of decoder, parse from ftyp. SrsMp4BoxBrand brand; + SrsMp4SampleManager* samples; public: // The video codec of first track, generally there is zero or one track. // Forbidden if no video stream.