Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: parse mp4 webvtt segments #1545

Merged
merged 12 commits into from
Oct 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
"global": "^4.4.0",
"m3u8-parser": "^7.2.0",
"mpd-parser": "^1.3.1",
"mux.js": "7.0.3",
"mux.js": "7.1.0",
"video.js": "^7 || ^8"
},
"peerDependencies": {
Expand Down
53 changes: 53 additions & 0 deletions src/media-segment-request.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ export const REQUEST_ERRORS = {
ABORTED: -102
};

const WEB_VTT_CODEC = 'wvtt';

/**
* Abort all requests
*
Expand Down Expand Up @@ -164,6 +166,43 @@ const handleKeyResponse = (segment, objects, finishProcessingFn, triggerSegmentE
return finishProcessingFn(null, segment);
};

/**
* Processes an mp4 init segment depending on the codec through the transmuxer.
*
* @param {Object} segment init segment to process
* @param {string} codec the codec of the text segments
*/
const initMp4Text = (segment, codec) => {
if (codec === WEB_VTT_CODEC) {
wseymour15 marked this conversation as resolved.
Show resolved Hide resolved
segment.transmuxer.postMessage({
action: 'initMp4WebVttParser',
data: segment.map.bytes
});
}
};

/**
* Parses an mp4 text segment with the transmuxer and calls the doneFn from
* the segment loader.
*
* @param {Object} segment the text segment to parse
* @param {string} codec the codec of the text segment
* @param {Function} doneFn the doneFn passed from the segment loader
*/
const parseMp4TextSegment = (segment, codec, doneFn) => {
if (codec === WEB_VTT_CODEC) {
workerCallback({
action: 'getMp4WebVttText',
data: segment.bytes,
transmuxer: segment.transmuxer,
callback: ({data, mp4VttCues}) => {
segment.bytes = data;
doneFn(null, segment, { mp4VttCues });
}
});
}
};

const parseInitSegment = (segment, callback) => {
const type = detectContainerForBytes(segment.map.bytes);

Expand Down Expand Up @@ -206,6 +245,10 @@ const parseInitSegment = (segment, callback) => {
segment.map.timescales[track.id] = track.timescale;
}

if (track.type === 'text') {
initMp4Text(segment, track.codec);
}

});

return callback(null);
Expand Down Expand Up @@ -468,6 +511,16 @@ const handleSegmentBytes = ({
if (isLikelyFmp4MediaSegment(bytesAsUint8Array)) {
segment.isFmp4 = true;
const {tracks} = segment.map;
const isMp4TextSegment = tracks.text && (!tracks.audio || !tracks.video);

if (isMp4TextSegment) {
dataFn(segment, {
data: bytesAsUint8Array,
type: 'text'
});
parseMp4TextSegment(segment, tracks.text.codec, doneFn);
return;
}

const trackInfo = {
isFmp4: true,
Expand Down
39 changes: 39 additions & 0 deletions src/transmuxer-worker.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import {Transmuxer} from 'mux.js/lib/mp4/transmuxer';
import CaptionParser from 'mux.js/lib/mp4/caption-parser';
import WebVttParser from 'mux.js/lib/mp4/webvtt-parser';
import mp4probe from 'mux.js/lib/mp4/probe';
import tsInspector from 'mux.js/lib/tools/ts-inspector.js';
import {
Expand Down Expand Up @@ -207,6 +208,44 @@
}, [segment.buffer]);
}

/**
* Initializes the WebVttParser and passes the init segment.
*
* @param {Uint8Array} data mp4 boxed WebVTT init segment data
*/
initMp4WebVttParser(data) {
if (!this.webVttParser) {
this.webVttParser = new WebVttParser();
}
const segment = new Uint8Array(data.data, data.byteOffset, data.byteLength);

// Set the timescale for the parser.
// This can be called repeatedly in order to set and re-set the timescale.
this.webVttParser.init(segment);
wseymour15 marked this conversation as resolved.
Show resolved Hide resolved
}

/**
* Parse an mp4 encapsulated WebVTT segment and return an array of cues.
*
* @param {Uint8Array} data a text/webvtt segment
* @return {Object[]} an array of parsed cue objects
*/
getMp4WebVttText(data) {
if (!this.webVttParser) {
wseymour15 marked this conversation as resolved.
Show resolved Hide resolved
// timescale might not be set yet if the parser is created before an init segment is passed.
// default timescale is 90k.
this.webVttParser = new WebVttParser();
}
const segment = new Uint8Array(data.data, data.byteOffset, data.byteLength);
const parsed = this.webVttParser.parseSegment(segment);

this.self.postMessage({
action: 'getMp4WebVttText',
mp4VttCues: parsed || [],

Check warning on line 244 in src/transmuxer-worker.js

View check run for this annotation

Codecov / codecov/patch

src/transmuxer-worker.js#L244

Added line #L244 was not covered by tests
data: segment.buffer
}, [segment.buffer]);
}

probeMp4StartTime({timescales, data}) {
const startTime = mp4probe.startTime(timescales, data);

Expand Down
74 changes: 58 additions & 16 deletions src/vtt-segment-loader.js
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,6 @@ export default class VTTSegmentLoader extends SegmentLoader {
this.shouldSaveSegmentTimingInfo_ = false;
}

createTransmuxer_() {
// don't need to transmux any subtitles
return null;
}

/**
* Indicates which time ranges are buffered
*
Expand Down Expand Up @@ -282,6 +277,11 @@ export default class VTTSegmentLoader extends SegmentLoader {
}

const segmentInfo = this.pendingSegment_;
const isMp4WebVttSegmentWithCues = result.mp4VttCues && result.mp4VttCues.length;

if (isMp4WebVttSegmentWithCues) {
segmentInfo.mp4VttCues = result.mp4VttCues;
}

// although the VTT segment loader bandwidth isn't really used, it's good to
// maintain functionality between segment loaders
Expand Down Expand Up @@ -334,11 +334,13 @@ export default class VTTSegmentLoader extends SegmentLoader {
return;
}

this.updateTimeMapping_(
segmentInfo,
this.syncController_.timelines[segmentInfo.timeline],
this.playlist_
);
if (!isMp4WebVttSegmentWithCues) {
this.updateTimeMapping_(
segmentInfo,
this.syncController_.timelines[segmentInfo.timeline],
this.playlist_
);
}

if (segmentInfo.cues.length) {
segmentInfo.timingInfo = {
Expand Down Expand Up @@ -380,14 +382,49 @@ export default class VTTSegmentLoader extends SegmentLoader {
this.handleAppendsDone_();
}

handleData_() {
// noop as we shouldn't be getting video/audio data captions
// that we do not support here.
handleData_(simpleSegment, result) {
const isVttType = simpleSegment && simpleSegment.type === 'vtt';
const isTextResult = result && result.type === 'text';
const isFmp4VttSegment = isVttType && isTextResult;
// handle segment data for fmp4 encapsulated webvtt

if (isFmp4VttSegment) {
super.handleData_(simpleSegment, result);
}
}

updateTimingInfoEnd_() {
// noop
}

/**
* Utility function for converting mp4 webvtt cue objects into VTTCues.
*
* @param {Object} segmentInfo with mp4 webvtt cues for parsing into VTTCue objecs
*/
parseMp4VttCues_(segmentInfo) {
const timestampOffset = this.sourceUpdater_.videoTimestampOffset() === null ?
this.sourceUpdater_.audioTimestampOffset() :
this.sourceUpdater_.videoTimestampOffset();

segmentInfo.mp4VttCues.forEach((cue) => {
const start = cue.start + timestampOffset;
const end = cue.end + timestampOffset;
const vttCue = new window.VTTCue(start, end, cue.cueText);
Copy link
Contributor

@alex-barstow alex-barstow Oct 28, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it worth confirming the existence/data type of these cue values or are we guaranteed they will be correct at this point?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The sample duration and cueText must be defined coming from the transmuxer, so I think we're guaranteed these values will be correct with regards to type at this point. https://github.com/videojs/mux.js/blob/e1b4d161f51782e4712d020b66d5087bd6febb3d/lib/mp4/webvtt-parser.js#L107


if (cue.settings) {
cue.settings.split(' ').forEach((cueSetting) => {
const keyValString = cueSetting.split(':');
const key = keyValString[0];
const value = keyValString[1];

vttCue[key] = isNaN(value) ? value : Number(value);
});
}
segmentInfo.cues.push(vttCue);
});
}

/**
* Uses the WebVTT parser to parse the segment response
*
Expand All @@ -406,6 +443,14 @@ export default class VTTSegmentLoader extends SegmentLoader {
throw new NoVttJsError();
}

segmentInfo.cues = [];
segmentInfo.timestampmap = { MPEGTS: 0, LOCAL: 0 };

if (segmentInfo.mp4VttCues) {
this.parseMp4VttCues_(segmentInfo);
return;
}

if (typeof window.TextDecoder === 'function') {
decoder = new window.TextDecoder('utf8');
} else {
Expand All @@ -419,9 +464,6 @@ export default class VTTSegmentLoader extends SegmentLoader {
decoder
);

segmentInfo.cues = [];
segmentInfo.timestampmap = { MPEGTS: 0, LOCAL: 0 };

parser.oncue = segmentInfo.cues.push.bind(segmentInfo.cues);
parser.ontimestampmap = (map) => {
segmentInfo.timestampmap = map;
Expand Down
85 changes: 84 additions & 1 deletion test/media-segment-request.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ import {
mp4VideoInit,
muxed as muxedSegment,
webmVideo,
webmVideoInit
webmVideoInit,
mp4WebVttInit,
mp4WebVtt
} from 'create-test-data!segments';
// needed for plugin registration
import '../src/videojs-http-streaming';
Expand Down Expand Up @@ -1863,3 +1865,84 @@ QUnit.test('can get emsg ID3 frames from fmp4 audio segment', function(assert) {
// Simulate receiving the init segment after the media
this.standardXHRResponse(initReq, mp4AudioInit());
});

QUnit.test('can get webvtt text from an fmp4 segment', function(assert) {
const done = assert.async();
// expected frame data
const expectedCues = [
{
cueText: '2024-10-16T05:13:50Z\nen # 864527815',
end: 1729055630.9,
settings: undefined,
start: 1729055630
},
{
cueText: '2024-10-16T05:13:51Z\nen # 864527815',
end: 1729055631.9,
settings: undefined,
start: 1729055631
}
];
const transmuxer = new videojs.EventTarget();

transmuxer.postMessage = (event) => {
if (event.action === 'getMp4WebVttText') {
transmuxer.trigger({
type: 'message',
data: {
action: 'getMp4WebVttText',
data: event.data,
mp4VttCues: expectedCues
}
});
}

if (event.action === 'probeMp4Tracks') {
transmuxer.trigger({
type: 'message',
data: {
action: 'probeMp4Tracks',
data: event.data,
tracks: [{type: 'text', codec: 'wvtt'}]
}
});
}
};

mediaSegmentRequest({
xhr: this.xhr,
xhrOptions: this.xhrOptions,
decryptionWorker: this.mockDecrypter,
segment: {
transmuxer,
resolvedUri: 'mp4WebVtt.mp4',
map: {
resolvedUri: 'mp4WebVttInit.mp4'
},
isFmp4: true
},
progressFn: this.noop,
trackInfoFn: this.noop,
timingInfoFn: this.noop,
id3Fn: this.noop,
captionsFn: this.noop,
dataFn: this.noop,
doneFn: (_e, _s, result) => {
assert.equal(result.mp4VttCues.length, 2, 'there are 2 mp4VttCues');
assert.deepEqual(result.mp4VttCues, expectedCues, 'mp4VttCues are expected values');
transmuxer.off();
done();
},
triggerSegmentEventFn: this.noop
});
assert.equal(this.requests.length, 2, 'there are two requests');

const initReq = this.requests.shift();
const segmentReq = this.requests.shift();

assert.equal(initReq.uri, 'mp4WebVttInit.mp4', 'the first request is for the init segment');
assert.equal(segmentReq.uri, 'mp4WebVtt.mp4', 'the second request is for a segment');

this.standardXHRResponse(initReq, mp4WebVttInit());
this.standardXHRResponse(segmentReq, mp4WebVtt());
});
Binary file added test/segments/mp4WebVtt.mp4
Binary file not shown.
Binary file added test/segments/mp4WebVttInit.mp4
Binary file not shown.
Loading
Loading