Skip to content

Commit

Permalink
Add new time formats and refactor time parsing logic for TTML parser.
Browse files Browse the repository at this point in the history
Issue #111

Change-Id: Ia9ebcd9be19ebdc452d9554ace116265d5a7d59c
  • Loading branch information
ismena committed Jul 7, 2016
1 parent 3def381 commit 134119e
Show file tree
Hide file tree
Showing 5 changed files with 304 additions and 138 deletions.
249 changes: 207 additions & 42 deletions lib/media/ttml_text_parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ goog.provide('shaka.media.TtmlTextParser');

goog.require('shaka.media.TextEngine');
goog.require('shaka.util.Error');
goog.require('shaka.util.TextParser');


/**
Expand All @@ -44,8 +43,12 @@ shaka.media.TtmlTextParser = function(data) {
}

if (xml) {
// Try to get the framerate if applicable
// Try to get the framerate, subRameRate and frameRateMultiplier
// if applicable
var frameRate = null;
var subFrameRate = null;
var frameRateMultiplier = null;
var tickRate = null;
var tts = xml.getElementsByTagName('tt');
var tt = tts[0];
// TTML should always have tt element
Expand All @@ -55,8 +58,14 @@ shaka.media.TtmlTextParser = function(data) {
shaka.util.Error.Code.INVALID_TTML);
} else {
frameRate = tt.getAttribute('ttp:frameRate');
subFrameRate = tt.getAttribute('ttp:subFrameRate');
frameRateMultiplier = tt.getAttribute('ttp:frameRateMultiplier');
tickRate = tt.getAttribute('ttp:tickRate');
}

var rateInfo = new shaka.media.TtmlTextParser.RateInfo_(
frameRate, subFrameRate, frameRateMultiplier, tickRate);

var styles = shaka.media.TtmlTextParser.getLeafNodes_(
tt.getElementsByTagName('styling')[0]);
var regions = shaka.media.TtmlTextParser.getLeafNodes_(
Expand All @@ -66,7 +75,7 @@ shaka.media.TtmlTextParser = function(data) {

for (var i = 0; i < textNodes.length; i++) {
var cue = shaka.media.TtmlTextParser.parseCue_(
textNodes[i], frameRate, styles, regions);
textNodes[i], rateInfo, styles, regions);
if (cue) {
ret.push(cue);
}
Expand All @@ -77,6 +86,58 @@ shaka.media.TtmlTextParser = function(data) {
};


/**
* @const
* @private {!RegExp}
* @example 00:00:40:07 (7 frames) or 00:00:40:07.1 (7 frames, 1 subframe)
*/
shaka.media.TtmlTextParser.timeColonFormatFrames_ =
/^(\d{2,}):(\d{2}):(\d{2}):(\d{2})\.?(\d+)?$/;


/**
* @const
* @private {!RegExp}
* @example 00:00:40 or 00:40
*/
shaka.media.TtmlTextParser.timeColonFormat_ =
/^(?:(\d{2,}):)?(\d{2}):(\d{2})$/;


/**
* @const
* @private {!RegExp}
* example 01:02:43.0345555 or 02:43.03
*/
shaka.media.TtmlTextParser.timeColonFormatMilliseconds_ =
/^(?:(\d{2,}):)?(\d{2}):(\d{2}\.\d{2,})$/;


/**
* @const
* @private {!RegExp}
* @example 75f or 75.5f
*/
shaka.media.TtmlTextParser.timeFramesFormat_ = /^(\d*\.?\d*)f$/;


/**
* @const
* @private {!RegExp}
* @example 50t or 50.5t
*/
shaka.media.TtmlTextParser.timeTickFormat_ = /^(\d*\.?\d*)t$/;


/**
* @const
* @private {!RegExp}
* @example 3.45h, 3m or 4.20s
*/
shaka.media.TtmlTextParser.timeHMSFormat_ =
/^(?:(\d*\.?\d*)h)?(?:(\d*\.?\d*)m)?(?:(\d*\.?\d*)s)?(?:(\d*\.?\d*)ms)?$/;


/**
* Gets leaf nodes of the xml node tree. Ignores the text, br elements
* and the spans positioned inside paragraphs
Expand Down Expand Up @@ -121,22 +182,27 @@ shaka.media.TtmlTextParser.getLeafNodes_ = function(element) {
* Parses an xml Element node into a Cue Element.
*
* @param {Node|Element} element
* @param {?string} frameRate
* @param {!shaka.media.TtmlTextParser.RateInfo_} rateInfo
* @param {Array.<Element>} styles
* @param {Array.<Element>} regions
* @return {TextTrackCue} ret
* @private
*/
shaka.media.TtmlTextParser.parseCue_ = function(
element, frameRate, styles, regions) {
element, rateInfo, styles, regions) {

// Get time
var start = shaka.media.TtmlTextParser.parseTime_(
element.getAttribute('begin'), frameRate);
element.getAttribute('begin'), rateInfo);
var end = shaka.media.TtmlTextParser.parseTime_(
element.getAttribute('end'), frameRate);
element.getAttribute('end'), rateInfo);
var duration = shaka.media.TtmlTextParser.parseTime_(
element.getAttribute('dur'), rateInfo);
var payload = element.textContent;

if (end == null && duration != null)
end = start + duration;

if (start == null || end == null) {
throw new shaka.util.Error(
shaka.util.Error.Category.TEXT,
Expand Down Expand Up @@ -252,72 +318,171 @@ shaka.media.TtmlTextParser.getInheritedAttribute_ = function(
* Parses a TTML time from the given word.
*
* @param {string} text
* @param {?string} frameRate
* @param {!shaka.media.TtmlTextParser.RateInfo_} rateInfo
* @return {?number} ret
* @private
*/
shaka.media.TtmlTextParser.parseTime_ = function(text, frameRate) {
shaka.media.TtmlTextParser.parseTime_ = function(text, rateInfo) {
var ret = null;
var parser = new shaka.util.TextParser(text);
var TtmlTextParser = shaka.media.TtmlTextParser;

if (TtmlTextParser.timeColonFormatFrames_.test(text)) {
ret = TtmlTextParser.parseColonTimeWithFrames_(rateInfo, text);
} else if (TtmlTextParser.timeColonFormat_.test(text)) {
ret = TtmlTextParser.parseTimeFromRegex_(
TtmlTextParser.timeColonFormat_, text);
} else if (TtmlTextParser.timeColonFormatMilliseconds_.test(text)) {
ret = TtmlTextParser.parseTimeFromRegex_(
TtmlTextParser.timeColonFormatMilliseconds_, text);
} else if (TtmlTextParser.timeFramesFormat_.test(text)) {
ret = TtmlTextParser.parseFramesTime_(rateInfo, text);
} else if (TtmlTextParser.timeTickFormat_.test(text)) {
ret = TtmlTextParser.parseTickTime_(rateInfo, text);
} else if (TtmlTextParser.timeHMSFormat_.test(text)) {
ret = TtmlTextParser.parseTimeFromRegex_(
TtmlTextParser.timeHMSFormat_, text);
}

// 01:02:43:07 or 01:02:43:07.1
var timeColonFormatFrames = /^(\d{2,}):(\d{2}):(\d{2}):(\d{2}(\.\d+)?)$/g;
return ret;
};

// 00:00:40 or 00:40
var timeColonFormat = /(?:(\d{2,}):)?(\d{2}):(\d{2})$/g;

// 01:02:43.0345555 or 02:43.03
var timeColonFormatMilliseconds = /(?:(\d{2,}):)?(\d{2}):(\d{2}\.\d{2,})/g;
/**
* Parses a TTML time in frame format
*
* @param {!shaka.media.TtmlTextParser.RateInfo_} rateInfo
* @param {string} text
* @return {?number}
* @private
*/
shaka.media.TtmlTextParser.parseFramesTime_ = function(rateInfo, text) {

// 3.45h, 3m or 4.20s
var timeHMSFormat =
/(?:([0-9]*\.*[0-9]*)h)?(?:([0-9]*\.*[0-9]*)m)?(?:([0-9.]*\.*[0-9]*)s)?$/g;
// 75f or 75.5f
var results = shaka.media.TtmlTextParser.timeFramesFormat_.exec(text);
var frames = Number(results[1]);

if (timeColonFormatFrames.test(text)) {
ret = shaka.media.TtmlTextParser.parseTimeWithFrames_(parser, frameRate);
} else if (timeColonFormat.test(text)) {
ret = parser.parseTime(timeColonFormat);
} else if (timeColonFormatMilliseconds.test(text)) {
ret = parser.parseTime(timeColonFormatMilliseconds);
} else if (timeHMSFormat.test(text)) {
ret = parser.parseTime(timeHMSFormat);
}
return frames / rateInfo.frameRate;
};

return ret;

/**
* Parses a TTML time in tick format
*
* @param {!shaka.media.TtmlTextParser.RateInfo_} rateInfo
* @param {string} text
* @return {?number}
* @private
*/
shaka.media.TtmlTextParser.parseTickTime_ = function(rateInfo, text) {

// 50t or 50.5t
var results = shaka.media.TtmlTextParser.timeTickFormat_.exec(text);
var ticks = Number(results[1]);

return ticks / rateInfo.tickRate;
};


/**
* Parses a TTML time containing frames
* Parses a TTML colon formatted time containing frames
*
* @param {!shaka.util.TextParser} parser
* @param {?string} frameRate
* @param {!shaka.media.TtmlTextParser.RateInfo_} rateInfo
* @param {string} text
* @return {?number}
* @private
*/
shaka.media.TtmlTextParser.parseTimeWithFrames_ = function(
parser, frameRate) {
if (!frameRate)
return null;
shaka.media.TtmlTextParser.parseColonTimeWithFrames_ = function(
rateInfo, text) {

var frameRateNum = Number(frameRate);
// 01:02:43:07 ('07' is frames) or 01:02:43:07.1 (subframes)
var results = parser.readRegex(/^(\d{2,}):(\d{2}):(\d{2}):(\d{2}(\.\d+)?)$/g);
if (results == null)
return null;
var results = shaka.media.TtmlTextParser.timeColonFormatFrames_.exec(text);

var hours = Number(results[1]);
var minutes = Number(results[2]);
var seconds = Number(results[3]);
var frames = Number(results[4]);
var subframes = Number(results[5]) || 0;

frames += subframes / rateInfo.subFrameRate;
seconds += frames / rateInfo.frameRate;

return seconds + (minutes * 60) + (hours * 3600);
};


var miliseconds = frames * frameRateNum;
if (minutes > 59 || seconds > 59)
/**
* Parses a TTML time with a given regex. Expects regex to be some
* sort of a time-matcher to match hours, minutes, seconds and milliseconds
*
* @param {!RegExp} regex
* @param {string} text
* @return {?number}
* @private
*/
shaka.media.TtmlTextParser.parseTimeFromRegex_ = function(regex, text) {
var results = regex.exec(text);
if (results == null || results[0] == '')
return null;
// This capture is optional, but will still be in the array as undefined,
// default to 0.
var hours = Number(results[1]) || 0;
var minutes = Number(results[2]) || 0;
var seconds = Number(results[3]) || 0;
var miliseconds = Number(results[4]) || 0;

return (miliseconds / 1000) + seconds + (minutes * 60) + (hours * 3600);
};



/**
* Contains information about frame/subframe rate
* and frame rate multiplier for time in frame format.
* ex. 01:02:03:04(4 frames) or 01:02:03:04.1(4 frames, 1 subframe)
*
* @param {?string} frameRate
* @param {?string} subFrameRate
* @param {?string} frameRateMultiplier
* @param {?string} tickRate
* @constructor
* @struct
* @private
*/
shaka.media.TtmlTextParser.RateInfo_ = function(
frameRate, subFrameRate, frameRateMultiplier, tickRate) {

/**
* @type {number}
*/
this.frameRate = Number(frameRate) || 30;

/**
* @type {number}
*/
this.subFrameRate = Number(subFrameRate) || 1;

/**
* @type {number}
*/
this.tickRate = Number(tickRate);
if (this.tickRate == 0) {
if (frameRate)
this.tickRate = this.frameRate * this.subFrameRate;
else
this.tickRate = 1;
}

if (frameRateMultiplier) {
var multiplierResults = /^(\d+) (\d+)$/g.exec(frameRateMultiplier);
if (multiplierResults) {
var numerator = multiplierResults[1];
var denominator = multiplierResults[2];
var multiplierNum = numerator / denominator;
this.frameRate *= multiplierNum;
}
}
};


shaka.media.TextEngine.registerParser(
'application/ttml+xml', shaka.media.TtmlTextParser);
29 changes: 27 additions & 2 deletions lib/media/vtt_text_parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ shaka.media.VttTextParser.parseCue_ = function(text) {

// Parse the times.
var parser = new shaka.util.TextParser(text[0]);
var start = parser.parseTime(/(?:(\d{2,}):)?(\d{2}):(\d{2})\.(\d{2,})/g);
var start = shaka.media.VttTextParser.parseTime_(parser);
var expect = parser.readRegex(/[ \t]+-->[ \t]+/g);
var end = parser.parseTime(/(?:(\d{2,}):)?(\d{2}):(\d{2})\.(\d{2,})/g);
var end = shaka.media.VttTextParser.parseTime_(parser);


if (start == null || expect == null || end == null) {
Expand Down Expand Up @@ -150,4 +150,29 @@ shaka.media.VttTextParser.parseSetting_ = function(cue, word) {
return true;
};


/**
* Parses a WebVTT time from the given parser.
*
* @param {!shaka.util.TextParser} parser
* @return {?number}
* @private
*/
shaka.media.VttTextParser.parseTime_ = function(parser) {
// 00:00.000 or 00:00:00.000
var results = parser.readRegex(/(?:(\d{2,}):)?(\d{2}):(\d{2})\.(\d{3})/g);
if (results == null)
return null;
// This capture is optional, but will still be in the array as undefined,
// default to 0.
var hours = Number(results[1]) || 0;
var minutes = Number(results[2]);
var seconds = Number(results[3]);
var miliseconds = Number(results[4]);
if (minutes > 59 || seconds > 59)
return null;

return (miliseconds / 1000) + seconds + (minutes * 60) + (hours * 3600);
};

shaka.media.TextEngine.registerParser('text/vtt', shaka.media.VttTextParser);
Loading

0 comments on commit 134119e

Please sign in to comment.