Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix parsing for timestamps with commas as decimal seperators #545

Merged
merged 1 commit into from
Jul 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions src/services/transcript-parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ const TRANSCRIPT_MIME_TYPES = {
docx: ['application/vnd.openxmlformats-officedocument.wordprocessingml.document']
};

export const VTT_TIMESTAMP_REGEX = /^(?:\d{2}:)?\d{2}:\d{2}(?:\.\d+)/g;
// SRT allows using comma for milliseconds while WebVTT does not
export const SRT_TIMESTAMP_REGEX = /^(?:\d{2}:)?\d{2}:\d{2}(?:[.,]\d+)/g;

const TRANSCRIPT_MIME_EXTENSIONS = [
{ type: TRANSCRIPT_MIME_TYPES.json, ext: 'json' },
{ type: TRANSCRIPT_MIME_TYPES.webvtt, ext: 'vtt' },
Expand Down Expand Up @@ -732,9 +736,9 @@ function parseTimedTextLine({ times, line, tag }, isSRT) {
let timestampRegex;
if (isSRT) {
// SRT allows using comma for milliseconds while WebVTT does not
timestampRegex = /^(?:\d{2}:)?\d{2}:\d{2}(?:[.,]\d+)/g;
timestampRegex = SRT_TIMESTAMP_REGEX;
} else {
timestampRegex = /^(?:\d{2}:)?\d{2}:\d{2}(?:\.\d+)/;
timestampRegex = VTT_TIMESTAMP_REGEX;
}

switch (tag) {
Expand Down
14 changes: 12 additions & 2 deletions src/services/utility-helpers.js
Original file line number Diff line number Diff line change
Expand Up @@ -253,11 +253,21 @@ export function getMediaFragment(uri, duration = 0) {
if (uri !== undefined) {
const fragment = uri.split('#t=')[1];
if (fragment !== undefined) {
let [start, end] = fragment.split(',');
let start, end;
/**
* If the times are in a string format (hh:mm:ss) check for comma seperated decimals.
* Some SRT captions use comma to seperate milliseconds.
*/
const timestampRegex = /([0-9]*:){1,2}([0-9]{2})(?:((\.|\,)[0-9]{2,3})?)/g;
if (fragment.includes(':') && [...fragment.matchAll(/\,/g)]?.length > 1) {
const times = [...fragment.matchAll(timestampRegex)];
[start, end] = times?.length == 2 ? [times[0][0], times[1][0]] : [0, 0];
} else {
[start, end] = fragment.split(',');
}
if (end === undefined) {
end = duration.toString();
}
let timestampRegex = /([0-9]*:){1,2}([0-9]{2})(?:(\.[0-9]{2,3})*)/g;
return {
start: start.match(timestampRegex) ? timeToS(start) : Number(start),
end: end.match(timestampRegex) ? timeToS(end) : Number(end)
Expand Down
32 changes: 32 additions & 0 deletions src/services/utility-helpers.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,45 @@ describe('util helper', () => {
});
});

it('returns time in seconds when hh:mm:ss,ms format time string is given', () => {
expect(util.getMediaFragment(
'http://example.com/sample/manifest/canvas#t=00:07:53,900,00:07:56,500'
)).toEqual({
start: 473.9, end: 476.5
});
});

it('returns time in seconds when hh:mm:ss format with mixed decimal formating is given', () => {
expect(util.getMediaFragment(
'http://example.com/sample/manifest/canvas#t=00:07:53.900,00:07:56,500'
)).toEqual({
start: 473.9, end: 476.5
});
});

it('returns time in seconds when hh:mm:ss format time string is given', () => {
expect(util.getMediaFragment(
'http://example.com/sample/manifest/canvas#t=00:07:53,00:07:56'
)).toEqual({
start: 473, end: 476
});
});

it('returns time in seconds when mm:ss,ms format time string is given', () => {
expect(util.getMediaFragment(
'http://example.com/sample/manifest/canvas#t=07:53,900,07:56,500'
)).toEqual({
start: 473.9, end: 476.5
});
});

it('returns time in seconds when mm:ss,ms format with mixed decimal formatting is given', () => {
expect(util.getMediaFragment(
'http://example.com/sample/manifest/canvas#t=07:53.900,07:56,500'
)).toEqual({
start: 473.9, end: 476.5
});
});
});

describe('getResourceItems()', () => {
Expand Down