Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SRT/VTT timestamp validation to deny single digit hour in hh:mm... #546

Merged
merged 2 commits into from
Jul 2, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 19 additions & 8 deletions src/components/Transcript/Transcript.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import './Transcript.scss';
const NO_TRANSCRIPTS_MSG = 'No valid Transcript(s) found, please check again.';
const INVALID_URL_MSG = 'Invalid URL for transcript, please check again.';
const INVALID_VTT = 'Invalid WebVTT file, please check again.';
const INVALID_TIMESTAMP = 'Invalid timestamp format in cue(s), please check again.';
const NO_SUPPORT = 'Transcript format is not supported, please check again.';

const buildSpeakerText = (item) => {
Expand Down Expand Up @@ -431,14 +432,24 @@ const Transcript = ({ playerID, manifestUrl, showNotes = false, search = {}, tra
if (value != null) {
const { tData, tUrl, tType, tFileExt } = value;
let newError = '';
if (tType === TRANSCRIPT_TYPES.invalid) {
newError = INVALID_URL_MSG;
} else if (tType === TRANSCRIPT_TYPES.noTranscript) {
newError = NO_TRANSCRIPTS_MSG;
} else if (tType === TRANSCRIPT_TYPES.noSupport) {
newError = NO_SUPPORT;
} else if (tType === TRANSCRIPT_TYPES.invalidTimedText) {
newError = INVALID_VTT;
switch (tType) {
case TRANSCRIPT_TYPES.invalid:
newError = INVALID_URL_MSG;
break;
case TRANSCRIPT_TYPES.noTranscript:
newError = NO_TRANSCRIPTS_MSG;
break;
case TRANSCRIPT_TYPES.noSupport:
newError = NO_SUPPORT;
break;
case TRANSCRIPT_TYPES.invalidVTT:
newError = INVALID_VTT;
break;
case TRANSCRIPT_TYPES.invalidTimestamp:
newError = INVALID_TIMESTAMP;
break;
default:
break;
}
setTranscript(tData);
setTranscriptInfo({ title, filename, id, isMachineGen, tType, tUrl, tFileExt, tError: newError });
Expand Down
53 changes: 51 additions & 2 deletions src/components/Transcript/Transcript.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ describe('Transcript component', () => {
initialManifestState: { manifest: lunchroomManners, canvasIndex: 0 },
initialPlayerState: {},
...props,
showNotes: true,
showNotes: true,
});

render(
Expand Down Expand Up @@ -782,7 +782,7 @@ describe('Transcript component', () => {
.mockReturnValue({
tData: [],
tUrl: 'https://example.com/lunchroom_manners.vtt',
tType: transcriptParser.TRANSCRIPT_TYPES.invalidTimedText,
tType: transcriptParser.TRANSCRIPT_TYPES.invalidVTT,
});

const TranscriptWithState = withManifestAndPlayerProvider(Transcript, {
Expand All @@ -809,6 +809,55 @@ describe('Transcript component', () => {
);
});
});

test('WebVTT file with invalid timestamps', async () => {
const props = {
playerID: 'player-id',
transcripts: [
{
canvasId: 0,
items: [
{
title: 'WebVTT Transcript',
url: 'https://example.com/lunchroom_manners.vtt',
},
],
},
],
};

const parseTranscriptMock = jest
.spyOn(transcriptParser, 'parseTranscriptData')
.mockReturnValue({
tData: [],
tUrl: 'https://example.com/lunchroom_manners.vtt',
tType: transcriptParser.TRANSCRIPT_TYPES.invalidTimestamp,
});

const TranscriptWithState = withManifestAndPlayerProvider(Transcript, {
initialManifestState: { manifest: lunchroomManners, canvasIndex: 0 },
initialPlayerState: {},
...props
});

render(
<React.Fragment>
<video id="player-id" />
<TranscriptWithState />
</React.Fragment>
);

await act(() => Promise.resolve());

await waitFor(() => {
expect(parseTranscriptMock).toHaveBeenCalled();
expect(screen.queryByTestId('transcript_content_-4')).toBeInTheDocument();
expect(screen.queryByTestId('no-transcript')).toBeInTheDocument();
expect(screen.getByTestId('no-transcript')).toHaveTextContent(
'Invalid timestamp format in cue(s), please check again.'
);
});
});
});

describe('with props', () => {
Expand Down
19 changes: 14 additions & 5 deletions src/services/transcript-parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ const TRANSCRIPT_MIME_EXTENSIONS = [

// ENum for describing transcript types include invalid and no transcript info
export const TRANSCRIPT_TYPES = {
invalidTimedText: -3,
invalidTimestamp: -4,
invalidVTT: -3,
noSupport: -2,
invalid: -1,
noTranscript: 0,
Expand Down Expand Up @@ -557,7 +558,7 @@ export function parseTimedText(fileData, isSRT = false) {
const { valid, cue_lines, notes } = validateWebVTT(lines);
if (!valid) {
console.error('Invalid WebVTT file');
return { tData: [], tType: TRANSCRIPT_TYPES.invalidTimedText };
return { tData: [], tType: TRANSCRIPT_TYPES.invalidVTT };
}
cueLines = cue_lines;
noteLines = notes;
Expand All @@ -566,14 +567,22 @@ export function parseTimedText(fileData, isSRT = false) {
const groups = groupTimedTextLines(cueLines);
// Add back the NOTE(s) in the header block
groups.unshift(...noteLines);
let hasInvalidTimestamp = false;
groups.map((t) => {
let line = parseTimedTextLine(t, isSRT);
if (line) {
tData.push(line);
} else {
hasInvalidTimestamp ||= true;
Dananji marked this conversation as resolved.
Show resolved Hide resolved
}
});

return { tData, tType: TRANSCRIPT_TYPES.timedText };
return {
tData: hasInvalidTimestamp ? null : tData,
tType: hasInvalidTimestamp
? TRANSCRIPT_TYPES.invalidTimestamp
: TRANSCRIPT_TYPES.timedText
};
}

/**
Expand Down Expand Up @@ -719,9 +728,9 @@ function parseTimedTextLine({ times, line, tag }, isSRT) {
let timestampRegex;
if (isSRT) {
// SRT allows using comma for milliseconds while WebVTT does not
timestampRegex = /([0-9]*:){1,2}([0-9]{2})(\.|\,)[0-9]{2,3}/g;
timestampRegex = /^(?:\d{2}:)?\d{2}:\d{2}(?:[.,]\d+)/g;
} else {
timestampRegex = /([0-9]*:){1,2}([0-9]{2})\.[0-9]{2,3}/g;
timestampRegex = /^(?:\d{2}:)?\d{2}:\d{2}(?:\.\d+)/;
}

switch (tag) {
Expand Down
27 changes: 22 additions & 5 deletions src/services/transcript-parser.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -852,7 +852,7 @@ describe('transcript-parser', () => {
expect(tData).toHaveLength(0);
expect(console.error).toHaveBeenCalledTimes(1);
expect(console.error).toHaveBeenCalledWith('Invalid WebVTT file');
expect(tType).toEqual(transcriptParser.TRANSCRIPT_TYPES.invalidTimedText);
expect(tType).toEqual(transcriptParser.TRANSCRIPT_TYPES.invalidVTT);
});

test('with random text in the header', () => {
Expand All @@ -866,24 +866,41 @@ describe('transcript-parser', () => {
expect(tData).toHaveLength(0);
expect(console.error).toHaveBeenCalledTimes(1);
expect(console.error).toHaveBeenCalledWith('Invalid WebVTT file');
expect(tType).toEqual(transcriptParser.TRANSCRIPT_TYPES.invalidTimedText);
expect(tType).toEqual(transcriptParser.TRANSCRIPT_TYPES.invalidVTT);
});

test('with incorrect timestamp', () => {
test('with incorrect timestamp with missing seconds group in hh:mm:ss format', () => {
// mock console.error
console.error = jest.fn();
const mockResponse =
'WEBVTT\r\n\r\n1\r\n00:00:01.200 --> 00:00:.000\n[music]\n\r\n2\r\n00:00:22.200 --> 00:00:26.600\nJust before lunch one day, a puppet show \nwas put on at school.\n\r\n3\r\n00:00:26.700 --> 00:00:31.500\nIt was called "Mister Bungle Goes to Lunch".\n\r\n4\r\n00:00:31.600 --> 00:00:34.500\nIt was fun to watch.\r\n\r\n5\r\n00:00:36.100 --> 00:00:41.300\nIn the puppet show, Mr. Bungle came to the \nboys\' room on his way to lunch.\n';

const { tData, tType } = transcriptParser.parseTimedText(mockResponse);

expect(tData).toHaveLength(4);
expect(tData).toBeNull();
expect(console.error).toHaveBeenCalledTimes(1);
expect(console.error).toHaveBeenCalledWith(
'Invalid timestamp in line with text; ',
'[music]'
);
expect(tType).toEqual(transcriptParser.TRANSCRIPT_TYPES.timedText);
expect(tType).toEqual(transcriptParser.TRANSCRIPT_TYPES.invalidTimestamp);
});

test('with incorrect timestamp with a single digit for hour in hh:mm:ss format', () => {
// mock console.error
console.error = jest.fn();
const mockResponse =
'WEBVTT\r\n\r\n1\r\n0:00:01.200 --> 0:00:00.000\n[music]\n\r\n2\r\n0:00:22.200 --> 0:00:26.600\nJust before lunch one day, a puppet show \nwas put on at school.\n\r\n3\r\n0:00:26.700 --> 0:00:31.500\nIt was called "Mister Bungle Goes to Lunch".\n\r\n4\r\n0:00:31.600 --> 0:00:34.500\nIt was fun to watch.\r\n\r\n5\r\n0:00:36.100 --> 0:00:41.300\nIn the puppet show, Mr. Bungle came to the \nboys\' room on his way to lunch.\n';

const { tData, tType } = transcriptParser.parseTimedText(mockResponse);

expect(tData).toBeNull();
expect(console.error).toHaveBeenCalledTimes(5);
expect(console.error).toHaveBeenCalledWith(
'Invalid timestamp in line with text; ',
'[music]'
);
expect(tType).toEqual(transcriptParser.TRANSCRIPT_TYPES.invalidTimestamp);
});
});
});
Expand Down