Skip to content

Commit

Permalink
Merge pull request #4582 from szaboa/feature/4306_srt_position_tags
Browse files Browse the repository at this point in the history
#4306 - Extract tags from SubRip subtitles, add support for alignment
  • Loading branch information
ojw28 authored Oct 3, 2018
2 parents e91065c + 56c7e1f commit 16fe67b
Show file tree
Hide file tree
Showing 3 changed files with 300 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,19 @@
*/
package com.google.android.exoplayer2.text.subrip;

import android.support.annotation.StringDef;
import android.text.Html;
import android.text.Layout;
import android.text.Spanned;
import android.text.TextUtils;
import com.google.android.exoplayer2.text.Cue;
import com.google.android.exoplayer2.text.SimpleSubtitleDecoder;
import com.google.android.exoplayer2.util.Log;
import com.google.android.exoplayer2.util.LongArray;
import com.google.android.exoplayer2.util.ParsableByteArray;

import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
Expand All @@ -38,6 +43,33 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
private static final Pattern SUBRIP_TIMING_LINE =
Pattern.compile("\\s*(" + SUBRIP_TIMECODE + ")\\s*-->\\s*(" + SUBRIP_TIMECODE + ")?\\s*");

private static final Pattern SUBRIP_TAG_PATTERN = Pattern.compile("\\{\\\\.*?\\}");
private static final String SUBRIP_ALIGNMENT_TAG = "\\{\\\\an[1-9]\\}";

private static final float DEFAULT_START_FRACTION = 0.08f;
private static final float DEFAULT_END_FRACTION = 1 - DEFAULT_START_FRACTION;
private static final float DEFAULT_MID_FRACTION = 0.5f;

@Retention(RetentionPolicy.SOURCE)
@StringDef({
ALIGN_BOTTOM_LEFT, ALIGN_BOTTOM_MID, ALIGN_BOTTOM_RIGHT,
ALIGN_MID_LEFT, ALIGN_MID_MID, ALIGN_MID_RIGHT,
ALIGN_TOP_LEFT, ALIGN_TOP_MID, ALIGN_TOP_RIGHT
})

private @interface SubRipTag {}

// Possible valid alignment tags based on SSA v4+ specs
private static final String ALIGN_BOTTOM_LEFT = "{\\an1}";
private static final String ALIGN_BOTTOM_MID = "{\\an2}";
private static final String ALIGN_BOTTOM_RIGHT = "{\\an3}";
private static final String ALIGN_MID_LEFT = "{\\an4}";
private static final String ALIGN_MID_MID = "{\\an5}";
private static final String ALIGN_MID_RIGHT = "{\\an6}";
private static final String ALIGN_TOP_LEFT = "{\\an7}";
private static final String ALIGN_TOP_MID = "{\\an8}";
private static final String ALIGN_TOP_RIGHT = "{\\an9}";

private final StringBuilder textBuilder;

public SubripDecoder() {
Expand Down Expand Up @@ -87,16 +119,32 @@ protected SubripSubtitle decode(byte[] bytes, int length, boolean reset) {
}

// Read and parse the text.
ArrayList<String> tags = new ArrayList<>();
textBuilder.setLength(0);
while (!TextUtils.isEmpty(currentLine = subripData.readLine())) {
if (textBuilder.length() > 0) {
textBuilder.append("<br>");
}
textBuilder.append(currentLine.trim());
textBuilder.append(processLine(currentLine, tags));
}

Spanned text = Html.fromHtml(textBuilder.toString());
cues.add(new Cue(text));
Cue cue = null;

// At end of this loop the clue must be created with the applied tags
for (String tag : tags) {

// Check if the tag is an alignment tag
if (tag.matches(SUBRIP_ALIGNMENT_TAG)) {
cue = buildCue(text, tag);

// Based on the specs, in case of alignment tags only the first appearance counts, so break
break;
}
}

cues.add(cue == null ? new Cue(text) : cue);

if (haveEndTimecode) {
cues.add(null);
}
Expand All @@ -108,12 +156,116 @@ protected SubripSubtitle decode(byte[] bytes, int length, boolean reset) {
return new SubripSubtitle(cuesArray, cueTimesUsArray);
}

/**
* Process the given line by first trimming it then extracting the tags from it
* <p>
* The pattern that is used to extract the tags is specified in SSA v4+ specs and
* has the following form: "{\...}".
* <p>
* "All override codes appear within braces {}"
* "All override codes are always preceded by a backslash \"
*
* @param currentLine Current line
* @param tags Extracted tags will be stored in this array list
* @return Processed line
*/
private String processLine(String currentLine, ArrayList<String> tags) {
// Trim line
String trimmedLine = currentLine.trim();

// Extract tags
int replacedCharacters = 0;
StringBuilder processedLine = new StringBuilder(trimmedLine);
Matcher matcher = SUBRIP_TAG_PATTERN.matcher(trimmedLine);

while (matcher.find()) {
String tag = matcher.group();
tags.add(tag);
processedLine.replace(matcher.start() - replacedCharacters, matcher.end() - replacedCharacters, "");
replacedCharacters += tag.length();
}

return processedLine.toString();
}

/**
* Build a {@link Cue} based on the given text and tag
* <p>
* Match the alignment tag and calculate the line, position, position anchor accordingly
* <p>
* Based on SSA v4+ specs the alignment tag can have the following form: {\an[1-9},
* where the number specifies the direction (based on the numpad layout).
* Note. older SSA scripts may contain tags like {\a1[1-9]} but these are based on
* other direction rules, but multiple sources says that these are deprecated, so no support here either
*
* @param alignmentTag Alignment tag
* @return Built cue
*/
private Cue buildCue(Spanned text, String alignmentTag) {
float line, position;
@Cue.AnchorType int positionAnchor;
@Cue.AnchorType int lineAnchor;

// Set position and position anchor (horizontal alignment)
switch (alignmentTag) {
case ALIGN_BOTTOM_LEFT:
case ALIGN_MID_LEFT:
case ALIGN_TOP_LEFT:
position = DEFAULT_START_FRACTION;
positionAnchor = Cue.ANCHOR_TYPE_START;
break;
case ALIGN_BOTTOM_MID:
case ALIGN_MID_MID:
case ALIGN_TOP_MID:
position = DEFAULT_MID_FRACTION;
positionAnchor = Cue.ANCHOR_TYPE_MIDDLE;
break;
case ALIGN_BOTTOM_RIGHT:
case ALIGN_MID_RIGHT:
case ALIGN_TOP_RIGHT:
position = DEFAULT_END_FRACTION;
positionAnchor = Cue.ANCHOR_TYPE_END;
break;
default:
position = DEFAULT_MID_FRACTION;
positionAnchor = Cue.ANCHOR_TYPE_MIDDLE;
break;
}

// Set line and line anchor (vertical alignment)
switch (alignmentTag) {
case ALIGN_BOTTOM_LEFT:
case ALIGN_BOTTOM_MID:
case ALIGN_BOTTOM_RIGHT:
line = DEFAULT_END_FRACTION;
lineAnchor = Cue.ANCHOR_TYPE_END;
break;
case ALIGN_MID_LEFT:
case ALIGN_MID_MID:
case ALIGN_MID_RIGHT:
line = DEFAULT_MID_FRACTION;
lineAnchor = Cue.ANCHOR_TYPE_MIDDLE;
break;
case ALIGN_TOP_LEFT:
case ALIGN_TOP_MID:
case ALIGN_TOP_RIGHT:
line = DEFAULT_START_FRACTION;
lineAnchor = Cue.ANCHOR_TYPE_START;
break;
default:
line = DEFAULT_END_FRACTION;
lineAnchor = Cue.ANCHOR_TYPE_END;
break;
}

return new Cue(text, null, line, Cue.LINE_TYPE_FRACTION, lineAnchor, position, positionAnchor, Cue.DIMEN_UNSET);
}

private static long parseTimecode(Matcher matcher, int groupOffset) {
long timestampMs = Long.parseLong(matcher.group(groupOffset + 1)) * 60 * 60 * 1000;
timestampMs += Long.parseLong(matcher.group(groupOffset + 2)) * 60 * 1000;
timestampMs += Long.parseLong(matcher.group(groupOffset + 3)) * 1000;
timestampMs += Long.parseLong(matcher.group(groupOffset + 4));
return timestampMs * 1000;
}

}
56 changes: 56 additions & 0 deletions library/core/src/test/assets/subrip/typical_with_tags
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
1
00:00:00,000 --> 00:00:01,234
This is {\an1} the first subtitle.

2
00:00:02,345 --> 00:00:03,456
This is the second subtitle.
Second {\ an 2} subtitle with second line.

3
00:00:04,567 --> 00:00:08,901
This {\an2} is the third {\ tag} subtitle.

4
00:00:09,567 --> 00:00:12,901
This { \an2} is the fourth subtitle.

5
00:00:013,567 --> 00:00:14,901
This {\an2} is the fifth subtitle with multiple {\xyz} valid {\qwe} tags.

6
00:00:015,567 --> 00:00:15,901
This {\an1} is a lines.

7
00:00:016,567 --> 00:00:16,901
This {\an2} is a line.

8
00:00:017,567 --> 00:00:17,901
This {\an3} is a line.

9
00:00:018,567 --> 00:00:18,901
This {\an4} is a line.

10
00:00:019,567 --> 00:00:19,901
This {\an5} is a line.

11
00:00:020,567 --> 00:00:20,901
This {\an6} is a line.

12
00:00:021,567 --> 00:00:22,901
This {\an7} is a line.

13
00:00:023,567 --> 00:00:23,901
This {\an8} is a line.

14
00:00:024,567 --> 00:00:24,901
This {\an9} is a line.
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
import static com.google.common.truth.Truth.assertThat;

import com.google.android.exoplayer2.testutil.TestUtil;
import com.google.android.exoplayer2.text.Cue;

import java.io.IOException;
import org.junit.Test;
import org.junit.runner.RunWith;
Expand All @@ -36,6 +38,7 @@ public final class SubripDecoderTest {
private static final String TYPICAL_MISSING_SEQUENCE = "subrip/typical_missing_sequence";
private static final String TYPICAL_NEGATIVE_TIMESTAMPS = "subrip/typical_negative_timestamps";
private static final String TYPICAL_UNEXPECTED_END = "subrip/typical_unexpected_end";
private static final String TYPICAL_WITH_TAGS = "subrip/typical_with_tags";
private static final String NO_END_TIMECODES_FILE = "subrip/no_end_timecodes";

@Test
Expand Down Expand Up @@ -154,6 +157,92 @@ public void testDecodeNoEndTimecodes() throws IOException {
.isEqualTo("Or to the end of the media.");
}

@Test
public void testDecodeCueWithTag() throws IOException{
SubripDecoder decoder = new SubripDecoder();
byte[] bytes = TestUtil.getByteArray(RuntimeEnvironment.application, TYPICAL_WITH_TAGS);
SubripSubtitle subtitle = decoder.decode(bytes, bytes.length, false);
assertThat(subtitle.getCues(subtitle.getEventTime(0)).get(0).text.toString())
.isEqualTo("This is the first subtitle.");
assertThat(subtitle.getCues(subtitle.getEventTime(2)).get(0).text.toString())
.isEqualTo("This is the second subtitle.\nSecond subtitle with second line.");
assertThat(subtitle.getCues(subtitle.getEventTime(4)).get(0).text.toString())
.isEqualTo("This is the third subtitle.");

// Based on the SSA v4+ specs the curly bracket must be followed by a backslash, so this is
// not a valid tag (won't be parsed / replaced)
assertThat(subtitle.getCues(subtitle.getEventTime(6)).get(0).text.toString())
.isEqualTo("This { \\an2} is the fourth subtitle.");

assertThat(subtitle.getCues(subtitle.getEventTime(8)).get(0).text.toString())
.isEqualTo("This is the fifth subtitle with multiple valid tags.");

// Verify positions

// {/an1}
assertThat(subtitle.getCues(subtitle.getEventTime(10)).get(0).positionAnchor)
.isEqualTo(Cue.ANCHOR_TYPE_START);

assertThat(subtitle.getCues(subtitle.getEventTime(10)).get(0).lineAnchor)
.isEqualTo(Cue.ANCHOR_TYPE_END);

// {/an2}
assertThat(subtitle.getCues(subtitle.getEventTime(12)).get(0).positionAnchor)
.isEqualTo(Cue.ANCHOR_TYPE_MIDDLE);

assertThat(subtitle.getCues(subtitle.getEventTime(12)).get(0).lineAnchor)
.isEqualTo(Cue.ANCHOR_TYPE_END);

// {/an3}
assertThat(subtitle.getCues(subtitle.getEventTime(14)).get(0).positionAnchor)
.isEqualTo(Cue.ANCHOR_TYPE_END);

assertThat(subtitle.getCues(subtitle.getEventTime(14)).get(0).lineAnchor)
.isEqualTo(Cue.ANCHOR_TYPE_END);

// {/an4}
assertThat(subtitle.getCues(subtitle.getEventTime(16)).get(0).positionAnchor)
.isEqualTo(Cue.ANCHOR_TYPE_START);

assertThat(subtitle.getCues(subtitle.getEventTime(16)).get(0).lineAnchor)
.isEqualTo(Cue.ANCHOR_TYPE_MIDDLE);

// {/an5}
assertThat(subtitle.getCues(subtitle.getEventTime(18)).get(0).positionAnchor)
.isEqualTo(Cue.ANCHOR_TYPE_MIDDLE);

assertThat(subtitle.getCues(subtitle.getEventTime(18)).get(0).lineAnchor)
.isEqualTo(Cue.ANCHOR_TYPE_MIDDLE);

// {/an6}
assertThat(subtitle.getCues(subtitle.getEventTime(20)).get(0).positionAnchor)
.isEqualTo(Cue.ANCHOR_TYPE_END);

assertThat(subtitle.getCues(subtitle.getEventTime(20)).get(0).lineAnchor)
.isEqualTo(Cue.ANCHOR_TYPE_MIDDLE);

// {/an7}
assertThat(subtitle.getCues(subtitle.getEventTime(22)).get(0).positionAnchor)
.isEqualTo(Cue.ANCHOR_TYPE_START);

assertThat(subtitle.getCues(subtitle.getEventTime(22)).get(0).lineAnchor)
.isEqualTo(Cue.ANCHOR_TYPE_START);

// {/an8}
assertThat(subtitle.getCues(subtitle.getEventTime(24)).get(0).positionAnchor)
.isEqualTo(Cue.ANCHOR_TYPE_MIDDLE);

assertThat(subtitle.getCues(subtitle.getEventTime(24)).get(0).lineAnchor)
.isEqualTo(Cue.ANCHOR_TYPE_START);

// {/an9}
assertThat(subtitle.getCues(subtitle.getEventTime(26)).get(0).positionAnchor)
.isEqualTo(Cue.ANCHOR_TYPE_END);

assertThat(subtitle.getCues(subtitle.getEventTime(26)).get(0).lineAnchor)
.isEqualTo(Cue.ANCHOR_TYPE_START);
}

private static void assertTypicalCue1(SubripSubtitle subtitle, int eventIndex) {
assertThat(subtitle.getEventTime(eventIndex)).isEqualTo(0);
assertThat(subtitle.getCues(subtitle.getEventTime(eventIndex)).get(0).text.toString())
Expand Down

0 comments on commit 16fe67b

Please sign in to comment.