Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[YouTube] Fix parsing short relative date formats (English only) #1068

Merged
merged 2 commits into from
Jun 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@ private boolean textualDateMatches(final String textualDate, final String agoPhr
final String escapedSeparator = patternsHolder.wordSeparator().equals(" ")
// From JDK8 → \h - Treat horizontal spaces as a normal one
// (non-breaking space, thin space, etc.)
? "[ \\t\\xA0\\u1680\\u180e\\u2000-\\u200a\\u202f\\u205f\\u3000]"
AudricV marked this conversation as resolved.
Show resolved Hide resolved
// Also split the string on numbers to be able to parse strings like "2wk"
? "[ \\t\\xA0\\u1680\\u180e\\u2000-\\u200a\\u202f\\u205f\\u3000\\d]"
: Pattern.quote(patternsHolder.wordSeparator());

// (^|separator)pattern($|separator)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
package org.schabi.newpipe.extractor.utils;

import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.Localization;
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.localization.TimeAgoPatternsManager;

import java.time.OffsetDateTime;
import java.time.ZoneOffset;
import java.time.temporal.ChronoUnit;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;

public class TimeagoTest {
private static TimeAgoParser parser;
private static OffsetDateTime now;

@BeforeAll
public static void setUp() {
parser = TimeAgoPatternsManager.getTimeAgoParserFor(Localization.DEFAULT);
now = OffsetDateTime.now(ZoneOffset.UTC);
}

@Test
void parseTimeago() throws ParsingException {
assertTimeWithin1s(
now.minus(1, ChronoUnit.SECONDS),
parser.parse("1 second ago").offsetDateTime()
);
assertTimeWithin1s(
now.minus(12, ChronoUnit.SECONDS),
parser.parse("12 second ago").offsetDateTime()
);
assertTimeWithin1s(
now.minus(1, ChronoUnit.MINUTES),
parser.parse("1 minute ago").offsetDateTime()
);
assertTimeWithin1s(
now.minus(23, ChronoUnit.MINUTES),
parser.parse("23 minutes ago").offsetDateTime()
);
assertTimeWithin1s(
now.minus(1, ChronoUnit.HOURS),
parser.parse("1 hour ago").offsetDateTime()
);
assertTimeWithin1s(
now.minus(8, ChronoUnit.HOURS),
parser.parse("8 hours ago").offsetDateTime()
);
assertEquals(
now.minus(1, ChronoUnit.DAYS).truncatedTo(ChronoUnit.HOURS),
parser.parse("1 day ago").offsetDateTime()
);
assertEquals(
now.minus(3, ChronoUnit.DAYS).truncatedTo(ChronoUnit.HOURS),
parser.parse("3 days ago").offsetDateTime()
);
assertEquals(
now.minus(1, ChronoUnit.WEEKS).truncatedTo(ChronoUnit.HOURS),
parser.parse("1 week ago").offsetDateTime()
);
assertEquals(
now.minus(3, ChronoUnit.WEEKS).truncatedTo(ChronoUnit.HOURS),
parser.parse("3 weeks ago").offsetDateTime()
);
assertEquals(
now.minus(1, ChronoUnit.MONTHS).truncatedTo(ChronoUnit.HOURS),
parser.parse("1 month ago").offsetDateTime()
);
assertEquals(
now.minus(3, ChronoUnit.MONTHS).truncatedTo(ChronoUnit.HOURS),
parser.parse("3 months ago").offsetDateTime()
);
assertEquals(
now.minus(1, ChronoUnit.YEARS).minusDays(1).truncatedTo(ChronoUnit.HOURS),
parser.parse("1 year ago").offsetDateTime()
);
assertEquals(
now.minus(3, ChronoUnit.YEARS).minusDays(1).truncatedTo(ChronoUnit.HOURS),
parser.parse("3 years ago").offsetDateTime()
);
}

@Test
void parseTimeagoShort() throws ParsingException {
final TimeAgoParser parser = TimeAgoPatternsManager.getTimeAgoParserFor(Localization.DEFAULT);
final OffsetDateTime now = OffsetDateTime.now(ZoneOffset.UTC);

assertTimeWithin1s(
now.minus(1, ChronoUnit.SECONDS),
parser.parse("1 sec ago").offsetDateTime()
);
assertTimeWithin1s(
now.minus(12, ChronoUnit.SECONDS),
parser.parse("12 sec ago").offsetDateTime()
);
assertTimeWithin1s(
now.minus(1, ChronoUnit.MINUTES),
parser.parse("1 min ago").offsetDateTime()
);
assertTimeWithin1s(
now.minus(23, ChronoUnit.MINUTES),
parser.parse("23 min ago").offsetDateTime()
);
assertTimeWithin1s(
now.minus(1, ChronoUnit.HOURS),
parser.parse("1 hr ago").offsetDateTime()
);
assertTimeWithin1s(
now.minus(8, ChronoUnit.HOURS),
parser.parse("8 hr ago").offsetDateTime()
);
assertEquals(
now.minus(1, ChronoUnit.DAYS).truncatedTo(ChronoUnit.HOURS),
parser.parse("1 day ago").offsetDateTime()
);
assertEquals(
now.minus(3, ChronoUnit.DAYS).truncatedTo(ChronoUnit.HOURS),
parser.parse("3 days ago").offsetDateTime()
);
assertEquals(
now.minus(1, ChronoUnit.WEEKS).truncatedTo(ChronoUnit.HOURS),
parser.parse("1 wk ago").offsetDateTime()
);
assertEquals(
now.minus(3, ChronoUnit.WEEKS).truncatedTo(ChronoUnit.HOURS),
parser.parse("3 wk ago").offsetDateTime()
);
assertEquals(
now.minus(1, ChronoUnit.MONTHS).truncatedTo(ChronoUnit.HOURS),
parser.parse("1 mo ago").offsetDateTime()
);
assertEquals(
now.minus(3, ChronoUnit.MONTHS).truncatedTo(ChronoUnit.HOURS),
parser.parse("3 mo ago").offsetDateTime()
);
assertEquals(
now.minus(1, ChronoUnit.YEARS).minusDays(1).truncatedTo(ChronoUnit.HOURS),
parser.parse("1 yr ago").offsetDateTime()
);
assertEquals(
now.minus(3, ChronoUnit.YEARS).minusDays(1).truncatedTo(ChronoUnit.HOURS),
parser.parse("3 yr ago").offsetDateTime()
);
}

void assertTimeWithin1s(final OffsetDateTime expected, final OffsetDateTime actual) {
final long delta = Math.abs(expected.toEpochSecond() - actual.toEpochSecond());
assertTrue(delta <= 1, String.format("Expected: %s\nActual: %s", expected, actual));
}
}
39 changes: 26 additions & 13 deletions timeago-parser/raw/unique_patterns.json
Original file line number Diff line number Diff line change
Expand Up @@ -415,62 +415,75 @@
"word_separator": " ",
"seconds": [
"second",
"seconds"
"seconds",
"sec"
],
"minutes": [
"minute",
"minutes"
"minutes",
"min"
],
"hours": [
"hour",
"hours"
"hours",
"h"
],
"days": [
"day",
"days"
"days",
"d"
],
"weeks": [
"week",
"weeks"
"weeks",
"w"
],
"months": [
"month",
"months"
"months",
"mo"
],
"years": [
"year",
"years"
"years",
"y"
]
},
"en-GB": {
"word_separator": " ",
"seconds": [
"second",
"seconds"
"seconds",
"sec"
],
"minutes": [
"minute",
"minutes"
"minutes",
"min"
],
"hours": [
"hour",
"hours"
"hours",
"hr"
],
"days": [
"day",
"days"
],
"weeks": [
"week",
"weeks"
"weeks",
"wk"
],
"months": [
"month",
"months"
"months",
"mo"
],
"years": [
"year",
"years"
"years",
"yr"
]
},
"es": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@
public class en extends PatternsHolder {
private static final String WORD_SEPARATOR = " ";
private static final String[]
SECONDS /**/ = {"second", "seconds"},
MINUTES /**/ = {"minute", "minutes"},
HOURS /**/ = {"hour", "hours"},
DAYS /**/ = {"day", "days"},
WEEKS /**/ = {"week", "weeks"},
MONTHS /**/ = {"month", "months"},
YEARS /**/ = {"year", "years"};
SECONDS /**/ = {"second", "seconds", "sec"},
MINUTES /**/ = {"minute", "minutes", "min"},
HOURS /**/ = {"hour", "hours", "h"},
DAYS /**/ = {"day", "days", "d"},
WEEKS /**/ = {"week", "weeks", "w"},
MONTHS /**/ = {"month", "months", "mo"},
YEARS /**/ = {"year", "years", "y"};

private static final en INSTANCE = new en();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@
public class en_GB extends PatternsHolder {
private static final String WORD_SEPARATOR = " ";
private static final String[]
SECONDS /**/ = {"second", "seconds"},
MINUTES /**/ = {"minute", "minutes"},
HOURS /**/ = {"hour", "hours"},
SECONDS /**/ = {"second", "seconds", "sec"},
MINUTES /**/ = {"minute", "minutes", "min"},
HOURS /**/ = {"hour", "hours", "hr"},
DAYS /**/ = {"day", "days"},
WEEKS /**/ = {"week", "weeks"},
MONTHS /**/ = {"month", "months"},
YEARS /**/ = {"year", "years"};
WEEKS /**/ = {"week", "weeks", "wk"},
MONTHS /**/ = {"month", "months", "mo"},
YEARS /**/ = {"year", "years", "yr"};

private static final en_GB INSTANCE = new en_GB();

Expand Down