Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: strip prefixes and suffixes in links #5486

Merged
merged 7 commits into from
Jul 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
- Minor: Improve appearance of reply button. (#5491)
- Minor: Introduce HTTP API for plugins. (#5383, #5492, #5494)
- Minor: Support more Firefox variants for incognito link opening. (#5503)
- Minor: Links can now have prefixes and suffixes such as parentheses. (#5486)
- Bugfix: Fixed tab move animation occasionally failing to start after closing a tab. (#5426)
- Bugfix: If a network request errors with 200 OK, Qt's error code is now reported instead of the HTTP status. (#5378)
- Bugfix: Fixed restricted users usernames not being clickable. (#5405)
Expand Down
15 changes: 7 additions & 8 deletions benchmarks/src/LinkParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,16 @@
#include <QString>
#include <QStringList>

#include <optional>

using namespace chatterino;

const QString INPUT = QStringLiteral(
"If your Chatterino isn't loading FFZ emotes, update to the latest nightly "
"(or 2.4.2 if its out) "
"https://github.com/Chatterino/chatterino2/releases/tag/nightly-build "
"AlienPls https://www.youtube.com/watch?v=ELBBiBDcWc0 "
"127.0.3 aaaa xd 256.256.256.256 AsdQwe xd 127.0.0.1 https://. https://.be "
"https://a http://a.b https://a.be ftp://xdd.com "
"127.0.3 aaaa xd 256.256.256.256 AsdQwe xd 127.0.0.1 https://. "
"*https://.be "
"https://a: http://a.b (https://a.be) ftp://xdd.com "
"this is a text lol . ://foo.com //aa.de :/foo.de xd.XDDDDDD ");

static void BM_LinkParsing(benchmark::State &state)
Expand All @@ -24,15 +23,15 @@ static void BM_LinkParsing(benchmark::State &state)

// Make sure the TLDs are loaded
{
benchmark::DoNotOptimize(LinkParser("xd.com").result());
benchmark::DoNotOptimize(linkparser::parse("xd.com"));
}

for (auto _ : state)
{
for (auto word : words)
for (const auto &word : words)
{
LinkParser parser(word);
benchmark::DoNotOptimize(parser.result());
auto parsed = linkparser::parse(word);
benchmark::DoNotOptimize(parsed);
}
}
}
Expand Down
78 changes: 58 additions & 20 deletions src/common/LinkParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,19 +113,58 @@ bool startsWithPort(QStringView string)
return true;
}

/// @brief Strips ignored characters off @a source
///
/// As per https://github.github.com/gfm/#autolinks-extension-:
///
/// '<', '*', '_', '~', and '(' are ignored at the beginning
/// '>', '?', '!', '.', ',', ':', '*', '~', and ')' are ignored at the end
///
/// A difference to GFM is that the source isn't scanned for parentheses and '_'
/// isn't a valid suffix.
void strip(QStringView &source)
{
while (!source.isEmpty())
{
auto c = source.first();
if (c == u'<' || c == u'*' || c == u'_' || c == u'~' || c == u'(')
{
source = source.mid(1);
continue;
}
break;
}

while (!source.isEmpty())
{
auto c = source.last();
if (c == u'>' || c == u'?' || c == u'!' || c == u'.' || c == u',' ||
c == u':' || c == u'*' || c == u'~' || c == u')')
{
source.chop(1);
continue;
}
break;
}
}

} // namespace

namespace chatterino {
namespace chatterino::linkparser {

LinkParser::LinkParser(const QString &unparsedString)
std::optional<Parsed> parse(const QString &source) noexcept
{
ParsedLink result;
std::optional<Parsed> result;
// This is not implemented with a regex to increase performance.
QStringView remaining(unparsedString);
QStringView protocol(remaining);

QStringView link{source};
strip(link);

QStringView remaining = link;
QStringView protocol;

// Check protocol for https?://
if (remaining.startsWith(QStringLiteral("http"), Qt::CaseInsensitive) &&
if (remaining.startsWith(u"http", Qt::CaseInsensitive) &&
remaining.length() >= 4 + 3 + 1) // 'http' + '://' + [any]
{
// optimistic view assuming there's a protocol (http or https)
Expand All @@ -136,11 +175,11 @@ LinkParser::LinkParser(const QString &unparsedString)
withProto = withProto.mid(1);
}

if (withProto.startsWith(QStringLiteral("://")))
if (withProto.startsWith(u"://"))
{
// there's really a protocol => consume it
remaining = withProto.mid(3);
result.protocol = {protocol.begin(), remaining.begin()};
protocol = {link.begin(), remaining.begin()};
}
}

Expand All @@ -161,7 +200,7 @@ LinkParser::LinkParser(const QString &unparsedString)
{
if (lastWasDot) // no double dots ..
{
return;
return result;
}
lastDotPos = i;
lastWasDot = true;
Expand All @@ -181,7 +220,7 @@ LinkParser::LinkParser(const QString &unparsedString)

if (!startsWithPort(remaining))
{
return;
return result;
}

break;
Expand All @@ -198,23 +237,22 @@ LinkParser::LinkParser(const QString &unparsedString)

if (lastWasDot || lastDotPos <= 0)
{
return;
return result;
}

// check host/tld
if ((nDots == 3 && isValidIpv4(host)) ||
isValidTld(host.mid(lastDotPos + 1)))
{
result.host = host;
result.rest = rest;
result.source = unparsedString;
this->result_ = std::move(result);
result = Parsed{
.protocol = protocol,
.host = host,
.rest = rest,
.link = link,
};
}
}

const std::optional<ParsedLink> &LinkParser::result() const
{
return this->result_;
return result;
}

} // namespace chatterino
} // namespace chatterino::linkparser
119 changes: 101 additions & 18 deletions src/common/LinkParser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,43 +4,126 @@

#include <optional>

namespace chatterino {
namespace chatterino::linkparser {

struct ParsedLink {
/// @brief Represents a parsed link
///
/// A parsed link is represented as views over the source string for its
/// different segments. In this simplified model, a link consists of an optional
/// @a protocol, a mandatory @a host and an optional @a rest. These segments are
/// always next to eachother in the input string, however together, they don't
/// span the whole input as it could contain prefixes or suffixes.
///
/// Prefixes and suffixes are almost identical to the ones in GitHub Flavored
/// Markdown (GFM - https://github.github.com/gfm/#autolinks-extension-).
/// The main differences are that '_' isn't a valid suffix and parentheses
/// aren't counted (e.g. "(a.com/(foo)! would result in "a.com/(foo").
/// Matching is done case insensitive (e.g. "HTTp://a.com" would be valid).
///
/// A @a protocol can either be empty, "http://", or "https://".
/// A @a host can either be an IPv4 address or a hostname. The hostname must end
/// in a valid top level domain. Otherwise, there are no restrictions on it.
/// The @a rest can start with an optional port followed by either a '/', '?',
/// or '#'.
///
/// @b Example
///
/// ```text
/// (https://wiki.chatterino.com/Help/#overview)
/// ▏▏proto ▕ host ▏ rest ▏▏
/// ▏▏ link ▏▏
/// ▏ source ▏
/// ```
struct Parsed {
/// The parsed protocol of the link. Can be empty.
///
/// ```text
/// https://www.forsen.tv/commands
/// ^------^
/// ▏╌╌╌╌╌╌▕
///
/// www.forsen.tv/commands
/// (empty)
/// ```
QStringView protocol;

/// The parsed host of the link. Can not be empty.
///
/// ```text
/// https://www.forsen.tv/commands
/// ^-----------^
/// ▏╌╌╌╌╌╌╌╌╌╌╌▕
/// ```
QStringView host;

/// The remainder of the link. Can be empty.
///
/// ```text
/// https://www.forsen.tv/commands
/// ^-------^
/// ▏╌╌╌╌╌╌╌▕
///
/// https://www.forsen.tv
/// (empty)
/// ```
QStringView rest;

/// The original unparsed link.
/// The matched link. Can not be empty.
///
/// https://www.forsen.tv/commands
/// ^----------------------------^
QString source;
};
/// ```text
/// (https://www.forsen.tv/commands)
/// ▏╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌▕
/// ```
QStringView link;

/// Checks if the parsed link contains a prefix
bool hasPrefix(const QString &source) const noexcept
{
return this->link.begin() != source.begin();
}

class LinkParser
{
public:
explicit LinkParser(const QString &unparsedString);
/// The prefix before the parsed link inside @a source. May be empty.
///
/// ```text
/// (https://www.forsen.tv/commands)
/// ^
///
/// https://www.forsen.tv/commands
/// (empty)
/// ```
QStringView prefix(const QString &source) const noexcept
{
return {source.data(), this->link.begin()};
}

const std::optional<ParsedLink> &result() const;
/// Checks if the parsed link contains a suffix
bool hasSuffix(const QString &source) const noexcept
{
return this->link.end() != source.end();
}

private:
std::optional<ParsedLink> result_{};
/// The suffix after the parsed link inside @a source. May be empty.
///
/// ```text
/// (https://www.forsen.tv/commands)
/// ^
///
/// https://www.forsen.tv/commands
/// (empty)
/// ```
QStringView suffix(const QString &source) const noexcept
{
return {
this->link.begin() + this->link.size(),
source.data() + source.length(),
};
}
};

} // namespace chatterino
/// @brief Parses a link from @a source into its segments
///
/// If no link is contained in @a source, `std::nullopt` will be returned.
/// The returned value is valid as long as @a source exists, as it contains
/// views into @a source.
///
/// For the accepted links, see Parsed.
std::optional<Parsed> parse(const QString &source) noexcept;

} // namespace chatterino::linkparser
6 changes: 3 additions & 3 deletions src/controllers/commands/builtin/twitch/SendWhisper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,10 @@ bool appendWhisperMessageWordsLocally(const QStringList &words)
void operator()(const QString &string,
MessageBuilder &b) const
{
LinkParser parser(string);
if (parser.result())
auto link = linkparser::parse(string);
if (link)
{
b.addLink(*parser.result());
b.addLink(*link, string);
}
else
{
Expand Down
Loading
Loading