Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion application/config.json.template
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,9 @@
"gradle.org",
"help.gradle.org",
"youtube.com",
"www.youtube.com"
"www.youtube.com",
"cdn.discordapp.com",
"media.discordapp.net"
],
"hostBlacklist": [
"bit.ly",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,16 @@
import org.togetherjava.tjbot.config.ScamBlockerConfig;
import org.togetherjava.tjbot.features.utils.StringDistances;

import javax.annotation.Nullable;

import java.net.URI;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.StringJoiner;
import java.util.function.Predicate;
import java.util.regex.Pattern;
Expand All @@ -24,6 +30,8 @@
* {@link #isScam(CharSequence)}.
*/
public final class ScamDetector {
private static final Set<String> IMAGE_EXTENSIONS =
Set.of("jpg", "jpeg", "png", "gif", "webp", "tiff", "svg", "apng");
private static final Pattern TOKENIZER = Pattern.compile("[\\s,]");
private final ScamBlockerConfig config;
private final Predicate<String> isSuspiciousAttachmentName;
Expand Down Expand Up @@ -59,7 +67,8 @@ public boolean isScam(Message message) {
}

String content = message.getContentDisplay();
List<Message.Attachment> attachments = message.getAttachments();
List<Attachment> attachments =
message.getAttachments().stream().map(Attachment::fromDiscord).toList();

if (content.isBlank()) {
return areAttachmentsSuspicious(attachments);
Expand All @@ -76,21 +85,28 @@ public boolean isScam(Message message) {
*/
public boolean isScam(CharSequence message) {
AnalyseResults results = new AnalyseResults();
results.onlyContainsUrls = true;
TOKENIZER.splitAsStream(message).forEach(token -> analyzeToken(token, results));
return isScam(results);
}

private boolean isScam(AnalyseResults results) {
if (results.pingsEveryone && (results.containsSuspiciousKeyword || results.hasUrl
if (results.pingsEveryone && (results.containsSuspiciousKeyword || results.hasUrl()
|| results.containsDollarSign)) {
return true;
}

return Stream
.of(results.containsSuspiciousKeyword, results.hasSuspiciousUrl,
boolean hasTooManySuspiciousFlags = Stream
.of(results.containsSuspiciousKeyword, results.hasSuspiciousUrl(),
results.containsDollarSign)
.filter(flag -> flag)
.count() >= 2;
if (hasTooManySuspiciousFlags) {
return true;
}

return results.onlyContainsUrls && results.areAllUrlsWithAttachments()
&& areAttachmentsSuspicious(results.getUrlAttachments());
}

private void analyzeToken(String token, AnalyseResults results) {
Expand All @@ -113,13 +129,18 @@ private void analyzeToken(String token, AnalyseResults results) {

if (token.startsWith("http")) {
analyzeUrl(token, results);
} else {
results.onlyContainsUrls = false;
}
}

private void analyzeUrl(String url, AnalyseResults results) {
String host;
String path;
try {
host = URI.create(url).getHost();
URI uri = URI.create(url);
host = uri.getHost();
path = uri.getPath();
} catch (IllegalArgumentException _) {
// Invalid urls are not scam
return;
Expand All @@ -129,20 +150,25 @@ private void analyzeUrl(String url, AnalyseResults results) {
return;
}

results.hasUrl = true;
AnalyseUrlResult result = new AnalyseUrlResult();
results.urls.add(result);

if (path != null && path.startsWith("/attachments")) {
result.containedAttachment = Attachment.fromUrlPath(path);
}

if (config.getHostWhitelist().contains(host)) {
return;
}

if (config.getHostBlacklist().contains(host)) {
results.hasSuspiciousUrl = true;
result.isSuspicious = true;
return;
}

for (String keyword : config.getSuspiciousHostKeywords()) {
if (isHostSimilarToKeyword(host, keyword)) {
results.hasSuspiciousUrl = true;
result.isSuspicious = true;
break;
}
}
Expand Down Expand Up @@ -171,14 +197,14 @@ private boolean containsSuspiciousKeyword(String token) {
});
}

private boolean areAttachmentsSuspicious(Collection<? extends Message.Attachment> attachments) {
private boolean areAttachmentsSuspicious(Collection<Attachment> attachments) {
long suspiciousAttachments =
attachments.stream().filter(this::isAttachmentSuspicious).count();
return suspiciousAttachments >= config.getSuspiciousAttachmentsThreshold();
}

private boolean isAttachmentSuspicious(Message.Attachment attachment) {
return attachment.isImage() && isSuspiciousAttachmentName.test(attachment.getFileName());
private boolean isAttachmentSuspicious(Attachment attachment) {
return attachment.isImage() && isSuspiciousAttachmentName.test(attachment.fileName());
}

private boolean isHostSimilarToKeyword(String host, String keyword) {
Expand Down Expand Up @@ -212,21 +238,79 @@ private static boolean endsWith(CharSequence text, char suffixToTest) {
return !text.isEmpty() && text.charAt(text.length() - 1) == suffixToTest;
}

private static class AnalyseResults {
private record Attachment(String fileName) {
boolean isImage() {
return getFileExtension().map(IMAGE_EXTENSIONS::contains).orElse(false);
}

private Optional<String> getFileExtension() {
int dot = fileName.lastIndexOf('.');
if (dot == -1) {
return Optional.empty();
}
String extension = fileName.substring(dot + 1);
return Optional.of(extension);
}

static Attachment fromDiscord(Message.Attachment attachment) {
return new Attachment(attachment.getFileName());
}

static Attachment fromUrlPath(String urlPath) {
int fileNameStart = urlPath.lastIndexOf('/');
String fileName = fileNameStart == -1 ? "" : urlPath.substring(fileNameStart + 1);
return new Attachment(fileName);
}
}

private static final class AnalyseUrlResult {
private boolean isSuspicious;
@Nullable
private Attachment containedAttachment;

@Override
public String toString() {
return new StringJoiner(", ", AnalyseUrlResult.class.getSimpleName() + "[", "]")
.add("isSuspicious=" + isSuspicious)
.add("containedAttachment=" + containedAttachment)
.toString();
}
}

private static final class AnalyseResults {
private boolean pingsEveryone;
private boolean containsSuspiciousKeyword;
private boolean containsDollarSign;
private boolean hasUrl;
private boolean hasSuspiciousUrl;
private boolean onlyContainsUrls;
private final Collection<AnalyseUrlResult> urls = new ArrayList<>();

boolean hasUrl() {
return !urls.isEmpty();
}

boolean hasSuspiciousUrl() {
return urls.stream().anyMatch(url -> url.isSuspicious);
}

boolean areAllUrlsWithAttachments() {
return urls.stream().allMatch(url -> url.containedAttachment != null);
}

Collection<Attachment> getUrlAttachments() {
return urls.stream()
.map(url -> url.containedAttachment)
.filter(Objects::nonNull)
.toList();
}

@Override
public String toString() {
return new StringJoiner(", ", AnalyseResults.class.getSimpleName() + "[", "]")
.add("pingsEveryone=" + pingsEveryone)
.add("containsSuspiciousKeyword=" + containsSuspiciousKeyword)
.add("containsDollarSign=" + containsDollarSign)
.add("hasUrl=" + hasUrl)
.add("hasSuspiciousUrl=" + hasSuspiciousUrl)
.add("onlyContainsUrls=" + onlyContainsUrls)
.add("urls=" + urls)
.toString();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@
import java.util.Collection;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.TimeUnit;
import java.util.function.Predicate;
import java.util.stream.IntStream;

/**
* Provides means to create previews of links. See
* {@link LinkDetection#extractLinks(String, boolean, boolean)} and
* {@link #createLinkPreviews(List)}.
* Provides means to create previews of links. See {@link LinkDetection#extractLinks(String, Set)}
* and {@link #createLinkPreviews(List)}.
*/
public final class LinkPreviews {
private static final Logger logger = LoggerFactory.getLogger(LinkPreviews.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

final class ScamDetectorTest {
private static final int SUSPICIOUS_ATTACHMENTS_THRESHOLD = 3;
private static final String SUSPICIOUS_ATTACHMENT_NAME = "scam.png";
private static final String SUSPICIOUS_ATTACHMENT_NAME = "image.png";

private ScamDetector scamDetector;

Expand All @@ -40,7 +40,8 @@ void setUp() {
"freenitro", "^earn$", "^earning", ".exe$", "mrbeast"));
when(scamConfig.getHostWhitelist()).thenReturn(Set.of("discord.com", "discord.media",
"discordapp.com", "discordapp.net", "discordstatus.com", "thehackernews.com",
"gradle.org", "help.gradle.org", "youtube.com", "www.youtube.com"));
"gradle.org", "help.gradle.org", "youtube.com", "www.youtube.com",
"cdn.discordapp.com", "media.discordapp.net"));
when(scamConfig.getHostBlacklist()).thenReturn(Set.of("bit.ly", "discord.gg", "teletype.in",
"t.me", "corematrix.us", "u.to", "steamcommunity.com", "goo.su", "telegra.ph",
"shorturl.at", "cheatings.xyz", "transfer.sh", "tobimoller.space"));
Expand All @@ -50,7 +51,7 @@ void setUp() {
when(scamConfig.getSuspiciousAttachmentsThreshold())
.thenReturn(SUSPICIOUS_ATTACHMENTS_THRESHOLD);
when(scamConfig.getSuspiciousAttachmentNamePattern())
.thenReturn(SUSPICIOUS_ATTACHMENT_NAME);
.thenReturn("(image|\\d{1,2})\\.[^.]{0,5}");

when(scamConfig.getTrustedUserRolePattern()).thenReturn("Moderator");

Expand Down Expand Up @@ -401,7 +402,16 @@ B2CWorkflow Builder (React Flow)
as a beginner from the digital market, DM me for expert guidance or contact me directly on telegram and start building your financial future.
Telegram username @JohnSmith123""",
"Grab it before it's deleted (available for Windows and macOS): https://www.reddit.com/r/TVBaFreeHub/comments/12345t/ninaatradercrackedfullpowertradingfreefor123/",
"Bro, claim 0.1 BTC now! Use promo code \"mrbeast\" at expmcoins.com screen @everyone");
"Bro, claim 0.1 BTC now! Use promo code \"mrbeast\" at expmcoins.com screen @everyone",
"""
https://cdn.discordapp.com/attachments/1234/5678/image.png?ex=688cd552&is=688b83d2&hm=5787b53f08a488a22df6e3d2d43b4445ed0ced5f790e4f6e6e82810e38dba2aa&
https://cdn.discordapp.com/attachments/1234/5678/image.png?ex=688cd552&is=688b83d2&hm=5787b53f08a488a22df6e3d2d43b4445ed0ced5f790e4f6e6e82810e38dba2aa&
https://cdn.discordapp.com/attachments/1234/5678/image.png?ex=688cd552&is=688b83d2&hm=5787b53f08a488a22df6e3d2d43b4445ed0ced5f790e4f6e6e82810e38dba2aa&""",
"""
https://cdn.discordapp.com/attachments/1234/5678/1.png?ex=688cd552&is=688b83d2&hm=5787b53f08a488a22df6e3d2d43b4445ed0ced5f790e4f6e6e82810e38dba2aa&
https://cdn.discordapp.com/attachments/1234/5678/2.png?ex=688cd552&is=688b83d2&hm=5787b53f08a488a22df6e3d2d43b4445ed0ced5f790e4f6e6e82810e38dba2aa&
https://cdn.discordapp.com/attachments/1234/5678/3.png?ex=688cd552&is=688b83d2&hm=5787b53f08a488a22df6e3d2d43b4445ed0ced5f790e4f6e6e82810e38dba2aa&
https://cdn.discordapp.com/attachments/1234/5678/4.png?ex=688cd552&is=688b83d2&hm=5787b53f08a488a22df6e3d2d43b4445ed0ced5f790e4f6e6e82810e38dba2aa&""");
}

private static List<String> provideRealFalsePositiveMessages() {
Expand Down
Loading