Skip to content

Commit

Permalink
'#1859 Continuation of DiscordParser refactoring.
Browse files Browse the repository at this point in the history
  • Loading branch information
patrickdalla committed Nov 7, 2023
1 parent 588a598 commit 3380e78
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 83 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import iped.data.IItemReader;
import iped.parsers.discord.cache.CacheEntry;
import iped.parsers.discord.cache.Index;
import iped.parsers.util.MetadataUtil;
import iped.properties.BasicProps;
import iped.properties.ExtraProperties;
import iped.search.IItemSearcher;
Expand All @@ -39,7 +40,11 @@ public class CacheIndexParser extends AbstractParser {
private static Logger LOGGER = LoggerFactory.getLogger(CacheIndexParser.class);


public static final String IS_CACHE_INDEX_ENTRY = "isChromeCacheEntry";
public static final String METADATA_PREFIX = "chromeCache";
public static final String IS_CACHE_INDEX_ENTRY = METADATA_PREFIX + ":isChromeCacheEntry";
public static final String CACHE_URL = METADATA_PREFIX + ":chromeCacheUrl";

private static final String CACHE_ENTRY_NAME = METADATA_PREFIX + ":cacheEntryName";

@Override
public Set<MediaType> getSupportedTypes(ParseContext context) {
Expand Down Expand Up @@ -67,6 +72,8 @@ public void parse(InputStream indexFile, ContentHandler handler, Metadata metada
List<IItemReader> dataFiles = searcher.search(commonQuery + " AND " + BasicProps.NAME
+ ":(\"data_0\" OR \"data_1\" OR \"data_2\" OR \"data_3\" OR \"data_4\" OR \"data_5\")");

MetadataUtil.addCustomMetadataPrefix("chromeCache");

Index index;
try {
index = new Index(indexFile, item.getPath(), dataFiles, externalFiles);
Expand All @@ -80,6 +87,10 @@ public void parse(InputStream indexFile, ContentHandler handler, Metadata metada

try {

if (ce.getRequestURL().contains("unknown")) {
System.out.println();
}

String contentEncoding = httpResponse.get("content-encoding");


Expand All @@ -95,7 +106,10 @@ public void parse(InputStream indexFile, ContentHandler handler, Metadata metada
ce.getRequestURL().substring(ce.getRequestURL().lastIndexOf('/') + 1));
entryMeta.set(BasicProps.HASCHILD, Boolean.TRUE.toString());
entryMeta.set(ExtraProperties.DECODED_DATA, Boolean.TRUE.toString());
entryMeta.set(CacheIndexParser.IS_CACHE_INDEX_ENTRY, Boolean.TRUE.toString());

entryMeta.set(IS_CACHE_INDEX_ENTRY, Boolean.TRUE.toString());
entryMeta.set(CACHE_ENTRY_NAME, ce.getName());
entryMeta.set(CACHE_URL, ce.getRequestURL());

for (Map.Entry<String, String> entry : httpResponse.entrySet()) {
entryMeta.set(entry.getKey(), entry.getValue());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
Expand Down Expand Up @@ -48,6 +50,7 @@
/***
*
* @author PCF Campanini
* @author PCF Patrick Dalla Bernardina
*
*/
public class DiscordParser extends AbstractParser {
Expand Down Expand Up @@ -87,106 +90,98 @@ public void parse(InputStream indexFile, ContentHandler handler, Metadata metada
IItemSearcher searcher = context.get(IItemSearcher.class);
IItemReader item = context.get(IItemReader.class);

if (searcher != null && item != null) {
String commonQuery = BasicProps.EVIDENCE_UUID + ":" + item.getDataSource().getUUID() + " AND "
+ BasicProps.PARENTID + ":" + item.getParentId() + " AND NOT " + BasicProps.LENGTH + ":0 AND "
+ CacheIndexParser.IS_CACHE_INDEX_ENTRY.replace(":", "\\:") + ":true";

String commonQuery = BasicProps.EVIDENCE_UUID + ":" + item.getDataSource().getUUID() + " AND "
+ BasicProps.PARENTID + ":" + item.getId() + " AND NOT " + BasicProps.LENGTH + ":0 AND "
+ CacheIndexParser.IS_CACHE_INDEX_ENTRY + ":true";
try (InputStream is = TikaInputStream.get(indexFile, new TemporaryResources())) {

List<IItemReader> entries = searcher
.search(commonQuery + " AND " + BasicProps.NAME + ":\"messages\\?limit\\=50\"");
ObjectMapper mapper = new ObjectMapper();
mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);

int chatVirtualId = 0;

for (IItemReader reader : entries) {
try (InputStream is = reader.getBufferedInputStream()) {

ObjectMapper mapper = new ObjectMapper();
mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);

List<DiscordRoot> discordRoot = null;
try {
discordRoot = mapper.readValue(is, new TypeReference<List<DiscordRoot>>() {
});
} catch (JsonProcessingException ex) {
LOGGER.error("Invalid JSON inside cache entry " + reader.getPath());
ex.printStackTrace();
}
List<DiscordRoot> discordRoot = null;
try {
discordRoot = mapper.readValue(is, new TypeReference<List<DiscordRoot>>() {
});
} catch (JsonProcessingException ex) {
LOGGER.error("Invalid JSON inside cache entry " + item.getPath());
ex.printStackTrace();
}

if (!discordRoot.isEmpty()) {
HashMap<String, byte[]> avatarCache = new HashMap<>();
// Checking if the image file is cached, to do so, iterates through all authors
// and attachments to check if they are in the case, comparing their attributes
for (DiscordRoot dr : discordRoot) {
if (dr.getAuthor().getAvatar() != null) {
byte[] avatar = avatarCache.get(dr.getAuthor().getAvatar());
if (avatar != null) {
dr.getAuthor().setAvatarBytes(avatar);
break;
} else {
List<IItemReader> avatars = searcher.search(
commonQuery + " AND " + BasicProps.NAME + ":" + dr.getAuthor().getAvatar()
+ " AND " + CacheIndexParser.IS_CACHE_INDEX_ENTRY + ":true");
for (IItemReader avatarItem : avatars) {
try (InputStream is2 = avatarItem.getBufferedInputStream()) {
BufferedImage img = ImageUtil.getSubSampledImage(is2, 64, 64);
img = ImageUtil.getOpaqueImage(img);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ImageIO.write(img, "jpg", baos);
avatar = baos.toByteArray();
dr.getAuthor().setAvatarBytes(avatar);
avatarCache.put(dr.getAuthor().getAvatar(), avatar);
break;
} catch (InputStreamNotAvailable e) {
// ignore
} catch (Exception e) {
LOGGER.warn("Exception decoding Discord avatar", e);
}
if (!discordRoot.isEmpty()) {
metadata.set(BasicProps.HASCHILD, Boolean.TRUE.toString());

HashMap<String, byte[]> avatarCache = new HashMap<>();
// Checking if the image file is cached, to do so, iterates through all authors
// and attachments to check if they are in the case, comparing their attributes
for (DiscordRoot dr : discordRoot) {
if (dr.getAuthor().getAvatar() != null) {
byte[] avatar = avatarCache.get(dr.getAuthor().getAvatar());
if (avatar != null) {
dr.getAuthor().setAvatarBytes(avatar);
break;
} else {
try {
List<IItemReader> avatars = searcher.search(commonQuery + " AND " + BasicProps.NAME
+ ":" + dr.getAuthor().getAvatar() + "*");
for (IItemReader avatarItem : avatars) {
try (InputStream is2 = avatarItem.getBufferedInputStream()) {
BufferedImage img = ImageUtil.getSubSampledImage(is2, 64, 64);
img = ImageUtil.getOpaqueImage(img);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ImageIO.write(img, "jpg", baos);
avatar = baos.toByteArray();
dr.getAuthor().setAvatarBytes(avatar);
avatarCache.put(dr.getAuthor().getAvatar(), avatar);
break;
} catch (InputStreamNotAvailable e) {
// ignore
}
}
} catch (Exception e) {
LOGGER.warn("Exception decoding Discord avatar", e);
}

/*
* for (DiscordAttachment att : dr.getAttachments()) { List<IItemReader> avatars
* = searcher.search( commonQuery + " AND " + ":" + dr.getAuthor().getAvatar());
* String[] parts =
* att.getUrl().split("https://cdn.discordapp.com/attachments/"); if
* (parts.length > 1 && ce2.getRequestURL().contains(parts[1])) { for
* (IItemReader ib : externalFiles) { if (ib.getName() != null &&
* ib.getName().equals(ce2.getName())) { att.setMediaHash(ib.getHash());
* att.setContent_type(ib.getMediaType().toString()); break; } } } }
*/
}
}
}
}

String chatName = "DiscordChat id(" + discordRoot.get(0).getId() + ")";
String chatName = "DiscordChat id(" + discordRoot.get(0).getId() + ")";

Metadata chatMeta = new Metadata();
chatMeta.set("URL", reader.getName());
chatMeta.set(TikaCoreProperties.TITLE, chatName);
chatMeta.set(StandardParser.INDEXER_CONTENT_TYPE, CHAT_MIME_TYPE);
chatMeta.set(ExtraProperties.ITEM_VIRTUAL_ID, Integer.toString(chatVirtualId));
chatMeta.set(BasicProps.HASCHILD, Boolean.TRUE.toString());
chatMeta.set(ExtraProperties.DECODED_DATA, Boolean.TRUE.toString());
metadata.set("URL", item.getName());
metadata.set(TikaCoreProperties.TITLE, chatName);
metadata.set(StandardParser.INDEXER_CONTENT_TYPE, CHAT_MIME_TYPE);

for (DiscordRoot dr : discordRoot) {
for (DiscordAttachment da : dr.getAttachments()) {
if (da.getMediaHash() != null) {
chatMeta.add(ExtraProperties.LINKED_ITEMS, BasicProps.HASH + ":" + da.getMediaHash());
for (DiscordRoot dr : discordRoot) {
for (DiscordAttachment da : dr.getAttachments()) {
try {
String[] parts = da.getUrl().split("https://cdn.discordapp.com/attachments/");

if (parts.length > 1) {
List<IItemReader> atts = searcher.search(commonQuery + " AND "
+ CacheIndexParser.CACHE_URL.replace(":", "\\:") + ":" + parts[1]);
for (IItemReader attsItem : atts) {
da.setMediaHash(attsItem.getHash());
da.setContent_type(attsItem.getMediaType().toString());
}
}
} catch (Exception e) {
LOGGER.warn("Exception decoding Discord attachment", e);
}

// Sort messages by timestamp in ascending order
Collections.sort(discordRoot);
if (da.getMediaHash() != null) {
metadata.add(ExtraProperties.LINKED_ITEMS, BasicProps.HASH + ":" + da.getMediaHash());
}
}
}

XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, chatMeta);
new DiscordHTMLReport().printHTML(discordRoot, xhtml, searcher);
// Sort messages by timestamp in ascending order
Collections.sort(discordRoot);

extractMessages(chatName, discordRoot, handler, extractor, chatVirtualId);
}
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
new DiscordHTMLReport().printHTML(discordRoot, xhtml, searcher);

}
extractMessages(chatName, discordRoot, handler, extractor, 0);
}
}

Expand Down

0 comments on commit 3380e78

Please sign in to comment.