From 3380e78b0896669d5f73f6da7414fc158ba9ef3f Mon Sep 17 00:00:00 2001 From: "patrick.pdb" Date: Tue, 7 Nov 2023 15:38:47 -0300 Subject: [PATCH] '#1859 Continuation of DiscordParser refactoring. --- .../browsers/chrome/CacheIndexParser.java | 18 +- .../iped/parsers/discord/DiscordParser.java | 157 +++++++++--------- 2 files changed, 92 insertions(+), 83 deletions(-) diff --git a/iped-parsers/iped-parsers-impl/src/main/java/iped/parsers/browsers/chrome/CacheIndexParser.java b/iped-parsers/iped-parsers-impl/src/main/java/iped/parsers/browsers/chrome/CacheIndexParser.java index 8d4296ab4a..7951ee0cfb 100644 --- a/iped-parsers/iped-parsers-impl/src/main/java/iped/parsers/browsers/chrome/CacheIndexParser.java +++ b/iped-parsers/iped-parsers-impl/src/main/java/iped/parsers/browsers/chrome/CacheIndexParser.java @@ -24,6 +24,7 @@ import iped.data.IItemReader; import iped.parsers.discord.cache.CacheEntry; import iped.parsers.discord.cache.Index; +import iped.parsers.util.MetadataUtil; import iped.properties.BasicProps; import iped.properties.ExtraProperties; import iped.search.IItemSearcher; @@ -39,7 +40,11 @@ public class CacheIndexParser extends AbstractParser { private static Logger LOGGER = LoggerFactory.getLogger(CacheIndexParser.class); - public static final String IS_CACHE_INDEX_ENTRY = "isChromeCacheEntry"; + public static final String METADATA_PREFIX = "chromeCache"; + public static final String IS_CACHE_INDEX_ENTRY = METADATA_PREFIX + ":isChromeCacheEntry"; + public static final String CACHE_URL = METADATA_PREFIX + ":chromeCacheUrl"; + + private static final String CACHE_ENTRY_NAME = METADATA_PREFIX + ":cacheEntryName"; @Override public Set getSupportedTypes(ParseContext context) { @@ -67,6 +72,8 @@ public void parse(InputStream indexFile, ContentHandler handler, Metadata metada List dataFiles = searcher.search(commonQuery + " AND " + BasicProps.NAME + ":(\"data_0\" OR \"data_1\" OR \"data_2\" OR \"data_3\" OR \"data_4\" OR \"data_5\")"); + MetadataUtil.addCustomMetadataPrefix("chromeCache"); + Index index; try { index = new Index(indexFile, item.getPath(), dataFiles, externalFiles); @@ -80,6 +87,10 @@ public void parse(InputStream indexFile, ContentHandler handler, Metadata metada try { + if (ce.getRequestURL().contains("unknown")) { + System.out.println(); + } + String contentEncoding = httpResponse.get("content-encoding"); @@ -95,7 +106,10 @@ public void parse(InputStream indexFile, ContentHandler handler, Metadata metada ce.getRequestURL().substring(ce.getRequestURL().lastIndexOf('/') + 1)); entryMeta.set(BasicProps.HASCHILD, Boolean.TRUE.toString()); entryMeta.set(ExtraProperties.DECODED_DATA, Boolean.TRUE.toString()); - entryMeta.set(CacheIndexParser.IS_CACHE_INDEX_ENTRY, Boolean.TRUE.toString()); + + entryMeta.set(IS_CACHE_INDEX_ENTRY, Boolean.TRUE.toString()); + entryMeta.set(CACHE_ENTRY_NAME, ce.getName()); + entryMeta.set(CACHE_URL, ce.getRequestURL()); for (Map.Entry entry : httpResponse.entrySet()) { entryMeta.set(entry.getKey(), entry.getValue()); diff --git a/iped-parsers/iped-parsers-impl/src/main/java/iped/parsers/discord/DiscordParser.java b/iped-parsers/iped-parsers-impl/src/main/java/iped/parsers/discord/DiscordParser.java index 95019ae4f4..7bfaf567d5 100644 --- a/iped-parsers/iped-parsers-impl/src/main/java/iped/parsers/discord/DiscordParser.java +++ b/iped-parsers/iped-parsers-impl/src/main/java/iped/parsers/discord/DiscordParser.java @@ -15,6 +15,8 @@ import org.apache.tika.exception.TikaException; import org.apache.tika.extractor.EmbeddedDocumentExtractor; import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor; +import org.apache.tika.io.TemporaryResources; +import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.mime.MediaType; @@ -48,6 +50,7 @@ /*** * * @author PCF Campanini + * @author PCF Patrick Dalla Bernardina * */ public class DiscordParser extends AbstractParser { @@ -87,106 +90,98 @@ public void parse(InputStream indexFile, ContentHandler handler, Metadata metada IItemSearcher searcher = context.get(IItemSearcher.class); IItemReader item = context.get(IItemReader.class); - if (searcher != null && item != null) { + String commonQuery = BasicProps.EVIDENCE_UUID + ":" + item.getDataSource().getUUID() + " AND " + + BasicProps.PARENTID + ":" + item.getParentId() + " AND NOT " + BasicProps.LENGTH + ":0 AND " + + CacheIndexParser.IS_CACHE_INDEX_ENTRY.replace(":", "\\:") + ":true"; - String commonQuery = BasicProps.EVIDENCE_UUID + ":" + item.getDataSource().getUUID() + " AND " - + BasicProps.PARENTID + ":" + item.getId() + " AND NOT " + BasicProps.LENGTH + ":0 AND " - + CacheIndexParser.IS_CACHE_INDEX_ENTRY + ":true"; + try (InputStream is = TikaInputStream.get(indexFile, new TemporaryResources())) { - List entries = searcher - .search(commonQuery + " AND " + BasicProps.NAME + ":\"messages\\?limit\\=50\""); + ObjectMapper mapper = new ObjectMapper(); + mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); - int chatVirtualId = 0; - - for (IItemReader reader : entries) { - try (InputStream is = reader.getBufferedInputStream()) { - - ObjectMapper mapper = new ObjectMapper(); - mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); - - List discordRoot = null; - try { - discordRoot = mapper.readValue(is, new TypeReference>() { - }); - } catch (JsonProcessingException ex) { - LOGGER.error("Invalid JSON inside cache entry " + reader.getPath()); - ex.printStackTrace(); - } + List discordRoot = null; + try { + discordRoot = mapper.readValue(is, new TypeReference>() { + }); + } catch (JsonProcessingException ex) { + LOGGER.error("Invalid JSON inside cache entry " + item.getPath()); + ex.printStackTrace(); + } - if (!discordRoot.isEmpty()) { - HashMap avatarCache = new HashMap<>(); - // Checking if the image file is cached, to do so, iterates through all authors - // and attachments to check if they are in the case, comparing their attributes - for (DiscordRoot dr : discordRoot) { - if (dr.getAuthor().getAvatar() != null) { - byte[] avatar = avatarCache.get(dr.getAuthor().getAvatar()); - if (avatar != null) { - dr.getAuthor().setAvatarBytes(avatar); - break; - } else { - List avatars = searcher.search( - commonQuery + " AND " + BasicProps.NAME + ":" + dr.getAuthor().getAvatar() - + " AND " + CacheIndexParser.IS_CACHE_INDEX_ENTRY + ":true"); - for (IItemReader avatarItem : avatars) { - try (InputStream is2 = avatarItem.getBufferedInputStream()) { - BufferedImage img = ImageUtil.getSubSampledImage(is2, 64, 64); - img = ImageUtil.getOpaqueImage(img); - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - ImageIO.write(img, "jpg", baos); - avatar = baos.toByteArray(); - dr.getAuthor().setAvatarBytes(avatar); - avatarCache.put(dr.getAuthor().getAvatar(), avatar); - break; - } catch (InputStreamNotAvailable e) { - // ignore - } catch (Exception e) { - LOGGER.warn("Exception decoding Discord avatar", e); - } + if (!discordRoot.isEmpty()) { + metadata.set(BasicProps.HASCHILD, Boolean.TRUE.toString()); + + HashMap avatarCache = new HashMap<>(); + // Checking if the image file is cached, to do so, iterates through all authors + // and attachments to check if they are in the case, comparing their attributes + for (DiscordRoot dr : discordRoot) { + if (dr.getAuthor().getAvatar() != null) { + byte[] avatar = avatarCache.get(dr.getAuthor().getAvatar()); + if (avatar != null) { + dr.getAuthor().setAvatarBytes(avatar); + break; + } else { + try { + List avatars = searcher.search(commonQuery + " AND " + BasicProps.NAME + + ":" + dr.getAuthor().getAvatar() + "*"); + for (IItemReader avatarItem : avatars) { + try (InputStream is2 = avatarItem.getBufferedInputStream()) { + BufferedImage img = ImageUtil.getSubSampledImage(is2, 64, 64); + img = ImageUtil.getOpaqueImage(img); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + ImageIO.write(img, "jpg", baos); + avatar = baos.toByteArray(); + dr.getAuthor().setAvatarBytes(avatar); + avatarCache.put(dr.getAuthor().getAvatar(), avatar); + break; + } catch (InputStreamNotAvailable e) { + // ignore } } + } catch (Exception e) { + LOGGER.warn("Exception decoding Discord avatar", e); } - - /* - * for (DiscordAttachment att : dr.getAttachments()) { List avatars - * = searcher.search( commonQuery + " AND " + ":" + dr.getAuthor().getAvatar()); - * String[] parts = - * att.getUrl().split("https://cdn.discordapp.com/attachments/"); if - * (parts.length > 1 && ce2.getRequestURL().contains(parts[1])) { for - * (IItemReader ib : externalFiles) { if (ib.getName() != null && - * ib.getName().equals(ce2.getName())) { att.setMediaHash(ib.getHash()); - * att.setContent_type(ib.getMediaType().toString()); break; } } } } - */ } } + } + } - String chatName = "DiscordChat id(" + discordRoot.get(0).getId() + ")"; + String chatName = "DiscordChat id(" + discordRoot.get(0).getId() + ")"; - Metadata chatMeta = new Metadata(); - chatMeta.set("URL", reader.getName()); - chatMeta.set(TikaCoreProperties.TITLE, chatName); - chatMeta.set(StandardParser.INDEXER_CONTENT_TYPE, CHAT_MIME_TYPE); - chatMeta.set(ExtraProperties.ITEM_VIRTUAL_ID, Integer.toString(chatVirtualId)); - chatMeta.set(BasicProps.HASCHILD, Boolean.TRUE.toString()); - chatMeta.set(ExtraProperties.DECODED_DATA, Boolean.TRUE.toString()); + metadata.set("URL", item.getName()); + metadata.set(TikaCoreProperties.TITLE, chatName); + metadata.set(StandardParser.INDEXER_CONTENT_TYPE, CHAT_MIME_TYPE); - for (DiscordRoot dr : discordRoot) { - for (DiscordAttachment da : dr.getAttachments()) { - if (da.getMediaHash() != null) { - chatMeta.add(ExtraProperties.LINKED_ITEMS, BasicProps.HASH + ":" + da.getMediaHash()); + for (DiscordRoot dr : discordRoot) { + for (DiscordAttachment da : dr.getAttachments()) { + try { + String[] parts = da.getUrl().split("https://cdn.discordapp.com/attachments/"); + + if (parts.length > 1) { + List atts = searcher.search(commonQuery + " AND " + + CacheIndexParser.CACHE_URL.replace(":", "\\:") + ":" + parts[1]); + for (IItemReader attsItem : atts) { + da.setMediaHash(attsItem.getHash()); + da.setContent_type(attsItem.getMediaType().toString()); } } + } catch (Exception e) { + LOGGER.warn("Exception decoding Discord attachment", e); } - // Sort messages by timestamp in ascending order - Collections.sort(discordRoot); + if (da.getMediaHash() != null) { + metadata.add(ExtraProperties.LINKED_ITEMS, BasicProps.HASH + ":" + da.getMediaHash()); + } + } + } - XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, chatMeta); - new DiscordHTMLReport().printHTML(discordRoot, xhtml, searcher); + // Sort messages by timestamp in ascending order + Collections.sort(discordRoot); - extractMessages(chatName, discordRoot, handler, extractor, chatVirtualId); - } + XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); + new DiscordHTMLReport().printHTML(discordRoot, xhtml, searcher); - } + extractMessages(chatName, discordRoot, handler, extractor, 0); } }