Skip to content

Commit

Permalink
'#1950: Simplify times per parser control.
Browse files Browse the repository at this point in the history
  • Loading branch information
wladimirleite committed Oct 29, 2023
1 parent ad61a6f commit f6a3291
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 38 deletions.
16 changes: 8 additions & 8 deletions iped-app/src/main/java/iped/app/processing/ui/ProgressFrame.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.atomic.AtomicLong;
import java.util.SortedMap;
import java.util.TreeMap;

import javax.swing.BorderFactory;
import javax.swing.BoxLayout;
Expand Down Expand Up @@ -93,6 +93,7 @@ public class ProgressFrame extends JFrame implements PropertyChangeListener, Act
private boolean paused = false;
private String decodingDir = null;
private long physicalMemory;
private static final SortedMap<String, Long> timesPerParser = new TreeMap<String, Long>();

private static class RestrictedSizeLabel extends JLabel {

Expand Down Expand Up @@ -377,7 +378,8 @@ private String getTaskTimes() {
}

private String getParserTimes() {
if (ParsingTask.times.isEmpty())
ParsingTask.copyTimesPerParser(timesPerParser);
if (timesPerParser.isEmpty())
return "";
StringBuilder msg = new StringBuilder();
startTable(msg);
Expand All @@ -391,14 +393,12 @@ private String getParserTimes() {
if (totalTime < 1)
totalTime = 1;

for (Object o : ParsingTask.times.entrySet().toArray()) {
@SuppressWarnings("unchecked")
Entry<String, AtomicLong> e = (Entry<String, AtomicLong>) o;
long time = e.getValue().get();
for (String parserName : timesPerParser.keySet()) {
long time = timesPerParser.get(parserName);
long sec = time / (1000000 * workers.length);
int pct = (int) ((100 * time) / totalTime);

startRow(msg, e.getKey(), pct);
startRow(msg, parserName, pct);
addCell(msg, nf.format(sec) + "s", Align.RIGHT);
finishRow(msg, pct + "%", Align.RIGHT);
}
Expand Down
51 changes: 21 additions & 30 deletions iped-engine/src/main/java/iped/engine/task/ParsingTask.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* Copyright 2012-2014, Luis Filipe da Cruz Nassif
*
* This file is part of Indexador e Processador de Evidências Digitais (IPED).
* This file is part of Indexador e Processador de Evidências Digitais (IPED).
*
* IPED is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
Expand All @@ -23,13 +23,11 @@
import java.io.InputStream;
import java.util.Arrays;
import java.util.Base64;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
Expand Down Expand Up @@ -155,7 +153,7 @@ public class ParsingTask extends ThumbTask implements EmbeddedDocumentExtractor
private static int max_expanding_containers;

public static AtomicLong totalText = new AtomicLong();
public static Map<String, AtomicLong> times = Collections.synchronizedMap(new TreeMap<String, AtomicLong>());
private static final Map<String, Long> timesPerParser = new HashMap<String, Long>();

private static Map<Integer, ZipBombStats> zipBombStatsMap = new ConcurrentHashMap<>();
private static final Set<MediaType> typesToCheckZipBomb = getTypesToCheckZipbomb();
Expand All @@ -170,8 +168,7 @@ public class ParsingTask extends ThumbTask implements EmbeddedDocumentExtractor
private boolean extractEmbedded;
private volatile ParsingReader reader;
private String firstParentPath = null;
private Map<Integer, Long> timeInDepth = new ConcurrentHashMap<>();
private volatile int depth = 0;
private volatile long subitemsTime;
private Map<Object, ParentInfo> idToItemMap = new HashMap<>();
private int numSubitems = 0;
private StandardParser autoParser;
Expand Down Expand Up @@ -307,34 +304,26 @@ public void process(IItem evidence) throws Exception {
return;
}

String parserName = getParserName(parser, evidence.getMetadata().get(Metadata.CONTENT_TYPE));
AtomicLong time = times.get(parserName);
if (time == null) {
time = new AtomicLong();
times.put(parserName, time);
}

SplitLargeBinaryConfig splitConfig = ConfigurationManager.get()
.findObject(SplitLargeBinaryConfig.class);
if (((Item) evidence).getTextCache() == null
&& ((evidence.getLength() == null || evidence.getLength() < splitConfig.getMinItemSizeToFragment())
|| StandardParser.isSpecificParser(parser))) {

ParsingTask task = null;
try {
depth++;
ParsingTask task = new ParsingTask(worker, autoParser);
task = new ParsingTask(worker, autoParser);
task.parsingConfig = this.parsingConfig;
task.expandConfig = this.expandConfig;
task.depth = depth;
task.timeInDepth = timeInDepth;
task.safeProcess(evidence);

} finally {
depth--;
String parserName = getParserName(parser, evidence.getMetadata().get(Metadata.CONTENT_TYPE));
long st = task == null ? 0 : task.subitemsTime;
long diff = System.nanoTime() / 1000 - start;
Long subitemsTime = timeInDepth.remove(depth + 1);
if (subitemsTime == null)
subitemsTime = 0L;
time.addAndGet(diff - subitemsTime);
synchronized (timesPerParser) {
timesPerParser.merge(parserName, diff - st, Long::sum);
}
}

}
Expand Down Expand Up @@ -694,22 +683,18 @@ public void parseEmbedded(InputStream inputStream, ContentHandler handler, Metad

// pausa contagem de timeout do pai antes de extrair e processar subitem
if (reader.setTimeoutPaused(true)) {
long start = System.nanoTime() / 1000;
try {
long start = System.nanoTime() / 1000;

ProcessTime time = ProcessTime.AUTO;

worker.processNewItem(subItem, time);
Statistics.get().incSubitemsDiscovered();
numSubitems++;

long diff = (System.nanoTime() / 1000) - start;
Long prevTime = timeInDepth.get(depth);
if (prevTime == null)
prevTime = 0L;
timeInDepth.put(depth, prevTime + diff);

} finally {
// Store time spent on subitems processing
subitemsTime += System.nanoTime() / 1000 - start;

// despausa contador de timeout do pai somente após processar subitem
reader.setTimeoutPaused(false);

Expand Down Expand Up @@ -875,4 +860,10 @@ public void finish() throws Exception {
totalText = null;
}

public static void copyTimesPerParser(Map<String,Long> dest) {
dest.clear();
synchronized (timesPerParser) {
dest.putAll(timesPerParser);
}
}
}

0 comments on commit f6a3291

Please sign in to comment.