From ed3bd4ef75447e17ff487b16858b7e748042e109 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Thu, 4 Nov 2021 12:20:05 -0600
Subject: [PATCH 01/19] main: add timing ctx manager

---
 capa/main.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/capa/main.py b/capa/main.py
index d610f3838..9bf45ecc8 100644
--- a/capa/main.py
+++ b/capa/main.py
@@ -10,6 +10,7 @@
 """
 import os
 import sys
+import time
 import hashlib
 import logging
 import os.path
@@ -17,6 +18,7 @@
 import datetime
 import textwrap
 import itertools
+import contextlib
 import collections
 from typing import Any, Dict, List, Tuple
 
@@ -65,6 +67,14 @@
 logger = logging.getLogger("capa")
 
 
+@contextlib.contextmanager
+def timing(msg: str):
+    t0 = time.time()
+    yield
+    t1 = time.time()
+    logger.debug("perf: %s: %0.2fs", msg, t1 - t0)
+
+
 def set_vivisect_log_level(level):
     logging.getLogger("vivisect").setLevel(level)
     logging.getLogger("vivisect.base").setLevel(level)

From f98236046bda6c47739214cb0d9d9806129dd128 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Thu, 4 Nov 2021 12:38:35 -0600
Subject: [PATCH 02/19] main: add coarse timing measurements

---
 capa/main.py | 57 +++++++++++++++++++++++++++++-----------------------
 1 file changed, 32 insertions(+), 25 deletions(-)

diff --git a/capa/main.py b/capa/main.py
index 9bf45ecc8..4b4248b6c 100644
--- a/capa/main.py
+++ b/capa/main.py
@@ -168,27 +168,28 @@ def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, disable_pro
     n_funcs = len(functions)
 
     pb = pbar(functions, desc="matching", unit=" functions", postfix="skipped 0 library functions")
-    for f in pb:
-        function_address = int(f)
-
-        if extractor.is_library_function(function_address):
-            function_name = extractor.get_function_name(function_address)
-            logger.debug("skipping library function 0x%x (%s)", function_address, function_name)
-            meta["library_functions"][function_address] = function_name
-            n_libs = len(meta["library_functions"])
-            percentage = 100 * (n_libs / n_funcs)
-            if isinstance(pb, tqdm.tqdm):
-                pb.set_postfix_str("skipped %d library functions (%d%%)" % (n_libs, percentage))
-            continue
+    with timing("match functions"):
+        for f in pb:
+            function_address = int(f)
+
+            if extractor.is_library_function(function_address):
+                function_name = extractor.get_function_name(function_address)
+                logger.debug("skipping library function 0x%x (%s)", function_address, function_name)
+                meta["library_functions"][function_address] = function_name
+                n_libs = len(meta["library_functions"])
+                percentage = 100 * (n_libs / n_funcs)
+                if isinstance(pb, tqdm.tqdm):
+                    pb.set_postfix_str("skipped %d library functions (%d%%)" % (n_libs, percentage))
+                continue
 
-        function_matches, bb_matches, feature_count = find_function_capabilities(ruleset, extractor, f)
-        meta["feature_counts"]["functions"][function_address] = feature_count
-        logger.debug("analyzed function 0x%x and extracted %d features", function_address, feature_count)
+            function_matches, bb_matches, feature_count = find_function_capabilities(ruleset, extractor, f)
+            meta["feature_counts"]["functions"][function_address] = feature_count
+            logger.debug("analyzed function 0x%x and extracted %d features", function_address, feature_count)
 
-        for rule_name, res in function_matches.items():
-            all_function_matches[rule_name].extend(res)
-        for rule_name, res in bb_matches.items():
-            all_bb_matches[rule_name].extend(res)
+            for rule_name, res in function_matches.items():
+                all_function_matches[rule_name].extend(res)
+            for rule_name, res in bb_matches.items():
+                all_bb_matches[rule_name].extend(res)
 
     # collection of features that captures the rule matches within function and BB scopes.
     # mapping from feature (matched rule) to set of addresses at which it matched.
@@ -198,7 +199,8 @@ def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, disable_pro
         rule = ruleset[rule_name]
         capa.engine.index_rule_matches(function_and_lower_features, rule, locations)
 
-    all_file_matches, feature_count = find_file_capabilities(ruleset, extractor, function_and_lower_features)
+    with timing("match file"):
+        all_file_matches, feature_count = find_file_capabilities(ruleset, extractor, function_and_lower_features)
     meta["feature_counts"]["file"] = feature_count
 
     matches = {
@@ -410,9 +412,11 @@ def get_workspace(path, format, sigpaths):
     else:
         raise ValueError("unexpected format: " + format)
 
-    viv_utils.flirt.register_flirt_signature_analyzers(vw, sigpaths)
+    with timing("load FLIRT"):
+        viv_utils.flirt.register_flirt_signature_analyzers(vw, sigpaths)
 
-    vw.analyze()
+    with timing("viv analyze"):
+        vw.analyze()
 
     logger.debug("%s", get_meta_str(vw))
     return vw
@@ -900,8 +904,10 @@ def main(argv=None):
         return E_MISSING_FILE
 
     try:
-        rules = get_rules(args.rules, disable_progress=args.quiet)
-        rules = capa.rules.RuleSet(rules)
+        with timing("load rules"):
+            rules = get_rules(args.rules, disable_progress=args.quiet)
+            rules = capa.rules.RuleSet(rules)
+
         logger.debug(
             "successfully loaded %s rules",
             # during the load of the RuleSet, we extract subscope statements into their own rules
@@ -1013,7 +1019,8 @@ def main(argv=None):
 
     meta = collect_metadata(argv, args.sample, args.rules, extractor)
 
-    capabilities, counts = find_capabilities(rules, extractor, disable_progress=args.quiet)
+    with timing("find capabilities"):
+        capabilities, counts = find_capabilities(rules, extractor, disable_progress=args.quiet)
     meta["analysis"].update(counts)
     meta["analysis"]["layout"] = compute_layout(rules, extractor, capabilities)
 

From 3d068fe3cdbcb9c155d7d23ac10197fd2598d0b2 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Thu, 4 Nov 2021 13:17:38 -0600
Subject: [PATCH 03/19] scripts: add utilities for collecting profile traces

---
 .gitignore                     |  2 ++
 scripts/profile-time.sh        | 62 ++++++++++++++++++++++++++++++++++
 scripts/render-time-profile.sh | 24 +++++++++++++
 3 files changed, 88 insertions(+)
 create mode 100644 scripts/profile-time.sh
 create mode 100644 scripts/render-time-profile.sh

diff --git a/.gitignore b/.gitignore
index 4a731c3cc..29161c055 100644
--- a/.gitignore
+++ b/.gitignore
@@ -115,3 +115,5 @@ isort-output.log
 black-output.log
 rule-linter-output.log
 .vscode
+scripts/perf/*.txt
+scripts/perf/*.svg
diff --git a/scripts/profile-time.sh b/scripts/profile-time.sh
new file mode 100644
index 000000000..d09eabd3f
--- /dev/null
+++ b/scripts/profile-time.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+# unset variables are errors
+set -o nounset;
+# any failed commands are errors
+set -o errexit;
+
+# current_directory is the path to the directory containing this script.
+# ref: https://stackoverflow.com/a/4774063/87207
+readonly CD="$( cd "$(dirname "$0")" ; pwd -P )"
+
+panic() {
+	echo "[erro]: $@" >&2;
+	exit 1;
+}
+
+info() {
+	echo "[info]: $@" >&2;
+}
+
+verbose=false;
+debug() {
+	if "$verbose"; then
+		echo "[debu]: $@" >&2;
+	fi
+}
+
+if [ "$(git status | grep "modified: " | grep -v "rules" | grep -v "tests/data")" ]; then
+    panic "modified content";
+fi
+
+rev=$(git rev-parse --short HEAD);
+info "rev: $rev";
+
+mkdir -p "$CD/perf/";
+
+info "analyzing PMA 01-01.dll...";
+
+pma_out=$(
+    py-spy record \
+    -o "$CD/perf/capa-$rev-PMA0101.svg" \
+    -- python -m capa.main \
+       -d \
+       "$CD/../tests/data/Practical Malware Analysis Lab 01-01.dll_" \
+       2>&1 || true);
+
+echo "$pma_out" | grep "perf:" | sed -e "s/^.*perf: /perf: /g" | tee "$CD/perf/capa-$rev-PMA0101.txt";
+ 
+info "analyzing kernel32.dll...";
+k32_out=$(
+    py-spy record \
+    -o "$CD/perf/capa-$rev-k32.svg" \
+    -- python -m capa.main \
+       -d \
+       "$CD/../tests/data/kernel32.dll_" \
+       2>&1 || true);
+
+echo "$k32_out" | grep "perf:" | sed -e "s/^.*perf: /perf: /g" | tee "$CD/perf/capa-$rev-k32.txt";
+
+bash "$CD/render-time-profile.sh" "$rev";
+
+info "done.";
diff --git a/scripts/render-time-profile.sh b/scripts/render-time-profile.sh
new file mode 100644
index 000000000..5232a2659
--- /dev/null
+++ b/scripts/render-time-profile.sh
@@ -0,0 +1,24 @@
+fields=("load FLIRT" "viv analyze" "match functions" "match file" "find capabilities");
+
+echo -n "|         |";
+for T in "${fields[@]}"; do
+    printf ' %-17s |' "$T";
+done
+echo "";
+ 
+echo -n "|---------|";
+for T in "${fields[@]}"; do
+    echo -n '-------------------|';
+done
+echo "";
+
+for rev in "$@"; do
+
+    echo -n "| $rev |";
+    for T in "${fields[@]}"; do
+        V1=$(cat scripts/perf/capa-$rev-PMA0101.txt | grep "$T" | sed -e "s/^.*$T: //g");
+        V2=$(cat scripts/perf/capa-$rev-k32.txt | grep "$T" | sed -e "s/^.*$T: //g");
+        printf ' %-17s |' "$V1/$V2";
+    done
+    echo "";
+done

From 86cab26a69b35e8d131db6ebf4784b07bbad50df Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Fri, 5 Nov 2021 14:59:22 -0600
Subject: [PATCH 04/19] add perf counters in module capa.perf

---
 capa/engine.py          | 16 ++++++++++++++++
 capa/features/common.py | 12 ++++++++++++
 capa/main.py            |  4 ++++
 capa/perf.py            |  3 +++
 capa/rules.py           |  3 +++
 5 files changed, 38 insertions(+)
 create mode 100644 capa/perf.py

diff --git a/capa/engine.py b/capa/engine.py
index 5a295b0db..7a2dea311 100644
--- a/capa/engine.py
+++ b/capa/engine.py
@@ -10,6 +10,7 @@
 import collections
 from typing import Set, Dict, List, Tuple, Union, Mapping, Iterable
 
+import capa.perf
 import capa.rules
 import capa.features.common
 from capa.features.common import Feature
@@ -125,6 +126,9 @@ def __init__(self, children, description=None):
         self.children = children
 
     def evaluate(self, ctx):
+        capa.perf.counters["evaluate.feature"] += 1
+        capa.perf.counters["evaluate.feature.and"] += 1  
+
         results = [child.evaluate(ctx) for child in self.children]
         success = all(results)
         return Result(success, self, results)
@@ -138,6 +142,9 @@ def __init__(self, children, description=None):
         self.children = children
 
     def evaluate(self, ctx):
+        capa.perf.counters["evaluate.feature"] += 1
+        capa.perf.counters["evaluate.feature.or"] += 1  
+ 
         results = [child.evaluate(ctx) for child in self.children]
         success = any(results)
         return Result(success, self, results)
@@ -151,6 +158,9 @@ def __init__(self, child, description=None):
         self.child = child
 
     def evaluate(self, ctx):
+        capa.perf.counters["evaluate.feature"] += 1
+        capa.perf.counters["evaluate.feature.not"] += 1  
+  
         results = [self.child.evaluate(ctx)]
         success = not results[0]
         return Result(success, self, results)
@@ -165,6 +175,9 @@ def __init__(self, count, children, description=None):
         self.children = children
 
     def evaluate(self, ctx):
+        capa.perf.counters["evaluate.feature"] += 1
+        capa.perf.counters["evaluate.feature.some"] += 1  
+  
         results = [child.evaluate(ctx) for child in self.children]
         # note that here we cast the child result as a bool
         # because we've overridden `__bool__` above.
@@ -184,6 +197,9 @@ def __init__(self, child, min=None, max=None, description=None):
         self.max = max if max is not None else (1 << 64 - 1)
 
     def evaluate(self, ctx):
+        capa.perf.counters["evaluate.feature"] += 1
+        capa.perf.counters["evaluate.feature.range"] += 1  
+  
         count = len(ctx.get(self.child, []))
         if self.min == 0 and count == 0:
             return Result(True, self, [])
diff --git a/capa/features/common.py b/capa/features/common.py
index 8a3d0ec23..9fa5d8bf7 100644
--- a/capa/features/common.py
+++ b/capa/features/common.py
@@ -12,6 +12,7 @@
 import collections
 from typing import Set, Dict, Union
 
+import capa.perf
 import capa.engine
 import capa.features
 import capa.features.extractors.elf
@@ -97,6 +98,8 @@ def __repr__(self):
         return str(self)
 
     def evaluate(self, ctx: Dict["Feature", Set[int]]) -> "capa.engine.Result":
+        capa.perf.counters["evaluate.feature"] += 1
+        capa.perf.counters["evaluate.feature." + self.name] += 1
         return capa.engine.Result(self in ctx, self, [], locations=ctx.get(self, []))
 
     def freeze_serialize(self):
@@ -141,6 +144,9 @@ def __init__(self, value: str, description=None):
         self.value = value
 
     def evaluate(self, ctx):
+        capa.perf.counters["evaluate.feature"] += 1
+        capa.perf.counters["evaluate.feature.substring"] += 1 
+
         # mapping from string value to list of locations.
         # will unique the locations later on.
         matches = collections.defaultdict(list)
@@ -226,6 +232,9 @@ def __init__(self, value: str, description=None):
             )
 
     def evaluate(self, ctx):
+        capa.perf.counters["evaluate.feature"] += 1
+        capa.perf.counters["evaluate.feature.regex"] += 1 
+                                                        
         # mapping from string value to list of locations.
         # will unique the locations later on.
         matches = collections.defaultdict(list)
@@ -309,6 +318,9 @@ def __init__(self, value: bytes, description=None):
         self.value = value
 
     def evaluate(self, ctx):
+        capa.perf.counters["evaluate.feature"] += 1
+        capa.perf.counters["evaluate.feature.bytes"] += 1 
+                                                        
         for feature, locations in ctx.items():
             if not isinstance(feature, (Bytes,)):
                 continue
diff --git a/capa/main.py b/capa/main.py
index 4b4248b6c..a830b2acd 100644
--- a/capa/main.py
+++ b/capa/main.py
@@ -28,6 +28,7 @@
 from pefile import PEFormatError
 from elftools.common.exceptions import ELFError
 
+import capa.perf
 import capa.rules
 import capa.engine
 import capa.version
@@ -1024,6 +1025,9 @@ def main(argv=None):
     meta["analysis"].update(counts)
     meta["analysis"]["layout"] = compute_layout(rules, extractor, capabilities)
 
+    for (counter, count) in capa.perf.counters.most_common():
+        logger.debug("perf: counter: %s: %d", counter, count)
+
     if has_file_limitation(rules, capabilities):
         # bail if capa encountered file limitation e.g. a packed binary
         # do show the output in verbose mode, though.
diff --git a/capa/perf.py b/capa/perf.py
new file mode 100644
index 000000000..d1e4083d4
--- /dev/null
+++ b/capa/perf.py
@@ -0,0 +1,3 @@
+import collections
+
+counters = collections.Counter()
diff --git a/capa/rules.py b/capa/rules.py
index a8e65b22b..6960e02bf 100644
--- a/capa/rules.py
+++ b/capa/rules.py
@@ -27,6 +27,7 @@
 import yaml
 import ruamel.yaml
 
+import capa.perf
 import capa.engine as ceng
 import capa.features
 import capa.features.file
@@ -620,6 +621,8 @@ def extract_subscope_rules(self):
             yield new_rule
 
     def evaluate(self, features: FeatureSet):
+        capa.perf.counters["evaluate.feature"] += 1
+        capa.perf.counters["evaluate.feature.rule"] += 1 
         return self.statement.evaluate(features)
 
     @classmethod

From 6524449ad1500a2496b35542d8ca68c43a52b20a Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Fri, 5 Nov 2021 15:23:22 -0600
Subject: [PATCH 05/19] main: perf: human format the numbers

---
 capa/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/capa/main.py b/capa/main.py
index a830b2acd..5aaa97d65 100644
--- a/capa/main.py
+++ b/capa/main.py
@@ -1026,7 +1026,7 @@ def main(argv=None):
     meta["analysis"]["layout"] = compute_layout(rules, extractor, capabilities)
 
     for (counter, count) in capa.perf.counters.most_common():
-        logger.debug("perf: counter: %s: %d", counter, count)
+        logger.debug("perf: counter: {:}: {:,}".format(counter, count))
 
     if has_file_limitation(rules, capabilities):
         # bail if capa encountered file limitation e.g. a packed binary

From 3a12472be8ff39b33048a34a23f29522496cd9b3 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Fri, 5 Nov 2021 15:23:34 -0600
Subject: [PATCH 06/19] perf: render: show evaluate.feature counter

---
 scripts/render-time-profile.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/render-time-profile.sh b/scripts/render-time-profile.sh
index 5232a2659..1b5627dce 100644
--- a/scripts/render-time-profile.sh
+++ b/scripts/render-time-profile.sh
@@ -1,4 +1,4 @@
-fields=("load FLIRT" "viv analyze" "match functions" "match file" "find capabilities");
+fields=("load FLIRT" "viv analyze" "match functions" "match file" "find capabilities" "evaluate.feature");
 
 echo -n "|         |";
 for T in "${fields[@]}"; do
@@ -16,8 +16,8 @@ for rev in "$@"; do
 
     echo -n "| $rev |";
     for T in "${fields[@]}"; do
-        V1=$(cat scripts/perf/capa-$rev-PMA0101.txt | grep "$T" | sed -e "s/^.*$T: //g");
-        V2=$(cat scripts/perf/capa-$rev-k32.txt | grep "$T" | sed -e "s/^.*$T: //g");
+        V1=$(cat scripts/perf/capa-$rev-PMA0101.txt | grep "$T: " | sed -e "s/^.*$T: //g");
+        V2=$(cat scripts/perf/capa-$rev-k32.txt | grep "$T: " | sed -e "s/^.*$T: //g");
         printf ' %-17s |' "$V1/$V2";
     done
     echo "";

From 702d00da9137aba8e48f3c091a38117587b84fdc Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Fri, 5 Nov 2021 15:24:24 -0600
Subject: [PATCH 07/19] gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 29161c055..cc331b545 100644
--- a/.gitignore
+++ b/.gitignore
@@ -117,3 +117,4 @@ rule-linter-output.log
 .vscode
 scripts/perf/*.txt
 scripts/perf/*.svg
+scripts/perf/*.zip

From 0629c584e123ca35499fef0c5d80b350bd7d5815 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Mon, 8 Nov 2021 11:50:16 -0700
Subject: [PATCH 08/19] common: move Result to capa.common from capa.engine

fixes circular import error in capa.features.freeze
---
 capa/engine.py          | 50 ++-----------------------------
 capa/features/common.py | 65 ++++++++++++++++++++++++++++++++++-------
 2 files changed, 57 insertions(+), 58 deletions(-)

diff --git a/capa/engine.py b/capa/engine.py
index 7a2dea311..d6138c9c9 100644
--- a/capa/engine.py
+++ b/capa/engine.py
@@ -13,7 +13,7 @@
 import capa.perf
 import capa.rules
 import capa.features.common
-from capa.features.common import Feature
+from capa.features.common import Result, Feature
 
 # a collection of features and the locations at which they are found.
 #
@@ -46,15 +46,9 @@ def __str__(self):
     def __repr__(self):
         return str(self)
 
-    def evaluate(self, features: FeatureSet) -> "Result":
+    def evaluate(self, features: FeatureSet) -> Result:
         """
         classes that inherit `Statement` must implement `evaluate`
-
-        args:
-          ctx (defaultdict[Feature, set[VA]])
-
-        returns:
-          Result
         """
         raise NotImplementedError()
 
@@ -78,46 +72,6 @@ def replace_child(self, existing, new):
                     children[i] = new
 
 
-class Result:
-    """
-    represents the results of an evaluation of statements against features.
-
-    instances of this class should behave like a bool,
-    e.g. `assert Result(True, ...) == True`
-
-    instances track additional metadata about evaluation results.
-    they contain references to the statement node (e.g. an And statement),
-     as well as the children Result instances.
-
-    we need this so that we can render the tree of expressions and their results.
-    """
-
-    def __init__(self, success: bool, statement: Union[Statement, Feature], children: List["Result"], locations=None):
-        """
-        args:
-          success (bool)
-          statement (capa.engine.Statement or capa.features.Feature)
-          children (list[Result])
-          locations (iterable[VA])
-        """
-        super(Result, self).__init__()
-        self.success = success
-        self.statement = statement
-        self.children = children
-        self.locations = locations if locations is not None else ()
-
-    def __eq__(self, other):
-        if isinstance(other, bool):
-            return self.success == other
-        return False
-
-    def __bool__(self):
-        return self.success
-
-    def __nonzero__(self):
-        return self.success
-
-
 class And(Statement):
     """match if all of the children evaluate to True."""
 
diff --git a/capa/features/common.py b/capa/features/common.py
index 9fa5d8bf7..0f01ef523 100644
--- a/capa/features/common.py
+++ b/capa/features/common.py
@@ -10,10 +10,9 @@
 import codecs
 import logging
 import collections
-from typing import Set, Dict, Union
+from typing import Set, Dict, List, Union
 
 import capa.perf
-import capa.engine
 import capa.features
 import capa.features.extractors.elf
 
@@ -47,6 +46,52 @@ def escape_string(s: str) -> str:
     return s
 
 
+class Result:
+    """
+    represents the results of an evaluation of statements against features.
+
+    instances of this class should behave like a bool,
+    e.g. `assert Result(True, ...) == True`
+
+    instances track additional metadata about evaluation results.
+    they contain references to the statement node (e.g. an And statement),
+     as well as the children Result instances.
+
+    we need this so that we can render the tree of expressions and their results.
+    """
+
+    def __init__(
+        self,
+        success: bool,
+        statement: Union["capa.engine.Statement", "Feature"],
+        children: List["Result"],
+        locations=None,
+    ):
+        """
+        args:
+          success (bool)
+          statement (capa.engine.Statement or capa.features.Feature)
+          children (list[Result])
+          locations (iterable[VA])
+        """
+        super(Result, self).__init__()
+        self.success = success
+        self.statement = statement
+        self.children = children
+        self.locations = locations if locations is not None else ()
+
+    def __eq__(self, other):
+        if isinstance(other, bool):
+            return self.success == other
+        return False
+
+    def __bool__(self):
+        return self.success
+
+    def __nonzero__(self):
+        return self.success
+
+
 class Feature:
     def __init__(self, value: Union[str, int, bytes], bitness=None, description=None):
         """
@@ -97,10 +142,10 @@ def __str__(self):
     def __repr__(self):
         return str(self)
 
-    def evaluate(self, ctx: Dict["Feature", Set[int]]) -> "capa.engine.Result":
+    def evaluate(self, ctx: Dict["Feature", Set[int]]) -> "Result":
         capa.perf.counters["evaluate.feature"] += 1
         capa.perf.counters["evaluate.feature." + self.name] += 1
-        return capa.engine.Result(self in ctx, self, [], locations=ctx.get(self, []))
+        return Result(self in ctx, self, [], locations=ctx.get(self, []))
 
     def freeze_serialize(self):
         if self.bitness is not None:
@@ -176,9 +221,9 @@ def evaluate(self, ctx):
             # unlike other features, we cannot return put a reference to `self` directly in a `Result`.
             # this is because `self` may match on many strings, so we can't stuff the matched value into it.
             # instead, return a new instance that has a reference to both the substring and the matched values.
-            return capa.engine.Result(True, _MatchedSubstring(self, matches), [], locations=locations)
+            return Result(True, _MatchedSubstring(self, matches), [], locations=locations)
         else:
-            return capa.engine.Result(False, _MatchedSubstring(self, None), [])
+            return Result(False, _MatchedSubstring(self, None), [])
 
     def __str__(self):
         return "substring(%s)" % self.value
@@ -269,9 +314,9 @@ def evaluate(self, ctx):
             # this is because `self` may match on many strings, so we can't stuff the matched value into it.
             # instead, return a new instance that has a reference to both the regex and the matched values.
             # see #262.
-            return capa.engine.Result(True, _MatchedRegex(self, matches), [], locations=locations)
+            return Result(True, _MatchedRegex(self, matches), [], locations=locations)
         else:
-            return capa.engine.Result(False, _MatchedRegex(self, None), [])
+            return Result(False, _MatchedRegex(self, None), [])
 
     def __str__(self):
         return "regex(string =~ %s)" % self.value
@@ -326,9 +371,9 @@ def evaluate(self, ctx):
                 continue
 
             if feature.value.startswith(self.value):
-                return capa.engine.Result(True, self, [], locations=locations)
+                return Result(True, self, [], locations=locations)
 
-        return capa.engine.Result(False, self, [])
+        return Result(False, self, [])
 
     def get_value_str(self):
         return hex_string(bytes_to_str(self.value))

From 5770d0c12d19dd40a8e95cc2c15032f8977d6449 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Mon, 8 Nov 2021 11:50:49 -0700
Subject: [PATCH 09/19] perf: add reset routine

---
 capa/perf.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/capa/perf.py b/capa/perf.py
index d1e4083d4..c7b416c0b 100644
--- a/capa/perf.py
+++ b/capa/perf.py
@@ -1,3 +1,8 @@
 import collections
 
 counters = collections.Counter()
+
+
+def reset():
+    global counters
+    counters = collections.Counter()

From a35be4a666c26ec13d61ee30639d9c3894d129e1 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Mon, 8 Nov 2021 11:51:09 -0700
Subject: [PATCH 10/19] scripts: add py script for profiling time

---
 scripts/profile-time.py | 115 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 115 insertions(+)
 create mode 100644 scripts/profile-time.py

diff --git a/scripts/profile-time.py b/scripts/profile-time.py
new file mode 100644
index 000000000..3c47b67b4
--- /dev/null
+++ b/scripts/profile-time.py
@@ -0,0 +1,115 @@
+import sys
+import timeit
+import logging
+import argparse
+import subprocess
+
+import tqdm
+import tabulate
+
+import capa.main
+import capa.perf
+import capa.rules
+import capa.engine
+import capa.helpers
+import capa.features
+import capa.features.common
+import capa.features.freeze
+
+logger = logging.getLogger("capa.profile")
+
+
+def main(argv=None):
+    if argv is None:
+        argv = sys.argv[1:]
+
+    label = subprocess.run(
+        "git show --pretty=oneline --abbrev-commit | head -n 1", shell=True, capture_output=True, text=True
+    ).stdout.strip()
+    is_dirty = (
+        subprocess.run(
+            "git status | grep 'modified: ' | grep -v 'rules' | grep -v 'tests/data'",
+            shell=True,
+            capture_output=True,
+            text=True,
+        ).stdout
+        != ""
+    )
+
+    if is_dirty:
+        label += " (dirty)"
+
+    parser = argparse.ArgumentParser(description="Profile capa performance")
+    capa.main.install_common_args(parser, wanted={"format", "sample", "signatures", "rules"})
+
+    parser.add_argument("--number", type=int, default=3, help="batch size of profile collection")
+    parser.add_argument("--repeat", type=int, default=30, help="batch count of profile collection")
+    parser.add_argument("--label", type=str, default=label, help="description of the profile collection")
+
+    args = parser.parse_args(args=argv)
+    capa.main.handle_common_args(args)
+
+    try:
+        taste = capa.helpers.get_file_taste(args.sample)
+    except IOError as e:
+        logger.error("%s", str(e))
+        return -1
+
+    try:
+        with capa.main.timing("load rules"):
+            rules = capa.rules.RuleSet(capa.main.get_rules(args.rules, disable_progress=True))
+    except (IOError) as e:
+        logger.error("%s", str(e))
+        return -1
+
+    try:
+        sig_paths = capa.main.get_signatures(args.signatures)
+    except (IOError) as e:
+        logger.error("%s", str(e))
+        return -1
+
+    if (args.format == "freeze") or (args.format == "auto" and capa.features.freeze.is_freeze(taste)):
+        with open(args.sample, "rb") as f:
+            extractor = capa.features.freeze.load(f.read())
+    else:
+        extractor = capa.main.get_extractor(
+            args.sample, args.format, capa.main.BACKEND_VIV, sig_paths, should_save_workspace=False
+        )
+
+    with tqdm.tqdm(total=args.number * args.repeat) as pbar:
+
+        def do_iteration():
+            capa.perf.reset()
+            capa.main.find_capabilities(rules, extractor, disable_progress=True)
+            pbar.update(1)
+
+        samples = timeit.repeat(do_iteration, number=args.number, repeat=args.repeat)
+
+    logger.debug("perf: find capabilities: min: %0.2fs" % (min(samples) / float(args.number)))
+    logger.debug("perf: find capabilities: avg: %0.2fs" % (sum(samples) / float(args.repeat) / float(args.number)))
+    logger.debug("perf: find capabilities: max: %0.2fs" % (max(samples) / float(args.number)))
+
+    for (counter, count) in capa.perf.counters.most_common():
+        logger.debug("perf: counter: {:}: {:,}".format(counter, count))
+
+    print(
+        tabulate.tabulate(
+            [
+                (
+                    args.label,
+                    "{:,}".format(capa.perf.counters["evaluate.feature"]),
+                    "%0.2fs" % (sum(samples) / float(args.repeat) / float(args.number)),
+                    "%0.2fs" % (min(samples) / float(args.number)),
+                    "%0.2fs" % (max(samples) / float(args.number)),
+                )
+            ],
+            headers=["label", "count(evaluations)", "avg(time)", "min(time)", "max(time)"],
+            tablefmt="github",
+        )
+    )
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())

From 70f007525d82df0b3980e954b2917aba9c781a4a Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Mon, 8 Nov 2021 12:11:01 -0700
Subject: [PATCH 11/19] pep8

---
 capa/engine.py          | 18 +++++++++---------
 capa/features/common.py | 10 +++++-----
 capa/rules.py           |  2 +-
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/capa/engine.py b/capa/engine.py
index d6138c9c9..97f6d2958 100644
--- a/capa/engine.py
+++ b/capa/engine.py
@@ -81,7 +81,7 @@ def __init__(self, children, description=None):
 
     def evaluate(self, ctx):
         capa.perf.counters["evaluate.feature"] += 1
-        capa.perf.counters["evaluate.feature.and"] += 1  
+        capa.perf.counters["evaluate.feature.and"] += 1
 
         results = [child.evaluate(ctx) for child in self.children]
         success = all(results)
@@ -97,8 +97,8 @@ def __init__(self, children, description=None):
 
     def evaluate(self, ctx):
         capa.perf.counters["evaluate.feature"] += 1
-        capa.perf.counters["evaluate.feature.or"] += 1  
- 
+        capa.perf.counters["evaluate.feature.or"] += 1
+
         results = [child.evaluate(ctx) for child in self.children]
         success = any(results)
         return Result(success, self, results)
@@ -113,8 +113,8 @@ def __init__(self, child, description=None):
 
     def evaluate(self, ctx):
         capa.perf.counters["evaluate.feature"] += 1
-        capa.perf.counters["evaluate.feature.not"] += 1  
-  
+        capa.perf.counters["evaluate.feature.not"] += 1
+
         results = [self.child.evaluate(ctx)]
         success = not results[0]
         return Result(success, self, results)
@@ -130,8 +130,8 @@ def __init__(self, count, children, description=None):
 
     def evaluate(self, ctx):
         capa.perf.counters["evaluate.feature"] += 1
-        capa.perf.counters["evaluate.feature.some"] += 1  
-  
+        capa.perf.counters["evaluate.feature.some"] += 1
+
         results = [child.evaluate(ctx) for child in self.children]
         # note that here we cast the child result as a bool
         # because we've overridden `__bool__` above.
@@ -152,8 +152,8 @@ def __init__(self, child, min=None, max=None, description=None):
 
     def evaluate(self, ctx):
         capa.perf.counters["evaluate.feature"] += 1
-        capa.perf.counters["evaluate.feature.range"] += 1  
-  
+        capa.perf.counters["evaluate.feature.range"] += 1
+
         count = len(ctx.get(self.child, []))
         if self.min == 0 and count == 0:
             return Result(True, self, [])
diff --git a/capa/features/common.py b/capa/features/common.py
index 0f01ef523..a40201e3a 100644
--- a/capa/features/common.py
+++ b/capa/features/common.py
@@ -190,7 +190,7 @@ def __init__(self, value: str, description=None):
 
     def evaluate(self, ctx):
         capa.perf.counters["evaluate.feature"] += 1
-        capa.perf.counters["evaluate.feature.substring"] += 1 
+        capa.perf.counters["evaluate.feature.substring"] += 1
 
         # mapping from string value to list of locations.
         # will unique the locations later on.
@@ -278,8 +278,8 @@ def __init__(self, value: str, description=None):
 
     def evaluate(self, ctx):
         capa.perf.counters["evaluate.feature"] += 1
-        capa.perf.counters["evaluate.feature.regex"] += 1 
-                                                        
+        capa.perf.counters["evaluate.feature.regex"] += 1
+
         # mapping from string value to list of locations.
         # will unique the locations later on.
         matches = collections.defaultdict(list)
@@ -364,8 +364,8 @@ def __init__(self, value: bytes, description=None):
 
     def evaluate(self, ctx):
         capa.perf.counters["evaluate.feature"] += 1
-        capa.perf.counters["evaluate.feature.bytes"] += 1 
-                                                        
+        capa.perf.counters["evaluate.feature.bytes"] += 1
+
         for feature, locations in ctx.items():
             if not isinstance(feature, (Bytes,)):
                 continue
diff --git a/capa/rules.py b/capa/rules.py
index 6960e02bf..2753f19dc 100644
--- a/capa/rules.py
+++ b/capa/rules.py
@@ -622,7 +622,7 @@ def extract_subscope_rules(self):
 
     def evaluate(self, features: FeatureSet):
         capa.perf.counters["evaluate.feature"] += 1
-        capa.perf.counters["evaluate.feature.rule"] += 1 
+        capa.perf.counters["evaluate.feature.rule"] += 1
         return self.statement.evaluate(features)
 
     @classmethod

From b621205a06a70822e1081d5341c59f09dd2f6133 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Mon, 8 Nov 2021 14:24:13 -0700
Subject: [PATCH 12/19] mypy

---
 capa/features/common.py | 6 +++++-
 capa/perf.py            | 3 ++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/capa/features/common.py b/capa/features/common.py
index a40201e3a..ac589593b 100644
--- a/capa/features/common.py
+++ b/capa/features/common.py
@@ -10,7 +10,11 @@
 import codecs
 import logging
 import collections
-from typing import Set, Dict, List, Union
+from typing import TYPE_CHECKING, Set, Dict, List, Union
+
+if TYPE_CHECKING:
+    # circular import, otherwise
+    import capa.engine
 
 import capa.perf
 import capa.features
diff --git a/capa/perf.py b/capa/perf.py
index c7b416c0b..067c4f37a 100644
--- a/capa/perf.py
+++ b/capa/perf.py
@@ -1,6 +1,7 @@
 import collections
+from typing import Dict
 
-counters = collections.Counter()
+counters: Dict[str, int] = collections.Counter()
 
 
 def reset():

From f598acb8fc78e80063bb4ccff1910c6cc5ef1503 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Mon, 8 Nov 2021 14:24:48 -0700
Subject: [PATCH 13/19] scripts: remove old profiling scripts

---
 scripts/profile-time.sh        | 62 ----------------------------------
 scripts/render-time-profile.sh | 24 -------------
 2 files changed, 86 deletions(-)
 delete mode 100644 scripts/profile-time.sh
 delete mode 100644 scripts/render-time-profile.sh

diff --git a/scripts/profile-time.sh b/scripts/profile-time.sh
deleted file mode 100644
index d09eabd3f..000000000
--- a/scripts/profile-time.sh
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/bin/bash
-
-# unset variables are errors
-set -o nounset;
-# any failed commands are errors
-set -o errexit;
-
-# current_directory is the path to the directory containing this script.
-# ref: https://stackoverflow.com/a/4774063/87207
-readonly CD="$( cd "$(dirname "$0")" ; pwd -P )"
-
-panic() {
-	echo "[erro]: $@" >&2;
-	exit 1;
-}
-
-info() {
-	echo "[info]: $@" >&2;
-}
-
-verbose=false;
-debug() {
-	if "$verbose"; then
-		echo "[debu]: $@" >&2;
-	fi
-}
-
-if [ "$(git status | grep "modified: " | grep -v "rules" | grep -v "tests/data")" ]; then
-    panic "modified content";
-fi
-
-rev=$(git rev-parse --short HEAD);
-info "rev: $rev";
-
-mkdir -p "$CD/perf/";
-
-info "analyzing PMA 01-01.dll...";
-
-pma_out=$(
-    py-spy record \
-    -o "$CD/perf/capa-$rev-PMA0101.svg" \
-    -- python -m capa.main \
-       -d \
-       "$CD/../tests/data/Practical Malware Analysis Lab 01-01.dll_" \
-       2>&1 || true);
-
-echo "$pma_out" | grep "perf:" | sed -e "s/^.*perf: /perf: /g" | tee "$CD/perf/capa-$rev-PMA0101.txt";
- 
-info "analyzing kernel32.dll...";
-k32_out=$(
-    py-spy record \
-    -o "$CD/perf/capa-$rev-k32.svg" \
-    -- python -m capa.main \
-       -d \
-       "$CD/../tests/data/kernel32.dll_" \
-       2>&1 || true);
-
-echo "$k32_out" | grep "perf:" | sed -e "s/^.*perf: /perf: /g" | tee "$CD/perf/capa-$rev-k32.txt";
-
-bash "$CD/render-time-profile.sh" "$rev";
-
-info "done.";
diff --git a/scripts/render-time-profile.sh b/scripts/render-time-profile.sh
deleted file mode 100644
index 1b5627dce..000000000
--- a/scripts/render-time-profile.sh
+++ /dev/null
@@ -1,24 +0,0 @@
-fields=("load FLIRT" "viv analyze" "match functions" "match file" "find capabilities" "evaluate.feature");
-
-echo -n "|         |";
-for T in "${fields[@]}"; do
-    printf ' %-17s |' "$T";
-done
-echo "";
- 
-echo -n "|---------|";
-for T in "${fields[@]}"; do
-    echo -n '-------------------|';
-done
-echo "";
-
-for rev in "$@"; do
-
-    echo -n "| $rev |";
-    for T in "${fields[@]}"; do
-        V1=$(cat scripts/perf/capa-$rev-PMA0101.txt | grep "$T: " | sed -e "s/^.*$T: //g");
-        V2=$(cat scripts/perf/capa-$rev-k32.txt | grep "$T: " | sed -e "s/^.*$T: //g");
-        printf ' %-17s |' "$V1/$V2";
-    done
-    echo "";
-done

From 6c8d246af91e1f0e0daa22b5f511ea0acbd40cf0 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Mon, 8 Nov 2021 14:31:43 -0700
Subject: [PATCH 14/19] fix bad merge

---
 capa/features/common.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/capa/features/common.py b/capa/features/common.py
index 75ed12ba5..3a4e71e94 100644
--- a/capa/features/common.py
+++ b/capa/features/common.py
@@ -146,13 +146,9 @@ def __str__(self):
     def __repr__(self):
         return str(self)
 
-<<<<<<< HEAD
-    def evaluate(self, ctx: Dict["Feature", Set[int]]) -> "Result":
+    def evaluate(self, ctx: Dict["Feature", Set[int]]) -> Result:
         capa.perf.counters["evaluate.feature"] += 1
         capa.perf.counters["evaluate.feature." + self.name] += 1
-=======
-    def evaluate(self, ctx: Dict["Feature", Set[int]]) -> Result:
->>>>>>> master
         return Result(self in ctx, self, [], locations=ctx.get(self, []))
 
     def freeze_serialize(self):

From 334425a08f024be847a8ac827e10c2187e8b804b Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Mon, 8 Nov 2021 15:16:08 -0700
Subject: [PATCH 15/19] changelog

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index cc4dac015..5c7c3ac6b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -20,6 +20,8 @@
 
 ### Development
 
+- add profiling infrastructure #828 @williballenthin
+
 ### Raw diffs
 - [capa <release>...master](https://github.com/mandiant/capa/compare/v3.0.3...master)
 - [capa-rules <release>...master](https://github.com/mandiant/capa-rules/compare/v3.0.3...master)

From 6f6831f8124771aec22597a42fa2929213971016 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Mon, 8 Nov 2021 15:20:11 -0700
Subject: [PATCH 16/19] perf: document that counters is unstable

---
 capa/perf.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/capa/perf.py b/capa/perf.py
index 067c4f37a..cb0e89eca 100644
--- a/capa/perf.py
+++ b/capa/perf.py
@@ -1,6 +1,7 @@
 import collections
 from typing import Dict
 
+# this structure is unstable and may change before the next major release.
 counters: Dict[str, int] = collections.Counter()
 
 

From 2abebfbce72842cc5ab8fe1b6dbc196d386aecda Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Mon, 8 Nov 2021 15:22:01 -0700
Subject: [PATCH 17/19] main: remove perf messages

---
 capa/main.py | 56 +++++++++++++++++++++++-----------------------------
 1 file changed, 25 insertions(+), 31 deletions(-)

diff --git a/capa/main.py b/capa/main.py
index 5aaa97d65..52c28d272 100644
--- a/capa/main.py
+++ b/capa/main.py
@@ -169,28 +169,27 @@ def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, disable_pro
     n_funcs = len(functions)
 
     pb = pbar(functions, desc="matching", unit=" functions", postfix="skipped 0 library functions")
-    with timing("match functions"):
-        for f in pb:
-            function_address = int(f)
-
-            if extractor.is_library_function(function_address):
-                function_name = extractor.get_function_name(function_address)
-                logger.debug("skipping library function 0x%x (%s)", function_address, function_name)
-                meta["library_functions"][function_address] = function_name
-                n_libs = len(meta["library_functions"])
-                percentage = 100 * (n_libs / n_funcs)
-                if isinstance(pb, tqdm.tqdm):
-                    pb.set_postfix_str("skipped %d library functions (%d%%)" % (n_libs, percentage))
-                continue
+    for f in pb:
+        function_address = int(f)
+
+        if extractor.is_library_function(function_address):
+            function_name = extractor.get_function_name(function_address)
+            logger.debug("skipping library function 0x%x (%s)", function_address, function_name)
+            meta["library_functions"][function_address] = function_name
+            n_libs = len(meta["library_functions"])
+            percentage = 100 * (n_libs / n_funcs)
+            if isinstance(pb, tqdm.tqdm):
+                pb.set_postfix_str("skipped %d library functions (%d%%)" % (n_libs, percentage))
+            continue
 
-            function_matches, bb_matches, feature_count = find_function_capabilities(ruleset, extractor, f)
-            meta["feature_counts"]["functions"][function_address] = feature_count
-            logger.debug("analyzed function 0x%x and extracted %d features", function_address, feature_count)
+        function_matches, bb_matches, feature_count = find_function_capabilities(ruleset, extractor, f)
+        meta["feature_counts"]["functions"][function_address] = feature_count
+        logger.debug("analyzed function 0x%x and extracted %d features", function_address, feature_count)
 
-            for rule_name, res in function_matches.items():
-                all_function_matches[rule_name].extend(res)
-            for rule_name, res in bb_matches.items():
-                all_bb_matches[rule_name].extend(res)
+        for rule_name, res in function_matches.items():
+            all_function_matches[rule_name].extend(res)
+        for rule_name, res in bb_matches.items():
+            all_bb_matches[rule_name].extend(res)
 
     # collection of features that captures the rule matches within function and BB scopes.
     # mapping from feature (matched rule) to set of addresses at which it matched.
@@ -200,8 +199,7 @@ def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, disable_pro
         rule = ruleset[rule_name]
         capa.engine.index_rule_matches(function_and_lower_features, rule, locations)
 
-    with timing("match file"):
-        all_file_matches, feature_count = find_file_capabilities(ruleset, extractor, function_and_lower_features)
+    all_file_matches, feature_count = find_file_capabilities(ruleset, extractor, function_and_lower_features)
     meta["feature_counts"]["file"] = feature_count
 
     matches = {
@@ -413,11 +411,9 @@ def get_workspace(path, format, sigpaths):
     else:
         raise ValueError("unexpected format: " + format)
 
-    with timing("load FLIRT"):
-        viv_utils.flirt.register_flirt_signature_analyzers(vw, sigpaths)
+    viv_utils.flirt.register_flirt_signature_analyzers(vw, sigpaths)
 
-    with timing("viv analyze"):
-        vw.analyze()
+    vw.analyze()
 
     logger.debug("%s", get_meta_str(vw))
     return vw
@@ -905,9 +901,8 @@ def main(argv=None):
         return E_MISSING_FILE
 
     try:
-        with timing("load rules"):
-            rules = get_rules(args.rules, disable_progress=args.quiet)
-            rules = capa.rules.RuleSet(rules)
+        rules = get_rules(args.rules, disable_progress=args.quiet)
+        rules = capa.rules.RuleSet(rules)
 
         logger.debug(
             "successfully loaded %s rules",
@@ -1020,8 +1015,7 @@ def main(argv=None):
 
     meta = collect_metadata(argv, args.sample, args.rules, extractor)
 
-    with timing("find capabilities"):
-        capabilities, counts = find_capabilities(rules, extractor, disable_progress=args.quiet)
+    capabilities, counts = find_capabilities(rules, extractor, disable_progress=args.quiet)
     meta["analysis"].update(counts)
     meta["analysis"]["layout"] = compute_layout(rules, extractor, capabilities)
 

From 18c30e4f12fbb3d639d59bff97bd3f88d76a236b Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Mon, 8 Nov 2021 15:24:43 -0700
Subject: [PATCH 18/19] main: remove perf debug msgs

---
 capa/main.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/capa/main.py b/capa/main.py
index 52c28d272..6fe59a510 100644
--- a/capa/main.py
+++ b/capa/main.py
@@ -1019,9 +1019,6 @@ def main(argv=None):
     meta["analysis"].update(counts)
     meta["analysis"]["layout"] = compute_layout(rules, extractor, capabilities)
 
-    for (counter, count) in capa.perf.counters.most_common():
-        logger.debug("perf: counter: {:}: {:,}".format(counter, count))
-
     if has_file_limitation(rules, capabilities):
         # bail if capa encountered file limitation e.g. a packed binary
         # do show the output in verbose mode, though.

From 7a4aee592be171c7ba7fde03623a1596db831b9e Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Tue, 9 Nov 2021 16:08:39 -0700
Subject: [PATCH 19/19] profile-time: add doc

---
 scripts/profile-time.py | 39 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 37 insertions(+), 2 deletions(-)

diff --git a/scripts/profile-time.py b/scripts/profile-time.py
index 3c47b67b4..3d6b5e070 100644
--- a/scripts/profile-time.py
+++ b/scripts/profile-time.py
@@ -1,3 +1,34 @@
+"""
+Invoke capa multiple times and record profiling informations.
+Use the --number and --repeat options to change the number of iterations.
+By default, the script will emit a markdown table with a label pulled from git.
+
+Note: you can run this script against pre-generated .frz files to reduce the startup time.
+
+usage:
+
+    usage: profile-time.py [--number NUMBER] [--repeat REPEAT] [--label LABEL] sample
+
+    Profile capa performance
+
+    positional arguments:
+      sample                path to sample to analyze
+
+    optional arguments:
+      --number NUMBER       batch size of profile collection
+      --repeat REPEAT       batch count of profile collection
+      --label LABEL         description of the profile collection
+
+example:
+
+    $ python profile-time.py ./tests/data/kernel32.dll_.frz --number 1 --repeat 2
+
+    | label                                | count(evaluations)   | avg(time)   | min(time)   | max(time)   |
+    |--------------------------------------|----------------------|-------------|-------------|-------------|
+    | 18c30e4 main: remove perf debug msgs | 66,561,622           | 132.13s     | 125.14s     | 139.12s     |
+
+      ^^^ --label or git hash               
+"""
 import sys
 import timeit
 import logging
@@ -98,12 +129,16 @@ def do_iteration():
                 (
                     args.label,
                     "{:,}".format(capa.perf.counters["evaluate.feature"]),
-                    "%0.2fs" % (sum(samples) / float(args.repeat) / float(args.number)),
+                    # python documentation indicates that min(samples) should be preferred,
+                    # so lets put that first.
+                    #
+                    # https://docs.python.org/3/library/timeit.html#timeit.Timer.repeat
                     "%0.2fs" % (min(samples) / float(args.number)),
+                    "%0.2fs" % (sum(samples) / float(args.repeat) / float(args.number)),
                     "%0.2fs" % (max(samples) / float(args.number)),
                 )
             ],
-            headers=["label", "count(evaluations)", "avg(time)", "min(time)", "max(time)"],
+            headers=["label", "count(evaluations)", "min(time)", "avg(time)", "max(time)"],
             tablefmt="github",
         )
     )