jonhoo · jasonrhansen · Mar 16, 2019 · Mar 14, 2019 · Mar 15, 2019 · Mar 15, 2019
diff --git a/Cargo.toml b/Cargo.toml
@@ -67,6 +67,11 @@ name = "inferno-flamegraph"
 path = "src/bin/flamegraph.rs"
 required-features = ["cli"]
 
+[[bin]]
+name = "inferno-diff-folded"
+path = "src/bin/diff-folded.rs"
+required-features = ["cli"]
+
 [[bench]]
 name = "collapse"
 harness = false

diff --git a/src/bin/diff-folded.rs b/src/bin/diff-folded.rs
@@ -0,0 +1,83 @@
+use env_logger::Env;
+use std::io;
+use std::path::PathBuf;
+use structopt::StructOpt;
+
+use inferno::differential::{self, Options};
+
+#[derive(Debug, StructOpt)]
+#[structopt(
+    name = "inferno-diff-folded",
+    author = "",
+    after_help = "\
+Creates a differential between two folded stack profiles that can be passed
+to inferno-flamegraph to generate a differential flame graph.
+
+$ inferno-diff-folded folded1 folded2 | inferno-flamegraph > diff2.svg
+
+The flamegraph will be colored based on higher samples (red) and smaller
+samples (blue). The frame widths will be based on the 2nd folded profile.
+This might be confusing if stack frames disappear entirely; it will make
+the most sense to ALSO create a differential based on the 1st profile widths,
+while switching the hues. To do this, reverse the order of the folded files
+and pass the --negate flag to inferno-flamegraph like this:
+
+$ inferno-diff-folded folded2 folded1 | inferno-flamegraph --negate > diff1.svg
+
+You can use the inferno-collapse-* tools to generate the folded files."
+)]
+struct Opt {
+    /// Normalize sample counts
+    #[structopt(short = "n", long = "normalize")]
+    normalize: bool,
+
+    /// Strip hex numbers (addresses)
+    #[structopt(short = "s", long = "--strip-hex")]
+    strip_hex: bool,
+
+    /// Silence all log output
+    #[structopt(short = "q", long = "quiet")]
+    quiet: bool,
+
+    /// Verbose logging mode (-v, -vv, -vvv)
+    #[structopt(short = "v", long = "verbose", parse(from_occurrences))]
+    verbose: usize,
+
+    /// Folded stack profile 1
+    infile1: PathBuf,
+
+    /// Folded stack profile 2
+    infile2: PathBuf,
+}
+
+impl Opt {
+    fn into_parts(self) -> (PathBuf, PathBuf, Options) {
+        (
+            self.infile1,
+            self.infile2,
+            Options {
+                normalize: self.normalize,
+                strip_hex: self.strip_hex,
+            },
+        )
+    }
+}
+
+fn main() -> io::Result<()> {
+    let opt = Opt::from_args();
+
+    // Initialize logger
+    if !opt.quiet {
+        env_logger::Builder::from_env(Env::default().default_filter_or(match opt.verbose {
+            0 => "warn",
+            1 => "info",
+            2 => "debug",
+            _ => "trace",
+        }))
+        .default_format_timestamp(false)
+        .init();
+    }
+
+    let (folded1, folded2, options) = opt.into_parts();
+    differential::from_files(options, folded1, folded2, io::stdout().lock())
+}
diff --git a/src/bin/flamegraph.rs b/src/bin/flamegraph.rs
@@ -112,6 +112,12 @@ struct Opt {
     #[structopt(long = "pretty-xml")]
     pretty_xml: bool,
 
+    /// Don't sort the input lines.
+    /// If you set this flag you need to be sure your
+    /// input stack lines are already sorted.
+    #[structopt(long = "no-sort")]
+    pub no_sort: bool,
+
     /// Don't include static JavaScript in flame graph.
     /// This flag is hidden since it's only meant to be used in
     /// tests so we don't have to include the same static
@@ -143,6 +149,7 @@ impl<'a> Opt {
         options.negate_differentials = self.negate;
         options.factor = self.factor;
         options.pretty_xml = self.pretty_xml;
+        options.no_sort = self.no_sort;
         options.no_javascript = self.no_javascript;
 
         // set style options

diff --git a/src/differential/mod.rs b/src/differential/mod.rs
@@ -0,0 +1,186 @@
+use hashbrown::HashMap;
+use std::fs::File;
+use std::io;
+use std::io::prelude::*;
+use std::path::Path;
+
+const READER_CAPACITY: usize = 128 * 1024;
+
+#[derive(Debug, Clone, Copy, Default)]
+struct Counts {
+    first: usize,
+    second: usize,
+}
+
+/// Configure the generated output.
+///
+/// All options default to off.
+#[derive(Debug, Clone, Copy, Default)]
+pub struct Options {
+    /// Normalize the first profile count to match the second.
+    ///
+    /// This can help in scenarios where you take profiles at different times, under varying
+    /// load. If you generate a differential flame graph without setting this flag, everything
+    /// will look red if the load increased, or blue if it decreased. If this flag is set,
+    /// the first profile is balanced so you get the full red/blue spectrum.
+    pub normalize: bool,
+
+    /// Strip hex numbers (addresses) of the form "0x45ef2173" and replace with "0x...".
+    pub strip_hex: bool,
+}
+
+/// Produce an output that can be used to generate a differential flame graph.
+///
+/// The readers are expected to contain folded stack lines of before and after profiles with
+/// the following whitespace-separated fields:
+///
+///  - A semicolon-separated list of frame names (e.g., `main;foo;bar;baz`).
+///  - A sample count for the given stack.
+///
+/// The output written to the `writer` will be similar to the inputs, except there will be two
+/// sample count columns -- one for each profile.
+pub fn from_readers<R1, R2, W>(opt: Options, before: R1, after: R2, writer: W) -> io::Result<()>
+where
+    R1: BufRead,
+    R2: BufRead,
+    W: Write,
+{
+    let mut stack_counts = HashMap::new();
+    let total1 = parse_stack_counts(opt, &mut stack_counts, before, true)?;
+    let total2 = parse_stack_counts(opt, &mut stack_counts, after, false)?;
+    if opt.normalize && total1 != total2 {
+        for counts in stack_counts.values_mut() {
+            counts.first = (counts.first as f64 * total2 as f64 / total1 as f64) as usize;
+        }
+    }
+    write_stacks(&stack_counts, writer)
+}
+
+/// Produce an output that can be used to generate a differential flame graph from
+/// a before and an after profile.
+///
+/// See [`from_readers`] for the input and output formats.
+pub fn from_files<P1, P2, W>(
+    opt: Options,
+    file_before: P1,
+    file_after: P2,
+    writer: W,
+) -> io::Result<()>
+where
+    P1: AsRef<Path>,
+    P2: AsRef<Path>,
+    W: Write,
+{
+    let file1 = File::open(file_before)?;
+    let reader1 = io::BufReader::with_capacity(READER_CAPACITY, file1);
+    let file2 = File::open(file_after)?;
+    let reader2 = io::BufReader::with_capacity(READER_CAPACITY, file2);
+    from_readers(opt, reader1, reader2, writer)
+}
+
+// Populate stack_counts based on lines from the reader and returns the sum of the sample counts.
+fn parse_stack_counts<R>(
+    opt: Options,
+    stack_counts: &mut HashMap<String, Counts>,
+    mut reader: R,
+    is_first: bool,
+) -> io::Result<usize>
+where
+    R: BufRead,
+{
+    let mut total = 0;
+    let mut line = String::new();
+    let mut stripped_fractional_samples = false;
+    loop {
+        line.clear();
+
+        if reader.read_line(&mut line)? == 0 {
+            break;
+        }
+
+        if let Some((stack, count)) =
+            parse_line(&line, opt.strip_hex, &mut stripped_fractional_samples)
+        {
+            let mut counts = stack_counts.entry(stack).or_default();
+            if is_first {
+                counts.first += count;
+            } else {
+                counts.second += count;
+            }
+            total += count;
+        } else {
+            warn!("Unable to parse line: {}", line);
+        }
+    }
+
+    Ok(total)
+}
+
+// Write three-column lines with the folded stack trace and two value columns,
+// one for each profile.
+fn write_stacks<W>(stack_counts: &HashMap<String, Counts>, mut writer: W) -> io::Result<()>
+where
+    W: Write,
+{
+    for (stack, &Counts { first, second }) in stack_counts {
+        writeln!(writer, "{} {} {}", stack, first, second)?;
+    }
+    Ok(())
+}
+
+// Parse stack and sample count from line.
+fn parse_line(
+    line: &str,
+    strip_hex: bool,
+    stripped_fractional_samples: &mut bool,
+) -> Option<(String, usize)> {
+    let samplesi = line.rfind(' ')?;
+    let mut samples = line[samplesi + 1..].trim_end();
+
+    // Strip fractional part (if any);
+    // foobar 1.klwdjlakdj
+    //
+    // The Perl version keeps the fractional part but inferno
+    // strips them in its flamegraph implementation anyway.
+    if let Some(doti) = samples.find('.') {
+        if !samples[..doti]
+            .chars()
+            .chain(samples[doti + 1..].chars())
+            .all(|c| c.is_digit(10))
+        {
+            return None;
+        }
+        // Warn if we're stripping a non-zero fractional part, but only the first time.
+        if !*stripped_fractional_samples && !samples[doti + 1..].chars().all(|c| c == '0') {
+            *stripped_fractional_samples = true;
+            warn!("The input data has fractional sample counts that will be truncated to integers");
+        }
+        samples = &samples[..doti];
+    }
+
+    let nsamples = samples.parse::<usize>().ok()?;
+    let stack = line[..samplesi].trim_end();
+    if strip_hex {
+        Some((strip_hex_address(stack), nsamples))
+    } else {
+        Some((stack.to_string(), nsamples))
+    }
+}
+
+// Replace all hex strings like "0x45ef2173" with "0x...".
+fn strip_hex_address(mut stack: &str) -> String {
+    let mut stripped = String::with_capacity(stack.len());
+    while let Some(idx) = stack.find("0x") {
+        stripped.push_str(&stack[..idx + 2]);
+        let ndigits = stack[idx + 2..]
+            .chars()
+            .take_while(|c| c.is_digit(16))
+            .count();
+        if ndigits > 0 {
+            stripped.push_str("...");
+        }
+        stack = &stack[idx + 2 + ndigits..];
+    }
+    stripped.push_str(stack);
+    stripped
+}
diff --git a/src/flamegraph/merge.rs b/src/flamegraph/merge.rs
@@ -1,4 +1,5 @@
 use std::collections::HashMap;
+use std::io;
 use std::iter;
 
 #[derive(Debug, PartialEq, Eq, Hash)]
@@ -103,7 +104,9 @@ fn flow<'a, LI, TI>(
     }
 }
 
-pub(super) fn frames<'a, I>(lines: I) -> (Vec<TimedFrame<'a>>, usize, usize, usize)
+pub(super) fn frames<'a, I>(
+    lines: I,
+) -> quick_xml::Result<(Vec<TimedFrame<'a>>, usize, usize, usize)>
 where
     I: IntoIterator<Item = &'a str>,
 {
@@ -115,12 +118,22 @@ where
     let mut delta = None;
     let mut delta_max = 1;
     let mut stripped_fractional_samples = false;
+    let mut prev_line = None;
     for line in lines {
         let mut line = line.trim();
         if line.is_empty() {
             continue;
         }
 
+        if let Some(prev_line) = prev_line {
+            if prev_line > line {
+                return Err(quick_xml::Error::Io(io::Error::new(
+                    io::ErrorKind::InvalidData,
+                    "unsorted input lines detected",
+                )));
+            }
+        }
+
         // Parse the number of samples for the purpose of computing overall time passed.
         // Usually there will only be one samples column at the end of a line,
         // but for differentials there will be two. When there are two we compute the
@@ -166,6 +179,7 @@ where
 
         last = stack;
         time += nsamples;
+        prev_line = Some(line);
     }
 
     if !last.is_empty() {
@@ -180,7 +194,7 @@ where
         );
     }
 
-    (frames, time, ignored, delta_max)
+    Ok((frames, time, ignored, delta_max))
 }
 
 // Parse and remove the number of samples from the end of a line.