Skip to content

Commit

Permalink
Parallelize dwarfdump per-unit
Browse files Browse the repository at this point in the history
Before:

time ( target/release/examples/dwarfdump -i ~/mozilla-central/obj-ff-opt/dist/bin/libxul.so >& /dev/null )
real    1m39.153s
user    1m37.714s
sys     0m1.320s

After:

time ( target/release/examples/dwarfdump -i ~/mozilla-central/obj-ff-opt/dist/bin/libxul.so >& /dev/null )
real	0m25.641s
user	2m3.328s
sys	0m1.087s

This increases memory usage. We buffer the output; the max memory usage increases by around the size of the
N largest outputs per compilation unit, where N is the min of 16 and num_cpus::get(). The larger compilation
units in Firefox libxul.so produce tens to hundreds of megabytes of output each. Then again, the speedup
processing such large files is important.
  • Loading branch information
rocallahan committed Mar 12, 2018
1 parent 71a1138 commit fef4e3d
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 33 deletions.
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@ byteorder = { version = "1.0", default-features = false }
fallible-iterator = { version = "0.1.4", default-features = false }

[dev-dependencies]
crossbeam = "0.3.2"
getopts = "0.2"
memmap = "0.6"
num_cpus = "1"
object = "0.7"
test-assembler = "0.1.3"

Expand Down
137 changes: 104 additions & 33 deletions examples/dwarfdump.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,30 @@
// Allow clippy lints when building without clippy.
#![allow(unknown_lints)]

extern crate crossbeam;
extern crate fallible_iterator;
extern crate gimli;
extern crate getopts;
extern crate memmap;
extern crate num_cpus;
extern crate object;

use fallible_iterator::FallibleIterator;
use gimli::{UnitOffset, UnwindSection};
use gimli::{UnitOffset, CompilationUnitHeader, UnwindSection};
use object::Object;
use std::cmp::min;
use std::collections::HashMap;
use std::env;
use std::io;
use std::io::{BufWriter, Write};
use std::fs;
use std::iter::Iterator;
use std::process;
use std::error;
use std::mem;
use std::result;
use std::fmt::{self, Debug};
use std::sync::{Mutex, Condvar};

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Error {
Expand Down Expand Up @@ -65,12 +71,77 @@ impl From<io::Error> for Error {

pub type Result<T> = result::Result<T, Error>;

trait Reader: gimli::Reader<Offset = usize> {}
fn parallel_output<II, F>(max_workers: usize, iter: II, f: F) -> Result<()>
where F: Sync + Fn(II::Item, &mut Vec<u8>) -> Result<()>,
II: IntoIterator,
II::IntoIter: Send {
struct ParallelOutputState<I: Iterator> {
iterator: I,
current_worker: usize,
result: Result<()>,
}

let state = Mutex::new(ParallelOutputState {
iterator: iter.into_iter().fuse(),
current_worker: 0,
result: Ok(()),
});
let workers = min(max_workers, num_cpus::get());
let mut condvars = Vec::new();
for _ in 0..workers {
condvars.push(Condvar::new());
}
{
let state_ref = &state;
let f_ref = &f;
let condvars_ref = &condvars;
crossbeam::scope(|scope| {
for i in 0..workers {
scope.spawn(move || {
let mut v = Vec::new();
let mut lock = state_ref.lock().unwrap();
while lock.current_worker != i {
lock = condvars_ref[i].wait(lock).unwrap();
}
loop {
let item = if lock.result.is_ok() { lock.iterator.next() } else { None };
lock.current_worker = (i + 1) % workers;
condvars_ref[lock.current_worker].notify_one();
mem::drop(lock);

let ret = if let Some(item) = item {
v.clear();
f_ref(item, &mut v)
} else {
return;
};

lock = state_ref.lock().unwrap();
while lock.current_worker != i {
lock = condvars_ref[i].wait(lock).unwrap();
}
if lock.result.is_ok() {
let out = io::stdout();
out.lock().write_all(&v).unwrap();
lock.result = ret;
}
}
});
}
});
}
state.into_inner().unwrap().result
}

trait Reader: gimli::Reader<Offset = usize> + Send + Sync {
type SyncSendEndian: gimli::Endianity + Send + Sync;
}

impl<'input, Endian> Reader for gimli::EndianBuf<'input, Endian>
where
Endian: gimli::Endianity,
Endian: gimli::Endianity + Send + Sync,
{
type SyncSendEndian = Endian;
}

#[derive(Default)]
Expand Down Expand Up @@ -194,11 +265,7 @@ fn main() {
} else {
gimli::RunTimeEndian::Big
};
let ret = {
let stdout = io::stdout();
let mut writer = BufWriter::new(stdout.lock());
dump_file(&mut writer, &file, endian, &flags)
};
let ret = dump_file(&file, endian, &flags);
match ret {
Ok(_) => (),
Err(err) => println!(
Expand All @@ -210,17 +277,17 @@ fn main() {
}
}

fn dump_file<Endian, W: Write>(w: &mut W, file: &object::File, endian: Endian, flags: &Flags) -> Result<()>
fn dump_file<Endian>(file: &object::File, endian: Endian, flags: &Flags) -> Result<()>
where
Endian: gimli::Endianity,
Endian: gimli::Endianity + Send + Sync,
{
fn load_section<'input, 'file, S, Endian>(
file: &'file object::File<'input>,
endian: Endian,
) -> S
where
S: gimli::Section<gimli::EndianBuf<'input, Endian>>,
Endian: gimli::Endianity,
Endian: gimli::Endianity + Send + Sync,
'file: 'input,
{
let data = file.section_data_by_name(S::section_name()).unwrap_or(&[]);
Expand All @@ -247,12 +314,12 @@ where
let debug_rnglists = load_section(file, endian);
let rnglists = &gimli::RangeLists::new(debug_ranges, debug_rnglists)?;

let out = io::stdout();
if flags.eh_frame {
dump_eh_frame(w, eh_frame)?;
dump_eh_frame(&mut BufWriter::new(out.lock()), eh_frame)?;
}
if flags.info {
dump_info(
w,
debug_info,
debug_abbrev,
debug_line,
Expand All @@ -263,7 +330,7 @@ where
flags,
)?;
dump_types(
w,
&mut BufWriter::new(out.lock()),
debug_types,
debug_abbrev,
debug_line,
Expand All @@ -273,8 +340,9 @@ where
endian,
flags,
)?;
writeln!(w)?;
writeln!(&mut out.lock())?;
}
let w = &mut BufWriter::new(out.lock());
if flags.line {
dump_line(w, debug_line, debug_info, debug_abbrev, debug_str)?;
}
Expand Down Expand Up @@ -504,34 +572,34 @@ fn dump_cfi_instructions<R: Reader, W: Write>(
}

#[allow(too_many_arguments)]
fn dump_info<R: Reader, W: Write>(
w: &mut W,
fn dump_info<R: Reader>(
debug_info: &gimli::DebugInfo<R>,
debug_abbrev: &gimli::DebugAbbrev<R>,
debug_line: &gimli::DebugLine<R>,
debug_str: &gimli::DebugStr<R>,
loclists: &gimli::LocationLists<R>,
rnglists: &gimli::RangeLists<R>,
endian: R::Endian,
endian: R::SyncSendEndian,
flags: &Flags,
) -> Result<()> {
writeln!(w, "\n.debug_info")?;
let out = io::stdout();
writeln!(&mut BufWriter::new(out.lock()), "\n.debug_info")?;

let mut iter = debug_info.units();
while let Some(unit) = iter.next()? {
let units = debug_info.units().collect::<Vec<_>>().unwrap();
let process_unit = |unit: CompilationUnitHeader<R, R::Offset>, buf: &mut Vec<u8>| -> Result<()> {
let abbrevs = match unit.abbreviations(debug_abbrev) {
Ok(abbrevs) => abbrevs,
Err(err) => {
writeln!(w,
writeln!(buf,
"Failed to parse abbreviations: {}",
error::Error::description(&err)
)?;
continue;
return Ok(());
}
};

let entries_result = dump_entries(
w,
buf,
unit.offset().0,
unit.entries(&abbrevs),
unit.address_size(),
Expand All @@ -545,13 +613,16 @@ fn dump_info<R: Reader, W: Write>(
flags,
);
if let Err(err) = entries_result {
writeln!(w,
writeln!(buf,
"Failed to dump entries: {}",
error::Error::description(&err)
)?;
};
}
Ok(())
}
Ok(())
};
// Don't use more than 16 cores even if available. No point in soaking hundreds
// of cores if you happen to have them.
parallel_output(16, units, process_unit)
}

#[allow(too_many_arguments)]
Expand All @@ -563,7 +634,7 @@ fn dump_types<R: Reader, W: Write>(
debug_str: &gimli::DebugStr<R>,
loclists: &gimli::LocationLists<R>,
rnglists: &gimli::RangeLists<R>,
endian: R::Endian,
endian: R::SyncSendEndian,
flags: &Flags,
) -> Result<()> {
writeln!(w, "\n.debug_types")?;
Expand Down Expand Up @@ -617,7 +688,7 @@ fn dump_types<R: Reader, W: Write>(

// TODO: most of this should be moved to the main library.
struct Unit<R: Reader> {
endian: R::Endian,
endian: R::SyncSendEndian,
format: gimli::Format,
address_size: u8,
version: u16,
Expand Down Expand Up @@ -646,7 +717,7 @@ fn dump_entries<R: Reader, W: Write>(
debug_str: &gimli::DebugStr<R>,
loclists: &gimli::LocationLists<R>,
rnglists: &gimli::RangeLists<R>,
endian: R::Endian,
endian: R::SyncSendEndian,
flags: &Flags,
) -> Result<()> {
let mut unit = Unit {
Expand Down Expand Up @@ -905,10 +976,10 @@ fn dump_attr_value<R: Reader, W: Write>(
fn dump_type_signature<Endian, W: Write>(
w: &mut W,
signature: gimli::DebugTypeSignature,
endian: Endian
endian: Endian,
) -> Result<()>
where
Endian: gimli::Endianity,
Endian: gimli::Endianity + Send + Sync,
{
// Convert back to bytes so we can match libdwarf-dwarfdump output.
let mut buf = [0; 8];
Expand Down

0 comments on commit fef4e3d

Please sign in to comment.