Skip to content

Commit

Permalink
Merge pull request #323 from pacak/constants
Browse files Browse the repository at this point in the history
Smarter approach for detecting constants
  • Loading branch information
pacak authored Oct 11, 2024
2 parents 1fe70a0 + 9e925f3 commit a27ecaf
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 40 deletions.
2 changes: 2 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
thanks @zamazan4ik for the suggestion
- detect and render merged functions (#310)
- update docs (#320)
- smarter approach for detecting constants (#315)
- bump deps

## [0.2.40] - 2024-10-01
- more consistend behavior when only one item is detected (#312)
Expand Down
46 changes: 29 additions & 17 deletions src/asm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ fn used_labels<'a>(stmts: &'_ [Statement<'a>]) -> BTreeSet<&'a str> {
| Directive::Loc(_)
| Directive::SubsectionsViaSym
| Directive::SymIsFun(_) => None,
Directive::SetValue(_, val) => Some(*val),
Directive::Data(_, val) | Directive::SetValue(_, val) => Some(*val),
Directive::Generic(g) => Some(g.0),
Directive::SectionStart(ss) => Some(*ss),
},
Expand All @@ -267,14 +267,17 @@ fn used_labels<'a>(stmts: &'_ [Statement<'a>]) -> BTreeSet<&'a str> {
}

/// Scans for referenced constants
fn scan_constant(name: &str, sections: &[(usize, &str)], body: &[Statement]) -> Option<URange> {
let start = sections
.iter()
.find_map(|(ix, ss)| ss.contains(name).then_some(*ix))?;
let end = body[start..]
.iter()
.position(|s| matches!(s, Statement::Nothing))
.map_or_else(|| body.len(), |e| start + e);
fn scan_constant(
name: &str,
sections: &BTreeMap<&str, usize>,
body: &[Statement],
) -> Option<URange> {
let start = *sections.get(name)?;
let end = start
+ body[start + 1..]
.iter()
.take_while(|s| matches!(s, Statement::Directive(Directive::Data(_, _))))
.count();
Some(URange { start, end })
}

Expand All @@ -295,7 +298,7 @@ fn dump_range(
};

let mut empty_line = false;
for line in stmts {
for (ix, line) in stmts.iter().enumerate() {
if fmt.verbosity > 2 {
safeprintln!("{line:?}");
}
Expand Down Expand Up @@ -349,7 +352,9 @@ fn dump_range(
}) = line
{
match fmt.redundant_labels {
_ if used.contains(id) => {
// We always include used labels and labels at the very
// beginning of the fragment - those are used for data declarations
_ if ix == 0 || used.contains(id) => {
safeprintln!("{line}");
}
RedundantLabels::Keep => {
Expand Down Expand Up @@ -612,14 +617,21 @@ impl<'a> Dumpable for Asm<'a> {
let mut pending = vec![print_range];
let mut seen: BTreeSet<URange> = BTreeSet::new();

let sections = lines
// Let's define a constant as a label followed by one or more data declarations
let constants = lines
.iter()
.enumerate()
.filter_map(|(ix, stmt)| match stmt {
Statement::Directive(Directive::SectionStart(ss)) => Some((ix, *ss)),
_ => None,
.filter_map(|(ix, stmt)| {
let Statement::Label(Label { id, .. }) = stmt else {
return None;
};
matches!(
lines.get(ix + 1),
Some(Statement::Directive(Directive::Data(_, _)))
)
.then_some((*id, ix))
})
.collect::<Vec<_>>();
.collect::<BTreeMap<_, _>>();
while let Some(subset) = pending.pop() {
seen.insert(subset);
for s in &lines[subset] {
Expand All @@ -629,7 +641,7 @@ impl<'a> Dumpable for Asm<'a> {
| Statement::Directive(Directive::Generic(GenericDirective(arg))) = s
{
for label in crate::demangle::local_labels(arg) {
if let Some(constant_range) = scan_constant(label, &sections, lines) {
if let Some(constant_range) = scan_constant(label, &constants, lines) {
if !seen.contains(&constant_range)
&& !print_range.fully_contains(constant_range)
{
Expand Down
82 changes: 59 additions & 23 deletions src/asm/statements.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,32 +51,43 @@ impl<'a> Instruction<'a> {
}
}

impl<'a> Statement<'a> {
/// Should we skip it for --simplify output?
pub fn boring(&self) -> bool {
if let Statement::Directive(Directive::Generic(GenericDirective(x))) = self {
static DATA_DEC: OnceLock<Regex> = OnceLock::new();
// all of those can insert something as well... Not sure if it's a full list or not
// .long, .short .octa, .quad, .word,
// .single .double .float
// .ascii, .asciz, .string, .string8 .string16 .string32 .string64
// .2byte .4byte .8byte
// .dc
// .inst .insn
let reg = DATA_DEC.get_or_init(|| {
Regex::new(
"^(long|short|octa|quad|word|\
fn parse_data_dec(input: &str) -> IResult<&str, Directive> {
static DATA_DEC: OnceLock<Regex> = OnceLock::new();
// all of those can insert something as well... Not sure if it's a full list or not
// .long, .short .octa, .quad, .word,
// .single .double .float
// .ascii, .asciz, .string, .string8 .string16 .string32 .string64
// .2byte .4byte .8byte
// .dc
// .inst .insn
let reg = DATA_DEC.get_or_init(|| {
Regex::new(
"^[\\s\\t]*\\.(long|short|octa|quad|word|\
single|double|float|\
ascii|asciz|string|string8|string16|string32|string64|\
2byte|4byte|8byte|dc|\
byte|2byte|4byte|8byte|dc|\
inst|insn\
)[\\s\\t]",
)
.expect("regexp should be valid")
});
return !reg.is_match(x);
}
)[\\s\\t]+([^\\n]+)",
)
.expect("regexp should be valid")
});

let Some(cap) = reg.captures(input) else {
use nom::error::*;
return Err(nom::Err::Error(Error::new(input, ErrorKind::Eof)));
};
let (Some(instr), Some(data)) = (cap.get(1), cap.get(2)) else {
panic!("regexp should be valid and capture found something");
};
Ok((
&input[data.range().end..],
Directive::Data(instr.as_str(), data.as_str()),
))
}

impl<'a> Statement<'a> {
/// Should we skip it for --simplify output?
pub fn boring(&self) -> bool {
if let Statement::Directive(Directive::SetValue(_, _)) = self {
return false;
}
Expand Down Expand Up @@ -167,6 +178,14 @@ impl std::fmt::Display for Directive<'_> {
color!("type", OwoColorize::bright_magenta)
)
}
Directive::Data(ty, data) => {
write!(
f,
"\t.{} {}",
color!(ty, OwoColorize::bright_magenta),
color!(data, OwoColorize::bright_cyan)
)
}
}
}
}
Expand Down Expand Up @@ -706,6 +725,22 @@ fn parse_function_alias() {
)
}

#[test]
fn parse_data_decl() {
assert_eq!(
parse_statement(" .asciz \"sample_merged\"\n").unwrap().1,
Statement::Directive(Directive::Data("asciz", "\"sample_merged\""))
);
assert_eq!(
parse_statement(" .byte 0\n").unwrap().1,
Statement::Directive(Directive::Data("byte", "0"))
);
assert_eq!(
parse_statement("\t.long .Linfo_st\n").unwrap().1,
Statement::Directive(Directive::Data("long", ".Linfo_st"))
);
}

#[derive(Clone, Debug, Eq, PartialEq)]
pub enum Directive<'a> {
File(File<'a>),
Expand All @@ -715,6 +750,7 @@ pub enum Directive<'a> {
SetValue(&'a str, &'a str),
SubsectionsViaSym,
SectionStart(&'a str),
Data(&'a str, &'a str),
}

#[derive(Clone, Debug, Eq, PartialEq)]
Expand Down Expand Up @@ -768,7 +804,7 @@ pub fn parse_statement(input: &str) -> IResult<&str, Statement> {
);

let dir = map(
alt((file, loc, set, ssvs, section, typ, generic)),
alt((file, loc, set, ssvs, section, typ, parse_data_dec, generic)),
Statement::Directive,
);

Expand Down

0 comments on commit a27ecaf

Please sign in to comment.