Skip to content

Commit 68a8acc

Browse files
committed
Add basic jsr / ret implementation. Fixes #280
jsr is a simple lightweight call-subroutine opcode (simply pushes a return address) and ret is the corresponding return, performing call/return within a compiled function. They are deprecated, but Java 5 compilers generate them and library JARs built to that standard are evidently still around. This implementation has a significant shortcoming: it considers the possibility of ret'ing to any jsr-site. This could be improved if we use basic dataflow analysis to determine which jsr targets can reach which ret instructions.
1 parent d3e2058 commit 68a8acc

File tree

1 file changed

+84
-2
lines changed

1 file changed

+84
-2
lines changed

src/java_bytecode/java_bytecode_convert_method.cpp

Lines changed: 84 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -543,6 +543,9 @@ codet java_bytecode_convert_methodt::convert_instructions(
543543
address_mapt address_map;
544544
std::set<unsigned> targets;
545545

546+
std::vector<unsigned> jsr_ret_targets;
547+
std::vector<instructionst::const_iterator> ret_instructions;
548+
546549
for(instructionst::const_iterator
547550
i_it=instructions.begin();
548551
i_it!=instructions.end();
@@ -585,6 +588,15 @@ codet java_bytecode_convert_methodt::convert_instructions(
585588
targets.insert(target);
586589

587590
a_entry.first->second.successors.push_back(target);
591+
592+
if(i_it->statement=="jsr" ||
593+
i_it->statement=="jsr_w")
594+
{
595+
instructionst::const_iterator next=i_it;
596+
assert(++next!=instructions.end() && "jsr without valid return address?");
597+
targets.insert(next->address);
598+
jsr_ret_targets.push_back(next->address);
599+
}
588600
}
589601
else if(i_it->statement=="tableswitch" ||
590602
i_it->statement=="lookupswitch")
@@ -604,6 +616,23 @@ codet java_bytecode_convert_methodt::convert_instructions(
604616
}
605617
}
606618
}
619+
else if(i_it->statement=="ret")
620+
{
621+
// Finish these later, once we've seen all jsr instructions.
622+
ret_instructions.push_back(i_it);
623+
}
624+
}
625+
626+
// Draw edges from every `ret` to every `jsr` successor.
627+
// Could do better with flow analysis to distinguish multiple subroutines within
628+
// the same function.
629+
for(const auto retinst : ret_instructions)
630+
{
631+
auto& a_entry=address_map.at(retinst->address);
632+
a_entry.successors.insert(
633+
a_entry.successors.end(),
634+
jsr_ret_targets.begin(),
635+
jsr_ret_targets.end());
607636
}
608637

609638
for(address_mapt::iterator
@@ -925,6 +954,48 @@ codet java_bytecode_convert_methodt::convert_instructions(
925954
code_gotot code_goto(label(number));
926955
c=code_goto;
927956
}
957+
else if(statement=="jsr" || statement=="jsr_w")
958+
{
959+
// As 'goto', except we must also push the subroutine return address:
960+
assert(op.empty() && results.size()==1);
961+
irep_idt number=to_constant_expr(arg0).get_value();
962+
code_gotot code_goto(label(number));
963+
c=code_goto;
964+
results[0]=as_number(
965+
std::next(i_it)->address,
966+
pointer_typet(void_typet(), 64));
967+
}
968+
else if(statement=="ret")
969+
{
970+
// Since we have a bounded target set, make life easier on our analyses
971+
// and write something like:
972+
// if(retaddr==5) goto 5; else if(retaddr==10) goto 10; ...
973+
assert(op.empty() && results.empty());
974+
code_blockt branches;
975+
auto retvar=variable(arg0, 'a', i_it->address, INST_INDEX, NO_CAST);
976+
assert(!jsr_ret_targets.empty());
977+
for(size_t idx=0, idxlim=jsr_ret_targets.size(); idx!=idxlim; ++idx)
978+
{
979+
irep_idt number=std::to_string(jsr_ret_targets[idx]);
980+
code_gotot g(label(number));
981+
g.add_source_location()=i_it->source_location;
982+
if(idx==idxlim-1)
983+
branches.move_to_operands(g);
984+
else
985+
{
986+
code_ifthenelset branch;
987+
auto address_ptr=as_number(
988+
jsr_ret_targets[idx],
989+
pointer_typet(void_typet(), 64));
990+
branch.cond()=equal_exprt(retvar, address_ptr);
991+
branch.cond().add_source_location()=i_it->source_location;
992+
branch.then_case()=g;
993+
branch.add_source_location()=i_it->source_location;
994+
branches.move_to_operands(branch);
995+
}
996+
}
997+
c=std::move(branches);
998+
}
928999
else if(statement=="iconst_m1")
9291000
{
9301001
assert(results.size()==1);
@@ -1530,8 +1601,19 @@ codet java_bytecode_convert_methodt::convert_instructions(
15301601
else
15311602
{
15321603
c.make_block();
1533-
forall_operands(o_it, more_code)
1534-
c.copy_to_operands(*o_it);
1604+
auto& last_statement=to_code_block(c).find_last_statement();
1605+
if(last_statement.get_statement()==ID_goto)
1606+
{
1607+
// Insert stack twiddling before branch:
1608+
last_statement.make_block();
1609+
last_statement.operands().insert(
1610+
last_statement.operands().begin(),
1611+
more_code.operands().begin(),
1612+
more_code.operands().end());
1613+
}
1614+
else
1615+
forall_operands(o_it, more_code)
1616+
c.copy_to_operands(*o_it);
15351617
}
15361618
}
15371619

0 commit comments

Comments
 (0)