Skip to content

Commit 8a27950

Browse files
committed
Java frontend: create String an Class literals earlier
This creates String and Class literals ahead of running java_bytecode_convert_method instead of creating them in an after-the-fact fixup in typecheck_expr as previously. These now exist before method conversion begins and __CPROVER_initialize is created, which means the String literals are correctly initialised even when using incremental method conversion. This can be a little wasteful if they are only referred to from methods that would never truly be invoked, but preliminary experiments suggest that creating ~10000 Strings takes under a second, and that Java lazy loading can quickly delete ones that are not in fact used. Java CI lazy methods is augmented a little to notice that if we need global A, and global A is initialised with a reference to global B, then we need global B as well. We appear to have got away wtih this before because the inter-global references were introduced after CI lazy methods finished loading methods.
1 parent d95cb12 commit 8a27950

File tree

5 files changed

+150
-46
lines changed

5 files changed

+150
-46
lines changed

src/java_bytecode/java_bytecode_convert_method.cpp

Lines changed: 5 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1712,41 +1712,12 @@ codet java_bytecode_convert_methodt::convert_instructions(
17121712
{
17131713
assert(op.empty() && results.size()==1);
17141714

1715-
// 1) Pushing a String causes a reference to a java.lang.String object
1716-
// to be constructed and pushed onto the operand stack.
1715+
INVARIANT(
1716+
arg0.id() != ID_java_string_literal && arg0.id() != ID_type,
1717+
"String and Class literals should have been lowered in "
1718+
"generate_constant_global_variables");
17171719

1718-
// 2) Pushing an int or a float causes a primitive value to be pushed
1719-
// onto the stack.
1720-
1721-
// 3) Pushing a Class constant causes a reference to a java.lang.Class
1722-
// to be pushed onto the operand stack
1723-
1724-
if(arg0.id()==ID_java_string_literal)
1725-
{
1726-
// these need to be references to java.lang.String
1727-
results[0]=arg0;
1728-
symbol_typet string_type("java::java.lang.String");
1729-
results[0].type()=java_reference_type(string_type);
1730-
}
1731-
else if(arg0.id()==ID_type)
1732-
{
1733-
irep_idt class_id=arg0.type().get(ID_identifier);
1734-
symbol_typet java_lang_Class("java::java.lang.Class");
1735-
symbol_exprt symbol_expr(
1736-
id2string(class_id)+"@class_model",
1737-
java_lang_Class);
1738-
address_of_exprt address_of_expr(symbol_expr);
1739-
results[0]=address_of_expr;
1740-
}
1741-
else if(arg0.id()==ID_constant)
1742-
{
1743-
results[0]=arg0;
1744-
}
1745-
else
1746-
{
1747-
error() << "unexpected ldc argument" << eom;
1748-
throw 0;
1749-
}
1720+
results[0] = arg0;
17501721
}
17511722
else if(statement=="goto" || statement=="goto_w")
17521723
{

src/java_bytecode/java_bytecode_language.cpp

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ Author: Daniel Kroening, kroening@kroening.com
3030
#include "java_entry_point.h"
3131
#include "java_bytecode_parser.h"
3232
#include "java_class_loader.h"
33+
#include "java_string_literals.h"
3334
#include "java_utils.h"
3435
#include <java_bytecode/ci_lazy_methods.h>
3536
#include <java_bytecode/generate_java_generic_type.h>
@@ -256,6 +257,118 @@ static void infer_opaque_type_fields(
256257
}
257258
}
258259

260+
/// Create if necessary, then return the constant global java.lang.Class symbol
261+
/// for a given class id
262+
/// \param class_id: class identifier
263+
/// \param symbol_table: global symbol table; a symbol may be added
264+
/// \return java.lang.Class typed symbol expression
265+
static symbol_exprt get_or_create_class_literal_symbol(
266+
const irep_idt &class_id, symbol_tablet &symbol_table)
267+
{
268+
symbol_typet java_lang_Class("java::java.lang.Class");
269+
symbol_exprt symbol_expr(
270+
id2string(class_id)+"@class_model",
271+
java_lang_Class);
272+
if(!symbol_table.has_symbol(symbol_expr.get_identifier()))
273+
{
274+
symbolt new_class_symbol;
275+
new_class_symbol.name = symbol_expr.get_identifier();
276+
new_class_symbol.type = symbol_expr.type();
277+
INVARIANT(
278+
has_prefix(id2string(new_class_symbol.name), "java::"),
279+
"class identifier should have 'java::' prefix");
280+
new_class_symbol.base_name =
281+
id2string(new_class_symbol.name).substr(6);
282+
new_class_symbol.mode = ID_java;
283+
new_class_symbol.is_lvalue = true;
284+
new_class_symbol.is_state_var = true;
285+
new_class_symbol.is_static_lifetime = true;
286+
symbol_table.add(new_class_symbol);
287+
}
288+
289+
return symbol_expr;
290+
}
291+
292+
/// Get result of a Java load-constant (ldc) instruction.
293+
/// Possible cases:
294+
/// 1) Pushing a String causes a reference to a java.lang.String object
295+
/// to be constructed and pushed onto the operand stack.
296+
/// 2) Pushing an int or a float causes a primitive value to be pushed
297+
/// onto the stack.
298+
/// 3) Pushing a Class constant causes a reference to a java.lang.Class
299+
/// to be pushed onto the operand stack
300+
/// \param ldc_arg0: raw operand to the ldc opcode
301+
/// \param symbol_table: global symbol table. If the argument `ldc_arg0` is a
302+
/// String or Class constant then a new constant global may be added.
303+
/// \param string_refinement_enabled: true if --refine-strings is enabled, which
304+
/// influences how String literals are structured.
305+
/// \return ldc result
306+
static exprt get_ldc_result(
307+
const exprt &ldc_arg0,
308+
symbol_tablet &symbol_table,
309+
bool string_refinement_enabled)
310+
{
311+
if(ldc_arg0.id() == ID_type)
312+
{
313+
const irep_idt &class_id = ldc_arg0.type().get(ID_identifier);
314+
return
315+
address_of_exprt(
316+
get_or_create_class_literal_symbol(class_id, symbol_table));
317+
}
318+
else if(ldc_arg0.id() == ID_java_string_literal)
319+
{
320+
return
321+
address_of_exprt(
322+
get_or_create_string_literal_symbol(
323+
ldc_arg0, symbol_table, string_refinement_enabled));
324+
}
325+
else
326+
{
327+
INVARIANT(
328+
ldc_arg0.id() == ID_constant,
329+
"ldc argument should be constant, string literal or class literal");
330+
return ldc_arg0;
331+
}
332+
}
333+
334+
/// Creates global variables for constants mentioned in a given method. These
335+
/// are either string literals, or class literals (the java.lang.Class instance
336+
/// returned by `(some_reference_typed_expression).class`). The method parse
337+
/// tree is rewritten to directly reference these globals.
338+
/// \param parse_tree: parse tree to search for constant global references
339+
/// \param symbol_table: global symbol table, to which constant globals will be
340+
/// added.
341+
/// \param string_refinement_enabled: true if `--refine-stings` is active,
342+
/// which changes how string literals are structured.
343+
static void generate_constant_global_variables(
344+
java_bytecode_parse_treet &parse_tree,
345+
symbol_tablet &symbol_table,
346+
bool string_refinement_enabled)
347+
{
348+
for(auto &method : parse_tree.parsed_class.methods)
349+
{
350+
for(auto &instruction : method.instructions)
351+
{
352+
// ldc* instructions are Java bytecode "load constant" ops, which can
353+
// retrieve a numeric constant, String literal, or Class literal.
354+
if(instruction.statement == "ldc" ||
355+
instruction.statement == "ldc2" ||
356+
instruction.statement == "ldc_w" ||
357+
instruction.statement == "ldc2_w")
358+
{
359+
INVARIANT(
360+
instruction.args.size() != 0,
361+
"ldc instructions should have an argument");
362+
instruction.args[0] =
363+
get_ldc_result(
364+
instruction.args[0],
365+
symbol_table,
366+
string_refinement_enabled);
367+
}
368+
}
369+
}
370+
}
371+
259372
bool java_bytecode_languaget::typecheck(
260373
symbol_tablet &symbol_table,
261374
const std::string &module)
@@ -317,6 +430,23 @@ bool java_bytecode_languaget::typecheck(
317430
infer_opaque_type_fields(c.second, symbol_table);
318431
}
319432

433+
// Create global variables for constants (String and Class literals) up front.
434+
// This means that when running with lazy loading, we will be aware of these
435+
// literal globals' existence when __CPROVER_initialize is generated in
436+
// `generate_support_functions`.
437+
const std::size_t before_constant_globals_size = symbol_table.symbols.size();
438+
for(auto &c : java_class_loader.class_map)
439+
{
440+
generate_constant_global_variables(
441+
c.second,
442+
symbol_table,
443+
string_refinement_enabled);
444+
}
445+
status() << "Java: added "
446+
<< (symbol_table.symbols.size() - before_constant_globals_size)
447+
<< " String or Class constant symbols"
448+
<< messaget::eom;
449+
320450
// Now incrementally elaborate methods
321451
// that are reachable from this entry point.
322452
if(lazy_methods_mode==LAZY_METHODS_MODE_CONTEXT_INSENSITIVE)

src/java_bytecode/java_bytecode_typecheck.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@ class java_bytecode_typecheckt:public typecheckt
6767
void typecheck_code(codet &);
6868
void typecheck_type(typet &);
6969
void typecheck_expr_symbol(symbol_exprt &);
70-
void typecheck_expr_java_string_literal(exprt &);
7170
void typecheck_expr_member(member_exprt &);
7271
void typecheck_expr_java_new(side_effect_exprt &);
7372
void typecheck_expr_java_new_array(side_effect_exprt &);

src/java_bytecode/java_bytecode_typecheck_expr.cpp

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ Author: Daniel Kroening, kroening@kroening.com
2121
#include "java_utils.h"
2222
#include "java_root_class.h"
2323
#include "java_string_library_preprocess.h"
24-
#include "java_string_literals.h"
2524

2625
void java_bytecode_typecheckt::typecheck_expr(exprt &expr)
2726
{
@@ -39,6 +38,11 @@ void java_bytecode_typecheckt::typecheck_expr(exprt &expr)
3938
Forall_operands(it, expr)
4039
typecheck_expr(*it);
4140

41+
INVARIANT(
42+
expr.id() != ID_java_string_literal,
43+
"String literals should have been converted to constant globals "
44+
"before typecheck_expr");
45+
4246
if(expr.id()==ID_symbol)
4347
typecheck_expr_symbol(to_symbol_expr(expr));
4448
else if(expr.id()==ID_side_effect)
@@ -49,8 +53,6 @@ void java_bytecode_typecheckt::typecheck_expr(exprt &expr)
4953
else if(statement==ID_java_new_array)
5054
typecheck_expr_java_new_array(to_side_effect_expr(expr));
5155
}
52-
else if(expr.id()==ID_java_string_literal)
53-
typecheck_expr_java_string_literal(expr);
5456
else if(expr.id()==ID_member)
5557
typecheck_expr_member(to_member_expr(expr));
5658
}
@@ -70,13 +72,6 @@ void java_bytecode_typecheckt::typecheck_expr_java_new_array(
7072
typecheck_type(type);
7173
}
7274

73-
void java_bytecode_typecheckt::typecheck_expr_java_string_literal(exprt &expr)
74-
{
75-
expr = address_of_exprt(
76-
get_or_create_string_literal_symbol(
77-
expr, symbol_table, string_refinement_enabled));
78-
}
79-
8075
void java_bytecode_typecheckt::typecheck_expr_symbol(symbol_exprt &expr)
8176
{
8277
irep_idt identifier=expr.get_identifier();

src/java_bytecode/java_string_literals.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,16 @@ symbol_exprt get_or_create_string_literal_symbol(
162162
return_symbol.type = return_symbol.value.type();
163163
if(symbol_table.add(return_symbol))
164164
throw "failed to add return symbol to symbol table";
165-
new_symbol.value = literal_init;
165+
166+
// Initialise the literal structure with
167+
// (ABC_return_value, { ..., .length = N, .data = &ABC_constarray }),
168+
// using a C-style comma expression to indicate that we need the
169+
// _return_value global for its side-effects.
170+
exprt init_comma_expr(ID_comma);
171+
init_comma_expr.type() = literal_init.type();
172+
init_comma_expr.copy_to_operands(return_symbol.symbol_expr());
173+
init_comma_expr.move_to_operands(literal_init);
174+
new_symbol.value = init_comma_expr;
166175
}
167176
else if(jls_struct.components().size()>=1 &&
168177
jls_struct.components()[0].get_name()=="@java.lang.Object")

0 commit comments

Comments
 (0)