diff --git a/jbmc/regression/jbmc/class-literals/ExampleAnnotation.class b/jbmc/regression/jbmc/class-literals/ExampleAnnotation.class new file mode 100644 index 00000000000..ab1a9793533 Binary files /dev/null and b/jbmc/regression/jbmc/class-literals/ExampleAnnotation.class differ diff --git a/jbmc/regression/jbmc/class-literals/ExampleAnnotation.java b/jbmc/regression/jbmc/class-literals/ExampleAnnotation.java new file mode 100644 index 00000000000..ff744666644 --- /dev/null +++ b/jbmc/regression/jbmc/class-literals/ExampleAnnotation.java @@ -0,0 +1,4 @@ + +public @interface ExampleAnnotation { + +} diff --git a/jbmc/regression/jbmc/class-literals/ExampleEnum.class b/jbmc/regression/jbmc/class-literals/ExampleEnum.class new file mode 100644 index 00000000000..17ed2db3139 Binary files /dev/null and b/jbmc/regression/jbmc/class-literals/ExampleEnum.class differ diff --git a/jbmc/regression/jbmc/class-literals/ExampleEnum.java b/jbmc/regression/jbmc/class-literals/ExampleEnum.java new file mode 100644 index 00000000000..9151252d7f0 --- /dev/null +++ b/jbmc/regression/jbmc/class-literals/ExampleEnum.java @@ -0,0 +1,5 @@ + +public enum ExampleEnum { + +} + diff --git a/jbmc/regression/jbmc/class-literals/ExampleInterface.class b/jbmc/regression/jbmc/class-literals/ExampleInterface.class new file mode 100644 index 00000000000..3669f3afa5c Binary files /dev/null and b/jbmc/regression/jbmc/class-literals/ExampleInterface.class differ diff --git a/jbmc/regression/jbmc/class-literals/ExampleInterface.java b/jbmc/regression/jbmc/class-literals/ExampleInterface.java new file mode 100644 index 00000000000..5f6dafbc4d2 --- /dev/null +++ b/jbmc/regression/jbmc/class-literals/ExampleInterface.java @@ -0,0 +1,4 @@ + +interface ExampleInterface { + +} diff --git a/jbmc/regression/jbmc/class-literals/ExampleSynthetic.class b/jbmc/regression/jbmc/class-literals/ExampleSynthetic.class new file mode 100644 index 00000000000..3be8707e65e Binary files /dev/null and b/jbmc/regression/jbmc/class-literals/ExampleSynthetic.class differ diff --git a/jbmc/regression/jbmc/class-literals/ExampleSynthetic.j b/jbmc/regression/jbmc/class-literals/ExampleSynthetic.j new file mode 100644 index 00000000000..0a18d25c87b --- /dev/null +++ b/jbmc/regression/jbmc/class-literals/ExampleSynthetic.j @@ -0,0 +1,4 @@ +; Compile me with Jasmin 2.1+ (https://sourceforge.net/projects/jasmin/) + +.class public synthetic ExampleSynthetic +.super java/lang/Object diff --git a/jbmc/regression/jbmc/class-literals/Test.class b/jbmc/regression/jbmc/class-literals/Test.class new file mode 100644 index 00000000000..ef7d26bc773 Binary files /dev/null and b/jbmc/regression/jbmc/class-literals/Test.class differ diff --git a/jbmc/regression/jbmc/class-literals/Test.java b/jbmc/regression/jbmc/class-literals/Test.java new file mode 100644 index 00000000000..a9b09288e71 --- /dev/null +++ b/jbmc/regression/jbmc/class-literals/Test.java @@ -0,0 +1,35 @@ + +public class Test { + + public static void main() { + + assert ExampleAnnotation.class.isAnnotation(); + assert ExampleInterface.class.isInterface(); + assert ExampleEnum.class.isEnum(); + assert ExampleSynthetic.class.isSynthetic(); + + assert char[].class.isArray(); + assert short[].class.isArray(); + assert int[].class.isArray(); + assert long[].class.isArray(); + assert float[].class.isArray(); + assert double[].class.isArray(); + assert boolean[].class.isArray(); + assert Object[].class.isArray(); + assert Object[][].class.isArray(); + + // Note use of '==' not '.equals', as we expect the same exact literal, + // which in jbmc always have the same address. + // Note names of array classes are not tested yet, as arrays' types are + // printed as their raw signature, to be addressed separately. + // Note also primitive types (e.g. int.class) are not addresses here, as + // they are created through box types' static initializers (e.g. Integer + // has a static member TYPE that holds the Class for `int.class`) + + assert ExampleAnnotation.class.getName() == "ExampleAnnotation"; + assert ExampleInterface.class.getName() == "ExampleInterface"; + assert ExampleEnum.class.getName() == "ExampleEnum"; + + } + +} diff --git a/jbmc/regression/jbmc/class-literals/java/lang/Class.class b/jbmc/regression/jbmc/class-literals/java/lang/Class.class new file mode 100644 index 00000000000..cd7f4d94c38 Binary files /dev/null and b/jbmc/regression/jbmc/class-literals/java/lang/Class.class differ diff --git a/jbmc/regression/jbmc/class-literals/java/lang/Class.java b/jbmc/regression/jbmc/class-literals/java/lang/Class.java new file mode 100644 index 00000000000..d5c3d3fcf5d --- /dev/null +++ b/jbmc/regression/jbmc/class-literals/java/lang/Class.java @@ -0,0 +1,45 @@ +package java.lang; + +public class Class { + + private String name; + + private boolean isAnnotation; + private boolean isArray; + private boolean isInterface; + private boolean isSynthetic; + private boolean isLocalClass; + private boolean isMemberClass; + private boolean isEnum; + + public void cproverInitializeClassLiteral( + String name, + boolean isAnnotation, + boolean isArray, + boolean isInterface, + boolean isSynthetic, + boolean isLocalClass, + boolean isMemberClass, + boolean isEnum) { + + this.name = name; + this.isAnnotation = isAnnotation; + this.isArray = isArray; + this.isInterface = isInterface; + this.isSynthetic = isSynthetic; + this.isLocalClass = isLocalClass; + this.isMemberClass = isMemberClass; + this.isEnum = isEnum; + + } + + public String getName() { return name; } + + public boolean isAnnotation() { return isAnnotation; } + public boolean isArray() { return isArray; } + public boolean isInterface() { return isInterface; } + public boolean isSynthetic() { return isSynthetic; } + public boolean isLocalClass() { return isLocalClass; } + public boolean isMemberClass() { return isMemberClass; } + public boolean isEnum() { return isEnum; } +} diff --git a/jbmc/regression/jbmc/class-literals/test.desc b/jbmc/regression/jbmc/class-literals/test.desc new file mode 100644 index 00000000000..de97168986e --- /dev/null +++ b/jbmc/regression/jbmc/class-literals/test.desc @@ -0,0 +1,8 @@ +CORE +Test.class + +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring diff --git a/jbmc/regression/jbmc/class-literals/test_lazy.desc b/jbmc/regression/jbmc/class-literals/test_lazy.desc new file mode 100644 index 00000000000..c1e82a18b31 --- /dev/null +++ b/jbmc/regression/jbmc/class-literals/test_lazy.desc @@ -0,0 +1,10 @@ +CORE symex-driven-lazy-loading-expected-failure +Test.class +--lazy-methods +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring +-- +lazy-methods is incompatible with symex-driven lazy loading diff --git a/jbmc/src/java_bytecode/ci_lazy_methods.cpp b/jbmc/src/java_bytecode/ci_lazy_methods.cpp index ccb892a296c..413f8b3c709 100644 --- a/jbmc/src/java_bytecode/ci_lazy_methods.cpp +++ b/jbmc/src/java_bytecode/ci_lazy_methods.cpp @@ -13,6 +13,7 @@ #include "java_string_library_preprocess.h" #include "remove_exceptions.h" +#include #include #include @@ -52,6 +53,31 @@ ci_lazy_methodst::ci_lazy_methodst( class_hierarchy(symbol_table); } +/// Checks if an expression refers to any class literals (e.g. MyType.class) +/// These are expressed as ldc instructions in Java bytecode, and as symbols +/// of the form `MyType@class_model` in GOTO programs. +/// \param expr: expression to check +/// \return true if the expression or any of its subexpressions refer to a +/// class +static bool references_class_model(const exprt &expr) +{ + static const symbol_typet class_type("java::java.lang.Class"); + + for(auto it = expr.depth_begin(); it != expr.depth_end(); ++it) + { + if(can_cast_expr(*it) && + it->type() == class_type && + has_suffix( + id2string(to_symbol_expr(*it).get_identifier()), + JAVA_CLASS_MODEL_SUFFIX)) + { + return true; + } + } + + return false; +} + /// Uses a simple context-insensitive ('ci') analysis to determine which methods /// may be reachable from the main entry point. In brief, static methods are /// reachable if we find a callsite in another reachable site, while virtual @@ -122,6 +148,7 @@ bool ci_lazy_methodst::operator()( std::unordered_set methods_already_populated; std::unordered_set virtual_function_calls; + bool class_initializer_seen = false; bool any_new_classes = true; while(any_new_classes) @@ -149,8 +176,19 @@ bool ci_lazy_methodst::operator()( // Couldn't convert this function continue; } - gather_virtual_callsites( - symbol_table.lookup_ref(mname).value, virtual_function_calls); + const exprt &method_body = symbol_table.lookup_ref(mname).value; + + gather_virtual_callsites(method_body, virtual_function_calls); + + if(!class_initializer_seen && references_class_model(method_body)) + { + class_initializer_seen = true; + irep_idt initializer_signature = + get_java_class_literal_initializer_signature(); + if(symbol_table.has_symbol(initializer_signature)) + methods_to_convert_later.insert(initializer_signature); + } + any_new_methods=true; } } diff --git a/jbmc/src/java_bytecode/java_bytecode_convert_class.cpp b/jbmc/src/java_bytecode/java_bytecode_convert_class.cpp index 706b61a0ecc..dbc8054390c 100644 --- a/jbmc/src/java_bytecode/java_bytecode_convert_class.cpp +++ b/jbmc/src/java_bytecode/java_bytecode_convert_class.cpp @@ -265,6 +265,9 @@ void java_bytecode_convert_classt::convert( class_type.set_tag(c.name); class_type.set(ID_base_name, c.name); class_type.set(ID_abstract, c.is_abstract); + class_type.set(ID_is_annotation, c.is_annotation); + class_type.set(ID_interface, c.is_interface); + class_type.set(ID_synthetic, c.is_synthetic); class_type.set_final(c.is_final); if(c.is_enum) { diff --git a/jbmc/src/java_bytecode/java_bytecode_language.cpp b/jbmc/src/java_bytecode/java_bytecode_language.cpp index f96b23a38ba..a3130545edb 100644 --- a/jbmc/src/java_bytecode/java_bytecode_language.cpp +++ b/jbmc/src/java_bytecode/java_bytecode_language.cpp @@ -299,7 +299,7 @@ static symbol_exprt get_or_create_class_literal_symbol( { symbol_typet java_lang_Class("java::java.lang.Class"); symbol_exprt symbol_expr( - id2string(class_id)+"@class_model", + id2string(class_id) + JAVA_CLASS_MODEL_SUFFIX, java_lang_Class); if(!symbol_table.has_symbol(symbol_expr.get_identifier())) { @@ -785,7 +785,8 @@ bool java_bytecode_languaget::generate_support_functions( get_message_handler(), assume_inputs_non_null, object_factory_parameters, - get_pointer_type_selector()); + get_pointer_type_selector(), + string_refinement_enabled); } /// Uses a simple context-insensitive ('ci') analysis to determine which methods diff --git a/jbmc/src/java_bytecode/java_bytecode_language.h b/jbmc/src/java_bytecode/java_bytecode_language.h index 29c347fe227..0aabc7afc14 100644 --- a/jbmc/src/java_bytecode/java_bytecode_language.h +++ b/jbmc/src/java_bytecode/java_bytecode_language.h @@ -68,6 +68,8 @@ enum lazy_methods_modet LAZY_METHODS_MODE_EXTERNAL_DRIVER }; +#define JAVA_CLASS_MODEL_SUFFIX "@class_model" + class java_bytecode_languaget:public languaget { public: diff --git a/jbmc/src/java_bytecode/java_bytecode_parse_tree.h b/jbmc/src/java_bytecode/java_bytecode_parse_tree.h index 4ff281c12b2..c5762137152 100644 --- a/jbmc/src/java_bytecode/java_bytecode_parse_tree.h +++ b/jbmc/src/java_bytecode/java_bytecode_parse_tree.h @@ -209,6 +209,9 @@ class java_bytecode_parse_treet bool is_enum=false; bool is_public=false, is_protected=false, is_private=false; bool is_final = false; + bool is_interface = false; + bool is_synthetic = false; + bool is_annotation = false; bool attribute_bootstrapmethods_read = false; size_t enum_elements=0; diff --git a/jbmc/src/java_bytecode/java_bytecode_parser.cpp b/jbmc/src/java_bytecode/java_bytecode_parser.cpp index 18fb937234a..735824954f6 100644 --- a/jbmc/src/java_bytecode/java_bytecode_parser.cpp +++ b/jbmc/src/java_bytecode/java_bytecode_parser.cpp @@ -451,9 +451,11 @@ bool java_bytecode_parsert::parse() #define ACC_BRIDGE 0x0040 #define ACC_VARARGS 0x0080 #define ACC_NATIVE 0x0100 +#define ACC_INTERFACE 0x0200 #define ACC_ABSTRACT 0x0400 #define ACC_STRICT 0x0800 #define ACC_SYNTHETIC 0x1000 +#define ACC_ANNOTATION 0x2000 #define ACC_ENUM 0x4000 #ifdef _MSC_VER @@ -496,6 +498,9 @@ void java_bytecode_parsert::rClassFile() parsed_class.is_protected=(access_flags&ACC_PROTECTED)!=0; parsed_class.is_private=(access_flags&ACC_PRIVATE)!=0; parsed_class.is_final = (access_flags & ACC_FINAL) != 0; + parsed_class.is_interface = (access_flags & ACC_INTERFACE) != 0; + parsed_class.is_synthetic = (access_flags & ACC_SYNTHETIC) != 0; + parsed_class.is_annotation = (access_flags & ACC_ANNOTATION) != 0; parsed_class.name= constant(this_class).type().get(ID_C_base_name); diff --git a/jbmc/src/java_bytecode/java_entry_point.cpp b/jbmc/src/java_bytecode/java_entry_point.cpp index 62b18d3dcf7..9181dc48026 100644 --- a/jbmc/src/java_bytecode/java_entry_point.cpp +++ b/jbmc/src/java_bytecode/java_entry_point.cpp @@ -17,6 +17,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include "java_object_factory.h" +#include "java_string_literals.h" #include "java_utils.h" static void create_initialize(symbol_table_baset &symbol_table) @@ -65,12 +66,60 @@ static bool should_init_symbol(const symbolt &sym) return is_java_string_literal_id(sym.name); } +/// Get the symbol name of java.lang.Class' initializer method. This method +/// should initialize a Class instance with known properties of the type it +/// represents, such as its name, whether it is abstract, or an enumeration, +/// etc. The method may or may not exist in any particular symbol table; it is +/// up to the caller to check. +/// The method's Java signature is: +/// void cproverInitializeClassLiteral( +/// String name, +/// boolean isAnnotation, +/// boolean isArray, +/// boolean isInterface, +/// boolean isSynthetic, +/// boolean isLocalClass, +/// boolean isMemberClass, +/// boolean isEnum); +/// \return Class initializer method's symbol name. +irep_idt get_java_class_literal_initializer_signature() +{ + static irep_idt signature = + "java::java.lang.Class.cproverInitializeClassLiteral:" + "(Ljava/lang/String;ZZZZZZZ)V"; + return signature; +} + +/// If symbol is a class literal, and an appropriate initializer method exists, +/// return a pointer to its symbol. If not, return null. +/// \param symbol: possible class literal symbol +/// \param symbol_table: table to search +/// \return pointer to the initializer method symbol or null +static const symbolt *get_class_literal_initializer( + const symbolt &symbol, + const symbol_table_baset &symbol_table) +{ + if(symbol.value.is_not_nil()) + return nullptr; + if(symbol.type != symbol_typet("java::java.lang.Class")) + return nullptr; + if(!has_suffix(id2string(symbol.name), JAVA_CLASS_MODEL_SUFFIX)) + return nullptr; + return symbol_table.lookup(get_java_class_literal_initializer_signature()); +} + +static constant_exprt constant_bool(bool val) +{ + return from_integer(val ? 1 : 0, java_boolean_type()); +} + static void java_static_lifetime_init( symbol_table_baset &symbol_table, const source_locationt &source_location, bool assume_init_pointers_not_null, const object_factory_parameterst &object_factory_parameters, - const select_pointer_typet &pointer_type_selector) + const select_pointer_typet &pointer_type_selector, + bool string_refinement_enabled) { symbolt &initialize_symbol=*symbol_table.get_writeable(INITIALIZE_FUNCTION); code_blockt &code_block=to_code_block(to_code(initialize_symbol.value)); @@ -88,7 +137,62 @@ static void java_static_lifetime_init( const symbolt &sym=*symbol_table.lookup(symname); if(should_init_symbol(sym)) { - if(sym.value.is_nil() && sym.type!=empty_typet()) + if(const symbolt *class_literal_init_method = + get_class_literal_initializer(sym, symbol_table)) + { + const std::string &name_str = id2string(sym.name); + irep_idt class_name = + name_str.substr(0, name_str.size() - strlen(JAVA_CLASS_MODEL_SUFFIX)); + const symbolt &class_symbol = symbol_table.lookup_ref(class_name); + + bool class_is_array = is_java_array_tag(sym.name); + + exprt name_literal(ID_java_string_literal); + name_literal.set(ID_value, to_class_type(class_symbol.type).get_tag()); + + symbol_exprt class_name_literal = + get_or_create_string_literal_symbol( + name_literal, symbol_table, string_refinement_enabled); + + // Call the literal initializer method instead of a nondet initializer: + + // For arguments we can't parse yet: + side_effect_expr_nondett nondet_bool(java_boolean_type()); + + // Argument order is: name, isAnnotation, isArray, isInterface, + // isSynthetic, isLocalClass, isMemberClass, isEnum + + code_function_callt initializer_call; + initializer_call.function() = class_literal_init_method->symbol_expr(); + + code_function_callt::argumentst &args = initializer_call.arguments(); + + // this: + args.push_back(address_of_exprt(sym.symbol_expr())); + // name: + args.push_back(address_of_exprt(class_name_literal)); + // isAnnotation: + args.push_back( + constant_bool(class_symbol.type.get_bool(ID_is_annotation))); + // isArray: + args.push_back(constant_bool(class_is_array)); + // isInterface: + args.push_back( + constant_bool(class_symbol.type.get_bool(ID_interface))); + // isSynthetic: + args.push_back( + constant_bool(class_symbol.type.get_bool(ID_synthetic))); + // isLocalClass: + args.push_back(nondet_bool); + // isMemberClass: + args.push_back(nondet_bool); + // isEnum: + args.push_back( + constant_bool(class_symbol.type.get_bool(ID_enumeration))); + + code_block.move_to_operands(initializer_call); + } + else if(sym.value.is_nil() && sym.type!=empty_typet()) { bool allow_null=!assume_init_pointers_not_null; if(allow_null) @@ -403,7 +507,8 @@ bool java_entry_point( message_handlert &message_handler, bool assume_init_pointers_not_null, const object_factory_parameterst &object_factory_parameters, - const select_pointer_typet &pointer_type_selector) + const select_pointer_typet &pointer_type_selector, + bool string_refinement_enabled) { // check if the entry point is already there if(symbol_table.symbols.find(goto_functionst::entry_point())!= @@ -426,7 +531,8 @@ bool java_entry_point( symbol.location, assume_init_pointers_not_null, object_factory_parameters, - pointer_type_selector); + pointer_type_selector, + string_refinement_enabled); return generate_java_start_function( symbol, diff --git a/jbmc/src/java_bytecode/java_entry_point.h b/jbmc/src/java_bytecode/java_entry_point.h index 006334ff99c..3fcdee49388 100644 --- a/jbmc/src/java_bytecode/java_entry_point.h +++ b/jbmc/src/java_bytecode/java_entry_point.h @@ -25,7 +25,8 @@ bool java_entry_point( class message_handlert &message_handler, bool assume_init_pointers_not_null, const object_factory_parameterst &object_factory_parameters, - const select_pointer_typet &pointer_type_selector); + const select_pointer_typet &pointer_type_selector, + bool string_refinement_enabled); struct main_function_resultt { @@ -60,6 +61,8 @@ struct main_function_resultt } }; +irep_idt get_java_class_literal_initializer_signature(); + /// Figures out the entry point of the code to verify main_function_resultt get_main_symbol( const symbol_table_baset &symbol_table, diff --git a/src/util/irep_ids.def b/src/util/irep_ids.def index be80594c8ce..7f5df683308 100644 --- a/src/util/irep_ids.def +++ b/src/util/irep_ids.def @@ -661,10 +661,13 @@ IREP_ID_TWO(overflow_shl, overflow-shl) IREP_ID_TWO(C_no_initialization_required, #no_initialization_required) IREP_ID_TWO(overlay_class, java::com.diffblue.OverlayClassImplementation) IREP_ID_TWO(overlay_method, java::com.diffblue.OverlayMethodImplementation) +IREP_ID_ONE(is_annotation) IREP_ID_TWO(C_annotations, #annotations) IREP_ID_ONE(final) IREP_ID_ONE(bits_per_byte) IREP_ID_TWO(C_abstract, #abstract) +IREP_ID_ONE(synthetic) +IREP_ID_ONE(interface) // Projects depending on this code base that wish to extend the list of // available ids should provide a file local_irep_ids.h in their source tree and