diff --git a/src/hotspot/share/compiler/compilerDirectives.cpp b/src/hotspot/share/compiler/compilerDirectives.cpp index 74259c8e5d436..46750cacc35db 100644 --- a/src/hotspot/share/compiler/compilerDirectives.cpp +++ b/src/hotspot/share/compiler/compilerDirectives.cpp @@ -33,6 +33,7 @@ #include "memory/resourceArea.hpp" #include "opto/phasetype.hpp" #include "opto/traceAutoVectorizationTag.hpp" +#include "opto/traceMergeStoresTag.hpp" #include "runtime/globals_extension.hpp" CompilerDirectives::CompilerDirectives() : _next(nullptr), _match(nullptr), _ref_count(0) { @@ -302,7 +303,8 @@ DirectiveSet::DirectiveSet(CompilerDirectives* d) : _inlinematchers(nullptr), _directive(d), _ideal_phase_name_set(PHASE_NUM_TYPES, mtCompiler), - _trace_auto_vectorization_tags(TRACE_AUTO_VECTORIZATION_TAG_NUM, mtCompiler) + _trace_auto_vectorization_tags(TRACE_AUTO_VECTORIZATION_TAG_NUM, mtCompiler), + _trace_merge_stores_tags(TraceMergeStores::TAG_NUM, mtCompiler) { #define init_defaults_definition(name, type, dvalue, compiler) this->name##Option = dvalue; compilerdirectives_common_flags(init_defaults_definition) @@ -432,7 +434,6 @@ DirectiveSet* DirectiveSet::compilecommand_compatibility_init(const methodHandle compilerdirectives_c1_flags(init_default_cc) #undef init_default_cc - // Parse PrintIdealPhaseName and create a lookup set #ifndef PRODUCT #ifdef COMPILER2 if (!_modified[TraceAutoVectorizationIndex]) { @@ -445,6 +446,17 @@ DirectiveSet* DirectiveSet::compilecommand_compatibility_init(const methodHandle } } } + if (!_modified[TraceMergeStoresIndex]) { + // Parse ccstr and create mask + ccstrlist option; + if (CompilerOracle::has_option_value(method, CompileCommandEnum::TraceMergeStores, option)) { + TraceMergeStores::TagValidator validator(option, false); + if (validator.is_valid()) { + set.cloned()->set_trace_merge_stores_tags(validator.tags()); + } + } + } + // Parse PrintIdealPhaseName and create a lookup set if (!_modified[PrintIdealPhaseIndex]) { // Parse ccstr and create set ccstrlist option; diff --git a/src/hotspot/share/compiler/compilerDirectives.hpp b/src/hotspot/share/compiler/compilerDirectives.hpp index bf15fe9c71397..e960fdb1e53ce 100644 --- a/src/hotspot/share/compiler/compilerDirectives.hpp +++ b/src/hotspot/share/compiler/compilerDirectives.hpp @@ -90,6 +90,7 @@ NOT_PRODUCT(cflags(IGVPrintLevel, intx, PrintIdealGraphLevel, IGVPrintLeve cflags(MaxNodeLimit, intx, MaxNodeLimit, MaxNodeLimit) #define compilerdirectives_c2_string_flags(cflags) \ NOT_PRODUCT(cflags(TraceAutoVectorization, ccstrlist, "", TraceAutoVectorization)) \ +NOT_PRODUCT(cflags(TraceMergeStores, ccstrlist, "", TraceMergeStores)) \ NOT_PRODUCT(cflags(PrintIdealPhase, ccstrlist, "", PrintIdealPhase)) #else #define compilerdirectives_c2_other_flags(cflags) @@ -131,6 +132,7 @@ class DirectiveSet : public CHeapObj { TriBoolArray<(size_t)vmIntrinsics::number_of_intrinsics(), int> _intrinsic_control_words; CHeapBitMap _ideal_phase_name_set; CHeapBitMap _trace_auto_vectorization_tags; + CHeapBitMap _trace_merge_stores_tags; public: DirectiveSet(CompilerDirectives* directive); @@ -211,6 +213,12 @@ void set_##name(void* value) { \ const CHeapBitMap& trace_auto_vectorization_tags() { return _trace_auto_vectorization_tags; }; + void set_trace_merge_stores_tags(const CHeapBitMap& tags) { + _trace_merge_stores_tags.set_from(tags); + }; + const CHeapBitMap& trace_merge_stores_tags() { + return _trace_merge_stores_tags; + }; void print_intx(outputStream* st, ccstr n, intx v, bool mod) { if (mod) { st->print("%s:" INTX_FORMAT " ", n, v); } } void print_uintx(outputStream* st, ccstr n, intx v, bool mod) { if (mod) { st->print("%s:" UINTX_FORMAT " ", n, v); } } diff --git a/src/hotspot/share/compiler/compilerOracle.cpp b/src/hotspot/share/compiler/compilerOracle.cpp index d0479f3fdbb50..107350794a02d 100644 --- a/src/hotspot/share/compiler/compilerOracle.cpp +++ b/src/hotspot/share/compiler/compilerOracle.cpp @@ -36,6 +36,7 @@ #include "oops/symbol.hpp" #include "opto/phasetype.hpp" #include "opto/traceAutoVectorizationTag.hpp" +#include "opto/traceMergeStoresTag.hpp" #include "runtime/globals_extension.hpp" #include "runtime/handles.inline.hpp" #include "runtime/jniHandles.hpp" @@ -802,6 +803,12 @@ static void scan_value(enum OptionType type, char* line, int& total_bytes_read, else if (option == CompileCommandEnum::TraceAutoVectorization) { TraceAutoVectorizationTagValidator validator(value, true); + if (!validator.is_valid()) { + jio_snprintf(errorbuf, buf_size, "Unrecognized tag name in %s: %s", option2name(option), validator.what()); + } + } else if (option == CompileCommandEnum::TraceMergeStores) { + TraceMergeStores::TagValidator validator(value, true); + if (!validator.is_valid()) { jio_snprintf(errorbuf, buf_size, "Unrecognized tag name in %s: %s", option2name(option), validator.what()); } diff --git a/src/hotspot/share/compiler/compilerOracle.hpp b/src/hotspot/share/compiler/compilerOracle.hpp index 5864ca5dc0dd7..0e55ca416e03c 100644 --- a/src/hotspot/share/compiler/compilerOracle.hpp +++ b/src/hotspot/share/compiler/compilerOracle.hpp @@ -86,6 +86,7 @@ NOT_PRODUCT(option(PrintIdeal, "PrintIdeal", Bool)) \ NOT_PRODUCT(option(PrintIdealPhase, "PrintIdealPhase", Ccstrlist)) \ NOT_PRODUCT(option(IGVPrintLevel, "IGVPrintLevel", Intx)) \ NOT_PRODUCT(option(TraceAutoVectorization, "TraceAutoVectorization", Ccstrlist)) \ +NOT_PRODUCT(option(TraceMergeStores, "TraceMergeStores", Ccstrlist)) \ option(Vectorize, "Vectorize", Bool) \ option(CloneMapDebug, "CloneMapDebug", Bool) \ option(IncrementalInlineForceCleanup, "IncrementalInlineForceCleanup", Bool) \ diff --git a/src/hotspot/share/compiler/directivesParser.cpp b/src/hotspot/share/compiler/directivesParser.cpp index 5501490028349..731bf33d799dd 100644 --- a/src/hotspot/share/compiler/directivesParser.cpp +++ b/src/hotspot/share/compiler/directivesParser.cpp @@ -29,6 +29,7 @@ #include "memory/resourceArea.hpp" #include "opto/phasetype.hpp" #include "opto/traceAutoVectorizationTag.hpp" +#include "opto/traceMergeStoresTag.hpp" #include "runtime/os.hpp" #include @@ -347,6 +348,15 @@ bool DirectivesParser::set_option_flag(JSON_TYPE t, JSON_VAL* v, const key* opti } else { error(VALUE_ERROR, "Unrecognized tag name detected in TraceAutoVectorization: %s", validator.what()); } + } else if (strncmp(option_key->name, "TraceMergeStores", 16) == 0) { + TraceMergeStores::TagValidator validator(s, false); + + valid = validator.is_valid(); + if (valid) { + set->set_trace_merge_stores_tags(validator.tags()); + } else { + error(VALUE_ERROR, "Unrecognized tag name detected in TraceMergeStores: %s", validator.what()); + } } else if (strncmp(option_key->name, "PrintIdealPhase", 15) == 0) { PhaseNameValidator validator(s); diff --git a/src/hotspot/share/opto/c2_globals.hpp b/src/hotspot/share/opto/c2_globals.hpp index c14162ddf6eed..7f6b7c2dcebae 100644 --- a/src/hotspot/share/opto/c2_globals.hpp +++ b/src/hotspot/share/opto/c2_globals.hpp @@ -367,9 +367,6 @@ product(bool, MergeStores, true, DIAGNOSTIC, \ "Optimize stores by combining values into larger store") \ \ - develop(bool, TraceMergeStores, false, \ - "Trace creation of merged stores") \ - \ product_pd(bool, OptoBundling, \ "Generate nops to fill i-cache lines") \ \ diff --git a/src/hotspot/share/opto/memnode.cpp b/src/hotspot/share/opto/memnode.cpp index 27c0d16fac1b6..919d23fea8da5 100644 --- a/src/hotspot/share/opto/memnode.cpp +++ b/src/hotspot/share/opto/memnode.cpp @@ -43,11 +43,13 @@ #include "opto/machnode.hpp" #include "opto/matcher.hpp" #include "opto/memnode.hpp" +#include "opto/mempointer.hpp" #include "opto/mulnode.hpp" #include "opto/narrowptrnode.hpp" #include "opto/phaseX.hpp" #include "opto/regmask.hpp" #include "opto/rootnode.hpp" +#include "opto/traceMergeStoresTag.hpp" #include "opto/vectornode.hpp" #include "utilities/align.hpp" #include "utilities/copy.hpp" @@ -2738,184 +2740,6 @@ uint StoreNode::hash() const { return NO_HASH; } -// Class to parse array pointers, and determine if they are adjacent. We parse the form: -// -// pointer = base -// + constant_offset -// + LShiftL( ConvI2L(int_offset + int_con), int_offset_shift) -// + sum(other_offsets) -// -// -// Note: we accumulate all constant offsets into constant_offset, even the int constant behind -// the "LShiftL(ConvI2L(...))" pattern. We convert "ConvI2L(int_offset + int_con)" to -// "ConvI2L(int_offset) + int_con", which is only safe if we can assume that either all -// compared addresses have an overflow for "int_offset + int_con" or none. -// For loads and stores on arrays, we know that if one overflows and the other not, then -// the two addresses lay almost max_int indices apart, but the maximal array size is -// only about half of that. Therefore, the RangeCheck on at least one of them must have -// failed. -// -// constant_offset += LShiftL( ConvI2L(int_con), int_offset_shift) -// -// pointer = base -// + constant_offset -// + LShiftL( ConvI2L(int_offset), int_offset_shift) -// + sum(other_offsets) -// -class ArrayPointer { -private: - const Node* _pointer; // The final pointer to the position in the array - const Node* _base; // Base address of the array - const jlong _constant_offset; // Sum of collected constant offsets - const Node* _int_offset; // (optional) Offset behind LShiftL and ConvI2L - const GrowableArray* _other_offsets; // List of other AddP offsets - const jint _int_offset_shift; // (optional) Shift value for int_offset - const bool _is_valid; // The parsing succeeded - - ArrayPointer(const bool is_valid, - const Node* pointer, - const Node* base, - const jlong constant_offset, - const Node* int_offset, - const jint int_offset_shift, - const GrowableArray* other_offsets) : - _pointer(pointer), - _base(base), - _constant_offset(constant_offset), - _int_offset(int_offset), - _other_offsets(other_offsets), - _int_offset_shift(int_offset_shift), - _is_valid(is_valid) - { - assert(_pointer != nullptr, "must always have pointer"); - assert(is_valid == (_base != nullptr), "have base exactly if valid"); - assert(is_valid == (_other_offsets != nullptr), "have other_offsets exactly if valid"); - } - - static ArrayPointer make_invalid(const Node* pointer) { - return ArrayPointer(false, pointer, nullptr, 0, nullptr, 0, nullptr); - } - - static bool parse_int_offset(Node* offset, Node*& int_offset, jint& int_offset_shift) { - // offset = LShiftL( ConvI2L(int_offset), int_offset_shift) - if (offset->Opcode() == Op_LShiftL && - offset->in(1)->Opcode() == Op_ConvI2L && - offset->in(2)->Opcode() == Op_ConI) { - int_offset = offset->in(1)->in(1); // LShiftL -> ConvI2L -> int_offset - int_offset_shift = offset->in(2)->get_int(); // LShiftL -> int_offset_shift - return true; - } - - // offset = ConvI2L(int_offset) = LShiftL( ConvI2L(int_offset), 0) - if (offset->Opcode() == Op_ConvI2L) { - int_offset = offset->in(1); - int_offset_shift = 0; - return true; - } - - // parse failed - return false; - } - -public: - // Parse the structure above the pointer - static ArrayPointer make(PhaseGVN* phase, const Node* pointer) { - assert(phase->type(pointer)->isa_aryptr() != nullptr, "must be array pointer"); - if (!pointer->is_AddP()) { return ArrayPointer::make_invalid(pointer); } - - const Node* base = pointer->in(AddPNode::Base); - if (base == nullptr) { return ArrayPointer::make_invalid(pointer); } - - const int search_depth = 5; - Node* offsets[search_depth]; - int count = pointer->as_AddP()->unpack_offsets(offsets, search_depth); - - // We expect at least a constant each - if (count <= 0) { return ArrayPointer::make_invalid(pointer); } - - // We extract the form: - // - // pointer = base - // + constant_offset - // + LShiftL( ConvI2L(int_offset + int_con), int_offset_shift) - // + sum(other_offsets) - // - jlong constant_offset = 0; - Node* int_offset = nullptr; - jint int_offset_shift = 0; - GrowableArray* other_offsets = new GrowableArray(count); - - for (int i = 0; i < count; i++) { - Node* offset = offsets[i]; - if (offset->Opcode() == Op_ConI) { - // Constant int offset - constant_offset += offset->get_int(); - } else if (offset->Opcode() == Op_ConL) { - // Constant long offset - constant_offset += offset->get_long(); - } else if(int_offset == nullptr && parse_int_offset(offset, int_offset, int_offset_shift)) { - // LShiftL( ConvI2L(int_offset), int_offset_shift) - int_offset = int_offset->uncast(); - if (int_offset->Opcode() == Op_AddI && int_offset->in(2)->Opcode() == Op_ConI) { - // LShiftL( ConvI2L(int_offset + int_con), int_offset_shift) - constant_offset += ((jlong)int_offset->in(2)->get_int()) << int_offset_shift; - int_offset = int_offset->in(1); - } - } else { - // All others - other_offsets->append(offset); - } - } - - return ArrayPointer(true, pointer, base, constant_offset, int_offset, int_offset_shift, other_offsets); - } - - bool is_adjacent_to_and_before(const ArrayPointer& other, const jlong data_size) const { - if (!_is_valid || !other._is_valid) { return false; } - - // Offset adjacent? - if (this->_constant_offset + data_size != other._constant_offset) { return false; } - - // All other components identical? - if (this->_base != other._base || - this->_int_offset != other._int_offset || - this->_int_offset_shift != other._int_offset_shift || - this->_other_offsets->length() != other._other_offsets->length()) { - return false; - } - - for (int i = 0; i < this->_other_offsets->length(); i++) { - Node* o1 = this->_other_offsets->at(i); - Node* o2 = other._other_offsets->at(i); - if (o1 != o2) { return false; } - } - - return true; - } - -#ifndef PRODUCT - void dump() { - if (!_is_valid) { - tty->print("ArrayPointer[%d %s, invalid]", _pointer->_idx, _pointer->Name()); - return; - } - tty->print("ArrayPointer[%d %s, base[%d %s] + %lld", - _pointer->_idx, _pointer->Name(), - _base->_idx, _base->Name(), - (long long)_constant_offset); - if (_int_offset != nullptr) { - tty->print(" + I2L[%d %s] << %d", - _int_offset->_idx, _int_offset->Name(), _int_offset_shift); - } - for (int i = 0; i < _other_offsets->length(); i++) { - Node* n = _other_offsets->at(i); - tty->print(" + [%d %s]", n->_idx, n->Name()); - } - tty->print_cr("]"); - } -#endif -}; - // Link together multiple stores (B/S/C/I) into a longer one. // // Example: _store = StoreB[i+3] @@ -2951,13 +2775,18 @@ class ArrayPointer { // of adjacent stores there remains exactly one RangeCheck, located between the // first and the second store (e.g. RangeCheck[i+3]). // -class MergePrimitiveArrayStores : public StackObj { +class MergePrimitiveStores : public StackObj { private: - PhaseGVN* _phase; - StoreNode* _store; + PhaseGVN* const _phase; + StoreNode* const _store; + + NOT_PRODUCT( const CHeapBitMap &_trace_tags; ) public: - MergePrimitiveArrayStores(PhaseGVN* phase, StoreNode* store) : _phase(phase), _store(store) {} + MergePrimitiveStores(PhaseGVN* phase, StoreNode* store) : + _phase(phase), _store(store) + NOT_PRODUCT( COMMA _trace_tags(Compile::current()->directive()->trace_merge_stores_tags()) ) + {} StoreNode* run(); @@ -2988,6 +2817,17 @@ class MergePrimitiveArrayStores : public StackObj { } return Status(found_store, cfg_status == CFGStatus::SuccessWithRangeCheck); } + +#ifndef PRODUCT + void print_on(outputStream* st) const { + if (_found_store == nullptr) { + st->print_cr("None"); + } else { + st->print_cr("Found[%d %s, %s]", _found_store->_idx, _found_store->Name(), + _found_range_check ? "RC" : "no-RC"); + } + } +#endif }; Status find_adjacent_use_store(const StoreNode* def_store) const; @@ -3001,43 +2841,56 @@ class MergePrimitiveArrayStores : public StackObj { Node* make_merged_input_value(const Node_List& merge_list); StoreNode* make_merged_store(const Node_List& merge_list, Node* merged_input_value); - DEBUG_ONLY( void trace(const Node_List& merge_list, const Node* merged_input_value, const StoreNode* merged_store) const; ) -}; +#ifndef PRODUCT + // Access to TraceMergeStores tags + bool is_trace(TraceMergeStores::Tag tag) const { + return _trace_tags.at(tag); + } -StoreNode* MergePrimitiveArrayStores::run() { - // Check for B/S/C/I - int opc = _store->Opcode(); - if (opc != Op_StoreB && opc != Op_StoreC && opc != Op_StoreI) { - return nullptr; + bool is_trace_basic() const { + return is_trace(TraceMergeStores::Tag::BASIC); } - // Only merge stores on arrays, and the stores must have the same size as the elements. - const TypePtr* ptr_t = _store->adr_type(); - if (ptr_t == nullptr) { - return nullptr; + bool is_trace_pointer() const { + return is_trace(TraceMergeStores::Tag::POINTER); } - const TypeAryPtr* aryptr_t = ptr_t->isa_aryptr(); - if (aryptr_t == nullptr) { - return nullptr; + + bool is_trace_aliasing() const { + return is_trace(TraceMergeStores::Tag::ALIASING); } - BasicType bt = aryptr_t->elem()->array_element_basic_type(); - if (!is_java_primitive(bt) || - type2aelembytes(bt) != _store->memory_size()) { - return nullptr; + + bool is_trace_adjacency() const { + return is_trace(TraceMergeStores::Tag::ADJACENCY); + } + + bool is_trace_success() const { + return is_trace(TraceMergeStores::Tag::SUCCESS); } - if (_store->is_unsafe_access()) { +#endif + + NOT_PRODUCT( void trace(const Node_List& merge_list, const Node* merged_input_value, const StoreNode* merged_store) const; ) +}; + +StoreNode* MergePrimitiveStores::run() { + // Check for B/S/C/I + int opc = _store->Opcode(); + if (opc != Op_StoreB && opc != Op_StoreC && opc != Op_StoreI) { return nullptr; } + NOT_PRODUCT( if (is_trace_basic()) { tty->print("[TraceMergeStores] MergePrimitiveStores::run: "); _store->dump(); }) + // The _store must be the "last" store in a chain. If we find a use we could merge with // then that use or a store further down is the "last" store. Status status_use = find_adjacent_use_store(_store); + NOT_PRODUCT( if (is_trace_basic()) { tty->print("[TraceMergeStores] expect no use: "); status_use.print_on(tty); }) if (status_use.found_store() != nullptr) { return nullptr; } // Check if we can merge with at least one def, so that we have at least 2 stores to merge. Status status_def = find_adjacent_def_store(_store); + NOT_PRODUCT( if (is_trace_basic()) { tty->print("[TraceMergeStores] expect def: "); status_def.print_on(tty); }) if (status_def.found_store() == nullptr) { return nullptr; } @@ -3051,45 +2904,25 @@ StoreNode* MergePrimitiveArrayStores::run() { StoreNode* merged_store = make_merged_store(merge_list, merged_input_value); - DEBUG_ONLY( if(TraceMergeStores) { trace(merge_list, merged_input_value, merged_store); } ) + NOT_PRODUCT( if (is_trace_success()) { trace(merge_list, merged_input_value, merged_store); } ) return merged_store; } // Check compatibility between _store and other_store. -bool MergePrimitiveArrayStores::is_compatible_store(const StoreNode* other_store) const { +bool MergePrimitiveStores::is_compatible_store(const StoreNode* other_store) const { int opc = _store->Opcode(); assert(opc == Op_StoreB || opc == Op_StoreC || opc == Op_StoreI, "precondition"); - assert(_store->adr_type()->isa_aryptr() != nullptr, "must be array store"); - assert(!_store->is_unsafe_access(), "no unsafe accesses"); if (other_store == nullptr || - _store->Opcode() != other_store->Opcode() || - other_store->adr_type() == nullptr || - other_store->adr_type()->isa_aryptr() == nullptr || - other_store->is_unsafe_access()) { + _store->Opcode() != other_store->Opcode()) { return false; } - // Check that the size of the stores, and the array elements are all the same. - const TypeAryPtr* aryptr_t1 = _store->adr_type()->is_aryptr(); - const TypeAryPtr* aryptr_t2 = other_store->adr_type()->is_aryptr(); - BasicType aryptr_bt1 = aryptr_t1->elem()->array_element_basic_type(); - BasicType aryptr_bt2 = aryptr_t2->elem()->array_element_basic_type(); - if (!is_java_primitive(aryptr_bt1) || !is_java_primitive(aryptr_bt2)) { - return false; - } - int size1 = type2aelembytes(aryptr_bt1); - int size2 = type2aelembytes(aryptr_bt2); - if (size1 != size2 || - size1 != _store->memory_size() || - _store->memory_size() != other_store->memory_size()) { - return false; - } return true; } -bool MergePrimitiveArrayStores::is_adjacent_pair(const StoreNode* use_store, const StoreNode* def_store) const { +bool MergePrimitiveStores::is_adjacent_pair(const StoreNode* use_store, const StoreNode* def_store) const { if (!is_adjacent_input_pair(def_store->in(MemNode::ValueIn), use_store->in(MemNode::ValueIn), def_store->memory_size())) { @@ -3097,16 +2930,17 @@ bool MergePrimitiveArrayStores::is_adjacent_pair(const StoreNode* use_store, con } ResourceMark rm; - ArrayPointer array_pointer_use = ArrayPointer::make(_phase, use_store->in(MemNode::Address)); - ArrayPointer array_pointer_def = ArrayPointer::make(_phase, def_store->in(MemNode::Address)); - if (!array_pointer_def.is_adjacent_to_and_before(array_pointer_use, use_store->memory_size())) { - return false; - } - - return true; +#ifndef PRODUCT + const TraceMemPointer trace(is_trace_pointer(), + is_trace_aliasing(), + is_trace_adjacency()); +#endif + const MemPointer pointer_use(use_store NOT_PRODUCT( COMMA trace )); + const MemPointer pointer_def(def_store NOT_PRODUCT( COMMA trace )); + return pointer_def.is_adjacent_to_and_before(pointer_use); } -bool MergePrimitiveArrayStores::is_adjacent_input_pair(const Node* n1, const Node* n2, const int memory_size) const { +bool MergePrimitiveStores::is_adjacent_input_pair(const Node* n1, const Node* n2, const int memory_size) const { // Pattern: [n1 = ConI, n2 = ConI] if (n1->Opcode() == Op_ConI) { return n2->Opcode() == Op_ConI; @@ -3148,7 +2982,7 @@ bool MergePrimitiveArrayStores::is_adjacent_input_pair(const Node* n1, const Nod } // Detect pattern: n = base_out >> shift_out -bool MergePrimitiveArrayStores::is_con_RShift(const Node* n, Node const*& base_out, jint& shift_out) { +bool MergePrimitiveStores::is_con_RShift(const Node* n, Node const*& base_out, jint& shift_out) { assert(n != nullptr, "precondition"); int opc = n->Opcode(); @@ -3171,7 +3005,7 @@ bool MergePrimitiveArrayStores::is_con_RShift(const Node* n, Node const*& base_o } // Check if there is nothing between the two stores, except optionally a RangeCheck leading to an uncommon trap. -MergePrimitiveArrayStores::CFGStatus MergePrimitiveArrayStores::cfg_status_for_pair(const StoreNode* use_store, const StoreNode* def_store) { +MergePrimitiveStores::CFGStatus MergePrimitiveStores::cfg_status_for_pair(const StoreNode* use_store, const StoreNode* def_store) { assert(use_store->in(MemNode::Memory) == def_store, "use-def relationship"); Node* ctrl_use = use_store->in(MemNode::Control); @@ -3216,7 +3050,7 @@ MergePrimitiveArrayStores::CFGStatus MergePrimitiveArrayStores::cfg_status_for_p return CFGStatus::SuccessWithRangeCheck; } -MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_adjacent_use_store(const StoreNode* def_store) const { +MergePrimitiveStores::Status MergePrimitiveStores::find_adjacent_use_store(const StoreNode* def_store) const { Status status_use = find_use_store(def_store); StoreNode* use_store = status_use.found_store(); if (use_store != nullptr && !is_adjacent_pair(use_store, def_store)) { @@ -3225,7 +3059,7 @@ MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_adjacent_use_s return status_use; } -MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_adjacent_def_store(const StoreNode* use_store) const { +MergePrimitiveStores::Status MergePrimitiveStores::find_adjacent_def_store(const StoreNode* use_store) const { Status status_def = find_def_store(use_store); StoreNode* def_store = status_def.found_store(); if (def_store != nullptr && !is_adjacent_pair(use_store, def_store)) { @@ -3234,7 +3068,7 @@ MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_adjacent_def_s return status_def; } -MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_use_store(const StoreNode* def_store) const { +MergePrimitiveStores::Status MergePrimitiveStores::find_use_store(const StoreNode* def_store) const { Status status_use = find_use_store_unidirectional(def_store); #ifdef ASSERT @@ -3250,7 +3084,7 @@ MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_use_store(cons return status_use; } -MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_def_store(const StoreNode* use_store) const { +MergePrimitiveStores::Status MergePrimitiveStores::find_def_store(const StoreNode* use_store) const { Status status_def = find_def_store_unidirectional(use_store); #ifdef ASSERT @@ -3266,7 +3100,7 @@ MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_def_store(cons return status_def; } -MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_use_store_unidirectional(const StoreNode* def_store) const { +MergePrimitiveStores::Status MergePrimitiveStores::find_use_store_unidirectional(const StoreNode* def_store) const { assert(is_compatible_store(def_store), "precondition: must be compatible with _store"); for (DUIterator_Fast imax, i = def_store->fast_outs(imax); i < imax; i++) { @@ -3279,7 +3113,7 @@ MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_use_store_unid return Status::make_failure(); } -MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_def_store_unidirectional(const StoreNode* use_store) const { +MergePrimitiveStores::Status MergePrimitiveStores::find_def_store_unidirectional(const StoreNode* use_store) const { assert(is_compatible_store(use_store), "precondition: must be compatible with _store"); StoreNode* def_store = use_store->in(MemNode::Memory)->isa_Store(); @@ -3290,7 +3124,7 @@ MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_def_store_unid return Status::make(def_store, cfg_status_for_pair(use_store, def_store)); } -void MergePrimitiveArrayStores::collect_merge_list(Node_List& merge_list) const { +void MergePrimitiveStores::collect_merge_list(Node_List& merge_list) const { // The merged store can be at most 8 bytes. const uint merge_list_max_size = 8 / _store->memory_size(); assert(merge_list_max_size >= 2 && @@ -3303,25 +3137,32 @@ void MergePrimitiveArrayStores::collect_merge_list(Node_List& merge_list) const merge_list.push(current); while (current != nullptr && merge_list.size() < merge_list_max_size) { Status status = find_adjacent_def_store(current); + NOT_PRODUCT( if (is_trace_basic()) { tty->print("[TraceMergeStores] find def: "); status.print_on(tty); }) + current = status.found_store(); if (current != nullptr) { merge_list.push(current); // We can have at most one RangeCheck. if (status.found_range_check()) { + NOT_PRODUCT( if (is_trace_basic()) { tty->print_cr("[TraceMergeStores] found RangeCheck, stop traversal."); }) break; } } } + NOT_PRODUCT( if (is_trace_basic()) { tty->print_cr("[TraceMergeStores] found:"); merge_list.dump(); }) + // Truncate the merge_list to a power of 2. const uint pow2size = round_down_power_of_2(merge_list.size()); assert(pow2size >= 2, "must be merging at least 2 stores"); while (merge_list.size() > pow2size) { merge_list.pop(); } + + NOT_PRODUCT( if (is_trace_basic()) { tty->print_cr("[TraceMergeStores] truncated:"); merge_list.dump(); }) } // Merge the input values of the smaller stores to a single larger input value. -Node* MergePrimitiveArrayStores::make_merged_input_value(const Node_List& merge_list) { +Node* MergePrimitiveStores::make_merged_input_value(const Node_List& merge_list) { int new_memory_size = _store->memory_size() * merge_list.size(); Node* first = merge_list.at(merge_list.size()-1); Node* merged_input_value = nullptr; @@ -3407,7 +3248,7 @@ Node* MergePrimitiveArrayStores::make_merged_input_value(const Node_List& merge_ // | | | | | | | | // // last_store (= _store) merged_store // // // -StoreNode* MergePrimitiveArrayStores::make_merged_store(const Node_List& merge_list, Node* merged_input_value) { +StoreNode* MergePrimitiveStores::make_merged_store(const Node_List& merge_list, Node* merged_input_value) { Node* first_store = merge_list.at(merge_list.size()-1); Node* last_ctrl = _store->in(MemNode::Control); // after (optional) RangeCheck Node* first_mem = first_store->in(MemNode::Memory); @@ -3436,8 +3277,8 @@ StoreNode* MergePrimitiveArrayStores::make_merged_store(const Node_List& merge_l return merged_store; } -#ifdef ASSERT -void MergePrimitiveArrayStores::trace(const Node_List& merge_list, const Node* merged_input_value, const StoreNode* merged_store) const { +#ifndef PRODUCT +void MergePrimitiveStores::trace(const Node_List& merge_list, const Node* merged_input_value, const StoreNode* merged_store) const { stringStream ss; ss.print_cr("[TraceMergeStores]: Replace"); for (int i = (int)merge_list.size() - 1; i >= 0; i--) { @@ -3535,7 +3376,7 @@ Node *StoreNode::Ideal(PhaseGVN *phase, bool can_reshape) { if (MergeStores && UseUnalignedAccesses) { if (phase->C->post_loop_opts_phase()) { - MergePrimitiveArrayStores merge(phase, this); + MergePrimitiveStores merge(phase, this); Node* progress = merge.run(); if (progress != nullptr) { return progress; } } else { diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp new file mode 100644 index 0000000000000..df443c69449cb --- /dev/null +++ b/src/hotspot/share/opto/mempointer.cpp @@ -0,0 +1,383 @@ +/* + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "opto/mempointer.hpp" +#include "utilities/resourceHash.hpp" + +// Recursively parse the pointer expression with a DFS all-path traversal +// (i.e. with node repetitions), starting at the pointer. +MemPointerDecomposedForm MemPointerDecomposedFormParser::parse_decomposed_form() { + assert(_worklist.is_empty(), "no prior parsing"); + assert(_summands.is_empty(), "no prior parsing"); + + Node* pointer = _mem->in(MemNode::Address); + + // Start with the trivial summand. + _worklist.push(MemPointerSummand(pointer, NoOverflowInt(1))); + + // Decompose the summands until only terminal summands remain. This effectively + // parses the pointer expression recursively. + int traversal_count = 0; + while (_worklist.is_nonempty()) { + // Bail out if the graph is too complex. + if (traversal_count++ > 1000) { return MemPointerDecomposedForm::make_trivial(pointer); } + parse_sub_expression(_worklist.pop()); + } + + // Bail out if there is a constant overflow. + if (_con.is_NaN()) { return MemPointerDecomposedForm::make_trivial(pointer); } + + // Sorting by variable idx means that all summands with the same variable are consecutive. + // This simplifies the combining of summands with the same variable below. + _summands.sort(MemPointerSummand::cmp_by_variable_idx); + + // Combine summands for the same variable, adding up the scales. + int pos_put = 0; + int pos_get = 0; + while (pos_get < _summands.length()) { + const MemPointerSummand& summand = _summands.at(pos_get++); + Node* variable = summand.variable(); + NoOverflowInt scale = summand.scale(); + // Add up scale of all summands with the same variable. + while (pos_get < _summands.length() && _summands.at(pos_get).variable() == variable) { + MemPointerSummand s = _summands.at(pos_get++); + scale = scale + s.scale(); + } + // Bail out if scale is NaN. + if (scale.is_NaN()) { + return MemPointerDecomposedForm::make_trivial(pointer); + } + // Keep summands with non-zero scale. + if (!scale.is_zero()) { + _summands.at_put(pos_put++, MemPointerSummand(variable, scale)); + } + } + _summands.trunc_to(pos_put); + + return MemPointerDecomposedForm::make(pointer, _summands, _con); +} + +// Parse a sub-expression of the pointer, starting at the current summand. We parse the +// current node, and see if it can be decomposed into further summands, or if the current +// summand is terminal. +void MemPointerDecomposedFormParser::parse_sub_expression(const MemPointerSummand& summand) { + Node* n = summand.variable(); + const NoOverflowInt scale = summand.scale(); + const NoOverflowInt one(1); + + int opc = n->Opcode(); + if (is_safe_to_decompose_op(opc, scale)) { + switch (opc) { + case Op_ConI: + case Op_ConL: + { + // Terminal: add to constant. + NoOverflowInt con = (opc == Op_ConI) ? NoOverflowInt(n->get_int()) + : NoOverflowInt(n->get_long()); + _con = _con + scale * con; + return; + } + case Op_AddP: + case Op_AddL: + case Op_AddI: + { + // Decompose addition. + Node* a = n->in((opc == Op_AddP) ? 2 : 1); + Node* b = n->in((opc == Op_AddP) ? 3 : 2); + _worklist.push(MemPointerSummand(a, scale)); + _worklist.push(MemPointerSummand(b, scale)); + return; + } + case Op_SubL: + case Op_SubI: + { + // Decompose subtraction. + Node* a = n->in(1); + Node* b = n->in(2); + + NoOverflowInt sub_scale = NoOverflowInt(-1) * scale; + + _worklist.push(MemPointerSummand(a, scale)); + _worklist.push(MemPointerSummand(b, sub_scale)); + return; + } + case Op_MulL: + case Op_MulI: + case Op_LShiftL: + case Op_LShiftI: + { + // Only multiplication with constants is allowed: factor * variable + // IGVN already folds constants to in(2). If we find a variable there + // instead, we cannot further decompose this summand, and have to add + // it to the terminal summands. + Node* variable = n->in(1); + Node* con = n->in(2); + if (!con->is_Con()) { break; } + NoOverflowInt factor; + switch (opc) { + case Op_MulL: // variable * con + factor = NoOverflowInt(con->get_long()); + break; + case Op_MulI: // variable * con + factor = NoOverflowInt(con->get_int()); + break; + case Op_LShiftL: // variable << con = variable * (1 << con) + factor = one << NoOverflowInt(con->get_int()); + break; + case Op_LShiftI: // variable << con = variable * (1 << con) + factor = one << NoOverflowInt(con->get_int()); + break; + } + + // Accumulate scale. + NoOverflowInt new_scale = scale * factor; + + _worklist.push(MemPointerSummand(variable, new_scale)); + return; + } + case Op_CastII: + case Op_CastLL: + case Op_CastX2P: + case Op_ConvI2L: + // On 32bit systems we can also look through ConvL2I, since the final result will always + // be truncated back with ConvL2I. On 64bit systems we cannot decompose ConvL2I because + // such int values will eventually be expanded to long with a ConvI2L: + // + // valL = max_jint + 1 + // ConvI2L(ConvL2I(valL)) = ConvI2L(min_jint) = min_jint != max_jint + 1 = valL + // + NOT_LP64( case Op_ConvL2I: ) + { + // Decompose: look through. + Node* a = n->in(1); + _worklist.push(MemPointerSummand(a, scale)); + return; + } + default: + // All other operations cannot be further decomposed. We just add them to the + // terminal summands below. + break; + } + } + + // Default: we could not parse the "summand" further, i.e. it is terminal. + _summands.push(summand); +} + +// Check if the decomposition of operation opc is guaranteed to be safe. +// Please refer to the definition of "safe decomposition" in mempointer.hpp +bool MemPointerDecomposedFormParser::is_safe_to_decompose_op(const int opc, const NoOverflowInt& scale) const { +#ifndef _LP64 + // On 32-bit platforms, the pointer has 32bits, and thus any higher bits will always + // be truncated. Thus, it does not matter if we have int or long overflows. + // Simply put: all decompositions are (SAFE1). + return true; +#else + + switch (opc) { + // These operations are always safe to decompose, i.e. (SAFE1): + case Op_ConI: + case Op_ConL: + case Op_AddP: + case Op_AddL: + case Op_SubL: + case Op_MulL: + case Op_LShiftL: + case Op_CastII: + case Op_CastLL: + case Op_CastX2P: + case Op_CastPP: + case Op_ConvI2L: + return true; + + // But on 64-bit platforms, these operations are not trivially safe to decompose: + case Op_AddI: // ConvI2L(a + b) != ConvI2L(a) + ConvI2L(b) + case Op_SubI: // ConvI2L(a - b) != ConvI2L(a) - ConvI2L(b) + case Op_MulI: // ConvI2L(a * conI) != ConvI2L(a) * ConvI2L(conI) + case Op_LShiftI: // ConvI2L(a << conI) != ConvI2L(a) << ConvI2L(conI) + break; // Analysis below. + + // All other operations are assumed not safe to decompose, or simply cannot be decomposed + default: + return false; + } + + const TypeAryPtr* ary_ptr_t = _mem->adr_type()->isa_aryptr(); + if (ary_ptr_t != nullptr) { + // Array accesses that are not Unsafe always have a RangeCheck which ensures + // that there is no int overflow. And without overflows, all decompositions + // are (SAFE1). + if (!_mem->is_unsafe_access()) { + return true; + } + + // Intuition: In general, the decomposition of AddI, SubI, MulI or LShiftI is not safe, + // because of overflows. But under some conditions, we can prove that such a + // decomposition is (SAFE2). Intuitively, we want to prove that an overflow + // would mean that the pointers have such a large distance, that at least one + // must lie out of bounds. In the proof of the "MemPointer Lemma", we thus + // get a contradiction with the condition that both pointers are in bounds. + // + // We prove that the decomposition of AddI, SubI, MulI (with constant) and ShiftI (with + // constant) is (SAFE2), under the condition: + // + // abs(scale) % array_element_size_in_bytes = 0 + // + // First, we describe how the decomposition works: + // + // mp_i = con + sum(other_summands) + summand + // ------------------------- ------- + // rest scale * ConvI2L(op) + // + // We decompose the summand depending on the op, where we know that there is some + // integer y, such that: + // + // scale * ConvI2L(a + b) = scale * ConvI2L(a) + scale * ConvI2L(b) + scale * y * 2^32 + // scale * ConvI2L(a - b) = scale * ConvI2L(a) - scale * ConvI2L(b) + scale * y * 2^32 + // scale * ConvI2L(a * con) = scale * con * ConvI2L(a) + scale * y * 2^32 + // scale * ConvI2L(a << con) = scale * (1 << con) * ConvI2L(a) + scale * y * 2^32 + // \_______________________/ \_____________________________________/ \______________/ + // before decomposition after decomposition ("new_summands") overflow correction + // + // Thus, for AddI and SubI, we get: + // summand = new_summand1 + new_summand2 + scale * y * 2^32 + // + // mp_{i+1} = con + sum(other_summands) + new_summand1 + new_summand2 + // = con + sum(other_summands) + summand - scale * y * 2^32 + // = mp_i - scale * y * 2^32 + // + // And for MulI and ShiftI we get: + // summand = new_summand + scale * y * 2^32 + // + // mp_{i+1} = con + sum(other_summands) + new_summand + // = con + sum(other_summands) + summand - scale * y * 2^32 + // = mp_i - scale * y * 2^32 + // + // Further: + // abs(scale) % array_element_size_in_bytes = 0 + // implies that there is some integer z, such that: + // z * array_element_size_in_bytes = scale + // + // And hence, with "x = y * z", the decomposition is (SAFE2) under the assumed condition: + // mp_i = mp_{i+1} + scale * y * 2^32 + // = mp_{i+1} + z * array_element_size_in_bytes * y * 2^32 + // = mp_{i+1} + x * array_element_size_in_bytes * 2^32 + // + BasicType array_element_bt = ary_ptr_t->elem()->array_element_basic_type(); + if (is_java_primitive(array_element_bt)) { + NoOverflowInt array_element_size_in_bytes = NoOverflowInt(type2aelembytes(array_element_bt)); + if (scale.is_multiple_of(array_element_size_in_bytes)) { + return true; + } + } + } + + return false; +#endif +} + +// Compute the aliasing between two MemPointerDecomposedForm. We use the "MemPointer Lemma" to +// prove that the computed aliasing also applies for the underlying pointers. Note that the +// condition (S0) is already given, because the MemPointerDecomposedForm is always constructed +// using only safe decompositions. +// +// Pre-Condition: +// We assume that both pointers are in-bounds of their respective memory object. If this does +// not hold, for example, with the use of Unsafe, then we would already have undefined behavior, +// and we are allowed to do anything. +MemPointerAliasing MemPointerDecomposedForm::get_aliasing_with(const MemPointerDecomposedForm& other + NOT_PRODUCT( COMMA const TraceMemPointer& trace) ) const { +#ifndef PRODUCT + if (trace.is_trace_aliasing()) { + tty->print_cr("MemPointerDecomposedForm::get_aliasing_with:"); + print_on(tty); + other.print_on(tty); + } +#endif + + // "MemPointer Lemma" condition (S2): check if all summands are the same: + for (uint i = 0; i < SUMMANDS_SIZE; i++) { + const MemPointerSummand s1 = summands_at(i); + const MemPointerSummand s2 = other.summands_at(i); + if (s1 != s2) { +#ifndef PRODUCT + if (trace.is_trace_aliasing()) { + tty->print_cr(" -> Aliasing unknown, differ on summand %d.", i); + } +#endif + return MemPointerAliasing::make_unknown(); + } + } + + // "MemPointer Lemma" condition (S3): check that the constants do not differ too much: + const NoOverflowInt distance = other.con() - con(); + // We must check that: abs(distance) < 2^32 + // However, this is only false if: distance = min_jint + if (distance.is_NaN() || distance.value() == min_jint) { +#ifndef PRODUCT + if (trace.is_trace_aliasing()) { + tty->print(" -> Aliasing unknown, bad distance: "); + distance.print_on(tty); + tty->cr(); + } +#endif + return MemPointerAliasing::make_unknown(); + } + + // "MemPointer Lemma" condition (S1): + // Given that all summands are the same, we know that both pointers point into the + // same memory object. With the Pre-Condition, we know that both pointers are in + // bounds of that same memory object. + + // Hence, all 4 conditions of the "MemoryPointer Lemma" are established, and hence + // we know that the distance between the underlying pointers is equal to the distance + // we computed for the MemPointers: + // p_other - p_this = distance = other.con - this.con +#ifndef PRODUCT + if (trace.is_trace_aliasing()) { + tty->print_cr(" -> Aliasing always, distance = %d.", distance.value()); + } +#endif + return MemPointerAliasing::make_always(distance.value()); +} + +bool MemPointer::is_adjacent_to_and_before(const MemPointer& other) const { + const MemPointerDecomposedForm& s1 = decomposed_form(); + const MemPointerDecomposedForm& s2 = other.decomposed_form(); + const MemPointerAliasing aliasing = s1.get_aliasing_with(s2 NOT_PRODUCT( COMMA _trace )); + const jint size = mem()->memory_size(); + const bool is_adjacent = aliasing.is_always_at_distance(size); + +#ifndef PRODUCT + if (_trace.is_trace_adjacency()) { + tty->print("Adjacent: %s, because size = %d and aliasing = ", + is_adjacent ? "true" : "false", size); + aliasing.print_on(tty); + tty->cr(); + } +#endif + + return is_adjacent; +} diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp new file mode 100644 index 0000000000000..1e5b2c00b8822 --- /dev/null +++ b/src/hotspot/share/opto/mempointer.hpp @@ -0,0 +1,618 @@ +/* + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_OPTO_MEMPOINTER_HPP +#define SHARE_OPTO_MEMPOINTER_HPP + +#include "opto/memnode.hpp" +#include "opto/noOverflowInt.hpp" + +// The MemPointer is a shared facility to parse pointers and check the aliasing of pointers, +// e.g. checking if two stores are adjacent. +// +// ----------------------------------------------------------------------------------------- +// +// Intuition and Examples: +// We parse / decompose pointers into a linear form: +// +// pointer = SUM(scale_i * variable_i) + con +// +// where SUM() adds all "scale_i * variable_i" for each i together. +// +// The con and scale_i are compile-time constants (NoOverflowInt), and the variable_i are +// compile-time variables (C2 nodes). +// +// For the MemPointer, we do not explicitly track the base address. For Java heap pointers, the +// base address is just a variable in a summand with scale == 1. For native memory (C heap) +// pointers, the base address is null, and is hence implicitly a zero constant. +// +// +// Example 1: byte array access: +// +// array[i] +// +// pointer = array_base + ARRAY_BYTE_BASE_OFFSET + 1 * i +// = 1 * array_base + ARRAY_BYTE_BASE_OFFSET + 1 * i +// -------------------- ---------------------- -------------------- +// = scale_0 * variable_0 + con + scale_1 * variable_1 +// +// +// Example 2: int array access +// +// array[5 + i + 3 * j] +// +// pointer = array_base + ARRAY_INT_BASE_OFFSET + 4 * 5 + 4 * i + 4 * 3 * j +// = 1 * array_base + ARRAY_INT_BASE_OFFSET + 20 + 4 * i + 12 * j +// -------------------- ----------------------------- -------------------- -------------------- +// = scale_0 * variable_0 + con + scale_1 * variable_1 + scale_2 * variable_2 +// +// +// Example 3: Unsafe with int array +// +// UNSAFE.getInt(array, ARRAY_INT_BASE_OFFSET + 4 * i); +// +// pointer = array_base + ARRAY_INT_BASE_OFFSET + 4 * i +// = 1 * array_base + ARRAY_INT_BASE_OFFSET + 4 * i +// -------------------- --------------------- -------------------- +// = scale_0 * variable_0 + con + scale_1 * variable_1 +// +// +// Example 4: Unsafe with native memory address +// +// long address; +// UNSAFE.getInt(null, address + 4 * i); +// +// pointer = address + 4 * i +// = 1 * address + 0 + 4 * i +// -------------------- --- -------------------- +// = scale_0 * variable_0 + con + scale_1 * variable_1 +// +// +// Example 5: MemorySegment with byte array as backing type +// +// byte[] array = new byte[1000]; +// MemorySegment ms = MemorySegment.ofArray(array); +// assert ms.heapBase().get() == array: "array is base"; +// assert ms.address() == 0: "zero offset from base"; +// byte val = ms.get(ValueLayout.JAVA_BYTE, i); +// +// pointer = ms.heapBase() + ARRAY_BYTE_BASE_OFFSET + ms.address() + i +// = 1 * array_base + ARRAY_BYTE_BASE_OFFSET + 0 + 1 * i +// ----------------------- ------------------------------------- -------------------- +// = scale_0 * variable_0 + con + scale_1 * variable_1 +// +// +// Example 6: MemorySegment with native memory +// +// MemorySegment ms = Arena.ofAuto().allocate(1000, 1); +// assert ms.heapBase().isEmpty(): "null base"; +// assert ms.address() != 0: "non-zero native memory address"; +// short val = ms.get(ValueLayout.JAVA_SHORT, 2L * i); +// +// pointer = ms.heapBase() + ms.address() + 2 i +// = 0 + 1 * ms.address() + 2 * i +// ------------ ---------------------- -------------------- +// = con scale_0 * variable_0 + scale_1 * variable_1 +// +// +// Example 7: Non-linear access to int array +// +// array[5 + i + j * k] +// +// pointer = array_base + ARRAY_INT_BASE_OFFSET + 4 * 5 + 4 * i + 4 * j * k +// = 1 * array_base + ARRAY_INT_BASE_OFFSET + 20 + 4 * i + 4 * j * k +// -------------------- ----------------------------- -------------------- -------------------- +// = scale_0 * variable_0 + con + scale_1 * variable_1 + scale_2 * variable_2 +// +// Note: we simply stop parsing once a term is not linear. We keep "j * k" as its own variable. +// +// +// Example 8: Unsafe with native memory address, non-linear access +// +// UNSAFE.getInt(null, i * j); +// +// pointer = i * j +// = 0 + 1 * i * j +// --- -------------------- +// = con + scale_0 * variable_0 +// +// Note: we can always parse a pointer into its trivial linear form: +// +// pointer = 0 + 1 * pointer. +// +// ----------------------------------------------------------------------------------------- +// +// MemPointerDecomposedForm: +// When the pointer is parsed, it is decomposed into a SUM of summands plus a constant: +// +// pointer = SUM(summands) + con +// +// Where each summand_i in summands has the form: +// +// summand_i = scale_i * variable_i +// +// Hence, the full decomposed form is: +// +// pointer = SUM(scale_i * variable_i) + con +// +// Note: the scale_i are compile-time constants (NoOverflowInt), and the variable_i are +// compile-time variables (C2 nodes). +// On 64-bit systems, this decomposed form is computed with long-add/mul, on 32-bit systems +// it is computed with int-add/mul. +// +// MemPointerAliasing: +// The decomposed form allows us to determine the aliasing between two pointers easily. For +// example, if two pointers are identical, except for their constant: +// +// pointer1 = SUM(summands) + con1 +// pointer2 = SUM(summands) + con2 +// +// then we can easily compute the distance between the pointers (distance = con2 - con1), +// and determine if they are adjacent. +// +// MemPointerDecomposedFormParser: +// Any pointer can be parsed into this (default / trivial) decomposed form: +// +// pointer = 1 * pointer + 0 +// scale_0 * variable_0 + con +// +// However, this is not particularly useful to compute aliasing. We would like to decompose +// the pointer as far as possible, i.e. extract as many summands and add up the constants to +// a single constant. +// +// Example (normal int-array access): +// pointer1 = array[i + 0] = array_base + array_int_base_offset + 4L * ConvI2L(i + 0) +// pointer2 = array[i + 1] = array_base + array_int_base_offset + 4L * ConvI2L(i + 1) +// +// At first, computing the aliasing is not immediately straight-forward in the general case because +// the distance is hidden inside the ConvI2L. We can convert this (with array_int_base_offset = 16) +// into these decomposed forms: +// +// pointer1 = 1L * array_base + 4L * i + 16L +// pointer2 = 1L * array_base + 4L * i + 20L +// +// This allows us to easily see that these two pointers are adjacent (distance = 4). +// +// Hence, in MemPointerDecomposedFormParser::parse_decomposed_form, we start with the pointer as +// a trivial summand. A summand can either be decomposed further or it is terminal (cannot +// be decomposed further). We decompose the summands recursively until all remaining summands +// are terminal, see MemPointerDecomposedFormParser::parse_sub_expression. This effectively parses +// the pointer expression recursively. +// +// ----------------------------------------------------------------------------------------- +// +// We have to be careful on 64-bit systems with ConvI2L: decomposing its input is not +// correct in general, overflows may not be preserved in the decomposed form: +// +// AddI: ConvI2L(a + b) != ConvI2L(a) + ConvI2L(b) +// SubI: ConvI2L(a - b) != ConvI2L(a) - ConvI2L(b) +// MulI: ConvI2L(a * conI) != ConvI2L(a) * ConvI2L(conI) +// LShiftI: ConvI2L(a << conI) != ConvI2L(a) << ConvI2L(conI) +// +// If we want to prove the correctness of MemPointerAliasing, we need some guarantees, +// that the MemPointers adequately represent the underlying pointers, such that we can +// compute the aliasing based on the summands and constants. +// +// ----------------------------------------------------------------------------------------- +// +// Below, we will formulate a "MemPointer Lemma" that helps us to prove the correctness of +// the MemPointerAliasing computations. To prove the "MemPointer Lemma", we need to define +// the idea of a "safe decomposition", and then prove that all the decompositions we apply +// are such "safe decompositions". +// +// +// Definition: Safe decomposition +// Trivial decomposition: +// (SAFE0) The trivial decomposition from p to mp_0 = 0 + 1 * p is always safe. +// +// Non-trivial decomposition: +// We decompose summand in: +// mp_i = con + summand + SUM(other_summands) +// resulting in: +-------------------------+ +// mp_{i+1} = con + dec_con + SUM(dec_summands) + SUM(other_summands) +// = new_con + SUM(new_summands) +// where mp_i means that the original pointer p was decomposed i times. +// +// We call a non-trivial decomposition safe if either: +// (SAFE1) No matter the values of the summand variables: +// mp_i = mp_{i+1} +// +// (SAFE2) The pointer is on an array with a known array_element_size_in_bytes, +// and there is an integer x, such that: +// mp_i = mp_{i+1} + x * array_element_size_in_bytes * 2^32 +// +// Note: if "x = 0", we have "mp1 = mp2", and if "x != 0", then mp1 and mp2 +// have a distance at least twice as large as the array size, and so +// at least one of mp1 or mp2 must be out of bounds of the array. +// +// MemPointer Lemma: +// Given two pointers p1 and p2, and their respective MemPointers mp1 and mp2. +// If these conditions hold: +// (S0) mp1 and mp2 are constructed only with safe decompositions (SAFE0, SAFE1, SAFE2) +// from p1 and p2, respectively. +// (S1) Both p1 and p2 are within the bounds of the same memory object. +// (S2) The constants do not differ too much: abs(mp1.con - mp2.con) < 2^31. +// (S3) All summands of mp1 and mp2 are identical (i.e. only the constants are possibly different). +// +// then the pointer difference between p1 and p2 is identical to the difference between +// mp1 and mp2: +// p1 - p2 = mp1 - mp2 +// +// Note: MemPointerDecomposedForm::get_aliasing_with relies on this MemPointer Lemma to +// prove the correctness of its aliasing computation between two MemPointers. +// +// +// Note: MemPointerDecomposedFormParser::is_safe_to_decompose_op checks that all +// decompositions we apply are safe. +// +// +// Proof of the "MemPointer Lemma": +// Assume (S0-S3) and show that +// p1 - p2 = mp1 - mp2 +// +// We make a case distinction over the types of decompositions used in the construction of mp1 and mp2. +// +// Trivial Case: Only trivial (SAFE0) decompositions were used: +// mp1 = 0 + 1 * p1 = p1 +// mp2 = 0 + 1 * p2 = p2 +// => +// p1 - p2 = mp1 - mp2 +// +// Unsafe Case: We apply at least one unsafe decomposition: +// This is a contradiction to (S0) and we are done. +// +// Case 1: Only decomposition of type (SAFE0) and (SAFE1) are used: +// We make an induction proof over the decompositions from p1 to mp1, starting with +// the trivial decomposition (SAFE0): +// mp1_0 = 0 + 1 * p1 = p1 +// Then for the i-th non-trivial decomposition (SAFE1) we know that +// mp1_i = mp1_{i+1} +// and hence, after the n-th non-trivial decomposition from p1: +// p1 = mp1_0 = mp1_i = mp1_n = mp1 +// Analogously, we can prove: +// p2 = mp2 +// +// p1 = mp1 +// p2 = mp2 +// => +// p1 - p2 = mp1 - mp2 +// +// Case 2: At least one decomposition of type (SAFE2) and no unsafe decomposition is used. +// Given we have (SAFE2) decompositions, we know that we are operating on an array of +// known array_element_size_in_bytes. We can weaken the guarantees from (SAFE1) +// decompositions to the same guarantee as (SAFE2) decompositions. Hence all applied +// non-trivial decompositions satisfy: +// mp1_i = mp1_{i+1} + x1_i * array_element_size_in_bytes * 2^32 +// where x1_i = 0 for (SAFE1) decompositions. +// +// We make an induction proof over the decompositions from p1 to mp1, starting with +// the trivial decomposition (SAFE0): +// mp1_0 = 0 + 1 * p1 = p1 +// Then for the i-th non-trivial decomposition (SAFE1) or (SAFE2), we know that +// mp1_i = mp1_{i+1} + x1_i * array_element_size_in_bytes * 2^32 +// and hence, if mp1 was decomposed with n non-trivial decompositions (SAFE1) or (SAFE2) from p1: +// p1 = mp1 + x1 * array_element_size_in_bytes * 2^32 +// where +// x1 = SUM(x1_i) +// Analogously, we can prove: +// p2 = mp2 + x2 * array_element_size_in_bytes * 2^32 +// +// And hence, with x = x1 - x2 we have: +// p1 - p2 = mp1 - mp2 + x * array_element_size_in_bytes * 2^32 +// +// If "x = 0", then it follows: +// p1 - p2 = mp1 - mp2 +// +// If "x != 0", then: +// abs(p1 - p2) = abs(mp1 - mp2 + x * array_element_size_in_bytes * 2^32) +// >= abs(x * array_element_size_in_bytes * 2^32) - abs(mp1 - mp2) +// -- apply x != 0 -- +// >= array_element_size_in_bytes * 2^32 - abs(mp1 - mp2) +// -- apply (S3) -- +// = array_element_size_in_bytes * 2^32 - abs(mp1.con - mp2.con) +// -- apply (S2) -- +// > array_element_size_in_bytes * 2^32 - 2^31 +// -- apply array_element_size_in_bytes > 0 -- +// >= array_element_size_in_bytes * 2^31 +// >= max_possible_array_size_in_bytes +// >= array_size_in_bytes +// +// This shows that p1 and p2 have a distance greater than the array size, and hence at least one of the two +// pointers must be out of bounds. This contradicts our assumption (S1) and we are done. + + +#ifndef PRODUCT +class TraceMemPointer : public StackObj { +private: + const bool _is_trace_pointer; + const bool _is_trace_aliasing; + const bool _is_trace_adjacency; + +public: + TraceMemPointer(const bool is_trace_pointer, + const bool is_trace_aliasing, + const bool is_trace_adjacency) : + _is_trace_pointer( is_trace_pointer), + _is_trace_aliasing( is_trace_aliasing), + _is_trace_adjacency(is_trace_adjacency) + {} + + bool is_trace_pointer() const { return _is_trace_pointer; } + bool is_trace_aliasing() const { return _is_trace_aliasing; } + bool is_trace_adjacency() const { return _is_trace_adjacency; } +}; +#endif + +// Class to represent aliasing between two MemPointer. +class MemPointerAliasing { +public: + enum Aliasing { + Unknown, // Distance unknown. + // Example: two "int[]" with different variable index offsets. + // e.g. "array[i] vs array[j]". + // e.g. "array1[i] vs array2[j]". + Always}; // Constant distance = p1 - p2. + // Example: The same address expression, except for a constant offset + // e.g. "array[i] vs array[i+1]". +private: + const Aliasing _aliasing; + const jint _distance; + + MemPointerAliasing(const Aliasing aliasing, const jint distance) : + _aliasing(aliasing), + _distance(distance) + { + assert(_distance != min_jint, "given by condition (S3) of MemPointer Lemma"); + } + +public: + static MemPointerAliasing make_unknown() { + return MemPointerAliasing(Unknown, 0); + } + + static MemPointerAliasing make_always(const jint distance) { + return MemPointerAliasing(Always, distance); + } + + // Use case: exact aliasing and adjacency. + bool is_always_at_distance(const jint distance) const { + return _aliasing == Always && _distance == distance; + } + +#ifndef PRODUCT + void print_on(outputStream* st) const { + switch(_aliasing) { + case Unknown: st->print("Unknown"); break; + case Always: st->print("Always(%d)", _distance); break; + default: ShouldNotReachHere(); + } + } +#endif +}; + +// Summand of a MemPointerDecomposedForm: +// +// summand = scale * variable +// +// where variable is a C2 node. +class MemPointerSummand : public StackObj { +private: + Node* _variable; + NoOverflowInt _scale; + +public: + MemPointerSummand() : + _variable(nullptr), + _scale(NoOverflowInt::make_NaN()) {} + MemPointerSummand(Node* variable, const NoOverflowInt& scale) : + _variable(variable), + _scale(scale) + { + assert(_variable != nullptr, "must have variable"); + assert(!_scale.is_zero(), "non-zero scale"); + } + + Node* variable() const { return _variable; } + NoOverflowInt scale() const { return _scale; } + + static int cmp_by_variable_idx(MemPointerSummand* p1, MemPointerSummand* p2) { + if (p1->variable() == nullptr) { + return (p2->variable() == nullptr) ? 0 : 1; + } else if (p2->variable() == nullptr) { + return -1; + } + + return p1->variable()->_idx - p2->variable()->_idx; + } + + friend bool operator==(const MemPointerSummand a, const MemPointerSummand b) { + // Both "null" -> equal. + if (a.variable() == nullptr && b.variable() == nullptr) { return true; } + + // Same variable and scale? + if (a.variable() != b.variable()) { return false; } + return a.scale() == b.scale(); + } + + friend bool operator!=(const MemPointerSummand a, const MemPointerSummand b) { + return !(a == b); + } + +#ifndef PRODUCT + void print_on(outputStream* st) const { + st->print("Summand["); + _scale.print_on(st); + tty->print(" * [%d %s]]", _variable->_idx, _variable->Name()); + } +#endif +}; + +// Decomposed form of the pointer sub-expression of "pointer". +// +// pointer = SUM(summands) + con +// +class MemPointerDecomposedForm : public StackObj { +private: + // We limit the number of summands to 10. This is just a best guess, and not at this + // point supported by evidence. But I think it is reasonable: usually, a pointer + // contains a base pointer (e.g. array pointer or null for native memory) and a few + // variables. It should be rare that we have more than 9 variables. + static const int SUMMANDS_SIZE = 10; + + Node* _pointer; // pointer node associated with this (sub)pointer + + MemPointerSummand _summands[SUMMANDS_SIZE]; + NoOverflowInt _con; + +public: + // Empty + MemPointerDecomposedForm() : _pointer(nullptr), _con(NoOverflowInt::make_NaN()) {} + +private: + // Default / trivial: pointer = 0 + 1 * pointer + MemPointerDecomposedForm(Node* pointer) : _pointer(pointer), _con(NoOverflowInt(0)) { + assert(pointer != nullptr, "pointer must be non-null"); + _summands[0] = MemPointerSummand(pointer, NoOverflowInt(1)); + } + + MemPointerDecomposedForm(Node* pointer, const GrowableArray& summands, const NoOverflowInt& con) + : _pointer(pointer), _con(con) { + assert(!_con.is_NaN(), "non-NaN constant"); + assert(summands.length() <= SUMMANDS_SIZE, "summands must fit"); + for (int i = 0; i < summands.length(); i++) { + MemPointerSummand s = summands.at(i); + assert(s.variable() != nullptr, "variable cannot be null"); + assert(!s.scale().is_NaN(), "non-NaN scale"); + _summands[i] = s; + } + } + +public: + static MemPointerDecomposedForm make_trivial(Node* pointer) { + return MemPointerDecomposedForm(pointer); + } + + static MemPointerDecomposedForm make(Node* pointer, const GrowableArray& summands, const NoOverflowInt& con) { + if (summands.length() <= SUMMANDS_SIZE) { + return MemPointerDecomposedForm(pointer, summands, con); + } else { + return MemPointerDecomposedForm::make_trivial(pointer); + } + } + + MemPointerAliasing get_aliasing_with(const MemPointerDecomposedForm& other + NOT_PRODUCT( COMMA const TraceMemPointer& trace) ) const; + + const MemPointerSummand summands_at(const uint i) const { + assert(i < SUMMANDS_SIZE, "in bounds"); + return _summands[i]; + } + + const NoOverflowInt con() const { return _con; } + +#ifndef PRODUCT + void print_on(outputStream* st) const { + if (_pointer == nullptr) { + st->print_cr("MemPointerDecomposedForm empty."); + return; + } + st->print("MemPointerDecomposedForm[%d %s: con = ", _pointer->_idx, _pointer->Name()); + _con.print_on(st); + for (int i = 0; i < SUMMANDS_SIZE; i++) { + const MemPointerSummand& summand = _summands[i]; + if (summand.variable() != nullptr) { + st->print(", "); + summand.print_on(st); + } + } + st->print_cr("]"); + } +#endif +}; + +class MemPointerDecomposedFormParser : public StackObj { +private: + const MemNode* _mem; + + // Internal data-structures for parsing. + NoOverflowInt _con; + GrowableArray _worklist; + GrowableArray _summands; + + // Resulting decomposed-form. + MemPointerDecomposedForm _decomposed_form; + +public: + MemPointerDecomposedFormParser(const MemNode* mem) : _mem(mem), _con(NoOverflowInt(0)) { + _decomposed_form = parse_decomposed_form(); + } + + const MemPointerDecomposedForm decomposed_form() const { return _decomposed_form; } + +private: + MemPointerDecomposedForm parse_decomposed_form(); + void parse_sub_expression(const MemPointerSummand& summand); + + bool is_safe_to_decompose_op(const int opc, const NoOverflowInt& scale) const; +}; + +// Facility to parse the pointer of a Load or Store, so that aliasing between two such +// memory operations can be determined (e.g. adjacency). +class MemPointer : public StackObj { +private: + const MemNode* _mem; + const MemPointerDecomposedForm _decomposed_form; + + NOT_PRODUCT( const TraceMemPointer& _trace; ) + +public: + MemPointer(const MemNode* mem NOT_PRODUCT( COMMA const TraceMemPointer& trace)) : + _mem(mem), + _decomposed_form(init_decomposed_form(_mem)) + NOT_PRODUCT( COMMA _trace(trace) ) + { +#ifndef PRODUCT + if (_trace.is_trace_pointer()) { + tty->print_cr("MemPointer::MemPointer:"); + tty->print("mem: "); mem->dump(); + _mem->in(MemNode::Address)->dump_bfs(5, 0, "d"); + _decomposed_form.print_on(tty); + } +#endif + } + + const MemNode* mem() const { return _mem; } + const MemPointerDecomposedForm decomposed_form() const { return _decomposed_form; } + bool is_adjacent_to_and_before(const MemPointer& other) const; + +private: + static const MemPointerDecomposedForm init_decomposed_form(const MemNode* mem) { + assert(mem->is_Store(), "only stores are supported"); + ResourceMark rm; + MemPointerDecomposedFormParser parser(mem); + return parser.decomposed_form(); + } +}; + +#endif // SHARE_OPTO_MEMPOINTER_HPP diff --git a/src/hotspot/share/opto/noOverflowInt.hpp b/src/hotspot/share/opto/noOverflowInt.hpp new file mode 100644 index 0000000000000..9da24645b4117 --- /dev/null +++ b/src/hotspot/share/opto/noOverflowInt.hpp @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_OPTO_NOOVERFLOWINT_HPP +#define SHARE_OPTO_NOOVERFLOWINT_HPP + +#include "utilities/ostream.hpp" + +// Wrapper around jint, which detects overflow. +// If any operation overflows, then it returns a NaN. +class NoOverflowInt { +private: + bool _is_NaN; // overflow, uninitialized, etc. + jint _value; + +public: + // Default: NaN. + constexpr NoOverflowInt() : _is_NaN(true), _value(0) {} + + // Create from jlong (or jint) -> NaN if overflows jint. + constexpr explicit NoOverflowInt(jlong value) : _is_NaN(true), _value(0) { + jint trunc = (jint)value; + if ((jlong)trunc == value) { + _is_NaN = false; + _value = trunc; + } + } + + static constexpr NoOverflowInt make_NaN() { return NoOverflowInt(); } + + bool is_NaN() const { return _is_NaN; } + jint value() const { assert(!is_NaN(), "NaN not allowed"); return _value; } + bool is_zero() const { return !is_NaN() && value() == 0; } + + friend NoOverflowInt operator+(const NoOverflowInt& a, const NoOverflowInt& b) { + if (a.is_NaN()) { return a; } + if (b.is_NaN()) { return b; } + return NoOverflowInt((jlong)a.value() + (jlong)b.value()); + } + + friend NoOverflowInt operator-(const NoOverflowInt& a, const NoOverflowInt& b) { + if (a.is_NaN()) { return a; } + if (b.is_NaN()) { return b; } + return NoOverflowInt((jlong)a.value() - (jlong)b.value()); + } + + friend NoOverflowInt operator*(const NoOverflowInt& a, const NoOverflowInt& b) { + if (a.is_NaN()) { return a; } + if (b.is_NaN()) { return b; } + return NoOverflowInt((jlong)a.value() * (jlong)b.value()); + } + + friend NoOverflowInt operator<<(const NoOverflowInt& a, const NoOverflowInt& b) { + if (a.is_NaN()) { return a; } + if (b.is_NaN()) { return b; } + jint shift = b.value(); + if (shift < 0 || shift > 31) { return make_NaN(); } + return NoOverflowInt((jlong)a.value() << shift); + } + + friend bool operator==(const NoOverflowInt& a, const NoOverflowInt& b) { + if (a.is_NaN()) { return false; } + if (b.is_NaN()) { return false; } + return a.value() == b.value(); + } + + NoOverflowInt abs() const { + if (is_NaN()) { return *this; } + if (value() >= 0) { return *this; } + return NoOverflowInt(0) - *this; + } + + bool is_multiple_of(const NoOverflowInt& other) const { + NoOverflowInt a = this->abs(); + NoOverflowInt b = other.abs(); + if (a.is_NaN()) { return false; } + if (b.is_NaN()) { return false; } + if (b.is_zero()) { return false; } + return a.value() % b.value() == 0; + } + +#ifndef PRODUCT + void print_on(outputStream* st) const { + if (is_NaN()) { + st->print("NaN"); + } else { + st->print("%d", value()); + } + } +#endif +}; + +#endif // SHARE_OPTO_NOOVERFLOWINT_HPP diff --git a/src/hotspot/share/opto/traceMergeStoresTag.hpp b/src/hotspot/share/opto/traceMergeStoresTag.hpp new file mode 100644 index 0000000000000..9f33c9efa0525 --- /dev/null +++ b/src/hotspot/share/opto/traceMergeStoresTag.hpp @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_OPTO_TRACEMERGESTORESTAG_HPP +#define SHARE_OPTO_TRACEMERGESTORESTAG_HPP + +#include "utilities/bitMap.inline.hpp" +#include "utilities/stringUtils.hpp" + +namespace TraceMergeStores { + #define COMPILER_TAG(flags) \ + flags(BASIC, "Trace basic analysis steps") \ + flags(POINTER, "Trace pointer IR") \ + flags(ALIASING, "Trace MemPointerSimpleForm::get_aliasing_with") \ + flags(ADJACENCY, "Trace adjacency") \ + flags(SUCCESS, "Trace successful merges") \ + + #define table_entry(name, description) name, + enum Tag { + COMPILER_TAG(table_entry) + TAG_NUM, + TAG_NONE + }; + #undef table_entry + + static const char* tag_descriptions[] = { + #define array_of_labels(name, description) description, + COMPILER_TAG(array_of_labels) + #undef array_of_labels + }; + + static const char* tag_names[] = { + #define array_of_labels(name, description) #name, + COMPILER_TAG(array_of_labels) + #undef array_of_labels + }; + + static Tag find_tag(const char* str) { + for (int i = 0; i < TAG_NUM; i++) { + if (strcmp(tag_names[i], str) == 0) { + return (Tag)i; + } + } + return TAG_NONE; + } + + class TagValidator { + private: + CHeapBitMap _tags; + bool _valid; + char* _bad; + bool _is_print_usage; + + public: + TagValidator(ccstrlist option, bool is_print_usage) : + _tags(TAG_NUM, mtCompiler), + _valid(true), + _bad(nullptr), + _is_print_usage(is_print_usage) + { + for (StringUtils::CommaSeparatedStringIterator iter(option); *iter != nullptr && _valid; ++iter) { + char const* tag_name = *iter; + if (strcmp("help", tag_name) == 0) { + if (_is_print_usage) { + print_help(); + } + continue; + } + bool set_bit = true; + // Check for "TAG" or "-TAG" + if (strncmp("-", tag_name, strlen("-")) == 0) { + tag_name++; + set_bit = false; + } + Tag tag = find_tag(tag_name); + if (TAG_NONE == tag) { + // cap len to a value we know is enough for all tags + const size_t len = MIN2(strlen(*iter), 63) + 1; + _bad = NEW_C_HEAP_ARRAY(char, len, mtCompiler); + // strncpy always writes len characters. If the source string is + // shorter, the function fills the remaining bytes with nulls. + strncpy(_bad, *iter, len); + _valid = false; + } else { + assert(tag < TAG_NUM, "out of bounds"); + _tags.at_put(tag, set_bit); + } + } + } + + ~TagValidator() { + if (_bad != nullptr) { + FREE_C_HEAP_ARRAY(char, _bad); + } + } + + bool is_valid() const { return _valid; } + const char* what() const { return _bad; } + const CHeapBitMap& tags() const { + assert(is_valid(), "only read tags when valid"); + return _tags; + } + + static void print_help() { + tty->cr(); + tty->print_cr("Usage for CompileCommand TraceMergeStores:"); + tty->print_cr(" -XX:CompileCommand=TraceMergeStores,,"); + tty->print_cr(" %-22s %s", "tags", "descriptions"); + for (int i = 0; i < TAG_NUM; i++) { + tty->print_cr(" %-22s %s", tag_names[i], tag_descriptions[i]); + } + tty->cr(); + } + }; +} + +#endif // SHARE_OPTO_TRACEMERGESTORESTAG_HPP diff --git a/test/hotspot/gtest/opto/test_no_overflow_int.cpp b/test/hotspot/gtest/opto/test_no_overflow_int.cpp new file mode 100644 index 0000000000000..7b4b4259bb841 --- /dev/null +++ b/test/hotspot/gtest/opto/test_no_overflow_int.cpp @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "opto/noOverflowInt.hpp" +#include "unittest.hpp" + +static void check_jlong(const jlong val) { + const NoOverflowInt x(val); + + if (val > max_jint || min_jint > val) { + ASSERT_TRUE(x.is_NaN()); + } else { + ASSERT_FALSE(x.is_NaN()); + ASSERT_EQ(x.value(), val); + } +} + +TEST_VM(opto, NoOverflowInt_check_jlong) { + jlong start = (jlong)min_jint - 10000LL; + jlong end = (jlong)max_jint + 10000LL; + for (jlong i = start; i < end; i+= 1000LL) { + check_jlong(i); + } + + check_jlong((jlong)min_jint - 1LL); + check_jlong((jlong)min_jint); + check_jlong((jlong)min_jint + 1LL); + check_jlong((jlong)max_jint - 1LL); + check_jlong((jlong)max_jint); + check_jlong((jlong)max_jint + 1LL); + + const NoOverflowInt nan; + ASSERT_TRUE(nan.is_NaN()); +} + +TEST_VM(opto, NoOverflowInt_add_sub) { + const NoOverflowInt nan; + const NoOverflowInt zero(0); + const NoOverflowInt one(1); + const NoOverflowInt two(2); + const NoOverflowInt big(1 << 30); + + ASSERT_EQ((one + two).value(), 3); + ASSERT_EQ((one - two).value(), -1); + ASSERT_TRUE((nan + one).is_NaN()); + ASSERT_TRUE((one + nan).is_NaN()); + ASSERT_TRUE((nan + nan).is_NaN()); + ASSERT_TRUE((nan - one).is_NaN()); + ASSERT_TRUE((one - nan).is_NaN()); + ASSERT_TRUE((nan - nan).is_NaN()); + + ASSERT_EQ((big + one).value(), (1 << 30) + 1); + ASSERT_TRUE((big + big).is_NaN()); + ASSERT_EQ((big - one).value(), (1 << 30) - 1); + ASSERT_EQ((big - big).value(), 0); + + ASSERT_EQ((big - one + big).value(), max_jint); + ASSERT_EQ((zero - big - big).value(), min_jint); + ASSERT_TRUE((zero - big - big - one).is_NaN()); +} + +TEST_VM(opto, NoOverflowInt_mul) { + const NoOverflowInt nan; + const NoOverflowInt zero(0); + const NoOverflowInt one(1); + const NoOverflowInt two(2); + const NoOverflowInt big(1 << 30); + + ASSERT_EQ((one * two).value(), 2); + ASSERT_TRUE((nan * one).is_NaN()); + ASSERT_TRUE((one * nan).is_NaN()); + ASSERT_TRUE((nan * nan).is_NaN()); + + ASSERT_EQ((big * one).value(), (1 << 30)); + ASSERT_EQ((one * big).value(), (1 << 30)); + ASSERT_EQ((big * zero).value(), 0); + ASSERT_EQ((zero * big).value(), 0); + ASSERT_TRUE((big * big).is_NaN()); + ASSERT_TRUE((big * two).is_NaN()); + + ASSERT_EQ(((big - one) * two).value(), max_jint - 1); + ASSERT_EQ(((one - big) * two).value(), min_jint + 2); + ASSERT_EQ(((zero - big) * two).value(), min_jint); + ASSERT_TRUE(((big + one) * two).is_NaN()); + ASSERT_TRUE(((zero - big - one) * two).is_NaN()); +} + +TEST_VM(opto, NoOverflowInt_lshift) { + const NoOverflowInt nan; + const NoOverflowInt zero(0); + const NoOverflowInt one(1); + const NoOverflowInt two(2); + const NoOverflowInt big(1 << 30); + + for (int i = 0; i < 31; i++) { + ASSERT_EQ((one << NoOverflowInt(i)).value(), 1LL << i); + } + for (int i = 31; i < 1000; i++) { + ASSERT_TRUE((one << NoOverflowInt(i)).is_NaN()); + } + for (int i = -1000; i < 0; i++) { + ASSERT_TRUE((one << NoOverflowInt(i)).is_NaN()); + } + + ASSERT_EQ((NoOverflowInt(3) << NoOverflowInt(2)).value(), 3 * 4); + ASSERT_EQ((NoOverflowInt(11) << NoOverflowInt(5)).value(), 11 * 32); + ASSERT_EQ((NoOverflowInt(-13) << NoOverflowInt(4)).value(), -13 * 16); +} + +TEST_VM(opto, NoOverflowInt_misc) { + const NoOverflowInt nan; + const NoOverflowInt zero(0); + const NoOverflowInt one(1); + const NoOverflowInt two(2); + const NoOverflowInt big(1 << 30); + + // operator== + ASSERT_FALSE(nan == nan); + ASSERT_FALSE(nan == zero); + ASSERT_FALSE(zero == nan); + ASSERT_TRUE(zero == zero); + ASSERT_TRUE(one == one); + ASSERT_TRUE((one + two) == (two + one)); + ASSERT_TRUE((big + two) == (two + big)); + ASSERT_FALSE((big + big) == (big + big)); + ASSERT_TRUE((big - one + big) == (big - one + big)); + + // abs + for (int i = 0; i < (1 << 31); i += 1024) { + ASSERT_EQ(NoOverflowInt(i).abs().value(), i); + ASSERT_EQ(NoOverflowInt(-i).abs().value(), i); + } + ASSERT_EQ(NoOverflowInt(max_jint).abs().value(), max_jint); + ASSERT_EQ(NoOverflowInt(min_jint + 1).abs().value(), max_jint); + ASSERT_TRUE(NoOverflowInt(min_jint).abs().is_NaN()); + ASSERT_TRUE(NoOverflowInt(nan).abs().is_NaN()); + + // is_multiple_of + ASSERT_TRUE(one.is_multiple_of(one)); + ASSERT_FALSE(one.is_multiple_of(nan)); + ASSERT_FALSE(nan.is_multiple_of(one)); + ASSERT_FALSE(nan.is_multiple_of(nan)); + for (int i = 0; i < (1 << 31); i += 1023) { + ASSERT_TRUE(NoOverflowInt(i).is_multiple_of(one)); + ASSERT_TRUE(NoOverflowInt(-i).is_multiple_of(one)); + ASSERT_FALSE(NoOverflowInt(i).is_multiple_of(zero)); + ASSERT_FALSE(NoOverflowInt(-i).is_multiple_of(zero)); + } + ASSERT_TRUE(NoOverflowInt(33 * 7).is_multiple_of(NoOverflowInt(33))); + ASSERT_TRUE(NoOverflowInt(13 * 5).is_multiple_of(NoOverflowInt(5))); + ASSERT_FALSE(NoOverflowInt(7).is_multiple_of(NoOverflowInt(5))); +} + diff --git a/test/hotspot/jtreg/compiler/c2/TestMergeStores.java b/test/hotspot/jtreg/compiler/c2/TestMergeStores.java index a94004d8e26c3..c8e8bd337ad4a 100644 --- a/test/hotspot/jtreg/compiler/c2/TestMergeStores.java +++ b/test/hotspot/jtreg/compiler/c2/TestMergeStores.java @@ -33,7 +33,7 @@ /* * @test - * @bug 8318446 8331054 8331311 + * @bug 8318446 8331054 8331311 8335392 * @summary Test merging of consecutive stores * @modules java.base/jdk.internal.misc * @library /test/lib / @@ -42,7 +42,7 @@ /* * @test - * @bug 8318446 8331054 8331311 + * @bug 8318446 8331054 8331311 8335392 * @summary Test merging of consecutive stores * @modules java.base/jdk.internal.misc * @library /test/lib / @@ -75,6 +75,17 @@ public class TestMergeStores { long vL1; long vL2; + static int zero0 = 0; + static int zero1 = 0; + static int zero2 = 0; + static int zero3 = 0; + static int zero4 = 0; + static int zero5 = 0; + static int zero6 = 0; + static int zero7 = 0; + static int zero8 = 0; + static int zero9 = 0; + interface TestFunction { Object[] run(boolean isWarmUp, int rnd); } @@ -154,6 +165,15 @@ public TestMergeStores() { testGroups.get("test7BE").put("test7RBE", (_,_) -> { return test7RBE(aB.clone(), offset1, vI1); }); testGroups.get("test7BE").put("test7aBE", (_,_) -> { return test7aBE(aB.clone(), offset1, vI1); }); + testGroups.put("test10", new HashMap()); + testGroups.get("test10").put("test10R", (_,_) -> { return test10R(aB.clone()); }); + testGroups.get("test10").put("test10a", (_,_) -> { return test10a(aB.clone()); }); + testGroups.get("test10").put("test10b", (_,_) -> { return test10b(aB.clone()); }); + testGroups.get("test10").put("test10c", (_,_) -> { return test10c(aB.clone()); }); + testGroups.get("test10").put("test10d", (_,_) -> { return test10d(aB.clone()); }); + testGroups.get("test10").put("test10e", (_,_) -> { return test10e(aB.clone()); }); + testGroups.get("test10").put("test10f", (_,_) -> { return test10f(aB.clone()); }); + testGroups.put("test100", new HashMap()); testGroups.get("test100").put("test100R", (_,_) -> { return test100R(aS.clone(), offset1); }); testGroups.get("test100").put("test100a", (_,_) -> { return test100a(aS.clone(), offset1); }); @@ -234,6 +254,10 @@ public TestMergeStores() { testGroups.get("test600").put("test600R", (_,i) -> { return test600R(aB.clone(), aI.clone(), i); }); testGroups.get("test600").put("test600a", (_,i) -> { return test600a(aB.clone(), aI.clone(), i); }); + testGroups.put("test601", new HashMap()); + testGroups.get("test601").put("test601R", (_,i) -> { return test601R(aB.clone(), aI.clone(), i, offset1); }); + testGroups.get("test601").put("test601a", (_,i) -> { return test601a(aB.clone(), aI.clone(), i, offset1); }); + testGroups.put("test700", new HashMap()); testGroups.get("test700").put("test700R", (_,i) -> { return test700R(aI.clone(), i); }); testGroups.get("test700").put("test700a", (_,i) -> { return test700a(aI.clone(), i); }); @@ -274,6 +298,12 @@ public TestMergeStores() { "test5a", "test6a", "test7a", + "test10a", + "test10b", + "test10c", + "test10d", + "test10e", + "test10f", "test7aBE", "test100a", "test101a", @@ -292,6 +322,7 @@ public TestMergeStores() { "test501aBE", "test502aBE", "test600a", + "test601a", "test700a", "test800a", "test800aBE"}) @@ -611,9 +642,8 @@ static Object[] test1e(byte[] a) { } @Test - // Disabled by JDK-8335390, to be enabled again by JDK-8335392. - // @IR(counts = {IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, - // applyIf = {"UseUnalignedAccesses", "true"}) + @IR(counts = {IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, + applyIf = {"UseUnalignedAccesses", "true"}) static Object[] test1f(byte[] a) { UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 0, (byte)0xbe); UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1, (byte)0xba); @@ -1124,6 +1154,145 @@ static Object[] test7aBE(byte[] a, int offset1, int v1) { return new Object[]{ a }; } + @DontCompile + static Object[] test10R(byte[] a) { + int zero = zero0 + zero1 + zero2 + zero3 + zero4 + + zero5 + zero6 + zero7 + zero8 + zero9; + a[zero + 0] = 'h'; + a[zero + 1] = 'e'; + a[zero + 2] = 'l'; + a[zero + 3] = 'l'; + a[zero + 4] = 'o'; + a[zero + 5] = ' '; + a[zero + 6] = ':'; + a[zero + 7] = ')'; + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8", // no merge + IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}) + static Object[] test10a(byte[] a) { + // We have 11 summands: 10x zero variable + 1x array base. + // Parsing only allows 10 summands -> does not merge the stores. + int zero = zero0 + zero1 + zero2 + zero3 + zero4 + + zero5 + zero6 + zero7 + zero8 + zero9; + a[zero + 0] = 'h'; + a[zero + 1] = 'e'; + a[zero + 2] = 'l'; + a[zero + 3] = 'l'; + a[zero + 4] = 'o'; + a[zero + 5] = ' '; + a[zero + 6] = ':'; + a[zero + 7] = ')'; + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1", // 1 left in uncommon trap path of RangeCheck + IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, // all merged + applyIf = {"UseUnalignedAccesses", "true"}) + static Object[] test10b(byte[] a) { + int zero = zero0 + zero1 + zero2 + zero3 + zero4 + + zero5 + zero6 + zero7 + zero8; + // We have 10 summands: 9x zero variable + 1x array base. + // Parsing allows 10 summands, so this should merge the stores. + a[zero + 0] = 'h'; + a[zero + 1] = 'e'; + a[zero + 2] = 'l'; + a[zero + 3] = 'l'; + a[zero + 4] = 'o'; + a[zero + 5] = ' '; + a[zero + 6] = ':'; + a[zero + 7] = ')'; + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1", // 1 left in uncommon trap path of RangeCheck + IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, // all merged + applyIf = {"UseUnalignedAccesses", "true"}) + static Object[] test10c(byte[] a) { + int zero = 7 * zero0 + 7 * zero1 + 7 * zero2 + 7 * zero3 + 7 * zero4 + + 7 * zero5 + 7 * zero6 + 7 * zero7 + 7 * zero8; + // The "7 * zero" is split into "zero << 3 - zero". But the parsing combines it again, lowering the summand count. + // We have 10 summands: 9x zero variable + 1x array base. + // Parsing allows 10 summands, so this should merge the stores. + a[zero + 0] = 'h'; + a[zero + 1] = 'e'; + a[zero + 2] = 'l'; + a[zero + 3] = 'l'; + a[zero + 4] = 'o'; + a[zero + 5] = ' '; + a[zero + 6] = ':'; + a[zero + 7] = ')'; + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, // all merged + applyIf = {"UseUnalignedAccesses", "true"}) + static Object[] test10d(byte[] a) { + // Summand is subtracted from itself -> scale = 0 -> should be removed from list. + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 0) - zero0, (byte)'h'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 1) - zero0, (byte)'e'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 2) - zero0, (byte)'l'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 3) - zero0, (byte)'l'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 4) - zero0, (byte)'o'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 5) - zero0, (byte)' '); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 6) - zero0, (byte)':'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 7) - zero0, (byte)')'); + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, // all merged + applyIf = {"UseUnalignedAccesses", "true"}) + static Object[] test10e(byte[] a) { + // Summand is subtracted from itself -> scale = 0 -> should be removed from list. Thus equal to if not present at all. + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 0) - zero0, (byte)'h'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 1) - zero0, (byte)'e'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 2) - zero0, (byte)'l'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 3) - zero0, (byte)'l'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 4, (byte)'o'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 5, (byte)' '); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 6, (byte)':'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 7, (byte)')'); + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8", // no merge + IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}) + static Object[] test10f(byte[] a) { + int big = 1 << 29; + // Adding up the scales overflows -> no merge. + long offset = zero9 * big + zero9 * big + zero9 * big + zero9 * big; + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset + 0, (byte)'h'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset + 1, (byte)'e'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset + 2, (byte)'l'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset + 3, (byte)'l'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset + 4, (byte)'o'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset + 5, (byte)' '); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset + 6, (byte)':'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset + 7, (byte)')'); + return new Object[]{ a }; + } + @DontCompile static Object[] test100R(short[] a, int offset) { a[offset + 0] = (short)0x0100; @@ -1560,15 +1729,12 @@ static Object[] test400R(int[] a) { } @Test - // We must be careful with mismatched accesses on arrays: - // An int-array can have about 2x max_int size, and hence if we address bytes in it, we can have int-overflows. - // We might consider addresses (x + 0) and (x + 1) as adjacent, even if x = max_int, and therefore the second - // address overflows and is not adjacent at all. - // Therefore, we should only consider stores that have the same size as the element type of the array. - @IR(counts = {IRNode.STORE_B_OF_CLASS, "int\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8", // no merging + // All constants are known, and AddI can be converted to AddL safely, hence the stores can be merged. + @IR(counts = {IRNode.STORE_B_OF_CLASS, "int\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", IRNode.STORE_C_OF_CLASS, "int\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", IRNode.STORE_I_OF_CLASS, "int\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", - IRNode.STORE_L_OF_CLASS, "int\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}) + IRNode.STORE_L_OF_CLASS, "int\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, // all merged + applyIf = {"UseUnalignedAccesses", "true"}) static Object[] test400a(int[] a) { UNSAFE.putByte(a, UNSAFE.ARRAY_INT_BASE_OFFSET + 0, (byte)0xbe); UNSAFE.putByte(a, UNSAFE.ARRAY_INT_BASE_OFFSET + 1, (byte)0xba); @@ -1858,7 +2024,11 @@ static Object[] test600R(byte[] aB, int[] aI, int i) { } @Test - @IR(counts = {IRNode.STORE_B_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8"}) // note: bottom type + @IR(counts = {IRNode.STORE_B_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_C_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_L_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, // all merged + applyIf = {"UseUnalignedAccesses", "true"}) static Object[] test600a(byte[] aB, int[] aI, int i) { Object a = null; long base = 0; @@ -1869,7 +2039,7 @@ static Object[] test600a(byte[] aB, int[] aI, int i) { a = aI; base = UNSAFE.ARRAY_INT_BASE_OFFSET; } - // array a is an aryptr, but its element type is unknown, i.e. bottom. + // Array type is unknown, i.e. bottom[]. But all AddI can be safely converted to AddL -> safe to merge. UNSAFE.putByte(a, base + 0, (byte)0xbe); UNSAFE.putByte(a, base + 1, (byte)0xba); UNSAFE.putByte(a, base + 2, (byte)0xad); @@ -1881,6 +2051,63 @@ static Object[] test600a(byte[] aB, int[] aI, int i) { return new Object[]{ aB, aI }; } + @DontCompile + static Object[] test601R(byte[] aB, int[] aI, int i, int offset1) { + Object a = null; + long base = 0; + if (i % 2 == 0) { + a = aB; + base = UNSAFE.ARRAY_BYTE_BASE_OFFSET; + } else { + a = aI; + base = UNSAFE.ARRAY_INT_BASE_OFFSET; + } + UNSAFE.putByte(a, base + (offset1 + 0), (byte)0xbe); + UNSAFE.putByte(a, base + (offset1 + 1), (byte)0xba); + UNSAFE.putByte(a, base + (offset1 + 2), (byte)0xad); + UNSAFE.putByte(a, base + (offset1 + 3), (byte)0xba); + UNSAFE.putByte(a, base + (offset1 + 4), (byte)0xef); + UNSAFE.putByte(a, base + (offset1 + 5), (byte)0xbe); + UNSAFE.putByte(a, base + (offset1 + 6), (byte)0xad); + UNSAFE.putByte(a, base + (offset1 + 7), (byte)0xde); + return new Object[]{ aB, aI }; + } + + @Test + @IR(counts = {IRNode.STORE_B_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8", // nothing merged + IRNode.STORE_C_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_L_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}, + applyIfPlatform = {"64-bit", "true"}) + @IR(counts = {IRNode.STORE_B_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_C_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_L_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, // all merged + applyIf = {"UseUnalignedAccesses", "true"}, + applyIfPlatform = {"32-bit", "true"}) + static Object[] test601a(byte[] aB, int[] aI, int i, int offset1) { + Object a = null; + long base = 0; + if (i % 2 == 0) { + a = aB; + base = UNSAFE.ARRAY_BYTE_BASE_OFFSET; + } else { + a = aI; + base = UNSAFE.ARRAY_INT_BASE_OFFSET; + } + // Array type is unknown, i.e. bottom[]. Hence we do not know the element size of the array. + // Thus, on 64-bits systems merging is not safe, there could be overflows. + UNSAFE.putByte(a, base + (offset1 + 0), (byte)0xbe); + UNSAFE.putByte(a, base + (offset1 + 1), (byte)0xba); + UNSAFE.putByte(a, base + (offset1 + 2), (byte)0xad); + UNSAFE.putByte(a, base + (offset1 + 3), (byte)0xba); + UNSAFE.putByte(a, base + (offset1 + 4), (byte)0xef); + UNSAFE.putByte(a, base + (offset1 + 5), (byte)0xbe); + UNSAFE.putByte(a, base + (offset1 + 6), (byte)0xad); + UNSAFE.putByte(a, base + (offset1 + 7), (byte)0xde); + return new Object[]{ aB, aI }; + } + @DontCompile static Object[] test700R(int[] a, long v1) { a[0] = (int)(v1 >> -1); diff --git a/test/hotspot/jtreg/compiler/c2/TestMergeStoresMemorySegment.java b/test/hotspot/jtreg/compiler/c2/TestMergeStoresMemorySegment.java new file mode 100644 index 0000000000000..a5302d1b5158e --- /dev/null +++ b/test/hotspot/jtreg/compiler/c2/TestMergeStoresMemorySegment.java @@ -0,0 +1,426 @@ +/* + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package compiler.c2; + +import compiler.lib.ir_framework.*; +import jdk.test.lib.Utils; +import java.nio.ByteBuffer; +import java.util.Map; +import java.util.HashMap; +import java.util.Random; +import java.lang.foreign.*; + +/* + * @test id=byte-array + * @bug 8335392 + * @summary Test MergeStores optimization for MemorySegment + * @library /test/lib / + * @run driver compiler.c2.TestMergeStoresMemorySegment ByteArray + */ + +/* + * @test id=char-array + * @bug 8335392 + * @summary Test MergeStores optimization for MemorySegment + * @library /test/lib / + * @run driver compiler.c2.TestMergeStoresMemorySegment CharArray + */ + +/* + * @test id=short-array + * @bug 8335392 + * @summary Test MergeStores optimization for MemorySegment + * @library /test/lib / + * @run driver compiler.c2.TestMergeStoresMemorySegment ShortArray + */ + +/* + * @test id=int-array + * @bug 8335392 + * @summary Test MergeStores optimization for MemorySegment + * @library /test/lib / + * @run driver compiler.c2.TestMergeStoresMemorySegment IntArray + */ + +/* + * @test id=long-array + * @bug 8335392 + * @summary Test MergeStores optimization for MemorySegment + * @library /test/lib / + * @run driver compiler.c2.TestMergeStoresMemorySegment LongArray + */ + +/* + * @test id=float-array + * @bug 8335392 + * @summary Test MergeStores optimization for MemorySegment + * @library /test/lib / + * @run driver compiler.c2.TestMergeStoresMemorySegment FloatArray + */ + +/* + * @test id=double-array + * @bug 8335392 + * @summary Test MergeStores optimization for MemorySegment + * @library /test/lib / + * @run driver compiler.c2.TestMergeStoresMemorySegment DoubleArray + */ + +/* + * @test id=byte-buffer + * @bug 8335392 + * @summary Test MergeStores optimization for MemorySegment + * @library /test/lib / + * @run driver compiler.c2.TestMergeStoresMemorySegment ByteBuffer + */ + +/* + * @test id=byte-buffer-direct + * @bug 8335392 + * @summary Test MergeStores optimization for MemorySegment + * @library /test/lib / + * @run driver compiler.c2.TestMergeStoresMemorySegment ByteBufferDirect + */ + +/* + * @test id=native + * @bug 8335392 + * @summary Test MergeStores optimization for MemorySegment + * @library /test/lib / + * @run driver compiler.c2.TestMergeStoresMemorySegment Native + */ + +// FAILS: mixed providers currently do not merge stores. Maybe there is some inlining issue. +// /* +// * @test id=mixed-array +// * @bug 8335392 +// * @summary Test MergeStores optimization for MemorySegment +// * @library /test/lib / +// * @run driver compiler.c2.TestMergeStoresMemorySegment MixedArray +// */ +// +// /* +// * @test id=MixedBuffer +// * @bug 8335392 +// * @summary Test MergeStores optimization for MemorySegment +// * @library /test/lib / +// * @run driver compiler.c2.TestMergeStoresMemorySegment MixedBuffer +// */ +// +// /* +// * @test id=mixed +// * @bug 8335392 +// * @summary Test MergeStores optimization for MemorySegment +// * @library /test/lib / +// * @run driver compiler.c2.TestMergeStoresMemorySegment Mixed +// */ + +public class TestMergeStoresMemorySegment { + public static void main(String[] args) { + for (String unaligned : new String[]{"-XX:-UseUnalignedAccesses", "-XX:+UseUnalignedAccesses"}) { + TestFramework framework = new TestFramework(TestMergeStoresMemorySegmentImpl.class); + framework.addFlags("-DmemorySegmentProviderNameForTestVM=" + args[0], unaligned); + framework.start(); + } + } +} + +class TestMergeStoresMemorySegmentImpl { + static final int BACKING_SIZE = 1024 * 8; + static final Random RANDOM = Utils.getRandomInstance(); + + private static final String START = "(\\d+(\\s){2}("; + private static final String MID = ".*)+(\\s){2}===.*"; + private static final String END = ")"; + + // Custom Regex: allows us to only match Store that come from MemorySegment internals. + private static final String REGEX_STORE_B_TO_MS_FROM_B = START + "StoreB" + MID + END + "ScopedMemoryAccess::putByteInternal"; + private static final String REGEX_STORE_C_TO_MS_FROM_B = START + "StoreC" + MID + END + "ScopedMemoryAccess::putByteInternal"; + private static final String REGEX_STORE_I_TO_MS_FROM_B = START + "StoreI" + MID + END + "ScopedMemoryAccess::putByteInternal"; + private static final String REGEX_STORE_L_TO_MS_FROM_B = START + "StoreL" + MID + END + "ScopedMemoryAccess::putByteInternal"; + + interface TestFunction { + Object[] run(); + } + + interface MemorySegmentProvider { + MemorySegment newMemorySegment(); + } + + static MemorySegmentProvider provider; + + static { + String providerName = System.getProperty("memorySegmentProviderNameForTestVM"); + provider = switch (providerName) { + case "ByteArray" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfByteArray; + case "CharArray" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfCharArray; + case "ShortArray" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfShortArray; + case "IntArray" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfIntArray; + case "LongArray" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfLongArray; + case "FloatArray" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfFloatArray; + case "DoubleArray" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfDoubleArray; + case "ByteBuffer" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfByteBuffer; + case "ByteBufferDirect" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfByteBufferDirect; + case "Native" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfNative; + case "MixedArray" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfMixedArray; + case "MixedBuffer" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfMixedBuffer; + case "Mixed" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfMixed; + default -> throw new RuntimeException("Test argument not recognized: " + providerName); + }; + } + + // List of tests + Map tests = new HashMap<>(); + + // List of golden values, the results from the first run before compilation + Map golds = new HashMap<>(); + + public TestMergeStoresMemorySegmentImpl () { + // Generate two MemorySegments as inputs + MemorySegment a = newMemorySegment(); + MemorySegment b = newMemorySegment(); + fillRandom(a); + fillRandom(b); + + // Future Work: add more test cases. For now, the issue seems to be that + // RangeCheck smearing does not remove the RangeChecks, thus + // we can only ever merge two stores. + // + // Ideas for more test cases, once they are better optimized: + // + // Have about 3 variables, each either int or long. Add all in int or + // long. Give them different scales. Compute the address in the same + // expression or separately. Use different element store sizes (BCIL). + // + tests.put("test_xxx", () -> test_xxx(copy(a), 5, 11, 31)); + tests.put("test_yyy", () -> test_yyy(copy(a), 5, 11, 31)); + tests.put("test_zzz", () -> test_zzz(copy(a), 5, 11, 31)); + + // Compute gold value for all test methods before compilation + for (Map.Entry entry : tests.entrySet()) { + String name = entry.getKey(); + TestFunction test = entry.getValue(); + Object[] gold = test.run(); + golds.put(name, gold); + } + } + + MemorySegment newMemorySegment() { + return provider.newMemorySegment(); + } + + MemorySegment copy(MemorySegment src) { + MemorySegment dst = newMemorySegment(); + MemorySegment.copy(src, 0, dst, 0, src.byteSize()); + return dst; + } + + static MemorySegment newMemorySegmentOfByteArray() { + return MemorySegment.ofArray(new byte[BACKING_SIZE]); + } + + static MemorySegment newMemorySegmentOfCharArray() { + return MemorySegment.ofArray(new char[BACKING_SIZE / 2]); + } + + static MemorySegment newMemorySegmentOfShortArray() { + return MemorySegment.ofArray(new short[BACKING_SIZE / 2]); + } + + static MemorySegment newMemorySegmentOfIntArray() { + return MemorySegment.ofArray(new int[BACKING_SIZE / 4]); + } + + static MemorySegment newMemorySegmentOfLongArray() { + return MemorySegment.ofArray(new long[BACKING_SIZE / 8]); + } + + static MemorySegment newMemorySegmentOfFloatArray() { + return MemorySegment.ofArray(new float[BACKING_SIZE / 4]); + } + + static MemorySegment newMemorySegmentOfDoubleArray() { + return MemorySegment.ofArray(new double[BACKING_SIZE / 8]); + } + + static MemorySegment newMemorySegmentOfByteBuffer() { + return MemorySegment.ofBuffer(ByteBuffer.allocate(BACKING_SIZE)); + } + + static MemorySegment newMemorySegmentOfByteBufferDirect() { + return MemorySegment.ofBuffer(ByteBuffer.allocateDirect(BACKING_SIZE)); + } + + static MemorySegment newMemorySegmentOfNative() { + // Auto arena: GC decides when there is no reference to the MemorySegment, + // and then it deallocates the backing memory. + return Arena.ofAuto().allocate(BACKING_SIZE, 1); + } + + static MemorySegment newMemorySegmentOfMixedArray() { + switch(RANDOM.nextInt(7)) { + case 0 -> { return newMemorySegmentOfByteArray(); } + case 1 -> { return newMemorySegmentOfCharArray(); } + case 2 -> { return newMemorySegmentOfShortArray(); } + case 3 -> { return newMemorySegmentOfIntArray(); } + case 4 -> { return newMemorySegmentOfLongArray(); } + case 5 -> { return newMemorySegmentOfFloatArray(); } + default -> { return newMemorySegmentOfDoubleArray(); } + } + } + + static MemorySegment newMemorySegmentOfMixedBuffer() { + switch (RANDOM.nextInt(2)) { + case 0 -> { return newMemorySegmentOfByteBuffer(); } + default -> { return newMemorySegmentOfByteBufferDirect(); } + } + } + + static MemorySegment newMemorySegmentOfMixed() { + switch (RANDOM.nextInt(3)) { + case 0 -> { return newMemorySegmentOfMixedArray(); } + case 1 -> { return newMemorySegmentOfMixedBuffer(); } + default -> { return newMemorySegmentOfNative(); } + } + } + + static void fillRandom(MemorySegment data) { + for (int i = 0; i < (int)data.byteSize(); i += 8) { + data.set(ValueLayout.JAVA_LONG_UNALIGNED, i, RANDOM.nextLong()); + } + } + + + static void verify(String name, Object[] gold, Object[] result) { + if (gold.length != result.length) { + throw new RuntimeException("verify " + name + ": not the same number of outputs: gold.length = " + + gold.length + ", result.length = " + result.length); + } + for (int i = 0; i < gold.length; i++) { + Object g = gold[i]; + Object r = result[i]; + if (g == r) { + throw new RuntimeException("verify " + name + ": should be two separate objects (with identical content):" + + " gold[" + i + "] == result[" + i + "]"); + } + + if (!(g instanceof MemorySegment && r instanceof MemorySegment)) { + throw new RuntimeException("verify " + name + ": only MemorySegment supported, i=" + i); + } + + MemorySegment mg = (MemorySegment)g; + MemorySegment mr = (MemorySegment)r; + + if (mg.byteSize() != mr.byteSize()) { + throw new RuntimeException("verify " + name + ": MemorySegment must have same byteSize:" + + " gold[" + i + "].byteSize = " + mg.byteSize() + + " result[" + i + "].byteSize = " + mr.byteSize()); + } + + for (int j = 0; j < (int)mg.byteSize(); j++) { + byte vg = mg.get(ValueLayout.JAVA_BYTE, j); + byte vr = mr.get(ValueLayout.JAVA_BYTE, j); + if (vg != vr) { + throw new RuntimeException("verify " + name + ": MemorySegment must have same content:" + + " gold[" + i + "][" + j + "] = " + vg + + " result[" + i + "][" + j + "] = " + vr); + } + } + } + } + + @Run(test = { "test_xxx", "test_yyy", "test_zzz" }) + void runTests() { + for (Map.Entry entry : tests.entrySet()) { + String name = entry.getKey(); + TestFunction test = entry.getValue(); + // Recall gold value from before compilation + Object[] gold = golds.get(name); + // Compute new result + Object[] result = test.run(); + // Compare gold and new result + verify(name, gold, result); + } + } + + @Test + @IR(counts = {REGEX_STORE_B_TO_MS_FROM_B, "<=5", // 4x RC + REGEX_STORE_C_TO_MS_FROM_B, ">=3", // 4x merged + REGEX_STORE_I_TO_MS_FROM_B, "0", + REGEX_STORE_L_TO_MS_FROM_B, "0"}, + phase = CompilePhase.PRINT_IDEAL, + applyIf = {"UseUnalignedAccesses", "true"}) + static Object[] test_xxx(MemorySegment a, int xI, int yI, int zI) { + // All RangeChecks remain -> RC smearing not good enough? + a.set(ValueLayout.JAVA_BYTE, (long)(xI + yI + zI + 0), (byte)'h'); + a.set(ValueLayout.JAVA_BYTE, (long)(xI + yI + zI + 1), (byte)'e'); + a.set(ValueLayout.JAVA_BYTE, (long)(xI + yI + zI + 2), (byte)'l'); + a.set(ValueLayout.JAVA_BYTE, (long)(xI + yI + zI + 3), (byte)'l'); + a.set(ValueLayout.JAVA_BYTE, (long)(xI + yI + zI + 4), (byte)'o'); + a.set(ValueLayout.JAVA_BYTE, (long)(xI + yI + zI + 5), (byte)' '); + a.set(ValueLayout.JAVA_BYTE, (long)(xI + yI + zI + 6), (byte)':'); + a.set(ValueLayout.JAVA_BYTE, (long)(xI + yI + zI + 7), (byte)')'); + return new Object[]{ a }; + } + + @Test + @IR(counts = {REGEX_STORE_B_TO_MS_FROM_B, "<=5", // 4x RC + REGEX_STORE_C_TO_MS_FROM_B, ">=3", // 4x merged + REGEX_STORE_I_TO_MS_FROM_B, "0", + REGEX_STORE_L_TO_MS_FROM_B, "0"}, + phase = CompilePhase.PRINT_IDEAL, + applyIf = {"UseUnalignedAccesses", "true"}) + static Object[] test_yyy(MemorySegment a, int xI, int yI, int zI) { + // All RangeChecks remain -> RC smearing not good enough? + a.set(ValueLayout.JAVA_BYTE, (long)(xI) + (long)(yI) + (long)(zI) + 0L, (byte)'h'); + a.set(ValueLayout.JAVA_BYTE, (long)(xI) + (long)(yI) + (long)(zI) + 1L, (byte)'e'); + a.set(ValueLayout.JAVA_BYTE, (long)(xI) + (long)(yI) + (long)(zI) + 2L, (byte)'l'); + a.set(ValueLayout.JAVA_BYTE, (long)(xI) + (long)(yI) + (long)(zI) + 3L, (byte)'l'); + a.set(ValueLayout.JAVA_BYTE, (long)(xI) + (long)(yI) + (long)(zI) + 4L, (byte)'o'); + a.set(ValueLayout.JAVA_BYTE, (long)(xI) + (long)(yI) + (long)(zI) + 5L, (byte)' '); + a.set(ValueLayout.JAVA_BYTE, (long)(xI) + (long)(yI) + (long)(zI) + 6L, (byte)':'); + a.set(ValueLayout.JAVA_BYTE, (long)(xI) + (long)(yI) + (long)(zI) + 7L, (byte)')'); + return new Object[]{ a }; + } + + @Test + @IR(counts = {REGEX_STORE_B_TO_MS_FROM_B, "<=5", // 4x RC + REGEX_STORE_C_TO_MS_FROM_B, ">=3", // 4x merged + REGEX_STORE_I_TO_MS_FROM_B, "0", + REGEX_STORE_L_TO_MS_FROM_B, "0"}, + phase = CompilePhase.PRINT_IDEAL, + applyIf = {"UseUnalignedAccesses", "true"}) + static Object[] test_zzz(MemorySegment a, long xL, long yL, long zL) { + // All RangeChecks remain -> RC smearing not good enough? + a.set(ValueLayout.JAVA_BYTE, xL + yL + zL + 0L, (byte)'h'); + a.set(ValueLayout.JAVA_BYTE, xL + yL + zL + 1L, (byte)'e'); + a.set(ValueLayout.JAVA_BYTE, xL + yL + zL + 2L, (byte)'l'); + a.set(ValueLayout.JAVA_BYTE, xL + yL + zL + 3L, (byte)'l'); + a.set(ValueLayout.JAVA_BYTE, xL + yL + zL + 4L, (byte)'o'); + a.set(ValueLayout.JAVA_BYTE, xL + yL + zL + 5L, (byte)' '); + a.set(ValueLayout.JAVA_BYTE, xL + yL + zL + 6L, (byte)':'); + a.set(ValueLayout.JAVA_BYTE, xL + yL + zL + 7L, (byte)')'); + return new Object[]{ a }; + } +} diff --git a/test/hotspot/jtreg/compiler/c2/TestMergeStoresUnsafeArrayPointer.java b/test/hotspot/jtreg/compiler/c2/TestMergeStoresUnsafeArrayPointer.java index dbfdfe6895766..3b65272c3c7ff 100644 --- a/test/hotspot/jtreg/compiler/c2/TestMergeStoresUnsafeArrayPointer.java +++ b/test/hotspot/jtreg/compiler/c2/TestMergeStoresUnsafeArrayPointer.java @@ -52,6 +52,10 @@ public class TestMergeStoresUnsafeArrayPointer { static final long ANCHOR = BYTE_SIZE / 2; static int four = 4; + static int max_int = Integer.MAX_VALUE; + static int min_int = Integer.MIN_VALUE; + static int val_2_to_30 = (1 << 30); + static int large_by_53 = (int)((1L << 31) / 53L + 1L); public static void main(String[] args) { System.out.println("Allocate big array of SIZE = " + SIZE); @@ -95,6 +99,103 @@ public static void main(String[] args) { } } + val = 0; + System.out.println("test3"); + for (int i = 0; i < 100_000; i++) { + testClear(big); + test3(big, ANCHOR); + long sum = testSum(big); + if (i == 0) { + val = sum; + } else { + if (sum != val) { + System.out.println("ERROR: test3 had wrong value: " + val + " != " + sum); + errors++; + break; + } + } + } + + val = 0; + System.out.println("test4"); + for (int i = 0; i < 100_000; i++) { + testClear(big); + test4(big, ANCHOR); + long sum = testSum(big); + if (i == 0) { + val = sum; + } else { + if (sum != val) { + System.out.println("ERROR: test4 had wrong value: " + val + " != " + sum); + errors++; + break; + } + } + } + + val = 0; + System.out.println("test5"); + for (int i = 0; i < 100_000; i++) { + testClear(big); + test5(big, ANCHOR); + long sum = testSum(big); + if (i == 0) { + val = sum; + } else { + if (sum != val) { + System.out.println("ERROR: test5 had wrong value: " + val + " != " + sum); + errors++; + break; + } + } + } + + val = 0; + System.out.println("test6"); + for (int i = 0; i < 100_000; i++) { + testClear(big); + test6(big, ANCHOR); + long sum = testSum(big); + if (i == 0) { + val = sum; + } else { + if (sum != val) { + System.out.println("ERROR: test6 had wrong value: " + val + " != " + sum); + errors++; + break; + } + } + } + + val = 0; + System.out.println("test7"); + for (int i = 0; i < 100_000; i++) { + testClear(big); + test7(big, ANCHOR); + long sum = testSum(big); + if (i == 0) { + val = sum; + } else { + if (sum != val) { + System.out.println("ERROR: test7 had wrong value: " + val + " != " + sum); + errors++; + break; + } + } + } + + // No result verification here. We only want to make sure we do not hit asserts. + System.out.println("test8 and test9"); + for (int i = 0; i < 100_000; i++) { + test8a(big, ANCHOR); + test8b(big, ANCHOR); + test8c(big, ANCHOR); + test8d(big, ANCHOR); + test9a(big, ANCHOR); + test9b(big, ANCHOR); + test9c(big, ANCHOR); + } + if (errors > 0) { throw new RuntimeException("ERRORS: " + errors); } @@ -129,4 +230,95 @@ static void test2(int[] a, long anchor) { UNSAFE.putInt(a, base + 0 + (long)(four + Integer.MAX_VALUE), 0x42424242); UNSAFE.putInt(a, base + Integer.MAX_VALUE + (long)(four + 4 ), 0x66666666); } + + // Test: if MergeStores is applied this can lead to wrong results + // -> AddI needs overflow check. + static void test3(int[] a, long anchor) { + long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor; + UNSAFE.putInt(a, base + (long)(max_int + 0), 0x42424242); + UNSAFE.putInt(a, base + (long)(max_int + 4), 0x66666666); + } + + // Test: "max_int - four" cannot be parsed further, but would not make a difference here. + static void test4(int[] a, long anchor) { + long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor; + UNSAFE.putInt(a, base + (long)(min_int - four) + 0, 0x42424242); + UNSAFE.putInt(a, base + (long)(min_int - four) + 4, 0x66666666); + } + + // Test: if MergeStores is applied this can lead to wrong results + // -> SubI needs overflow check. + static void test5(int[] a, long anchor) { + long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor; + UNSAFE.putInt(a, base + (long)(min_int) - (long)(four) + 0, 0x42424242); // no overflow + UNSAFE.putInt(a, base + (long)(min_int - four) + 4, 0x66666666); // overflow + } + + // Test: if MergeStores is applied this can lead to wrong results + // -> LShiftI needs overflow check. + static void test6(int[] a, long anchor) { + long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor; + UNSAFE.putInt(a, base + (long)(2 * val_2_to_30) + 0, 0x42424242); // overflow + UNSAFE.putInt(a, base + 2L * (long)(val_2_to_30) + 4, 0x66666666); // no overflow + } + + // Test: if MergeStores is applied this can lead to wrong results + // -> MulI needs overflow check. + static void test7(int[] a, long anchor) { + long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor; + UNSAFE.putInt(a, base + (long)(53 * large_by_53) + 0, 0x42424242); // overflow + UNSAFE.putInt(a, base + 53L * (long)(large_by_53) + 4, 0x66666666); // no overflow + } + + // Test: check if large distance leads to assert + static void test8a(int[] a, long anchor) { + long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor; + UNSAFE.putByte(a, base + (1L << 11) + 0, (byte)42); + UNSAFE.putByte(a, base + (1L << 11) + (1L << 30), (byte)11); + } + + // Test: check if large distance leads to assert + static void test8b(int[] a, long anchor) { + long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor; + UNSAFE.putByte(a, base + (1L << 11) + (1L << 30), (byte)11); + UNSAFE.putByte(a, base + (1L << 11) + 0, (byte)42); + } + + // Test: check if large distance leads to assert + static void test8c(int[] a, long anchor) { + long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor; + UNSAFE.putByte(a, base - (1L << 11) - 0, (byte)42); + UNSAFE.putByte(a, base - (1L << 11) - (1L << 30), (byte)11); + } + + // Test: check if large distance leads to assert + static void test8d(int[] a, long anchor) { + long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor; + UNSAFE.putByte(a, base - (1L << 11) - (1L << 30), (byte)11); + UNSAFE.putByte(a, base - (1L << 11) - 0, (byte)42); + } + + // Test: check if large distance leads to assert + // case: bad distance: NaN + static void test9a(int[] a, long anchor) { + long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor; + UNSAFE.putByte(a, base - 100, (byte)42); + UNSAFE.putByte(a, base - 100 + (1L << 31), (byte)11); + } + + // Test: check if large distance leads to assert + // case: just before NaN, it is still a valid distance for MemPointer aliasing. + static void test9b(int[] a, long anchor) { + long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor; + UNSAFE.putByte(a, base - 100, (byte)42); + UNSAFE.putByte(a, base - 100 + (1L << 31) - 1, (byte)11); + } + + // Test: check if large distance leads to assert + // case: constant too large + static void test9c(int[] a, long anchor) { + long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor; + UNSAFE.putByte(a, base, (byte)42); + UNSAFE.putByte(a, base + (1L << 31), (byte)11); + } } diff --git a/test/micro/org/openjdk/bench/vm/compiler/MergeStores.java b/test/micro/org/openjdk/bench/vm/compiler/MergeStores.java index 93d98116ecc5f..809ec01f495ca 100644 --- a/test/micro/org/openjdk/bench/vm/compiler/MergeStores.java +++ b/test/micro/org/openjdk/bench/vm/compiler/MergeStores.java @@ -41,12 +41,12 @@ @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) -@Warmup(iterations = 3, time = 3) -@Measurement(iterations = 3, time = 3) -@Fork(value = 3, jvmArgs = { +@Warmup(iterations = 2, time = 1) +@Measurement(iterations = 3, time = 1) +@Fork(value = 1, jvmArgs = { "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED", "--add-exports", "java.base/jdk.internal.util=ALL-UNNAMED"}) -@State(Scope.Benchmark) +@State(Scope.Thread) public class MergeStores { public static final int RANGE = 100; @@ -66,6 +66,7 @@ public class MergeStores { public static byte[] aB = new byte[RANGE]; public static short[] aS = new short[RANGE]; public static int[] aI = new int[RANGE]; + public static long native_adr = UNSAFE.allocateMemory(RANGE * 8); // ------------------------------------------- // ------- Little-Endian API ---------- @@ -691,4 +692,59 @@ public int[] store_I2_zero_offs_nonalloc_direct() { aI[offset + 1] = 0; return aI; } + + @Benchmark + public void store_unsafe_B8_L_offs_noalloc_direct() { + UNSAFE.putByte(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 0, (byte)(vL >> 0 )); + UNSAFE.putByte(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 1, (byte)(vL >> 8 )); + UNSAFE.putByte(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 2, (byte)(vL >> 16)); + UNSAFE.putByte(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 3, (byte)(vL >> 24)); + UNSAFE.putByte(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 4, (byte)(vL >> 32)); + UNSAFE.putByte(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 5, (byte)(vL >> 40)); + UNSAFE.putByte(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 6, (byte)(vL >> 48)); + UNSAFE.putByte(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 7, (byte)(vL >> 56)); + } + + @Benchmark + public void store_unsafe_B8_L_offs_noalloc_unsafe() { + UNSAFE.putLongUnaligned(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 0, vL); + } + + @Benchmark + public void store_unsafe_C4_L_offs_noalloc_direct() { + UNSAFE.putChar(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 0, (char)(vL >> 0 )); + UNSAFE.putChar(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 2, (char)(vL >> 16)); + UNSAFE.putChar(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 4, (char)(vL >> 32)); + UNSAFE.putChar(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 6, (char)(vL >> 48)); + } + + @Benchmark + public void store_unsafe_native_B8_L_offs_noalloc_direct() { + UNSAFE.putByte(null, native_adr + offset + 0, (byte)(vL >> 0 )); + UNSAFE.putByte(null, native_adr + offset + 1, (byte)(vL >> 8 )); + UNSAFE.putByte(null, native_adr + offset + 2, (byte)(vL >> 16)); + UNSAFE.putByte(null, native_adr + offset + 3, (byte)(vL >> 24)); + UNSAFE.putByte(null, native_adr + offset + 4, (byte)(vL >> 32)); + UNSAFE.putByte(null, native_adr + offset + 5, (byte)(vL >> 40)); + UNSAFE.putByte(null, native_adr + offset + 6, (byte)(vL >> 48)); + UNSAFE.putByte(null, native_adr + offset + 7, (byte)(vL >> 56)); + } + + @Benchmark + public void store_unsafe_native_C4_L_offs_noalloc_direct() { + UNSAFE.putChar(null, native_adr + offset + 0, (char)(vL >> 0 )); + UNSAFE.putChar(null, native_adr + offset + 2, (char)(vL >> 16)); + UNSAFE.putChar(null, native_adr + offset + 4, (char)(vL >> 32)); + UNSAFE.putChar(null, native_adr + offset + 6, (char)(vL >> 48)); + } + + @Benchmark + public void store_unsafe_native_B8_L_offs_noalloc_unsafe() { + UNSAFE.putLongUnaligned(null, native_adr + offset + 0, vL); + } + + @Fork(value = 1, jvmArgsPrepend = { + "-XX:+UnlockDiagnosticVMOptions", "-XX:-MergeStores" + }) + public static class MergeStoresDisabled extends MergeStores {} }