diff --git a/src/hotspot/share/opto/compile.cpp b/src/hotspot/share/opto/compile.cpp index 621ba684da114..eb396ed7873b4 100644 --- a/src/hotspot/share/opto/compile.cpp +++ b/src/hotspot/share/opto/compile.cpp @@ -73,6 +73,8 @@ #include "opto/opcodes.hpp" #include "opto/output.hpp" #include "opto/parse.hpp" +#include "opto/phase.hpp" +#include "opto/phaseloadfolding.hpp" #include "opto/phaseX.hpp" #include "opto/rootnode.hpp" #include "opto/runtime.hpp" @@ -2402,6 +2404,19 @@ void Compile::Optimize() { if (failing()) return; + { + // This phase is much faster than EA, so doing it before EA reduces the work of EA by reducing + // the number of loads. It also helps EA terminate sooner because folded loads may expose + // further EA opportunities, and it is better if an EA opportunity is revealed from the + // beginning than if it is only revealed after some rounds of EA. + TracePhase tp(_t_loadFolding); + PhaseLoadFolding load_folding(igvn); + load_folding.optimize(); + if (failing()) { + return; + } + } + if (has_loops()) { print_method(PHASE_BEFORE_LOOP_OPTS, 2); } diff --git a/src/hotspot/share/opto/phase.cpp b/src/hotspot/share/opto/phase.cpp index 5603033ce69d8..a8dc270392d92 100644 --- a/src/hotspot/share/opto/phase.cpp +++ b/src/hotspot/share/opto/phase.cpp @@ -68,6 +68,7 @@ void Phase::print_timers() { tty->print_cr (" Conn Graph: %7.3f s", timers[_t_connectionGraph].seconds()); tty->print_cr (" Macro Eliminate: %7.3f s", timers[_t_macroEliminate].seconds()); } + tty->print_cr (" Load Folding: %7.3f s", timers[_t_loadFolding].seconds()); tty->print_cr (" GVN 1: %7.3f s", timers[_t_iterGVN].seconds()); { @@ -100,6 +101,7 @@ void Phase::print_timers() { double other = timers[_t_optimizer].seconds() - (timers[_t_escapeAnalysis].seconds() + + timers[_t_loadFolding].seconds() + timers[_t_iterGVN].seconds() + timers[_t_incrInline].seconds() + timers[_t_vector].seconds() + diff --git a/src/hotspot/share/opto/phase.hpp b/src/hotspot/share/opto/phase.hpp index 6700df6ec177e..194b5a5c18027 100644 --- a/src/hotspot/share/opto/phase.hpp +++ b/src/hotspot/share/opto/phase.hpp @@ -50,6 +50,7 @@ class Phase : public StackObj { Remove_Useless_And_Renumber_Live, // First, remove useless nodes from the graph. Then, renumber live nodes. Optimistic, // Optimistic analysis phase GVN, // Pessimistic global value numbering phase + LoadFolding, // Aggressively look through loads Ins_Select, // Instruction selection phase CFG, // Build a CFG BlockLayout, // Linear ordering of blocks @@ -73,6 +74,7 @@ class Phase : public StackObj { f( _t_escapeAnalysis, "escapeAnalysis") \ f( _t_connectionGraph, "connectionGraph") \ f( _t_macroEliminate, "macroEliminate") \ + f( _t_loadFolding, "loadFolding") \ f( _t_iterGVN, "iterGVN") \ f( _t_incrInline, "incrementalInline") \ f( _t_incrInline_ideal, "incrementalInline_ideal") \ diff --git a/src/hotspot/share/opto/phaseloadfolding.cpp b/src/hotspot/share/opto/phaseloadfolding.cpp new file mode 100644 index 0000000000000..ae3b28268e194 --- /dev/null +++ b/src/hotspot/share/opto/phaseloadfolding.cpp @@ -0,0 +1,445 @@ +/* + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "libadt/vectset.hpp" +#include "memory/resourceArea.hpp" +#include "opto/addnode.hpp" +#include "opto/callnode.hpp" +#include "opto/cfgnode.hpp" +#include "opto/compile.hpp" +#include "opto/memnode.hpp" +#include "opto/mulnode.hpp" +#include "opto/node.hpp" +#include "opto/phaseloadfolding.hpp" +#include "utilities/globalDefinitions.hpp" + +void PhaseLoadFolding::optimize() { + ciEnv* env = C->env(); + if (env->should_retain_local_variables() || env->jvmti_can_walk_any_space()) { + // Give up because JVMTI can do wonders + return; + } + + // This phase is very fast, but it is still preferable not to allow potential unbounded + // iterations + for (int i = 0; i < 10; i++) { + bool progress = do_optimize(); + if (!progress) { + return; + } + _igvn.optimize(); + if (C->failing()) { + return; + } + } +} + +// The escape status of a node is visible in the memory graph. That is, at runtime, if a load 'l' +// from an object 'o' must be executed after an action 'a' that allows 'o' to escape, and in the +// IR graph, the node 'L' corresponding to 'l' consumes the address 'O' + c, with 'O' being the +// node corresponding to the newly allocated object 'o', then there must be a path along the +// use-def edges from 'L' to the node 'A' that corresponds to 'a'. +// +// - If 'a' is a method invocation that receives 'o' as an argument, then in the graph, 'A' kills +// all memory. As a result, the memory input of 'L' must be a transitive use of 'A'. This is +// because in a well-behave memory graph, there is always a path of use-def edges from a memory +// node to the previous bottom memory node. This is important as it ensures memory fences can +// serialize memory operations by imposing use-def dependencies between the fence and the +// surrounding memory nodes. +// Example: +// Integer o = new Integer(v); +// int x = o.value; +// if (flag) { +// consume(o); +// int y = o.value; +// } else { +// consume(null); +// int z = o.value; +// } +// int t = o.value; +// The memory graph will then look like: +// NarrowMemProj (Integer.value) +// | | +// | | +// CallJava(o) CallJava(null) +// | | +// | | +// Proj1 Proj2 +// \ / +// \ / +// Phi +// We can see that the object can be considered non-escape at NarrowMemProj, CallJava(null), and +// Proj2, while it is considered escape at CallJava(o), Proj1, Phi. The loads x and z will be +// from NarrowMemProj and Proj2, respectively, which means they can be considered loads from an +// object that has not escaped, and we can fold them to v. On the other hand, the loads y and t +// are from Proj1 and Phi, respectively, which means we cannot assume that the only value they +// can see is v. +// +// - If 'a' is a store of 'o' into the memory, then 'l' must be executed after a iff: +// + There is a memory fence that prevents 'l' from being executed before 'a'. Since a memory +// fence kills all memory, the node 'F' corresponding to that fence must be a transitive use of +// 'A', and the memory input of 'L' must be a transitive use of 'F', similar to case 1. +// + There is a data dependency between 'l' and 'a'. In this case, there must be a path of +// use-def edges from 'L' to 'A'. +// For example: +// Integer o = new Integer(v); +// *p = o; +// Integer o_cloned = *p; +// o_clone.value = u; +// int x = o.value; +// Then, there is a path of use-def edges: +// Load(x = o.value) +// | (MemNode::Memory) +// v +// Store(o_clone.value = u) +// | (MemNode::Address) +// V +// Load(o_clone = *p) +// | (MemNode::Memory) +// v +// Store(*p = o) +// We can see that, we cannot fold x to v, because it must observe the value u, and we can +// correcly detect that the object O has escaped by following the outputs of the store that +// allows o to escape. +// +// It is important to remind that even if 'l' is scheduled after the store 'a', unless there is a +// memory fence between 'l' and 'a', it is generally not required that 'l' is executed after 'a'. +// For example: +// 1. Integer o = new Integer(v); +// *p = o; +// int x = o.value; +// In this case, even if the load x = o.value is declared after the store of o to p that allows o +// to escape, it is valid for the load to actually happen before the store. If the developer +// wants to ensure that the order in which the memory accesses appear in the program is the same +// as the order they are executed, memory barriers (e.g. a store-load barrier) must be placed +// between them. As a result, we can consider x = o.value to be a load from an object that has +// not escaped, and fold it to v. +// 2. boolean b1, b2; +// Point o = new Point(v1, v2); +// int r; +// if (b1) { +// *p = o; +// } else { +// *q = o; +// } +// if (b2) { +// r = o.x; +// } else { +// r = o.y; +// } +// In this case, even if the control flow forces the loads to be scheduled after the stores +// that allow o to escape, without actual memory barriers, the JMM does not require the CPU to +// execute the loads after the stores (e.g. the loads are in cache so they can be executed +// sooner while the stores need to wait for the acquisition of the corresponding cache lines). +// As a result, we can consider those loads to be from an object that has not escaped, and fold +// o.x to v1 and o.y to v2. +bool PhaseLoadFolding::do_optimize() { + bool progress = false; + for (int macro_idx = 0; macro_idx < C->macro_count(); macro_idx++) { + Node* macro = C->macro_node(macro_idx); + if (!macro->is_Allocate()) { + continue; + } + + AllocateNode* alloc = macro->as_Allocate(); + Node* oop = alloc->result_cast(); + if (oop == nullptr) { + continue; + } + + if (process_allocate_result(oop)) { + progress = true; + } + } + return progress; +} + +// Find all loads from oop that have not observed the escape of oop, and try to find their +// corresponding stores +bool PhaseLoadFolding::process_allocate_result(Node* oop) { + ResourceMark rm; + Unique_Node_List candidates; + VectorSet candidate_set; + + collect_loads(candidates, candidate_set, oop); + if (candidate_set.is_empty()) { + return false; + } + + WorkLists work_lists; + process_candidates(candidate_set, work_lists, oop); + if (candidate_set.is_empty()) { + return false; + } + + bool progress = false; + for (uint candidate_idx = 0; candidate_idx < candidates.size(); candidate_idx++) { + LoadNode* candidate = candidates.at(candidate_idx)->as_Load(); + if (!candidate_set.test(candidate->_idx)) { + continue; + } + + work_lists.results.clear(); + Node* folded_value = try_fold_recursive(oop, candidate, candidate->in(MemNode::Memory), work_lists); + if (folded_value != nullptr) { + progress = true; + _igvn.replace_node(candidate, folded_value); + } + } + return progress; +} + +// Collect all loads from oop +void PhaseLoadFolding::collect_loads(Unique_Node_List& candidates, VectorSet& candidate_set, Node* oop) { + assert(candidates.size() == 0 && candidate_set.is_empty(), "must start with no candidates"); + for (DUIterator_Fast oop_out_max, oop_out_idx = oop->fast_outs(oop_out_max); oop_out_idx < oop_out_max; oop_out_idx++) { + Node* out = oop->fast_out(oop_out_idx); + if (!out->is_AddP()) { + continue; + } + + if (out->in(AddPNode::Base) != oop || out->in(AddPNode::Address) != oop || !out->in(AddPNode::Offset)->is_Con()) { + // Only try to fold loads in the form of oop + C + continue; + } + + for (DUIterator_Fast addp_out_max, addp_out_idx = out->fast_outs(addp_out_max); addp_out_idx < addp_out_max; addp_out_idx++) { + Node* addp_out = out->fast_out(addp_out_idx); + if (addp_out->is_Load() && !addp_out->as_Load()->is_mismatched_access()) { + candidates.push(addp_out); + } + } + } + + for (uint i = 0; i < candidates.size(); i++) { + candidate_set.set(candidates.at(i)->_idx); + } +} + +// Find all nodes that observe the escape of oop. This function also finds stores that may store +// into oop. This is tricky, for example: +// Integer o = new Integer(v); +// Integer phi = o; +// if (b) { +// phi = new Integer(0); +// } +// phi.value = 1; +// Then, the store phi.value = 1 may or may not modify o, this cannot be known at compile time. As +// a result, when we walk the memory graph from a load, if we encounter such a store, we cannot +// know if it is the value we are looking for, and must give up. +void PhaseLoadFolding::process_candidates(VectorSet& candidate_set, WorkLists& work_lists, Node* oop) { + assert(work_lists.may_alias.is_empty() && work_lists.escapes.size() == 0 && work_lists.work_list.size() == 0, "must start with empty work lists"); + work_lists.work_list.push(oop); + for (uint wl_idx = 0; wl_idx < work_lists.work_list.size(); wl_idx++) { + // At runtime, n may be the same as oop, or may be a different value + Node* n = work_lists.work_list.at(wl_idx); + for (DUIterator_Fast out_max, out_idx = n->fast_outs(out_max); out_idx < out_max; out_idx++) { + Node* out = n->fast_out(out_idx); + if (out->is_ConstraintCast() || out->is_DecodeN() || out->is_EncodeP() || + out->is_Phi() || out->is_CMove()) { + // All things that can alias n + work_lists.work_list.push(out); + } else if (out->is_AddP()) { + AddPNode* addp = out->as_AddP(); + assert(addp->base_node() == n, "unexpected base of an AddP"); + + // A store that may or may not modify a field of oop (e.g. a store into a Phi which has oop + // as one input, or a store into an element of oop at a variable index). This is + // conservative, that is it must be true if the store may modify a field of oop but is not + // in the form oop + C + bool may_alias = false; + if (out->in(AddPNode::Base) != oop || out->in(AddPNode::Address) != oop || !out->in(AddPNode::Offset)->is_Con()) { + // Not an oop + C pointer + may_alias = true; + } + + for (DUIterator_Fast addp_out_max, addp_out_idx = addp->fast_outs(addp_out_max); addp_out_idx < addp_out_max; addp_out_idx++) { + Node* addp_out = addp->fast_out(addp_out_idx); + if ((addp_out->is_Store() || addp_out->is_LoadStore())) { + assert(addp == addp_out->in(MemNode::Address), "store a derived pointer?"); + if (may_alias) { + work_lists.may_alias.set(addp_out->_idx); + } + + if (addp_out->is_LoadStore() || addp_out->as_Store()->is_mismatched_access()) { + // Mismatched accesses are especially hard because they may lie in a different alias + // class, so we may not encounter them when walking the memory graph. As a result, be + // conservative and give up on all loads that may observe this store. LoadStores are + // also lumped here because there is no LoadStoreNode::is_mismatched_access. + work_lists.escapes.push(addp_out); + } + } else if (addp_out->is_Mem()) { + // A load, does not affect the memory + } else if (addp_out->is_AddP()) { + // Another AddP, it should share the base with the current addp, so it will be visited + // later + assert(addp_out->in(AddPNode::Base) == n, "must have the same base"); + } else { + // Some runtime calls receive the pointer without the base + work_lists.escapes.push(addp_out); + } + } + } else if (out->is_Mem()) { + // A store that may allow oop to escape + if (out->req() > MemNode::ValueIn && n == out->in(MemNode::ValueIn)) { + work_lists.escapes.push(out); + } + } else if (out->is_Call()) { + // A call that may allow oop to escape + if (!out->is_AbstractLock() && out->as_Call()->has_non_debug_use(n)) { + work_lists.escapes.push(out); + } + } else if (out->is_SafePoint()) { + // Non-call safepoints are pure control nodes + continue; + } else { + // Be conservative with everything else + work_lists.escapes.push(out); + } + } + } + + // Propagate the escape status, if a node observes oop escaping, then all of its users also + // observe that oop escapes + for (uint idx = 0; idx < work_lists.escapes.size(); idx++) { + Node* n = work_lists.escapes.at(idx); + candidate_set.remove(n->_idx); + if (candidate_set.is_empty()) { + return; + } + + for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { + Node* out = n->fast_out(i); + if (!out->is_Root()) { + work_lists.escapes.push(out); + } + } + } +} + +// Try to find the store that a load observes. Since we know that oop has not escaped, we can +// inspect the graph aggressively, ignoring calls and memory barriers. +Node* PhaseLoadFolding::try_fold_recursive(Node* oop, LoadNode* candidate, Node* mem, WorkLists& work_lists) { + // An arbitrary int can be the input to a StoreB or a StoreC, the load needs to do the + // normalization + auto extract_store_value = [&](StoreNode* store) { + assert(store->Opcode() == candidate->store_Opcode(), "must match %s - %s", store->Name(), candidate->Name()); + Node* res = store->in(MemNode::ValueIn); + if (candidate->Opcode() == Op_LoadUB) { + res = new AndINode(res, _igvn.intcon(0xFF)); + _igvn.register_new_node_with_optimizer(res); + } else if (candidate->Opcode() == Op_LoadB) { + res = new LShiftINode(res, _igvn.intcon(24)); + _igvn.register_new_node_with_optimizer(res); + res = new RShiftINode(res, _igvn.intcon(24)); + _igvn.register_new_node_with_optimizer(res); + } else if (candidate->Opcode() == Op_LoadUS) { + res = new AndINode(res, _igvn.intcon(0xFFFF)); + _igvn.register_new_node_with_optimizer(res); + } else if (candidate->Opcode() == Op_LoadS) { + res = new LShiftINode(res, _igvn.intcon(16)); + _igvn.register_new_node_with_optimizer(res); + res = new RShiftINode(res, _igvn.intcon(16)); + _igvn.register_new_node_with_optimizer(res); + } + return res; + }; + + Node* ptr = candidate->in(MemNode::Address); + int alias_idx = C->get_alias_index(_igvn.type(ptr)->is_ptr()); + while (true) { + // We may encounter a memory loop, so recording Phis are necessary + if (work_lists.results.length() > int(mem->_idx)) { + Node* res = work_lists.results.at(mem->_idx); + if (res != nullptr) { + return res; + } + } + + // If we encounter a store that we cannot decide if it modifies the memory candidate loads + // from, give up + if (work_lists.may_alias.test(mem->_idx)) { + return nullptr; + } + + if (mem->is_MergeMem()) { + mem = mem->as_MergeMem()->memory_at(alias_idx); + } else if (mem->is_Phi()) { + // Create a Phi for the result and store it in work_lists.results, this allows working with + // cycles + PhiNode* res = new PhiNode(mem->in(0), candidate->bottom_type()); + _igvn.register_new_node_with_optimizer(res); + work_lists.results.at_put_grow(mem->_idx, res); + for (uint i = 1; i < mem->req(); i++) { + Node* phi_in = try_fold_recursive(oop, candidate, mem->in(i), work_lists); + if (phi_in == nullptr) { + return nullptr; + } + + res->init_req(i, phi_in); + } + return res; + } else if (mem->is_Proj()) { + mem = mem->in(0); + } else if (mem->is_MemBar()) { + // Look through MemBars, only stop at the InitializeNode of oop + if (!mem->is_Initialize() || mem != oop->in(0)->in(0)) { + mem = mem->in(TypeFunc::Memory); + continue; + } + + InitializeNode* init = mem->as_Initialize(); + assert(ptr->is_AddP() && ptr->in(AddPNode::Base) == oop && ptr->in(AddPNode::Address) == oop && ptr->in(AddPNode::Offset)->is_Con(), + "invalid pointer"); + +#ifdef _LP64 + Node* res = init->find_captured_store(ptr->in(AddPNode::Offset)->get_long(), candidate->memory_size(), &_igvn); +#else // _LP64 + Node* res = init->find_captured_store(ptr->in(AddPNode::Offset)->get_int(), candidate->memory_size(), &_igvn); +#endif // _LP64 + if (res == nullptr) { + return nullptr; + } else if (res->is_Proj() && res->in(0) == init->allocation()) { + // Failure to find a captured store will return the memory output of the AllocateNode + return _igvn.zerocon(candidate->value_basic_type()); + } else if (res->Opcode() == candidate->store_Opcode()) { + return extract_store_value(res->as_Store()); + } else { + return nullptr; + } + } else if (mem->is_SafePoint()) { + mem = mem->in(TypeFunc::Memory); + } else if (mem->is_Store()) { + // We discarded all stores that may write into this field but does not have the form oop + C, + // so a simple comparison of the address input is enough + if (ptr == mem->in(MemNode::Address)) { + return extract_store_value(mem->as_Store()); + } else { + mem = mem->in(MemNode::Memory); + } + } else { + return nullptr; + } + } +} diff --git a/src/hotspot/share/opto/phaseloadfolding.hpp b/src/hotspot/share/opto/phaseloadfolding.hpp new file mode 100644 index 0000000000000..2f1c920a3c749 --- /dev/null +++ b/src/hotspot/share/opto/phaseloadfolding.hpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_OPTO_PHASELOADFOLDING_HPP +#define SHARE_OPTO_PHASELOADFOLDING_HPP + +#include "libadt/vectset.hpp" +#include "opto/node.hpp" +#include "opto/phase.hpp" +#include "utilities/growableArray.hpp" + +class AllocateNode; +class PhaseIterGVN; + +// Try to fold loads by finding the corresponding stores. The transformations here inspect the +// graph more aggressively than during IterGVN, so it is a separate phase in the compilation +// process. The loads taken into consideration are: +// +// 1. If an object has not escaped, then all modification must be visible in the graph. As a +// result, we can follow the memory input, skip through calls and memory fences to find a +// corresponding store. +class PhaseLoadFolding : public Phase { +private: + PhaseIterGVN& _igvn; + + class WorkLists { + public: + VectorSet may_alias; + Unique_Node_List escapes; + Unique_Node_List work_list; + GrowableArray results; + }; + + bool do_optimize(); + bool process_allocate_result(Node* oop); + void collect_loads(Unique_Node_List& candidates, VectorSet& candidate_mems, Node* oop); + void process_candidates(VectorSet& candidate_mems, WorkLists& work_lists, Node* oop); + Node* try_fold_recursive(Node* oop, LoadNode* candidate, Node* mem, WorkLists& work_lists); + +public: + PhaseLoadFolding(PhaseIterGVN& igvn) : Phase(LoadFolding), _igvn(igvn) {} + void optimize(); +}; + +#endif // SHARE_OPTO_PHASELOADFOLDING_HPP diff --git a/test/hotspot/jtreg/compiler/escapeAnalysis/TestLoadFolding.java b/test/hotspot/jtreg/compiler/escapeAnalysis/TestLoadFolding.java new file mode 100644 index 0000000000000..7e6afaa7ab76c --- /dev/null +++ b/test/hotspot/jtreg/compiler/escapeAnalysis/TestLoadFolding.java @@ -0,0 +1,258 @@ +/* + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package compiler.escapeAnalysis; + +import compiler.lib.ir_framework.*; + +import java.lang.invoke.VarHandle; + +/** + * @test + * @bug 8373495 + * @summary Test that loads from a newly allocated object are aggressively folded if the object has not escaped + * @library /test/lib / + * @run driver ${test.main.class} + */ +public class TestLoadFolding { + public static class Point { + int x; + int y; + + Point() { + x = 1; + y = 2; + } + + static final Point DEFAULT = new Point(); + } + + static Point staticField; + + public static void main(String[] args) { + TestFramework.run(); + } + + @Run(test = {"test11", "test12", "test13", "test14", "test15", "test16", "test17", "test18"}) + public void runPositiveTests() { + test11(); + test12(false); + test12(true); + test13(false); + test13(true); + test14(); + test15(1, 16); + test16(1, 16, false); + test16(1, 16, true); + test17(0); + test18(0); + } + + @Run(test = {"test01", "test02", "test03", "test04", "test05"}) + public void runNegativeTests() { + test01(); + test02(false); + test02(true); + test03(false); + test03(true); + test04(1, 16); + test05(0); + } + + @DontInline + static void escape(Object o) {} + + @Test + @IR(failOn = IRNode.LOAD_I, counts = {IRNode.ALLOC, "1"}) + public Point test11() { + // p only escapes at return + Point p = new Point(); + escape(null); + p.x += p.y; + return p; + } + + @Test + @IR(failOn = IRNode.LOAD_I, counts = {IRNode.ALLOC, "1"}) + public Point test12(boolean b) { + // p escapes in another branch + Point p = new Point(); + if (b) { + escape(p); + } else { + escape(null); + p.x += p.y; + } + return p; + } + + @Test + @IR(failOn = IRNode.LOAD_I, counts = {IRNode.ALLOC, "1"}) + public Point test13(boolean b) { + // A Phi of p1 and Point.DEFAULT, but a store to Phi is after all the loads from p1 + Point p1 = new Point(); + Point p = b ? p1 : Point.DEFAULT; + escape(null); + p.x = p1.x + p1.y; + return p; + } + + @Test + @IR(failOn = IRNode.LOAD_I, counts = {IRNode.ALLOC, "1"}) + public int test14() { + // Even if p escapes before the loads, if it is legal to execute the loads before the + // store, then we can fold the loads + Point p = new Point(); + escape(null); + staticField = p; + return p.x + p.y; + } + + @Test + @IR(failOn = IRNode.LOAD_I, counts = {IRNode.ALLOC, "1"}) + public Point test15(int begin, int end) { + // Fold the load that is a part of a cycle + Point p = new Point(); + for (int i = begin; i < end; i *= 2) { + p.x++; + escape(null); // Force a memory Phi + } + p.x += p.y; + return p; + } + + @Test + @IR(failOn = IRNode.LOAD_I, counts = {IRNode.ALLOC, "1"}) + public Point test16(int begin, int end, boolean b) { + // A cycle and a Phi, this time the store is at a different + Point p1 = new Point(); + // This store is not on a Phi involving p1, so it does not interfere + Point.DEFAULT.y = 3; + Point p = p1; + for (int i = begin; i < end; i += 2) { + if (b) { + p = p1; + } else { + p = Point.DEFAULT; + } + b = !b; + + p.x = p1.y + 3; + escape(null); // Force a memory Phi + } + p1.x = p1.y; + return p; + } + + @Test + @IR(counts = {IRNode.LOAD_I, "1", IRNode.ALLOC_ARRAY, "1"}) + public int test17(int idx) { + // Array + int[] a = new int[2]; + a[0] = 1; + a[1] = 2; + int res = a[idx & 1]; + escape(null); + res += a[0] + a[1]; + escape(a); + return res; + } + + @Test + @IR(failOn = IRNode.LOAD_I, counts = {IRNode.ALLOC_ARRAY, "1"}) + public int test18(int idx) { + // Array, even if we will give up if we encounter a[idx & 1] = 3, we meet a[0] = 4 first, + // so the load int res = a[0] can still be folded + int[] a = new int[2]; + a[0] = 1; + a[1] = 2; + escape(null); + a[idx & 1] = 3; + a[0] = 4; + escape(null); + int res = a[0]; + escape(a); + return res; + } + + @Test + @IR(counts = {IRNode.LOAD_I, "2", IRNode.ALLOC, "1"}) + public int test01() { + Point p = new Point(); + staticField = p; + // Actually, the only fence that requires the following loads to be executed after the + // store is a fullFence + VarHandle.fullFence(); + return p.x + p.y; + } + + @Test + @IR(counts = {IRNode.LOAD_I, "1", IRNode.ALLOC, "1"}) + public int test02(boolean b) { + Point p = new Point(); + if (b) { + escape(p); + // p escaped, so the load must not be removed + return p.x; + } else { + escape(null); + return 0; + } + } + + @Test + @IR(counts = {IRNode.LOAD_I, "1", IRNode.ALLOC, "1"}) + public int test03(boolean b) { + Point p = new Point(); + if (b) { + escape(p); + } + // p escaped, so the load must not be removed + return p.x; + } + + @Test + @IR(counts = {IRNode.LOAD_I, "> 0", IRNode.ALLOC, "1"}) + public Point test04(int begin, int end) { + Point p = new Point(); + for (int i = begin; i < end; i *= 2) { + // p escaped here because this is a loop + p.x++; + escape(p); + } + return p; + } + + @Test + @IR(counts = {IRNode.LOAD_I, "2", IRNode.ALLOC_ARRAY, "1"}) + public int test05(int idx) { + int[] a = new int[2]; + a[0] = 1; + a[1] = 2; + escape(null); + a[idx & 1] = 3; + // Cannot fold the loads because we do not know which element is written to by + // a[idx & 1] = 3 + return a[0] + a[1]; + } +}