From 8e923b0ad9b9b1f89413008a5acd8408c67841f6 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 13 Apr 2021 16:40:21 -0700 Subject: [PATCH 001/155] Initial public commit of regalloc2. --- .empty | 0 .gitignore | 4 + Cargo.toml | 27 + LICENSE | 220 ++ README.md | 159 ++ benches/regalloc.rs | 56 + fuzz/.gitignore | 4 + fuzz/Cargo.toml | 54 + fuzz/fuzz_targets/domtree.rs | 128 + fuzz/fuzz_targets/ion.rs | 11 + fuzz/fuzz_targets/ion_checker.rs | 39 + fuzz/fuzz_targets/moves.rs | 76 + fuzz/fuzz_targets/ssagen.rs | 35 + src/bin/test.rs | 45 + src/bitvec.rs | 139 ++ src/cfg.rs | 110 + src/checker.rs | 615 +++++ src/domtree.rs | 118 + src/fuzzing/func.rs | 542 +++++ src/fuzzing/mod.rs | 3 + src/index.rs | 176 ++ src/ion/LICENSE | 373 +++ src/ion/mod.rs | 3763 ++++++++++++++++++++++++++++++ src/lib.rs | 780 +++++++ src/moves.rs | 199 ++ src/postorder.rs | 51 + src/ssa.rs | 87 + 27 files changed, 7814 insertions(+) delete mode 100644 .empty create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 LICENSE create mode 100644 README.md create mode 100644 benches/regalloc.rs create mode 100644 fuzz/.gitignore create mode 100644 fuzz/Cargo.toml create mode 100644 fuzz/fuzz_targets/domtree.rs create mode 100644 fuzz/fuzz_targets/ion.rs create mode 100644 fuzz/fuzz_targets/ion_checker.rs create mode 100644 fuzz/fuzz_targets/moves.rs create mode 100644 fuzz/fuzz_targets/ssagen.rs create mode 100644 src/bin/test.rs create mode 100644 src/bitvec.rs create mode 100644 src/cfg.rs create mode 100644 src/checker.rs create mode 100644 src/domtree.rs create mode 100644 src/fuzzing/func.rs create mode 100644 src/fuzzing/mod.rs create mode 100644 src/index.rs create mode 100644 src/ion/LICENSE create mode 100644 src/ion/mod.rs create mode 100644 src/lib.rs create mode 100644 src/moves.rs create mode 100644 src/postorder.rs create mode 100644 src/ssa.rs diff --git a/.empty b/.empty deleted file mode 100644 index e69de29b..00000000 diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..aadc1161 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +Cargo.lock +target/ +.*.swp +*~ diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 00000000..7e32c7c1 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "regalloc2" +version = "0.0.1" +authors = ["Chris Fallin ", "Mozilla SpiderMonkey Developers"] +edition = "2018" +license = "Apache-2.0 WITH LLVM-exception AND MPL-2.0" +description = "Backtracking register allocator ported from IonMonkey" +repository = "https://github.com/cfallin/regalloc2" + +[dependencies] +log = { version = "0.4.8", default-features = false } +smallvec = "1.6.1" +# keep this in sync with libfuzzer_sys's crate version: +arbitrary = "^0.4.6" +rand = "0.8" +rand_chacha = "0.3" +env_logger = "*" + +[dev-dependencies] +criterion = "0.3" + +[profile.release] +debug = true + +[[bench]] +name = "regalloc" +harness = false diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..f9d81955 --- /dev/null +++ b/LICENSE @@ -0,0 +1,220 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +--- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + diff --git a/README.md b/README.md new file mode 100644 index 00000000..e755c4c0 --- /dev/null +++ b/README.md @@ -0,0 +1,159 @@ +## regalloc2: another register allocator + +This is a register allocator that started life as, and is about 75% +still, a port of IonMonkey's backtracking register allocator to +Rust. The data structures and invariants have been simplified a little +bit, and the interfaces made a little more generic and reusable. In +addition, it contains substantial amounts of testing infrastructure +(fuzzing harnesses and checkers) that does not exist in the original +IonMonkey allocator. + +### Design Overview + +TODO + +- SSA with blockparams + +- Operands with constraints, and clobbers, and reused regs; contrast + with regalloc.rs approach of vregs and pregs and many moves that get + coalesced/elided + +### Differences from IonMonkey Backtracking Allocator + +There are a number of differences between the [IonMonkey +allocator](https://searchfox.org/mozilla-central/source/js/src/jit/BacktrackingAllocator.cpp) +and this one: + +* Most significantly, there are [fuzz/fuzz_targets/](many different + fuzz targets) that exercise the allocator, including a full symbolic + checker (`ion_checker` target) based on the [symbolic checker in + regalloc.rs](https://cfallin.org/blog/2021/03/15/cranelift-isel-3/) + and, e.g., a targetted fuzzer for the parallel move-resolution + algorithm (`moves`) and the SSA generator used for generating cases + for the other fuzz targets (`ssagen`). + +* The data-structure invariants are simplified. While the IonMonkey + allocator allowed for LiveRanges and Bundles to overlap in certain + cases, this allocator sticks to a strict invariant: ranges do not + overlap in bundles, and bundles do not overlap. There are other + examples too: e.g., the definition of minimal bundles is very simple + and does not depend on scanning the code at all. In general, we + should be able to state simple invariants and see by inspection (as + well as fuzzing -- see above) that they hold. + +* Many of the algorithms in the IonMonkey allocator are built with + helper functions that do linear scans. These "small quadratic" loops + are likely not a huge issue in practice, but nevertheless have the + potential to be in corner cases. As much as possible, all work in + this allocator is done in linear scans. For example, bundle + splitting is done in a single compound scan over a bundle, ranges in + the bundle, and a sorted list of split-points. + +* There are novel schemes for solving certain interesting design + challenges. One example: in IonMonkey, liveranges are connected + across blocks by, when reaching one end of a control-flow edge in a + scan, doing a lookup of the allocation at the other end. This is in + principle a linear lookup (so quadratic overall). We instead + generate a list of "half-moves", keyed on the edge and from/to + vregs, with each holding one of the allocations. By sorting and then + scanning this list, we can generate all edge moves in one linear + scan. There are a number of other examples of simplifications: for + example, we handle multiple conflicting + physical-register-constrained uses of a vreg in a single instruction + by recording a copy to do in a side-table, then removing constraints + for the core regalloc. Ion instead has to tweak its definition of + minimal bundles and create two liveranges that overlap (!) to + represent the two uses. + +* Using block parameters rather than phi-nodes significantly + simplifies handling of inter-block data movement. IonMonkey had to + special-case phis in many ways because they are actually quite + weird: their uses happen semantically in other blocks, and their + defs happen in parallel at the top of the block. Block parameters + naturally and explicitly reprsent these semantics in a direct way. + +* The allocator supports irreducible control flow and arbitrary block + ordering (its only CFG requirement is that critical edges are + split). It handles loops during live-range computation in a way that + is similar in spirit to IonMonkey's allocator -- in a single pass, + when we discover a loop, we just mark the whole loop as a liverange + for values live at the top of the loop -- but we find the loop body + without the fixpoint workqueue loop that IonMonkey uses, instead + doing a single linear scan for backedges and finding the minimal + extent that covers all intermingled loops. In order to support + arbitrary block order and irreducible control flow, we relax the + invariant that the first liverange for a vreg always starts at its + def; instead, the def can happen anywhere, and a liverange may + overapproximate. It turns out this is not too hard to handle and is + a more robust invariant. (It also means that non-SSA code *may* not + be too hard to adapt to, though I haven't seriously thought about + this.) + +### Rough Performance Comparison with Regalloc.rs + +The allocator has not yet been wired up to a suitable compiler backend +(such as Cranelift) to perform a true apples-to-apples compile-time +and runtime comparison. However, we can get some idea of compile speed +by running suitable test cases through the allocator and measuring +*throughput*: that is, instructions per second for which registers are +allocated. + +To do so, I measured the `qsort2` benchmark in +[regalloc.rs](https://github.com/bytecodealliance/regalloc.rs), +register-allocated with default options in that crate's backtracking +allocator, using the Criterion benchmark framework to measure ~620K +instructions per second: + + +```plain +benches/0 time: [365.68 us 367.36 us 369.04 us] + thrpt: [617.82 Kelem/s 620.65 Kelem/s 623.49 Kelem/s] +``` + +I then measured three different fuzztest-SSA-generator test cases in +this allocator, `regalloc2`, measuring between 1.05M and 2.3M +instructions per second (closer to the former for larger functions): + +```plain +==== 459 instructions +benches/0 time: [424.46 us 425.65 us 426.59 us] + thrpt: [1.0760 Melem/s 1.0784 Melem/s 1.0814 Melem/s] + +==== 225 instructions +benches/1 time: [213.05 us 213.28 us 213.54 us] + thrpt: [1.0537 Melem/s 1.0549 Melem/s 1.0561 Melem/s] + +Found 1 outliers among 100 measurements (1.00%) + 1 (1.00%) high mild +==== 21 instructions +benches/2 time: [9.0495 us 9.0571 us 9.0641 us] + thrpt: [2.3168 Melem/s 2.3186 Melem/s 2.3206 Melem/s] + +Found 4 outliers among 100 measurements (4.00%) + 2 (2.00%) high mild + 2 (2.00%) high severe +``` + +Though not apples-to-apples (SSA vs. non-SSA, completely different +code only with similar length), this is at least some evidence that +`regalloc2` is likely to lead to at least a compile-time improvement +when used in e.g. Cranelift. + +### License + +Unless otherwise specified, code in this crate is licensed under the Apache 2.0 +License with LLVM Exception. This license text can be found in the file +`LICENSE`. + +Files in the `src/ion/` directory are directly ported from original C++ code in +IonMonkey, a part of the Firefox codebase. Parts of `src/lib.rs` are also +definitions that are directly translated from this original code. As a result, +these files are derivative works and are covered by the Mozilla Public License +(MPL) 2.0, as described in license headers in those files. Please see the +notices in relevant files for links to the original IonMonkey source files from +which they have been translated/derived. The MPL text can be found in +`src/ion/LICENSE`. + +Parts of the code are derived from regalloc.rs: in particular, +`src/checker.rs` and `src/domtree.rs`. This crate has the same license +as regalloc.rs, so the license on these files does not differ. diff --git a/benches/regalloc.rs b/benches/regalloc.rs new file mode 100644 index 00000000..85cee8c5 --- /dev/null +++ b/benches/regalloc.rs @@ -0,0 +1,56 @@ +//! Criterion-based benchmark target that computes insts/second for +//! arbitrary inputs. + +use arbitrary::{Arbitrary, Unstructured}; +use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; +use rand::{Rng, SeedableRng}; +use rand_chacha::ChaCha8Rng; +use regalloc2::fuzzing::func::{machine_env, Func}; +use regalloc2::ion; +use regalloc2::Function; + +fn create_random_func(seed: u64, size: usize) -> Func { + let mut bytes: Vec = vec![]; + bytes.resize(size, 0); + let mut rng = ChaCha8Rng::seed_from_u64(seed); + rng.fill(&mut bytes[..]); + loop { + let mut u = Unstructured::new(&bytes[..]); + match Func::arbitrary(&mut u) { + Ok(f) => { + return f; + } + Err(arbitrary::Error::NotEnoughData) => { + let len = bytes.len(); + bytes.resize(len + 1024, 0); + rng.fill(&mut bytes[len..]); + } + Err(e) => panic!("unexpected error: {:?}", e), + } + } +} + +fn run_regalloc(c: &mut Criterion) { + const SIZE: usize = 1000 * 1000; + env_logger::init(); + let env = machine_env(); + let mut group = c.benchmark_group("benches"); + for iter in 0..3 { + let func = create_random_func(iter, SIZE); + eprintln!("==== {} instructions", func.insts()); + group.throughput(Throughput::Elements(func.insts() as u64)); + group.bench_with_input(BenchmarkId::from_parameter(iter), &iter, |b, _| { + b.iter(|| { + // For fair comparison with regalloc.rs, which needs + // to clone its Func on every alloc, we clone + // too. Seems to make a few percent difference. + let func = func.clone(); + ion::run(&func, &env).expect("regalloc did not succeed"); + }); + }); + } + group.finish(); +} + +criterion_group!(benches, run_regalloc); +criterion_main!(benches); diff --git a/fuzz/.gitignore b/fuzz/.gitignore new file mode 100644 index 00000000..572e03bd --- /dev/null +++ b/fuzz/.gitignore @@ -0,0 +1,4 @@ + +target +corpus +artifacts diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml new file mode 100644 index 00000000..e0eec8da --- /dev/null +++ b/fuzz/Cargo.toml @@ -0,0 +1,54 @@ + +[package] +name = "regalloc2-fuzz" +version = "0.0.0" +authors = ["Chris Fallin "] +license = "MPL-2.0 AND Apache-2.0 WITH LLVM-exception" +publish = false +edition = "2018" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.3" +arbitrary = { version = "^0.4.6", features = ["derive"] } +log = { version = "0.4.8", default-features = false } +env_logger = "0.8.3" + +[dependencies.regalloc2] +path = ".." + +# Prevent this from interfering with workspaces +[workspace] +members = ["."] + +[[bin]] +name = "domtree" +path = "fuzz_targets/domtree.rs" +test = false +doc = false + +[[bin]] +name = "ssagen" +path = "fuzz_targets/ssagen.rs" +test = false +doc = false + +[[bin]] +name = "ion" +path = "fuzz_targets/ion.rs" +test = false +doc = false + +[[bin]] +name = "moves" +path = "fuzz_targets/moves.rs" +test = false +doc = false + +[[bin]] +name = "ion_checker" +path = "fuzz_targets/ion_checker.rs" +test = false +doc = false diff --git a/fuzz/fuzz_targets/domtree.rs b/fuzz/fuzz_targets/domtree.rs new file mode 100644 index 00000000..5923befb --- /dev/null +++ b/fuzz/fuzz_targets/domtree.rs @@ -0,0 +1,128 @@ +#![no_main] +use libfuzzer_sys::arbitrary::{Arbitrary, Result, Unstructured}; +use libfuzzer_sys::fuzz_target; +use std::collections::HashSet; + +use regalloc2::{domtree, postorder, Block}; + +#[derive(Clone, Debug)] +struct CFG { + num_blocks: usize, + preds: Vec>, + succs: Vec>, +} + +impl Arbitrary for CFG { + fn arbitrary(u: &mut Unstructured) -> Result { + let num_blocks = u.int_in_range(1..=1000)?; + let mut succs = vec![]; + for _ in 0..num_blocks { + let mut block_succs = vec![]; + for _ in 0..u.int_in_range(0..=5)? { + block_succs.push(Block::new(u.int_in_range(0..=(num_blocks - 1))?)); + } + succs.push(block_succs); + } + let mut preds = vec![]; + for _ in 0..num_blocks { + preds.push(vec![]); + } + for from in 0..num_blocks { + for succ in &succs[from] { + preds[succ.index()].push(Block::new(from)); + } + } + Ok(CFG { + num_blocks, + preds, + succs, + }) + } +} + +#[derive(Clone, Debug)] +struct Path { + blocks: Vec, +} + +impl Path { + fn choose_from_cfg(cfg: &CFG, u: &mut Unstructured) -> Result { + let succs = u.int_in_range(0..=(2 * cfg.num_blocks))?; + let mut block = Block::new(0); + let mut blocks = vec![]; + blocks.push(block); + for _ in 0..succs { + if cfg.succs[block.index()].is_empty() { + break; + } + block = *u.choose(&cfg.succs[block.index()])?; + blocks.push(block); + } + Ok(Path { blocks }) + } +} + +fn check_idom_violations(idom: &[Block], path: &Path) { + // "a dom b" means that any path from the entry block through the CFG that + // contains a and b will contain a before b. + // + // To test this, for any given block b_i, we have the set S of b_0 .. b_{i-1}, + // and we walk up the domtree from b_i to get all blocks that dominate b_i; + // each such block must appear in S. (Otherwise, we have a counterexample + // for which dominance says it should appear in the path prefix, but it does + // not.) + let mut visited = HashSet::new(); + visited.insert(Block::new(0)); + for block in &path.blocks { + let mut parent = idom[block.index()]; + let mut domset = HashSet::new(); + domset.insert(*block); + loop { + assert!(parent.is_valid()); + assert!(visited.contains(&parent)); + domset.insert(parent); + let next = idom[parent.index()]; + if next == parent { + break; + } + parent = next; + } + // Check that `dominates()` returns true for every block in domset, + // and false for every other block. + for domblock in 0..idom.len() { + let domblock = Block::new(domblock); + assert_eq!(domset.contains(&domblock), domtree::dominates(idom, domblock, *block)); + } + visited.insert(*block); + } +} + +#[derive(Clone, Debug)] +struct TestCase { + cfg: CFG, + path: Path, +} + +impl Arbitrary for TestCase { + fn arbitrary(u: &mut Unstructured) -> Result { + let cfg = CFG::arbitrary(u)?; + let path = Path::choose_from_cfg(&cfg, u)?; + Ok(TestCase { + cfg, + path, + }) + } +} + +fuzz_target!(|testcase: TestCase| { + let postord = postorder::calculate(testcase.cfg.num_blocks, Block::new(0), |block| { + &testcase.cfg.succs[block.index()] + }); + let idom = domtree::calculate( + testcase.cfg.num_blocks, + |block| &testcase.cfg.preds[block.index()], + &postord[..], + Block::new(0), + ); + check_idom_violations(&idom[..], &testcase.path); +}); diff --git a/fuzz/fuzz_targets/ion.rs b/fuzz/fuzz_targets/ion.rs new file mode 100644 index 00000000..dc4a3423 --- /dev/null +++ b/fuzz/fuzz_targets/ion.rs @@ -0,0 +1,11 @@ +#![no_main] +use libfuzzer_sys::fuzz_target; + +use regalloc2::fuzzing::func::Func; + +fuzz_target!(|func: Func| { + let _ = env_logger::try_init(); + log::debug!("func:\n{:?}", func); + let env = regalloc2::fuzzing::func::machine_env(); + let _out = regalloc2::ion::run(&func, &env).expect("regalloc did not succeed"); +}); diff --git a/fuzz/fuzz_targets/ion_checker.rs b/fuzz/fuzz_targets/ion_checker.rs new file mode 100644 index 00000000..e3ce1dc7 --- /dev/null +++ b/fuzz/fuzz_targets/ion_checker.rs @@ -0,0 +1,39 @@ +#![no_main] +use libfuzzer_sys::fuzz_target; +use libfuzzer_sys::arbitrary::{Arbitrary, Unstructured, Result}; + +use regalloc2::fuzzing::func::{Func, Options}; +use regalloc2::checker::Checker; + +#[derive(Clone, Debug)] +struct TestCase { + func: Func, +} + +impl Arbitrary for TestCase { + fn arbitrary(u: &mut Unstructured) -> Result { + Ok(TestCase { + func: Func::arbitrary_with_options(u, &Options { + reused_inputs: true, + fixed_regs: true, + clobbers: true, + control_flow: true, + reducible: false, + block_params: true, + always_local_uses: false, + })?, + }) + } +} + +fuzz_target!(|testcase: TestCase| { + let func = testcase.func; + let _ = env_logger::try_init(); + log::debug!("func:\n{:?}", func); + let env = regalloc2::fuzzing::func::machine_env(); + let out = regalloc2::ion::run(&func, &env).expect("regalloc did not succeed"); + + let mut checker = Checker::new(&func); + checker.prepare(&out); + checker.run().expect("checker failed"); +}); diff --git a/fuzz/fuzz_targets/moves.rs b/fuzz/fuzz_targets/moves.rs new file mode 100644 index 00000000..a719f7c4 --- /dev/null +++ b/fuzz/fuzz_targets/moves.rs @@ -0,0 +1,76 @@ +#![no_main] +use libfuzzer_sys::arbitrary::{Arbitrary, Result, Unstructured}; +use libfuzzer_sys::fuzz_target; + +use regalloc2::moves::ParallelMoves; +use regalloc2::{Allocation, PReg, RegClass}; +use std::collections::HashSet; + +#[derive(Clone, Debug)] +struct TestCase { + moves: Vec<(Allocation, Allocation)>, +} + +impl Arbitrary for TestCase { + fn arbitrary(u: &mut Unstructured) -> Result { + let mut ret = TestCase { moves: vec![] }; + let mut written = HashSet::new(); + while bool::arbitrary(u)? { + let reg1 = u.int_in_range(0..=30)?; + let reg2 = u.int_in_range(0..=30)?; + if written.contains(®2) { + break; + } + written.insert(reg2); + ret.moves.push(( + Allocation::reg(PReg::new(reg1, RegClass::Int)), + Allocation::reg(PReg::new(reg2, RegClass::Int)), + )); + } + Ok(ret) + } +} + +fuzz_target!(|testcase: TestCase| { + let _ = env_logger::try_init(); + let scratch = Allocation::reg(PReg::new(31, RegClass::Int)); + let mut par = ParallelMoves::new(scratch); + for &(src, dst) in &testcase.moves { + par.add(src, dst); + } + let moves = par.resolve(); + + // Compute the final source reg for each dest reg in the original + // parallel-move set. + let mut final_src_per_dest: Vec> = vec![None; 32]; + for &(src, dst) in &testcase.moves { + if let (Some(preg_src), Some(preg_dst)) = (src.as_reg(), dst.as_reg()) { + final_src_per_dest[preg_dst.hw_enc()] = Some(preg_src.hw_enc()); + } + } + + // Simulate the sequence of moves. + let mut regfile: Vec> = vec![None; 32]; + for i in 0..32 { + regfile[i] = Some(i); + } + for (src, dst) in moves { + if let (Some(preg_src), Some(preg_dst)) = (src.as_reg(), dst.as_reg()) { + let data = regfile[preg_src.hw_enc()]; + regfile[preg_dst.hw_enc()] = data; + } else { + panic!("Bad allocation in move list"); + } + } + + // Assert that the expected register-moves occurred. + // N.B.: range up to 31 (not 32) to skip scratch register. + for i in 0..31 { + if let Some(orig_src) = final_src_per_dest[i] { + assert_eq!(regfile[i], Some(orig_src)); + } else { + // Should be untouched. + assert_eq!(regfile[i], Some(i)); + } + } +}); diff --git a/fuzz/fuzz_targets/ssagen.rs b/fuzz/fuzz_targets/ssagen.rs new file mode 100644 index 00000000..e69e71a6 --- /dev/null +++ b/fuzz/fuzz_targets/ssagen.rs @@ -0,0 +1,35 @@ +#![no_main] +use libfuzzer_sys::arbitrary::{Arbitrary, Result, Unstructured}; +use libfuzzer_sys::fuzz_target; + +use regalloc2::cfg::CFGInfo; +use regalloc2::fuzzing::func::{Func, Options}; +use regalloc2::ssa::validate_ssa; + +#[derive(Debug)] +struct TestCase { + f: Func, +} + +impl Arbitrary for TestCase { + fn arbitrary(u: &mut Unstructured) -> Result { + Ok(TestCase { + f: Func::arbitrary_with_options( + u, + &Options { + reused_inputs: true, + fixed_regs: true, + clobbers: true, + control_flow: true, + reducible: false, + always_local_uses: false, + }, + )?, + }) + } +} + +fuzz_target!(|t: TestCase| { + let cfginfo = CFGInfo::new(&t.f); + validate_ssa(&t.f, &cfginfo).expect("invalid SSA"); +}); diff --git a/src/bin/test.rs b/src/bin/test.rs new file mode 100644 index 00000000..6d7c7de6 --- /dev/null +++ b/src/bin/test.rs @@ -0,0 +1,45 @@ +use arbitrary::{Arbitrary, Unstructured}; +use rand::{Rng, SeedableRng}; +use rand_chacha::ChaCha8Rng; +use regalloc2::fuzzing::func::{machine_env, Func}; +use regalloc2::ion; +use regalloc2::Function; + +fn create_random_func(seed: u64, size: usize) -> Func { + let mut bytes: Vec = vec![]; + bytes.resize(size, 0); + let mut rng = ChaCha8Rng::seed_from_u64(seed); + rng.fill(&mut bytes[..]); + loop { + let mut u = Unstructured::new(&bytes[..]); + match Func::arbitrary(&mut u) { + Ok(f) => { + return f; + } + Err(arbitrary::Error::NotEnoughData) => { + let len = bytes.len(); + bytes.resize(len + 1024, 0); + rng.fill(&mut bytes[len..]); + } + Err(e) => panic!("unexpected error: {:?}", e), + } + } +} + +fn main() { + const SIZE: usize = 1000 * 1000; + env_logger::init(); + let env = machine_env(); + for iter in 0..3 { + let func = create_random_func(iter, SIZE); + eprintln!("==== {} instructions", func.insts()); + let mut stats: ion::Stats = ion::Stats::default(); + for i in 0..1000 { + let out = ion::run(&func, &env).expect("regalloc did not succeed"); + if i == 0 { + stats = out.stats; + } + } + eprintln!("Stats: {:?}", stats); + } +} diff --git a/src/bitvec.rs b/src/bitvec.rs new file mode 100644 index 00000000..ce3be7cf --- /dev/null +++ b/src/bitvec.rs @@ -0,0 +1,139 @@ +//! Bit vectors. + +use smallvec::{smallvec, SmallVec}; + +/// A conceptually infinite-length bitvector that allows bitwise operations and +/// iteration over set bits efficiently. +#[derive(Clone, Debug)] +pub struct BitVec { + bits: SmallVec<[u64; 2]>, +} + +const BITS_PER_WORD: usize = 64; + +impl BitVec { + pub fn new() -> Self { + Self { bits: smallvec![] } + } + + pub fn with_capacity(len: usize) -> Self { + let words = (len + BITS_PER_WORD - 1) / BITS_PER_WORD; + Self { + bits: SmallVec::with_capacity(words), + } + } + + #[inline(never)] + fn ensure_idx(&mut self, word: usize) { + let mut target_len = std::cmp::max(2, self.bits.len()); + while word >= target_len { + target_len *= 2; + } + self.bits.resize(target_len, 0); + } + + #[inline(always)] + pub fn set(&mut self, idx: usize, val: bool) { + let word = idx / BITS_PER_WORD; + let bit = idx % BITS_PER_WORD; + if val { + if word >= self.bits.len() { + self.ensure_idx(word); + } + self.bits[word] |= 1 << bit; + } else { + if word < self.bits.len() { + self.bits[word] &= !(1 << bit); + } + } + } + + #[inline(always)] + pub fn get(&mut self, idx: usize) -> bool { + let word = idx / BITS_PER_WORD; + let bit = idx % BITS_PER_WORD; + if word >= self.bits.len() { + false + } else { + (self.bits[word] & (1 << bit)) != 0 + } + } + + pub fn or(&mut self, other: &Self) { + if other.bits.is_empty() { + return; + } + let last_idx = other.bits.len() - 1; + self.ensure_idx(last_idx); + + for (self_word, other_word) in self.bits.iter_mut().zip(other.bits.iter()) { + *self_word |= *other_word; + } + } + + pub fn and(&mut self, other: &Self) { + if other.bits.len() < self.bits.len() { + self.bits.truncate(other.bits.len()); + } + + for (self_word, other_word) in self.bits.iter_mut().zip(other.bits.iter()) { + *self_word &= *other_word; + } + } + + pub fn iter<'a>(&'a self) -> SetBitsIter<'a> { + let cur_word = if self.bits.len() > 0 { self.bits[0] } else { 0 }; + SetBitsIter { + words: &self.bits[..], + word_idx: 0, + cur_word, + } + } +} + +pub struct SetBitsIter<'a> { + words: &'a [u64], + word_idx: usize, + cur_word: u64, +} + +impl<'a> Iterator for SetBitsIter<'a> { + type Item = usize; + fn next(&mut self) -> Option { + while self.cur_word == 0 { + if self.word_idx + 1 >= self.words.len() { + return None; + } + self.word_idx += 1; + self.cur_word = self.words[self.word_idx]; + } + let bitidx = self.cur_word.trailing_zeros(); + self.cur_word &= !(1 << bitidx); + Some(self.word_idx * BITS_PER_WORD + bitidx as usize) + } +} + +#[cfg(test)] +mod test { + use super::BitVec; + + #[test] + fn test_set_bits_iter() { + let mut vec = BitVec::new(); + let mut sum = 0; + for i in 0..1024 { + if i % 17 == 0 { + vec.set(i, true); + sum += i; + } + } + + let mut checksum = 0; + for bit in vec.iter() { + assert!(bit % 17 == 0); + checksum += bit; + } + + assert_eq!(sum, checksum); + } +} diff --git a/src/cfg.rs b/src/cfg.rs new file mode 100644 index 00000000..4c838e78 --- /dev/null +++ b/src/cfg.rs @@ -0,0 +1,110 @@ +//! Lightweight CFG analyses. + +use crate::{domtree, postorder, Block, Function, Inst, OperandKind, ProgPoint}; + +#[derive(Clone, Debug)] +pub struct CFGInfo { + /// Postorder traversal of blocks. + pub postorder: Vec, + /// Domtree parents, indexed by block. + pub domtree: Vec, + /// For each instruction, the block it belongs to. + pub insn_block: Vec, + /// For each vreg, the instruction that defines it, if any. + pub vreg_def_inst: Vec, + /// For each vreg, the block that defines it as a blockparam, if + /// any. (Every vreg must have a valid entry in either + /// `vreg_def_inst` or `vreg_def_blockparam`.) + pub vreg_def_blockparam: Vec<(Block, u32)>, + /// For each block, the first instruction. + pub block_entry: Vec, + /// For each block, the last instruction. + pub block_exit: Vec, + /// For each block, what is its position in its successor's preds, + /// if it has a single successor? + /// + /// (Because we require split critical edges, we always either have a single + /// successor (which itself may have multiple preds), or we have multiple + /// successors but each successor itself has only one pred; so we can store + /// just one value per block and always know any block's position in its + /// successors' preds lists.) + pub pred_pos: Vec, +} + +impl CFGInfo { + pub fn new(f: &F) -> CFGInfo { + let postorder = + postorder::calculate(f.blocks(), f.entry_block(), |block| f.block_succs(block)); + let domtree = domtree::calculate( + f.blocks(), + |block| f.block_preds(block), + &postorder[..], + f.entry_block(), + ); + let mut insn_block = vec![Block::invalid(); f.insts()]; + let mut vreg_def_inst = vec![Inst::invalid(); f.num_vregs()]; + let mut vreg_def_blockparam = vec![(Block::invalid(), 0); f.num_vregs()]; + let mut block_entry = vec![ProgPoint::before(Inst::invalid()); f.blocks()]; + let mut block_exit = vec![ProgPoint::before(Inst::invalid()); f.blocks()]; + let mut pred_pos = vec![0; f.blocks()]; + + for block in 0..f.blocks() { + let block = Block::new(block); + for (i, param) in f.block_params(block).iter().enumerate() { + vreg_def_blockparam[param.vreg()] = (block, i as u32); + } + for inst in f.block_insns(block).iter() { + insn_block[inst.index()] = block; + for operand in f.inst_operands(inst) { + match operand.kind() { + OperandKind::Def => { + vreg_def_inst[operand.vreg().vreg()] = inst; + } + _ => {} + } + } + } + block_entry[block.index()] = ProgPoint::before(f.block_insns(block).first()); + block_exit[block.index()] = ProgPoint::after(f.block_insns(block).last()); + + if f.block_preds(block).len() > 1 { + for (i, &pred) in f.block_preds(block).iter().enumerate() { + // Assert critical edge condition. + assert_eq!( + f.block_succs(pred).len(), + 1, + "Edge {} -> {} is critical", + pred.index(), + block.index(), + ); + pred_pos[pred.index()] = i; + } + } + } + + CFGInfo { + postorder, + domtree, + insn_block, + vreg_def_inst, + vreg_def_blockparam, + block_entry, + block_exit, + pred_pos, + } + } + + pub fn dominates(&self, a: Block, b: Block) -> bool { + domtree::dominates(&self.domtree[..], a, b) + } + + /// Return the position of this block in its successor's predecessor list. + /// + /// Because the CFG must have split critical edges, we actually do not need + /// to know *which* successor: if there is more than one, then each + /// successor has only one predecessor (that's this block), so the answer is + /// `0` no matter which successor we are considering. + pub fn pred_position(&self, block: Block) -> usize { + self.pred_pos[block.index()] + } +} diff --git a/src/checker.rs b/src/checker.rs new file mode 100644 index 00000000..5cdcb602 --- /dev/null +++ b/src/checker.rs @@ -0,0 +1,615 @@ +/* + * The following code is derived from `lib/src/checker.rs` in the + * regalloc.rs project + * (https://github.com/bytecodealliance/regalloc.rs). regalloc.rs is + * also licensed under Apache-2.0 with the LLVM exception, as the rest + * of regalloc2's non-Ion-derived code is. + */ + +//! Checker: verifies that spills/reloads/moves retain equivalent +//! dataflow to original, VReg-based code. +//! +//! The basic idea is that we track symbolic values as they flow +//! through spills and reloads. The symbolic values represent +//! particular virtual registers in the original function body +//! presented to the register allocator. Any instruction in the +//! original function body (i.e., not added by the allocator) +//! conceptually generates a symbolic value "Vn" when storing to (or +//! modifying) a virtual register. +//! +//! Operand policies (fixed register, register, any) are also checked +//! at each operand. +//! +//! The dataflow analysis state at each program point is: +//! +//! - map of: Allocation -> lattice value (top > Vn symbols (unordered) > bottom) +//! +//! And the transfer functions for instructions are: +//! +//! - `Edit::Move` inserted by RA: [ alloc_d := alloc_s ] +//! +//! A[alloc_d] := A[alloc_s] +//! +//! - phi-node [ V_i := phi block_j:V_j, block_k:V_k, ... ] +//! with allocations [ A_i := phi block_j:A_j, block_k:A_k, ... ] +//! (N.B.: phi-nodes are not semantically present in the final +//! machine code, but we include their allocations so that this +//! checker can work) +//! +//! A[A_i] := meet(A_j, A_k, ...) +//! +//! - statement in pre-regalloc function [ V_i := op V_j, V_k, ... ] +//! with allocated form [ A_i := op A_j, A_k, ... ] +//! +//! A[A_i] := `V_i` +//! +//! In other words, a statement, even after allocation, generates +//! a symbol that corresponds to its original virtual-register +//! def. +//! +//! (N.B.: moves in pre-regalloc function fall into this last case +//! -- they are "just another operation" and generate a new +//! symbol) +//! +//! At control-flow join points, the symbols meet using a very simple +//! lattice meet-function: two different symbols in the same +//! allocation meet to "conflicted"; otherwise, the symbol meets with +//! itself to produce itself (reflexivity). +//! +//! To check correctness, we first find the dataflow fixpoint with the +//! above lattice and transfer/meet functions. Then, at each op, we +//! examine the dataflow solution at the preceding program point, and +//! check that the allocation for each op arg (input/use) contains the +//! symbol corresponding to the original virtual register specified +//! for this arg. + +#![allow(dead_code)] + +use crate::{ + Allocation, AllocationKind, Block, Edit, Function, Inst, InstPosition, Operand, OperandKind, + OperandPolicy, OperandPos, Output, ProgPoint, VReg, +}; + +use std::collections::{HashMap, VecDeque}; +use std::default::Default; +use std::hash::Hash; +use std::result::Result; + +use log::debug; + +/// A set of errors detected by the regalloc checker. +#[derive(Clone, Debug)] +pub struct CheckerErrors { + errors: Vec, +} + +/// A single error detected by the regalloc checker. +#[derive(Clone, Debug)] +pub enum CheckerError { + MissingAllocation { + inst: Inst, + op: Operand, + }, + UnknownValueInAllocation { + inst: Inst, + op: Operand, + alloc: Allocation, + }, + ConflictedValueInAllocation { + inst: Inst, + op: Operand, + alloc: Allocation, + }, + IncorrectValueInAllocation { + inst: Inst, + op: Operand, + alloc: Allocation, + actual: VReg, + }, + PolicyViolated { + inst: Inst, + op: Operand, + alloc: Allocation, + }, + AllocationIsNotReg { + inst: Inst, + op: Operand, + alloc: Allocation, + }, + AllocationIsNotFixedReg { + inst: Inst, + op: Operand, + alloc: Allocation, + }, + AllocationIsNotReuse { + inst: Inst, + op: Operand, + alloc: Allocation, + expected_alloc: Allocation, + }, +} + +/// Abstract state for an allocation. +/// +/// Forms a lattice with \top (`Unknown`), \bot (`Conflicted`), and a +/// number of mutually unordered value-points in between, one per real +/// or virtual register. Any two different registers meet to \bot. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum CheckerValue { + /// "top" value: this storage slot has no known value. + Unknown, + /// "bottom" value: this storage slot has a conflicted value. + Conflicted, + /// Reg: this storage slot has a value that originated as a def + /// into the given virtual register. + /// + /// The boolean flag indicates whether the value is + /// reference-typed. + Reg(VReg, bool), +} + +impl Default for CheckerValue { + fn default() -> CheckerValue { + CheckerValue::Unknown + } +} + +impl CheckerValue { + /// Meet function of the abstract-interpretation value lattice. + fn meet(&self, other: &CheckerValue) -> CheckerValue { + match (self, other) { + (&CheckerValue::Unknown, _) => *other, + (_, &CheckerValue::Unknown) => *self, + (&CheckerValue::Conflicted, _) => *self, + (_, &CheckerValue::Conflicted) => *other, + (&CheckerValue::Reg(r1, ref1), &CheckerValue::Reg(r2, ref2)) if r1 == r2 => { + CheckerValue::Reg(r1, ref1 || ref2) + } + _ => { + log::debug!("{:?} and {:?} meet to Conflicted", self, other); + CheckerValue::Conflicted + } + } + } +} + +/// State that steps through program points as we scan over the instruction stream. +#[derive(Clone, Debug, PartialEq, Eq)] +struct CheckerState { + allocations: HashMap, +} + +impl Default for CheckerState { + fn default() -> CheckerState { + CheckerState { + allocations: HashMap::new(), + } + } +} + +impl std::fmt::Display for CheckerValue { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + CheckerValue::Unknown => write!(f, "?"), + CheckerValue::Conflicted => write!(f, "!"), + CheckerValue::Reg(r, _) => write!(f, "{}", r), + } + } +} + +fn merge_map( + into: &mut HashMap, + from: &HashMap, +) { + for (k, v) in from { + let into_v = into.entry(*k).or_insert(Default::default()); + let merged = into_v.meet(v); + *into_v = merged; + } +} + +impl CheckerState { + /// Create a new checker state. + fn new() -> CheckerState { + Default::default() + } + + /// Merge this checker state with another at a CFG join-point. + fn meet_with(&mut self, other: &CheckerState) { + merge_map(&mut self.allocations, &other.allocations); + } + + fn check_val( + &self, + inst: Inst, + op: Operand, + alloc: Allocation, + val: CheckerValue, + allocs: &[Allocation], + ) -> Result<(), CheckerError> { + if alloc == Allocation::none() { + return Err(CheckerError::MissingAllocation { inst, op }); + } + + match val { + CheckerValue::Unknown => { + return Err(CheckerError::UnknownValueInAllocation { inst, op, alloc }); + } + CheckerValue::Conflicted => { + return Err(CheckerError::ConflictedValueInAllocation { inst, op, alloc }); + } + CheckerValue::Reg(r, _) if r != op.vreg() => { + return Err(CheckerError::IncorrectValueInAllocation { + inst, + op, + alloc, + actual: r, + }); + } + _ => {} + } + + self.check_policy(inst, op, alloc, allocs)?; + + Ok(()) + } + + /// Check an instruction against this state. This must be called + /// twice: once with `InstPosition::Before`, and once with + /// `InstPosition::After` (after updating state with defs). + fn check(&self, pos: InstPosition, checkinst: &CheckerInst) -> Result<(), CheckerError> { + match checkinst { + &CheckerInst::Op { + inst, + ref operands, + ref allocs, + .. + } => { + // Skip Use-checks at the After point if there are any + // reused inputs: the Def which reuses the input + // happens early. + let has_reused_input = operands + .iter() + .any(|op| matches!(op.policy(), OperandPolicy::Reuse(_))); + if has_reused_input && pos == InstPosition::After { + return Ok(()); + } + + // For each operand, check (i) that the allocation + // contains the expected vreg, and (ii) that it meets + // the requirements of the OperandPolicy. + for (op, alloc) in operands.iter().zip(allocs.iter()) { + let is_here = match (op.pos(), pos) { + (OperandPos::Before, InstPosition::Before) + | (OperandPos::Both, InstPosition::Before) => true, + (OperandPos::After, InstPosition::After) + | (OperandPos::Both, InstPosition::After) => true, + _ => false, + }; + if !is_here { + continue; + } + if op.kind() == OperandKind::Def { + continue; + } + + let val = self + .allocations + .get(alloc) + .cloned() + .unwrap_or(Default::default()); + debug!( + "checker: checkinst {:?}: op {:?}, alloc {:?}, checker value {:?}", + checkinst, op, alloc, val + ); + self.check_val(inst, *op, *alloc, val, allocs)?; + } + } + _ => {} + } + Ok(()) + } + + /// Update according to instruction. + fn update(&mut self, checkinst: &CheckerInst) { + match checkinst { + &CheckerInst::Move { into, from } => { + let val = self + .allocations + .get(&from) + .cloned() + .unwrap_or(Default::default()); + debug!( + "checker: checkinst {:?} updating: move {:?} -> {:?} val {:?}", + checkinst, from, into, val + ); + self.allocations.insert(into, val); + } + &CheckerInst::Op { + ref operands, + ref allocs, + .. + } => { + for (op, alloc) in operands.iter().zip(allocs.iter()) { + if op.kind() != OperandKind::Def { + continue; + } + self.allocations + .insert(*alloc, CheckerValue::Reg(op.vreg(), false)); + } + } + &CheckerInst::BlockParams { + ref vregs, + ref allocs, + .. + } => { + for (vreg, alloc) in vregs.iter().zip(allocs.iter()) { + self.allocations + .insert(*alloc, CheckerValue::Reg(*vreg, false)); + } + } + } + } + + fn check_policy( + &self, + inst: Inst, + op: Operand, + alloc: Allocation, + allocs: &[Allocation], + ) -> Result<(), CheckerError> { + match op.policy() { + OperandPolicy::Any => {} + OperandPolicy::Reg => { + if alloc.kind() != AllocationKind::Reg { + return Err(CheckerError::AllocationIsNotReg { inst, op, alloc }); + } + } + OperandPolicy::FixedReg(preg) => { + if alloc != Allocation::reg(preg) { + return Err(CheckerError::AllocationIsNotFixedReg { inst, op, alloc }); + } + } + OperandPolicy::Reuse(idx) => { + if alloc.kind() != AllocationKind::Reg { + return Err(CheckerError::AllocationIsNotReg { inst, op, alloc }); + } + if alloc != allocs[idx] { + return Err(CheckerError::AllocationIsNotReuse { + inst, + op, + alloc, + expected_alloc: allocs[idx], + }); + } + } + } + Ok(()) + } +} + +/// An instruction representation in the checker's BB summary. +#[derive(Clone, Debug)] +pub(crate) enum CheckerInst { + /// A move between allocations (these could be registers or + /// spillslots). + Move { into: Allocation, from: Allocation }, + + /// A regular instruction with fixed use and def slots. Contains + /// both the original operands (as given to the regalloc) and the + /// allocation results. + Op { + inst: Inst, + operands: Vec, + allocs: Vec, + }, + + /// The top of a block with blockparams. We define the given vregs + /// into the given allocations. + BlockParams { + block: Block, + vregs: Vec, + allocs: Vec, + }, +} + +#[derive(Debug)] +pub struct Checker<'a, F: Function> { + f: &'a F, + bb_in: HashMap, + bb_insts: HashMap>, +} + +impl<'a, F: Function> Checker<'a, F> { + /// Create a new checker for the given function, initializing CFG + /// info immediately. The client should call the `add_*()` + /// methods to add abstract instructions to each BB before + /// invoking `run()` to check for errors. + pub fn new(f: &'a F) -> Checker<'a, F> { + let mut bb_in = HashMap::new(); + let mut bb_insts = HashMap::new(); + + for block in 0..f.blocks() { + let block = Block::new(block); + bb_in.insert(block, Default::default()); + bb_insts.insert(block, vec![]); + } + + Checker { f, bb_in, bb_insts } + } + + /// Build the list of checker instructions based on the given func + /// and allocation results. + pub fn prepare(&mut self, out: &Output) { + debug!("checker: out = {:?}", out); + // For each original instruction, create an `Op`. + let mut last_inst = None; + let mut insert_idx = 0; + for block in 0..self.f.blocks() { + let block = Block::new(block); + for inst in self.f.block_insns(block).iter() { + assert!(last_inst.is_none() || inst > last_inst.unwrap()); + last_inst = Some(inst); + + // Any inserted edits before instruction. + self.handle_edits(block, out, &mut insert_idx, ProgPoint::before(inst)); + + // Instruction itself. + let operands: Vec<_> = self.f.inst_operands(inst).iter().cloned().collect(); + let allocs: Vec<_> = out.inst_allocs(inst).iter().cloned().collect(); + let checkinst = CheckerInst::Op { + inst, + operands, + allocs, + }; + debug!("checker: adding inst {:?}", checkinst); + self.bb_insts.get_mut(&block).unwrap().push(checkinst); + + // Any inserted edits after instruction. + self.handle_edits(block, out, &mut insert_idx, ProgPoint::after(inst)); + } + } + } + + fn handle_edits(&mut self, block: Block, out: &Output, idx: &mut usize, pos: ProgPoint) { + while *idx < out.edits.len() && out.edits[*idx].0 <= pos { + let &(edit_pos, ref edit) = &out.edits[*idx]; + *idx += 1; + if edit_pos < pos { + continue; + } + debug!("checker: adding edit {:?} at pos {:?}", edit, pos); + match edit { + &Edit::Move { from, to, .. } => { + self.bb_insts + .get_mut(&block) + .unwrap() + .push(CheckerInst::Move { into: to, from }); + } + &Edit::BlockParams { + ref vregs, + ref allocs, + } => { + let inst = CheckerInst::BlockParams { + block, + vregs: vregs.clone(), + allocs: allocs.clone(), + }; + self.bb_insts.get_mut(&block).unwrap().push(inst); + } + } + } + } + + /// Perform the dataflow analysis to compute checker state at each BB entry. + fn analyze(&mut self) { + let mut queue = VecDeque::new(); + queue.push_back(self.f.entry_block()); + + while !queue.is_empty() { + let block = queue.pop_front().unwrap(); + let mut state = self.bb_in.get(&block).cloned().unwrap(); + debug!("analyze: block {} has state {:?}", block.index(), state); + for inst in self.bb_insts.get(&block).unwrap() { + state.update(inst); + debug!("analyze: inst {:?} -> state {:?}", inst, state); + } + + for &succ in self.f.block_succs(block) { + let cur_succ_in = self.bb_in.get(&succ).unwrap(); + let mut new_state = state.clone(); + new_state.meet_with(cur_succ_in); + let changed = &new_state != cur_succ_in; + if changed { + debug!( + "analyze: block {} state changed from {:?} to {:?}; pushing onto queue", + succ.index(), + cur_succ_in, + new_state + ); + self.bb_in.insert(succ, new_state); + queue.push_back(succ); + } + } + } + } + + /// Using BB-start state computed by `analyze()`, step the checker state + /// through each BB and check each instruction's register allocations + /// for errors. + fn find_errors(&self) -> Result<(), CheckerErrors> { + let mut errors = vec![]; + for (block, input) in &self.bb_in { + let mut state = input.clone(); + for inst in self.bb_insts.get(block).unwrap() { + if let Err(e) = state.check(InstPosition::Before, inst) { + debug!("Checker error: {:?}", e); + errors.push(e); + } + state.update(inst); + if let Err(e) = state.check(InstPosition::After, inst) { + debug!("Checker error: {:?}", e); + errors.push(e); + } + } + } + + if errors.is_empty() { + Ok(()) + } else { + Err(CheckerErrors { errors }) + } + } + + /// Find any errors, returning `Err(CheckerErrors)` with all errors found + /// or `Ok(())` otherwise. + pub fn run(mut self) -> Result<(), CheckerErrors> { + self.analyze(); + let result = self.find_errors(); + + debug!("=== CHECKER RESULT ==="); + fn print_state(state: &CheckerState) { + let mut s = vec![]; + for (alloc, state) in &state.allocations { + s.push(format!("{} := {}", alloc, state)); + } + debug!(" {{ {} }}", s.join(", ")) + } + for bb in 0..self.f.blocks() { + let bb = Block::new(bb); + debug!("block{}:", bb.index()); + let insts = self.bb_insts.get(&bb).unwrap(); + let mut state = self.bb_in.get(&bb).unwrap().clone(); + print_state(&state); + for inst in insts { + match inst { + &CheckerInst::Op { + inst, + ref operands, + ref allocs, + } => { + debug!(" inst{}: {:?} ({:?})", inst.index(), operands, allocs); + } + &CheckerInst::Move { from, into } => { + debug!(" {} -> {}", from, into); + } + &CheckerInst::BlockParams { + ref vregs, + ref allocs, + .. + } => { + let mut args = vec![]; + for (vreg, alloc) in vregs.iter().zip(allocs.iter()) { + args.push(format!("{}:{}", vreg, alloc)); + } + debug!(" blockparams: {}", args.join(", ")); + } + } + state.update(inst); + print_state(&state); + } + } + + result + } +} diff --git a/src/domtree.rs b/src/domtree.rs new file mode 100644 index 00000000..7677583f --- /dev/null +++ b/src/domtree.rs @@ -0,0 +1,118 @@ +/* + * Derives from the dominator tree implementation in regalloc.rs, which is + * licensed under the Apache Public License 2.0 with LLVM Exception. See: + * https://github.com/bytecodealliance/regalloc.rs + */ + +// This is an implementation of the algorithm described in +// +// A Simple, Fast Dominance Algorithm +// Keith D. Cooper, Timothy J. Harvey, and Ken Kennedy +// Department of Computer Science, Rice University, Houston, Texas, USA +// TR-06-33870 +// https://www.cs.rice.edu/~keith/EMBED/dom.pdf + +use crate::Block; + +// Helper +fn merge_sets( + idom: &[Block], // map from Block to Block + block_to_rpo: &[Option], + mut node1: Block, + mut node2: Block, +) -> Block { + while node1 != node2 { + if node1.is_invalid() || node2.is_invalid() { + return Block::invalid(); + } + let rpo1 = block_to_rpo[node1.index()].unwrap(); + let rpo2 = block_to_rpo[node2.index()].unwrap(); + if rpo1 > rpo2 { + node1 = idom[node1.index()]; + } else if rpo2 > rpo1 { + node2 = idom[node2.index()]; + } + } + assert!(node1 == node2); + node1 +} + +pub fn calculate<'a, PredFn: Fn(Block) -> &'a [Block]>( + num_blocks: usize, + preds: PredFn, + post_ord: &[Block], + start: Block, +) -> Vec { + // We have post_ord, which is the postorder sequence. + + // Compute maps from RPO to block number and vice-versa. + let mut block_to_rpo = vec![None; num_blocks]; + block_to_rpo.resize(num_blocks, None); + for (i, rpo_block) in post_ord.iter().rev().enumerate() { + block_to_rpo[rpo_block.index()] = Some(i as u32); + } + + let mut idom = vec![Block::invalid(); num_blocks]; + + // The start node must have itself as a parent. + idom[start.index()] = start; + + let mut changed = true; + while changed { + changed = false; + // Consider blocks in reverse postorder. Skip any that are unreachable. + for &node in post_ord.iter().rev() { + let rponum = block_to_rpo[node.index()].unwrap(); + + let mut parent = Block::invalid(); + for &pred in preds(node).iter() { + let pred_rpo = match block_to_rpo[pred.index()] { + Some(r) => r, + None => { + // Skip unreachable preds. + continue; + } + }; + if pred_rpo < rponum { + parent = pred; + break; + } + } + + if parent.is_valid() { + for &pred in preds(node).iter() { + if pred == parent { + continue; + } + if idom[pred.index()].is_invalid() { + continue; + } + parent = merge_sets(&idom, &block_to_rpo[..], parent, pred); + } + } + + if parent.is_valid() && parent != idom[node.index()] { + idom[node.index()] = parent; + changed = true; + } + } + } + + idom +} + +pub fn dominates(idom: &[Block], a: Block, mut b: Block) -> bool { + loop { + if a == b { + return true; + } + if b.is_invalid() { + return false; + } + let parent = idom[b.index()]; + if b == parent { + return false; + } + b = idom[b.index()]; + } +} diff --git a/src/fuzzing/func.rs b/src/fuzzing/func.rs new file mode 100644 index 00000000..ba38e985 --- /dev/null +++ b/src/fuzzing/func.rs @@ -0,0 +1,542 @@ +use crate::{ + domtree, postorder, Allocation, Block, Function, Inst, InstRange, MachineEnv, Operand, + OperandKind, OperandPolicy, OperandPos, PReg, RegClass, VReg, +}; + +use arbitrary::Result as ArbitraryResult; +use arbitrary::{Arbitrary, Unstructured}; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum InstOpcode { + Phi, + Op, + Call, + Ret, + Branch, +} + +#[derive(Clone, Debug)] +pub struct InstData { + op: InstOpcode, + operands: Vec, + clobbers: Vec, +} + +impl InstData { + pub fn op(def: usize, uses: &[usize]) -> InstData { + let mut operands = vec![Operand::reg_def(VReg::new(def, RegClass::Int))]; + for &u in uses { + operands.push(Operand::reg_use(VReg::new(u, RegClass::Int))); + } + InstData { + op: InstOpcode::Op, + operands, + clobbers: vec![], + } + } + pub fn branch(uses: &[usize]) -> InstData { + let mut operands = vec![]; + for &u in uses { + operands.push(Operand::reg_use(VReg::new(u, RegClass::Int))); + } + InstData { + op: InstOpcode::Branch, + operands, + clobbers: vec![], + } + } + pub fn ret() -> InstData { + InstData { + op: InstOpcode::Ret, + operands: vec![], + clobbers: vec![], + } + } +} + +#[derive(Clone)] +pub struct Func { + insts: Vec, + blocks: Vec, + block_preds: Vec>, + block_succs: Vec>, + block_params: Vec>, + num_vregs: usize, +} + +impl Function for Func { + fn insts(&self) -> usize { + self.insts.len() + } + + fn blocks(&self) -> usize { + self.blocks.len() + } + + fn entry_block(&self) -> Block { + assert!(self.blocks.len() > 0); + Block::new(0) + } + + fn block_insns(&self, block: Block) -> InstRange { + self.blocks[block.index()] + } + + fn block_succs(&self, block: Block) -> &[Block] { + &self.block_succs[block.index()][..] + } + + fn block_preds(&self, block: Block) -> &[Block] { + &self.block_preds[block.index()][..] + } + + fn block_params(&self, block: Block) -> &[VReg] { + &self.block_params[block.index()][..] + } + + fn is_call(&self, insn: Inst) -> bool { + self.insts[insn.index()].op == InstOpcode::Call + } + + fn is_ret(&self, insn: Inst) -> bool { + self.insts[insn.index()].op == InstOpcode::Ret + } + + fn is_branch(&self, insn: Inst) -> bool { + self.insts[insn.index()].op == InstOpcode::Branch + } + + fn is_safepoint(&self, _: Inst) -> bool { + false + } + + fn is_move(&self, _: Inst) -> Option<(VReg, VReg)> { + None + } + + fn inst_operands(&self, insn: Inst) -> &[Operand] { + &self.insts[insn.index()].operands[..] + } + + fn inst_clobbers(&self, insn: Inst) -> &[PReg] { + &self.insts[insn.index()].clobbers[..] + } + + fn num_vregs(&self) -> usize { + self.num_vregs + } + + fn spillslot_size(&self, regclass: RegClass, _: VReg) -> usize { + match regclass { + RegClass::Int => 1, + RegClass::Float => 2, + } + } +} + +struct FuncBuilder { + postorder: Vec, + idom: Vec, + f: Func, + insts_per_block: Vec>, +} + +impl FuncBuilder { + fn new() -> Self { + FuncBuilder { + postorder: vec![], + idom: vec![], + f: Func { + block_preds: vec![], + block_succs: vec![], + block_params: vec![], + insts: vec![], + blocks: vec![], + num_vregs: 0, + }, + insts_per_block: vec![], + } + } + + pub fn add_block(&mut self) -> Block { + let b = Block::new(self.f.blocks.len()); + self.f + .blocks + .push(InstRange::forward(Inst::new(0), Inst::new(0))); + self.f.block_preds.push(vec![]); + self.f.block_succs.push(vec![]); + self.f.block_params.push(vec![]); + self.insts_per_block.push(vec![]); + b + } + + pub fn add_inst(&mut self, block: Block, data: InstData) { + self.insts_per_block[block.index()].push(data); + } + + pub fn add_edge(&mut self, from: Block, to: Block) { + self.f.block_succs[from.index()].push(to); + self.f.block_preds[to.index()].push(from); + } + + pub fn set_block_params(&mut self, block: Block, params: &[VReg]) { + self.f.block_params[block.index()] = params.iter().cloned().collect(); + } + + fn compute_doms(&mut self) { + self.postorder = postorder::calculate(self.f.blocks.len(), Block::new(0), |block| { + &self.f.block_succs[block.index()][..] + }); + self.idom = domtree::calculate( + self.f.blocks.len(), + |block| &self.f.block_preds[block.index()][..], + &self.postorder[..], + Block::new(0), + ); + } + + fn finalize(mut self) -> Func { + for (blocknum, blockrange) in self.f.blocks.iter_mut().enumerate() { + let begin_inst = self.f.insts.len(); + for inst in &self.insts_per_block[blocknum] { + self.f.insts.push(inst.clone()); + } + let end_inst = self.f.insts.len(); + *blockrange = InstRange::forward(Inst::new(begin_inst), Inst::new(end_inst)); + } + + self.f + } +} + +impl Arbitrary for OperandPolicy { + fn arbitrary(u: &mut Unstructured) -> ArbitraryResult { + Ok(*u.choose(&[OperandPolicy::Any, OperandPolicy::Reg])?) + } +} + +fn choose_dominating_block( + idom: &[Block], + mut block: Block, + allow_self: bool, + u: &mut Unstructured, +) -> ArbitraryResult { + assert!(block.is_valid()); + let orig_block = block; + loop { + if (allow_self || block != orig_block) && bool::arbitrary(u)? { + break; + } + if idom[block.index()] == block { + break; + } + block = idom[block.index()]; + assert!(block.is_valid()); + } + let block = if block != orig_block || allow_self { + block + } else { + Block::invalid() + }; + Ok(block) +} + +#[derive(Clone, Copy, Debug)] +pub struct Options { + pub reused_inputs: bool, + pub fixed_regs: bool, + pub clobbers: bool, + pub control_flow: bool, + pub reducible: bool, + pub block_params: bool, + pub always_local_uses: bool, +} + +impl std::default::Default for Options { + fn default() -> Self { + Options { + reused_inputs: false, + fixed_regs: false, + clobbers: false, + control_flow: true, + reducible: false, + block_params: true, + always_local_uses: false, + } + } +} + +impl Arbitrary for Func { + fn arbitrary(u: &mut Unstructured) -> ArbitraryResult { + Func::arbitrary_with_options(u, &Options::default()) + } +} + +impl Func { + pub fn arbitrary_with_options(u: &mut Unstructured, opts: &Options) -> ArbitraryResult { + // General strategy: + // 1. Create an arbitrary CFG. + // 2. Create a list of vregs to define in each block. + // 3. Define some of those vregs in each block as blockparams.f. + // 4. Populate blocks with ops that define the rest of the vregs. + // - For each use, choose an available vreg: either one + // already defined (via blockparam or inst) in this block, + // or one defined in a dominating block. + + let mut builder = FuncBuilder::new(); + for _ in 0..u.int_in_range(1..=100)? { + builder.add_block(); + } + let num_blocks = builder.f.blocks.len(); + + // Generate a CFG. Create a "spine" of either single blocks, + // with links to the next; or fork patterns, with the left + // fork linking to the next and the right fork in `out_blocks` + // to be connected below. This creates an arbitrary CFG with + // split critical edges, which is a property that we require + // for the regalloc. + let mut from = 0; + let mut out_blocks = vec![]; + let mut in_blocks = vec![]; + // For reducibility, if selected: enforce strict nesting of backedges + let mut max_backedge_src = 0; + let mut min_backedge_dest = num_blocks; + while from < num_blocks { + in_blocks.push(from); + if num_blocks > 3 && from < num_blocks - 3 && bool::arbitrary(u)? && opts.control_flow { + // To avoid critical edges, we use from+1 as an edge + // block, and advance `from` an extra block; `from+2` + // will be the next normal iteration. + builder.add_edge(Block::new(from), Block::new(from + 1)); + builder.add_edge(Block::new(from), Block::new(from + 2)); + builder.add_edge(Block::new(from + 2), Block::new(from + 3)); + out_blocks.push(from + 1); + from += 2; + } else if from < num_blocks - 1 { + builder.add_edge(Block::new(from), Block::new(from + 1)); + } + from += 1; + } + for pred in out_blocks { + let mut succ = *u.choose(&in_blocks[..])?; + if opts.reducible && (pred >= succ) { + if pred < max_backedge_src || succ > min_backedge_dest { + // If the chosen edge would result in an + // irreducible CFG, just make this a diamond + // instead. + succ = pred + 2; + } else { + max_backedge_src = pred; + min_backedge_dest = succ; + } + } + builder.add_edge(Block::new(pred), Block::new(succ)); + } + + builder.compute_doms(); + + for block in 0..num_blocks { + builder.f.block_preds[block].clear(); + } + for block in 0..num_blocks { + for &succ in &builder.f.block_succs[block] { + builder.f.block_preds[succ.index()].push(Block::new(block)); + } + } + + builder.compute_doms(); + + let mut vregs_by_block = vec![]; + let mut vregs_by_block_to_be_defined = vec![]; + let mut block_params = vec![vec![]; num_blocks]; + for block in 0..num_blocks { + let mut vregs = vec![]; + for _ in 0..u.int_in_range(5..=15)? { + let vreg = VReg::new(builder.f.num_vregs, RegClass::Int); + builder.f.num_vregs += 1; + vregs.push(vreg); + } + vregs_by_block.push(vregs.clone()); + vregs_by_block_to_be_defined.push(vec![]); + let mut max_block_params = u.int_in_range(0..=std::cmp::min(3, vregs.len() / 3))?; + for &vreg in &vregs { + if block > 0 && opts.block_params && bool::arbitrary(u)? && max_block_params > 0 { + block_params[block].push(vreg); + max_block_params -= 1; + } else { + vregs_by_block_to_be_defined.last_mut().unwrap().push(vreg); + } + } + vregs_by_block_to_be_defined.last_mut().unwrap().reverse(); + builder.set_block_params(Block::new(block), &block_params[block][..]); + } + + for block in 0..num_blocks { + let mut avail = block_params[block].clone(); + let mut remaining_nonlocal_uses = u.int_in_range(0..=3)?; + while let Some(vreg) = vregs_by_block_to_be_defined[block].pop() { + let def_policy = OperandPolicy::arbitrary(u)?; + let def_pos = if bool::arbitrary(u)? { + OperandPos::Before + } else { + OperandPos::After + }; + let mut operands = vec![Operand::new(vreg, def_policy, OperandKind::Def, def_pos)]; + let mut allocations = vec![Allocation::none()]; + for _ in 0..u.int_in_range(0..=3)? { + let vreg = if avail.len() > 0 + && (opts.always_local_uses + || remaining_nonlocal_uses == 0 + || bool::arbitrary(u)?) + { + *u.choose(&avail[..])? + } else if !opts.always_local_uses { + let def_block = choose_dominating_block( + &builder.idom[..], + Block::new(block), + /* allow_self = */ false, + u, + )?; + if !def_block.is_valid() { + // No vregs already defined, and no pred blocks that dominate us + // (perhaps we are the entry block): just stop generating inputs. + break; + } + remaining_nonlocal_uses -= 1; + *u.choose(&vregs_by_block[def_block.index()])? + } else { + break; + }; + let use_policy = OperandPolicy::arbitrary(u)?; + operands.push(Operand::new( + vreg, + use_policy, + OperandKind::Use, + OperandPos::Before, + )); + allocations.push(Allocation::none()); + } + let mut clobbers: Vec = vec![]; + if operands.len() > 1 && opts.reused_inputs && bool::arbitrary(u)? { + // Make the def a reused input. + let op = operands[0]; + assert_eq!(op.kind(), OperandKind::Def); + let reused = u.int_in_range(1..=(operands.len() - 1))?; + operands[0] = Operand::new( + op.vreg(), + OperandPolicy::Reuse(reused), + op.kind(), + OperandPos::After, + ); + } else if opts.fixed_regs && bool::arbitrary(u)? { + // Pick an operand and make it a fixed reg. + let fixed_reg = PReg::new(u.int_in_range(0..=30)?, RegClass::Int); + let i = u.int_in_range(0..=(operands.len() - 1))?; + let op = operands[i]; + operands[i] = Operand::new( + op.vreg(), + OperandPolicy::FixedReg(fixed_reg), + op.kind(), + op.pos(), + ); + } else if opts.clobbers && bool::arbitrary(u)? { + for _ in 0..u.int_in_range(0..=5)? { + let reg = u.int_in_range(0..=30)?; + if clobbers.iter().any(|r| r.hw_enc() == reg) { + break; + } + clobbers.push(PReg::new(reg, RegClass::Int)); + } + } + let op = *u.choose(&[InstOpcode::Op, InstOpcode::Call])?; + builder.add_inst( + Block::new(block), + InstData { + op, + operands, + clobbers, + }, + ); + avail.push(vreg); + } + + // Define the branch with blockparam args that must end + // the block. + if builder.f.block_succs[block].len() > 0 { + let mut args = vec![]; + for &succ in &builder.f.block_succs[block] { + for _ in 0..builder.f.block_params[succ.index()].len() { + let dom_block = choose_dominating_block( + &builder.idom[..], + Block::new(block), + false, + u, + )?; + let vreg = if dom_block.is_valid() && bool::arbitrary(u)? { + u.choose(&vregs_by_block[dom_block.index()][..])? + } else { + u.choose(&avail[..])? + }; + args.push(vreg.vreg()); + } + } + builder.add_inst(Block::new(block), InstData::branch(&args[..])); + } else { + builder.add_inst(Block::new(block), InstData::ret()); + } + } + + Ok(builder.finalize()) + } +} + +impl std::fmt::Debug for Func { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{{\n")?; + for (i, blockrange) in self.blocks.iter().enumerate() { + let succs = self.block_succs[i] + .iter() + .map(|b| b.index()) + .collect::>(); + let preds = self.block_preds[i] + .iter() + .map(|b| b.index()) + .collect::>(); + let params = self.block_params[i] + .iter() + .map(|v| format!("v{}", v.vreg())) + .collect::>() + .join(", "); + write!( + f, + " block{}({}): # succs:{:?} preds:{:?}\n", + i, params, succs, preds + )?; + for inst in blockrange.iter() { + write!( + f, + " inst{}: {:?} ops:{:?} clobber:{:?}\n", + inst.index(), + self.insts[inst.index()].op, + self.insts[inst.index()].operands, + self.insts[inst.index()].clobbers + )?; + } + } + write!(f, "}}\n")?; + Ok(()) + } +} + +pub fn machine_env() -> MachineEnv { + // Reg 31 is the scratch reg. + let regs: Vec = (0..31).map(|i| PReg::new(i, RegClass::Int)).collect(); + let regs_by_class: Vec> = vec![regs.clone(), vec![]]; + let scratch_by_class: Vec = + vec![PReg::new(31, RegClass::Int), PReg::new(0, RegClass::Float)]; + MachineEnv { + regs, + regs_by_class, + scratch_by_class, + } +} diff --git a/src/fuzzing/mod.rs b/src/fuzzing/mod.rs new file mode 100644 index 00000000..8aecdabd --- /dev/null +++ b/src/fuzzing/mod.rs @@ -0,0 +1,3 @@ +//! Utilities for fuzzing. + +pub mod func; diff --git a/src/index.rs b/src/index.rs new file mode 100644 index 00000000..1fe1b604 --- /dev/null +++ b/src/index.rs @@ -0,0 +1,176 @@ +#[macro_export] +macro_rules! define_index { + ($ix:ident) => { + #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] + pub struct $ix(pub u32); + impl $ix { + #[inline(always)] + pub fn new(i: usize) -> Self { + Self(i as u32) + } + #[inline(always)] + pub fn index(self) -> usize { + assert!(self.is_valid()); + self.0 as usize + } + #[inline(always)] + pub fn invalid() -> Self { + Self(u32::MAX) + } + #[inline(always)] + pub fn is_invalid(self) -> bool { + self == Self::invalid() + } + #[inline(always)] + pub fn is_valid(self) -> bool { + self != Self::invalid() + } + #[inline(always)] + pub fn next(self) -> $ix { + assert!(self.is_valid()); + Self(self.0 + 1) + } + #[inline(always)] + pub fn prev(self) -> $ix { + assert!(self.is_valid()); + Self(self.0 - 1) + } + } + + impl crate::index::ContainerIndex for $ix {} + }; +} + +pub trait ContainerIndex: Clone + Copy + std::fmt::Debug + PartialEq + Eq {} + +pub trait ContainerComparator { + type Ix: ContainerIndex; + fn compare(&self, a: Self::Ix, b: Self::Ix) -> std::cmp::Ordering; +} + +define_index!(Inst); +define_index!(Block); + +#[derive(Clone, Copy, Debug)] +pub struct InstRange(Inst, Inst, bool); + +impl InstRange { + #[inline(always)] + pub fn forward(from: Inst, to: Inst) -> Self { + assert!(from.index() <= to.index()); + InstRange(from, to, true) + } + + #[inline(always)] + pub fn backward(from: Inst, to: Inst) -> Self { + assert!(from.index() >= to.index()); + InstRange(to, from, false) + } + + #[inline(always)] + pub fn first(self) -> Inst { + assert!(self.len() > 0); + if self.is_forward() { + self.0 + } else { + self.1.prev() + } + } + + #[inline(always)] + pub fn last(self) -> Inst { + assert!(self.len() > 0); + if self.is_forward() { + self.1.prev() + } else { + self.0 + } + } + + #[inline(always)] + pub fn rest(self) -> InstRange { + assert!(self.len() > 0); + if self.is_forward() { + InstRange::forward(self.0.next(), self.1) + } else { + InstRange::backward(self.1.prev(), self.0) + } + } + + #[inline(always)] + pub fn len(self) -> usize { + self.1.index() - self.0.index() + } + + #[inline(always)] + pub fn is_forward(self) -> bool { + self.2 + } + + #[inline(always)] + pub fn rev(self) -> Self { + Self(self.0, self.1, !self.2) + } + + #[inline(always)] + pub fn iter(self) -> InstRangeIter { + InstRangeIter(self) + } +} + +#[derive(Clone, Copy, Debug)] +pub struct InstRangeIter(InstRange); + +impl Iterator for InstRangeIter { + type Item = Inst; + #[inline(always)] + fn next(&mut self) -> Option { + if self.0.len() == 0 { + None + } else { + let ret = self.0.first(); + self.0 = self.0.rest(); + Some(ret) + } + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_inst_range() { + let range = InstRange::forward(Inst::new(0), Inst::new(0)); + assert_eq!(range.len(), 0); + + let range = InstRange::forward(Inst::new(0), Inst::new(5)); + assert_eq!(range.first().index(), 0); + assert_eq!(range.last().index(), 4); + assert_eq!(range.len(), 5); + assert_eq!( + range.iter().collect::>(), + vec![ + Inst::new(0), + Inst::new(1), + Inst::new(2), + Inst::new(3), + Inst::new(4) + ] + ); + let range = range.rev(); + assert_eq!(range.first().index(), 4); + assert_eq!(range.last().index(), 0); + assert_eq!(range.len(), 5); + assert_eq!( + range.iter().collect::>(), + vec![ + Inst::new(4), + Inst::new(3), + Inst::new(2), + Inst::new(1), + Inst::new(0) + ] + ); + } +} diff --git a/src/ion/LICENSE b/src/ion/LICENSE new file mode 100644 index 00000000..14e2f777 --- /dev/null +++ b/src/ion/LICENSE @@ -0,0 +1,373 @@ +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. diff --git a/src/ion/mod.rs b/src/ion/mod.rs new file mode 100644 index 00000000..78d42dca --- /dev/null +++ b/src/ion/mod.rs @@ -0,0 +1,3763 @@ +/* + * The following license applies to this file, which has been largely + * derived from the files `js/src/jit/BacktrackingAllocator.h` and + * `js/src/jit/BacktrackingAllocator.cpp` in Mozilla Firefox: + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +//! Backtracking register allocator on SSA code ported from IonMonkey's +//! BacktrackingAllocator. + +/* + * TODO: + * + * - tune heuristics: + * - splits: + * - safepoints? + * - split just before uses with fixed regs and/or just after defs + * with fixed regs? + * - try-any-reg allocate loop should randomly probe in caller-save + * ("preferred") regs first -- have a notion of "preferred regs" in + * MachineEnv? + * - measure average liverange length / number of splits / ... + * + * - reused-input reg: don't allocate register for input that is reused. + * + * - more fuzzing: + * - test with *multiple* fixed-reg constraints on one vreg (same + * inst, different insts) + * + * - modify CL to generate SSA VCode + * - lower blockparams to blockparams directly + * - use temps properly (`alloc_tmp()` vs `alloc_reg()`) + * + * - produce stackmaps + * - stack constraint (also: unify this with stack-args? spillslot vs user stackslot?) + * - vreg reffyness + * - if reffy vreg, add to stackmap lists during reification scan + */ + +#![allow(dead_code, unused_imports)] + +use crate::bitvec::BitVec; +use crate::cfg::CFGInfo; +use crate::index::ContainerComparator; +use crate::moves::ParallelMoves; +use crate::{ + define_index, domtree, Allocation, AllocationKind, Block, Edit, Function, Inst, InstPosition, + MachineEnv, Operand, OperandKind, OperandPolicy, OperandPos, Output, PReg, ProgPoint, + RegAllocError, RegClass, SpillSlot, VReg, +}; +use log::debug; +use smallvec::{smallvec, SmallVec}; +use std::cmp::Ordering; +use std::collections::{BTreeMap, BinaryHeap}; +use std::fmt::Debug; + +#[cfg(not(debug))] +fn validate_ssa(_: &F, _: &CFGInfo) -> Result<(), RegAllocError> { + Ok(()) +} + +#[cfg(debug)] +fn validate_ssa(f: &F, cfginfo: &CFGInfo) -> Result<(), RegAllocError> { + crate::validate_ssa(f, cfginfo) +} + +/// A range from `from` (inclusive) to `to` (exclusive). +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct CodeRange { + from: ProgPoint, + to: ProgPoint, +} + +impl CodeRange { + pub fn is_empty(&self) -> bool { + self.from == self.to + } + pub fn contains(&self, other: &Self) -> bool { + other.from >= self.from && other.to <= self.to + } + pub fn contains_point(&self, other: ProgPoint) -> bool { + other >= self.from && other < self.to + } + pub fn overlaps(&self, other: &Self) -> bool { + other.to > self.from && other.from < self.to + } + pub fn len(&self) -> usize { + self.to.inst.index() - self.from.inst.index() + } +} + +impl std::cmp::PartialOrd for CodeRange { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} +impl std::cmp::Ord for CodeRange { + fn cmp(&self, other: &Self) -> Ordering { + if self.to <= other.from { + Ordering::Less + } else if self.from >= other.to { + Ordering::Greater + } else { + Ordering::Equal + } + } +} + +define_index!(LiveBundleIndex); +define_index!(LiveRangeIndex); +define_index!(SpillSetIndex); +define_index!(UseIndex); +define_index!(DefIndex); +define_index!(VRegIndex); +define_index!(PRegIndex); +define_index!(SpillSlotIndex); + +type LiveBundleVec = SmallVec<[LiveBundleIndex; 4]>; + +#[derive(Clone, Debug)] +struct LiveRange { + range: CodeRange, + vreg: VRegIndex, + bundle: LiveBundleIndex, + uses_spill_weight: u32, + num_fixed_uses_and_flags: u32, + + first_use: UseIndex, + last_use: UseIndex, + def: DefIndex, + + next_in_bundle: LiveRangeIndex, + next_in_reg: LiveRangeIndex, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[repr(u32)] +enum LiveRangeFlag { + Minimal = 1, + Fixed = 2, +} + +impl LiveRange { + #[inline(always)] + pub fn num_fixed_uses(&self) -> u32 { + self.num_fixed_uses_and_flags & ((1 << 24) - 1) + } + #[inline(always)] + pub fn set_num_fixed_uses(&mut self, count: u32) { + debug_assert!(count < (1 << 24)); + self.num_fixed_uses_and_flags = (self.num_fixed_uses_and_flags & !((1 << 24) - 1)) | count; + } + #[inline(always)] + pub fn inc_num_fixed_uses(&mut self) { + debug_assert!(self.num_fixed_uses_and_flags & ((1 << 24) - 1) < ((1 << 24) - 1)); + self.num_fixed_uses_and_flags += 1; + } + #[inline(always)] + pub fn dec_num_fixed_uses(&mut self) { + debug_assert!(self.num_fixed_uses_and_flags & ((1 << 24) - 1) > 0); + self.num_fixed_uses_and_flags -= 1; + } + #[inline(always)] + pub fn set_flag(&mut self, flag: LiveRangeFlag) { + self.num_fixed_uses_and_flags |= (flag as u32) << 24; + } + #[inline(always)] + pub fn clear_flag(&mut self, flag: LiveRangeFlag) { + self.num_fixed_uses_and_flags &= !((flag as u32) << 24); + } + #[inline(always)] + pub fn has_flag(&self, flag: LiveRangeFlag) -> bool { + self.num_fixed_uses_and_flags & ((flag as u32) << 24) != 0 + } +} + +#[derive(Clone, Debug)] +struct Use { + operand: Operand, + pos: ProgPoint, + slot: usize, + next_use: UseIndex, +} + +#[derive(Clone, Debug)] +struct Def { + operand: Operand, + pos: ProgPoint, + slot: usize, +} + +#[derive(Clone, Debug)] +struct LiveBundle { + first_range: LiveRangeIndex, + last_range: LiveRangeIndex, + spillset: SpillSetIndex, + allocation: Allocation, + prio: u32, // recomputed after every bulk update + spill_weight_and_props: u32, +} + +impl LiveBundle { + #[inline(always)] + fn set_cached_spill_weight_and_props(&mut self, spill_weight: u32, minimal: bool, fixed: bool) { + debug_assert!(spill_weight < ((1 << 30) - 1)); + self.spill_weight_and_props = + spill_weight | (if minimal { 1 << 31 } else { 0 }) | (if fixed { 1 << 30 } else { 0 }); + } + + #[inline(always)] + fn cached_minimal(&self) -> bool { + self.spill_weight_and_props & (1 << 31) != 0 + } + + #[inline(always)] + fn cached_fixed(&self) -> bool { + self.spill_weight_and_props & (1 << 30) != 0 + } + + #[inline(always)] + fn cached_spill_weight(&self) -> u32 { + self.spill_weight_and_props & !((1 << 30) - 1) + } +} + +#[derive(Clone, Debug)] +struct SpillSet { + bundles: LiveBundleVec, + size: u32, + class: RegClass, + slot: SpillSlotIndex, + reg_hint: Option, +} + +#[derive(Clone, Debug)] +struct VRegData { + reg: VReg, + def: DefIndex, + blockparam: Block, + first_range: LiveRangeIndex, +} + +#[derive(Clone, Debug)] +struct PRegData { + reg: PReg, + allocations: LiveRangeSet, +} + +/* + * Environment setup: + * + * We have seven fundamental objects: LiveRange, LiveBundle, SpillSet, Use, Def, VReg, PReg. + * + * The relationship is as follows: + * + * LiveRange --(vreg)--> shared(VReg) + * LiveRange --(bundle)--> shared(LiveBundle) + * LiveRange --(def)--> owns(Def) + * LiveRange --(use) --> list(Use) + * + * Use --(vreg)--> shared(VReg) + * + * Def --(vreg) --> owns(VReg) + * + * LiveBundle --(range)--> list(LiveRange) + * LiveBundle --(spillset)--> shared(SpillSet) + * LiveBundle --(parent)--> parent(LiveBundle) + * + * SpillSet --(parent)--> parent(SpillSet) + * SpillSet --(bundles)--> list(LiveBundle) + * + * VReg --(range)--> list(LiveRange) + * + * PReg --(ranges)--> set(LiveRange) + */ + +#[derive(Clone, Debug)] +struct Env<'a, F: Function> { + func: &'a F, + env: &'a MachineEnv, + cfginfo: CFGInfo, + liveins: Vec, + /// Blockparam outputs: from-vreg, (end of) from-block, (start of) + /// to-block, to-vreg. The field order is significant: these are sorted so + /// that a scan over vregs, then blocks in each range, can scan in + /// order through this (sorted) list and add allocs to the + /// half-move list. + blockparam_outs: Vec<(VRegIndex, Block, Block, VRegIndex)>, + /// Blockparam inputs: to-vreg, (start of) to-block, (end of) + /// from-block. As above for `blockparam_outs`, field order is + /// significant. + blockparam_ins: Vec<(VRegIndex, Block, Block)>, + /// Blockparam allocs: block, idx, vreg, alloc. Info to describe + /// blockparam locations at block entry, for metadata purposes + /// (e.g. for the checker). + blockparam_allocs: Vec<(Block, u32, VRegIndex, Allocation)>, + + ranges: Vec, + bundles: Vec, + spillsets: Vec, + uses: Vec, + defs: Vec, + vregs: Vec, + pregs: Vec, + allocation_queue: PrioQueue, + hot_code: LiveRangeSet, + clobbers: Vec, // Sorted list of insts with clobbers. + + spilled_bundles: Vec, + spillslots: Vec, + slots_by_size: Vec, + + // When multiple fixed-register constraints are present on a + // single VReg at a single program point (this can happen for, + // e.g., call args that use the same value multiple times), we + // remove all but one of the fixed-register constraints, make a + // note here, and add a clobber with that PReg instread to keep + // the register available. When we produce the final edit-list, we + // will insert a copy from wherever the VReg's primary allocation + // was to the approprate PReg. + // + // (progpoint, copy-from-preg, copy-to-preg) + multi_fixed_reg_fixups: Vec<(ProgPoint, PRegIndex, PRegIndex)>, + + inserted_moves: Vec, + + // Output: + edits: Vec<(u32, InsertMovePrio, Edit)>, + allocs: Vec, + inst_alloc_offsets: Vec, + num_spillslots: u32, + + stats: Stats, + + // For debug output only: a list of textual annotations at every + // ProgPoint to insert into the final allocated program listing. + debug_annotations: std::collections::HashMap>, +} + +#[derive(Clone, Debug)] +struct SpillSlotData { + ranges: LiveRangeSet, + class: RegClass, + size: u32, + alloc: Allocation, + next_spillslot: SpillSlotIndex, +} + +#[derive(Clone, Debug)] +struct SpillSlotList { + first_spillslot: SpillSlotIndex, + last_spillslot: SpillSlotIndex, +} + +#[derive(Clone, Debug)] +struct PrioQueue { + heap: std::collections::BinaryHeap, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +struct PrioQueueEntry { + prio: u32, + bundle: LiveBundleIndex, +} + +#[derive(Clone, Debug)] +struct LiveRangeSet { + btree: BTreeMap, +} + +#[derive(Clone, Copy, Debug)] +struct LiveRangeKey { + from: u32, + to: u32, +} + +impl LiveRangeKey { + fn from_range(range: &CodeRange) -> Self { + Self { + from: range.from.to_index(), + to: range.to.to_index(), + } + } +} + +impl std::cmp::PartialEq for LiveRangeKey { + fn eq(&self, other: &Self) -> bool { + self.to > other.from && self.from < other.to + } +} +impl std::cmp::Eq for LiveRangeKey {} +impl std::cmp::PartialOrd for LiveRangeKey { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} +impl std::cmp::Ord for LiveRangeKey { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + if self.to <= other.from { + std::cmp::Ordering::Less + } else if self.from >= other.to { + std::cmp::Ordering::Greater + } else { + std::cmp::Ordering::Equal + } + } +} + +struct PrioQueueComparator<'a> { + prios: &'a [usize], +} +impl<'a> ContainerComparator for PrioQueueComparator<'a> { + type Ix = LiveBundleIndex; + fn compare(&self, a: Self::Ix, b: Self::Ix) -> std::cmp::Ordering { + self.prios[a.index()].cmp(&self.prios[b.index()]) + } +} + +impl PrioQueue { + fn new() -> Self { + PrioQueue { + heap: std::collections::BinaryHeap::new(), + } + } + + fn insert(&mut self, bundle: LiveBundleIndex, prio: usize) { + self.heap.push(PrioQueueEntry { + prio: prio as u32, + bundle, + }); + } + + fn is_empty(self) -> bool { + self.heap.is_empty() + } + + fn pop(&mut self) -> Option { + self.heap.pop().map(|entry| entry.bundle) + } +} + +impl LiveRangeSet { + pub(crate) fn new() -> Self { + Self { + btree: BTreeMap::new(), + } + } +} + +fn spill_weight_from_policy(policy: OperandPolicy) -> u32 { + match policy { + OperandPolicy::Any => 1000, + OperandPolicy::Reg | OperandPolicy::FixedReg(_) => 2000, + _ => 0, + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum Requirement { + Fixed(PReg), + Register(RegClass), + Any(RegClass), +} +impl Requirement { + fn class(self) -> RegClass { + match self { + Requirement::Fixed(preg) => preg.class(), + Requirement::Register(class) | Requirement::Any(class) => class, + } + } + + fn merge(self, other: Requirement) -> Option { + if self.class() != other.class() { + return None; + } + match (self, other) { + (other, Requirement::Any(_)) | (Requirement::Any(_), other) => Some(other), + (Requirement::Register(_), Requirement::Fixed(preg)) + | (Requirement::Fixed(preg), Requirement::Register(_)) => { + Some(Requirement::Fixed(preg)) + } + (Requirement::Register(_), Requirement::Register(_)) => Some(self), + (Requirement::Fixed(a), Requirement::Fixed(b)) if a == b => Some(self), + _ => None, + } + } + fn from_operand(op: Operand) -> Requirement { + match op.policy() { + OperandPolicy::FixedReg(preg) => Requirement::Fixed(preg), + OperandPolicy::Reg | OperandPolicy::Reuse(_) => Requirement::Register(op.class()), + _ => Requirement::Any(op.class()), + } + } +} + +#[derive(Clone, Debug, PartialEq, Eq)] +enum AllocRegResult { + Allocated(Allocation), + Conflict(LiveBundleVec), + ConflictWithFixed, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +struct BundleProperties { + minimal: bool, + fixed: bool, +} + +#[derive(Clone, Debug)] +struct InsertedMove { + pos: ProgPoint, + prio: InsertMovePrio, + from_alloc: Allocation, + to_alloc: Allocation, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +enum InsertMovePrio { + InEdgeMoves, + BlockParam, + Regular, + MultiFixedReg, + ReusedInput, + OutEdgeMoves, +} + +#[derive(Clone, Copy, Debug, Default)] +pub struct Stats { + initial_liverange_count: usize, + merged_bundle_count: usize, + process_bundle_count: usize, + process_bundle_reg_probes_fixed: usize, + process_bundle_reg_success_fixed: usize, + process_bundle_reg_probes_any: usize, + process_bundle_reg_success_any: usize, + evict_bundle_event: usize, + evict_bundle_count: usize, + splits: usize, + splits_clobbers: usize, + splits_hot: usize, + splits_conflicts: usize, + splits_all: usize, + final_liverange_count: usize, + final_bundle_count: usize, + spill_bundle_count: usize, + spill_bundle_reg_probes: usize, + spill_bundle_reg_success: usize, + blockparam_ins_count: usize, + blockparam_outs_count: usize, + blockparam_allocs_count: usize, + halfmoves_count: usize, + edits_count: usize, +} + +impl<'a, F: Function> Env<'a, F> { + pub(crate) fn new(func: &'a F, env: &'a MachineEnv, cfginfo: CFGInfo) -> Self { + Self { + func, + env, + cfginfo, + + liveins: vec![], + blockparam_outs: vec![], + blockparam_ins: vec![], + blockparam_allocs: vec![], + bundles: vec![], + ranges: vec![], + spillsets: vec![], + uses: vec![], + defs: vec![], + vregs: vec![], + pregs: vec![], + allocation_queue: PrioQueue::new(), + clobbers: vec![], + hot_code: LiveRangeSet::new(), + spilled_bundles: vec![], + spillslots: vec![], + slots_by_size: vec![], + + multi_fixed_reg_fixups: vec![], + inserted_moves: vec![], + edits: vec![], + allocs: vec![], + inst_alloc_offsets: vec![], + num_spillslots: 0, + + stats: Stats::default(), + + debug_annotations: std::collections::HashMap::new(), + } + } + + fn create_pregs_and_vregs(&mut self) { + // Create RRegs from the RealRegUniverse. + for &preg in &self.env.regs { + self.pregs.push(PRegData { + reg: preg, + allocations: LiveRangeSet::new(), + }); + } + // Create VRegs from the vreg count. + for idx in 0..self.func.num_vregs() { + // We'll fill in the real details when we see the def. + let reg = VReg::new(idx, RegClass::Int); + self.add_vreg(VRegData { + reg, + def: DefIndex::invalid(), + first_range: LiveRangeIndex::invalid(), + blockparam: Block::invalid(), + }); + } + // Create allocations too. + for inst in 0..self.func.insts() { + let start = self.allocs.len() as u32; + self.inst_alloc_offsets.push(start); + for _ in 0..self.func.inst_operands(Inst::new(inst)).len() { + self.allocs.push(Allocation::none()); + } + } + } + + fn add_vreg(&mut self, data: VRegData) -> VRegIndex { + let idx = self.vregs.len(); + self.vregs.push(data); + VRegIndex::new(idx) + } + + fn create_liverange(&mut self, range: CodeRange) -> LiveRangeIndex { + let idx = self.ranges.len(); + self.ranges.push(LiveRange { + range, + vreg: VRegIndex::invalid(), + bundle: LiveBundleIndex::invalid(), + uses_spill_weight: 0, + num_fixed_uses_and_flags: 0, + first_use: UseIndex::invalid(), + last_use: UseIndex::invalid(), + def: DefIndex::invalid(), + next_in_bundle: LiveRangeIndex::invalid(), + next_in_reg: LiveRangeIndex::invalid(), + }); + LiveRangeIndex::new(idx) + } + + /// Mark `range` as live for the given `vreg`. `num_ranges` is used to prevent + /// excessive coalescing on pathological inputs. + /// + /// Returns the liverange that contains the given range. + fn add_liverange_to_vreg( + &mut self, + vreg: VRegIndex, + range: CodeRange, + num_ranges: &mut usize, + ) -> LiveRangeIndex { + log::debug!("add_liverange_to_vreg: vreg {:?} range {:?}", vreg, range); + const COALESCE_LIMIT: usize = 100_000; + + // Look for a single or contiguous sequence of existing live ranges that overlap with the + // given range. + + let mut insert_after = LiveRangeIndex::invalid(); + let mut merged = LiveRangeIndex::invalid(); + let mut iter = self.vregs[vreg.index()].first_range; + let mut prev = LiveRangeIndex::invalid(); + while iter.is_valid() { + let existing = &mut self.ranges[iter.index()]; + log::debug!(" -> existing range: {:?}", existing); + if range.from >= existing.range.to && *num_ranges < COALESCE_LIMIT { + // New range comes fully after this one -- record it as a lower bound. + insert_after = iter; + prev = iter; + iter = existing.next_in_reg; + log::debug!(" -> lower bound"); + continue; + } + if range.to <= existing.range.from { + // New range comes fully before this one -- we're found our spot. + log::debug!(" -> upper bound (break search loop)"); + break; + } + // If we're here, then we overlap with at least one endpoint of the range. + log::debug!(" -> must overlap"); + debug_assert!(range.overlaps(&existing.range)); + if merged.is_invalid() { + // This is the first overlapping range. Extend to simply cover the new range. + merged = iter; + if range.from < existing.range.from { + existing.range.from = range.from; + } + if range.to > existing.range.to { + existing.range.to = range.to; + } + log::debug!( + " -> extended range of existing range to {:?}", + existing.range + ); + // Continue; there may be more ranges to merge with. + prev = iter; + iter = existing.next_in_reg; + continue; + } + // We overlap but we've already extended the first overlapping existing liverange, so + // we need to do a true merge instead. + log::debug!(" -> merging {:?} into {:?}", iter, merged); + log::debug!( + " -> before: merged {:?}: {:?}", + merged, + self.ranges[merged.index()] + ); + debug_assert!( + self.ranges[iter.index()].range.from >= self.ranges[merged.index()].range.from + ); // Because we see LRs in order. + if self.ranges[iter.index()].range.to > self.ranges[merged.index()].range.to { + self.ranges[merged.index()].range.to = self.ranges[iter.index()].range.to; + } + if self.ranges[iter.index()].def.is_valid() { + self.ranges[merged.index()].def = self.ranges[iter.index()].def; + } + self.distribute_liverange_uses(vreg, iter, merged); + log::debug!( + " -> after: merged {:?}: {:?}", + merged, + self.ranges[merged.index()] + ); + + // Remove from list of liveranges for this vreg. + let next = self.ranges[iter.index()].next_in_reg; + if prev.is_valid() { + self.ranges[prev.index()].next_in_reg = next; + } else { + self.vregs[vreg.index()].first_range = next; + } + // `prev` remains the same (we deleted current range). + iter = next; + } + + // If we get here and did not merge into an existing liverange or liveranges, then we need + // to create a new one. + if merged.is_invalid() { + let lr = self.create_liverange(range); + self.ranges[lr.index()].vreg = vreg; + if insert_after.is_valid() { + let next = self.ranges[insert_after.index()].next_in_reg; + self.ranges[lr.index()].next_in_reg = next; + self.ranges[insert_after.index()].next_in_reg = lr; + } else { + self.ranges[lr.index()].next_in_reg = self.vregs[vreg.index()].first_range; + self.vregs[vreg.index()].first_range = lr; + } + *num_ranges += 1; + lr + } else { + merged + } + } + + fn distribute_liverange_uses( + &mut self, + vreg: VRegIndex, + from: LiveRangeIndex, + into: LiveRangeIndex, + ) { + log::debug!("distribute from {:?} to {:?}", from, into); + assert_eq!( + self.ranges[from.index()].vreg, + self.ranges[into.index()].vreg + ); + let from_range = self.ranges[from.index()].range; + let into_range = self.ranges[into.index()].range; + // For every use in `from`... + let mut prev = UseIndex::invalid(); + let mut iter = self.ranges[from.index()].first_use; + while iter.is_valid() { + let usedata = &mut self.uses[iter.index()]; + // If we have already passed `into`, we're done. + if usedata.pos >= into_range.to { + break; + } + // If this use is within the range of `into`, move it over. + if into_range.contains_point(usedata.pos) { + log::debug!(" -> moving {:?}", iter); + let next = usedata.next_use; + if prev.is_valid() { + self.uses[prev.index()].next_use = next; + } else { + self.ranges[from.index()].first_use = next; + } + if iter == self.ranges[from.index()].last_use { + self.ranges[from.index()].last_use = prev; + } + // `prev` remains the same. + self.update_liverange_stats_on_remove_use(from, iter); + // This may look inefficient but because we are always merging + // non-overlapping LiveRanges, all uses will be at the beginning + // or end of the existing use-list; both cases are optimized. + self.insert_use_into_liverange_and_update_stats(into, iter); + iter = next; + } else { + prev = iter; + iter = usedata.next_use; + } + } + + // Distribute def too if `from` has a def and the def is in range of `into_range`. + if self.ranges[from.index()].def.is_valid() { + let def_idx = self.vregs[vreg.index()].def; + if from_range.contains_point(self.defs[def_idx.index()].pos) { + self.ranges[into.index()].def = def_idx; + } + } + } + + fn update_liverange_stats_on_remove_use(&mut self, from: LiveRangeIndex, u: UseIndex) { + log::debug!("remove use {:?} from lr {:?}", u, from); + debug_assert!(u.is_valid()); + let usedata = &self.uses[u.index()]; + let lrdata = &mut self.ranges[from.index()]; + if let OperandPolicy::FixedReg(_) = usedata.operand.policy() { + lrdata.dec_num_fixed_uses(); + } + log::debug!( + " -> subtract {} from uses_spill_weight {}; now {}", + spill_weight_from_policy(usedata.operand.policy()), + lrdata.uses_spill_weight, + lrdata.uses_spill_weight - spill_weight_from_policy(usedata.operand.policy()), + ); + + lrdata.uses_spill_weight -= spill_weight_from_policy(usedata.operand.policy()); + } + + fn insert_use_into_liverange_and_update_stats(&mut self, into: LiveRangeIndex, u: UseIndex) { + let insert_pos = self.uses[u.index()].pos; + let first = self.ranges[into.index()].first_use; + self.uses[u.index()].next_use = UseIndex::invalid(); + if first.is_invalid() { + // Empty list. + self.ranges[into.index()].first_use = u; + self.ranges[into.index()].last_use = u; + } else if insert_pos > self.uses[self.ranges[into.index()].last_use.index()].pos { + // After tail. + let tail = self.ranges[into.index()].last_use; + self.uses[tail.index()].next_use = u; + self.ranges[into.index()].last_use = u; + } else { + // Otherwise, scan linearly to find insertion position. + let mut prev = UseIndex::invalid(); + let mut iter = first; + while iter.is_valid() { + if self.uses[iter.index()].pos > insert_pos { + break; + } + prev = iter; + iter = self.uses[iter.index()].next_use; + } + self.uses[u.index()].next_use = iter; + if prev.is_valid() { + self.uses[prev.index()].next_use = u; + } else { + self.ranges[into.index()].first_use = u; + } + if iter.is_invalid() { + self.ranges[into.index()].last_use = u; + } + } + + // Update stats. + let policy = self.uses[u.index()].operand.policy(); + if let OperandPolicy::FixedReg(_) = policy { + self.ranges[into.index()].inc_num_fixed_uses(); + } + log::debug!( + "insert use {:?} into lr {:?} with weight {}", + u, + into, + spill_weight_from_policy(policy) + ); + self.ranges[into.index()].uses_spill_weight += spill_weight_from_policy(policy); + log::debug!(" -> now {}", self.ranges[into.index()].uses_spill_weight); + } + + fn find_vreg_liverange_for_pos( + &self, + vreg: VRegIndex, + pos: ProgPoint, + ) -> Option { + let mut range = self.vregs[vreg.index()].first_range; + while range.is_valid() { + if self.ranges[range.index()].range.contains_point(pos) { + return Some(range); + } + range = self.ranges[range.index()].next_in_reg; + } + None + } + + fn add_liverange_to_preg(&mut self, range: CodeRange, reg: PReg) { + let preg_idx = PRegIndex::new(reg.index()); + let lr = self.create_liverange(range); + self.pregs[preg_idx.index()] + .allocations + .btree + .insert(LiveRangeKey::from_range(&range), lr); + } + + fn compute_liveness(&mut self) { + // Create initial LiveIn bitsets. + for _ in 0..self.func.blocks() { + self.liveins.push(BitVec::new()); + } + + let num_vregs = self.func.num_vregs(); + + let mut num_ranges = 0; + + // Create Uses and Defs referring to VRegs, and place the Uses + // in LiveRanges. + // + // We iterate backward, so as long as blocks are well-ordered + // (in RPO), we see uses before defs. + // + // Because of this, we can construct live ranges in one pass, + // i.e., considering each block once, propagating live + // registers backward across edges to a bitset at each block + // exit point, gen'ing at uses, kill'ing at defs, and meeting + // with a union. + let mut block_to_postorder: SmallVec<[Option; 16]> = + smallvec![None; self.func.blocks()]; + for i in 0..self.cfginfo.postorder.len() { + let block = self.cfginfo.postorder[i]; + block_to_postorder[block.index()] = Some(i as u32); + } + + // Track current LiveRange for each vreg. + let mut vreg_ranges: Vec = + vec![LiveRangeIndex::invalid(); self.func.num_vregs()]; + + for i in 0..self.cfginfo.postorder.len() { + // (avoid borrowing `self`) + let block = self.cfginfo.postorder[i]; + block_to_postorder[block.index()] = Some(i as u32); + + // Init live-set to union of liveins from successors + // (excluding backedges; those are handled below). + let mut live = BitVec::with_capacity(num_vregs); + for &succ in self.func.block_succs(block) { + live.or(&self.liveins[succ.index()]); + } + + // Initially, registers are assumed live for the whole block. + for vreg in live.iter() { + let range = CodeRange { + from: self.cfginfo.block_entry[block.index()], + to: self.cfginfo.block_exit[block.index()].next(), + }; + log::debug!( + "vreg {:?} live at end of block --> create range {:?}", + VRegIndex::new(vreg), + range + ); + let lr = self.add_liverange_to_vreg(VRegIndex::new(vreg), range, &mut num_ranges); + vreg_ranges[vreg] = lr; + } + + // Create vreg data for blockparams. + for param in self.func.block_params(block) { + self.vregs[param.vreg()].reg = *param; + self.vregs[param.vreg()].blockparam = block; + } + + let insns = self.func.block_insns(block); + + // If the last instruction is a branch (rather than + // return), create blockparam_out entries. + if self.func.is_branch(insns.last()) { + let operands = self.func.inst_operands(insns.last()); + let mut i = 0; + for &succ in self.func.block_succs(block) { + for &blockparam in self.func.block_params(succ) { + let from_vreg = VRegIndex::new(operands[i].vreg().vreg()); + let blockparam_vreg = VRegIndex::new(blockparam.vreg()); + self.blockparam_outs + .push((from_vreg, block, succ, blockparam_vreg)); + i += 1; + } + } + } + + // For each instruction, in reverse order, process + // operands and clobbers. + for inst in insns.rev().iter() { + if self.func.inst_clobbers(inst).len() > 0 { + self.clobbers.push(inst); + } + // Mark clobbers with CodeRanges on PRegs. + for i in 0..self.func.inst_clobbers(inst).len() { + // don't borrow `self` + let clobber = self.func.inst_clobbers(inst)[i]; + let range = CodeRange { + from: ProgPoint::before(inst), + to: ProgPoint::before(inst.next()), + }; + self.add_liverange_to_preg(range, clobber); + } + + // Does the instruction have any input-reusing + // outputs? This is important below to establish + // proper interference wrt other inputs. + let mut reused_input = None; + for op in self.func.inst_operands(inst) { + if let OperandPolicy::Reuse(i) = op.policy() { + reused_input = Some(i); + break; + } + } + + // Process defs and uses. + for i in 0..self.func.inst_operands(inst).len() { + // don't borrow `self` + let operand = self.func.inst_operands(inst)[i]; + match operand.kind() { + OperandKind::Def => { + // Create the Def object. + let pos = match operand.pos() { + OperandPos::Before | OperandPos::Both => ProgPoint::before(inst), + OperandPos::After => ProgPoint::after(inst), + }; + let def = DefIndex(self.defs.len() as u32); + self.defs.push(Def { + operand, + pos, + slot: i, + }); + + log::debug!("Def of {} at {:?}", operand.vreg(), pos); + + // Fill in vreg's actual data. + debug_assert!(self.vregs[operand.vreg().vreg()].def.is_invalid()); + self.vregs[operand.vreg().vreg()].reg = operand.vreg(); + self.vregs[operand.vreg().vreg()].def = def; + + // Trim the range for this vreg to start + // at `pos` if it previously ended at the + // start of this block (i.e. was not + // merged into some larger LiveRange due + // to out-of-order blocks). + let mut lr = vreg_ranges[operand.vreg().vreg()]; + log::debug!(" -> has existing LR {:?}", lr); + // If there was no liverange (dead def), create a trivial one. + if lr.is_invalid() { + lr = self.add_liverange_to_vreg( + VRegIndex::new(operand.vreg().vreg()), + CodeRange { + from: pos, + to: pos.next(), + }, + &mut num_ranges, + ); + log::debug!(" -> invalid; created {:?}", lr); + } + if self.ranges[lr.index()].range.from + == self.cfginfo.block_entry[block.index()] + { + log::debug!(" -> started at block start; trimming to {:?}", pos); + self.ranges[lr.index()].range.from = pos; + } + // Note that the liverange contains a def. + self.ranges[lr.index()].def = def; + // Remove from live-set. + live.set(operand.vreg().vreg(), false); + vreg_ranges[operand.vreg().vreg()] = LiveRangeIndex::invalid(); + } + OperandKind::Use => { + // Establish where the use occurs. + let mut pos = match operand.pos() { + OperandPos::Before => ProgPoint::before(inst), + OperandPos::Both | OperandPos::After => ProgPoint::after(inst), + }; + // If there are any reused inputs in this + // instruction, and this is *not* the + // reused input, force `pos` to + // `After`. (See note below for why; it's + // very subtle!) + if reused_input.is_some() && reused_input.unwrap() != i { + pos = ProgPoint::after(inst); + } + // If this is a branch, extend `pos` to + // the end of the block. (Branch uses are + // blockparams and need to be live at the + // end of the block. + if self.func.is_branch(inst) { + pos = self.cfginfo.block_exit[block.index()]; + } + + // Create the actual use object. + let u = UseIndex(self.uses.len() as u32); + self.uses.push(Use { + operand, + pos, + slot: i, + next_use: UseIndex::invalid(), + }); + + // Create/extend the LiveRange and add the use to the range. + let range = CodeRange { + from: self.cfginfo.block_entry[block.index()], + to: pos.next(), + }; + let lr = self.add_liverange_to_vreg( + VRegIndex::new(operand.vreg().vreg()), + range, + &mut num_ranges, + ); + vreg_ranges[operand.vreg().vreg()] = lr; + + log::debug!("Use of {:?} at {:?} -> {:?} -> {:?}", operand, pos, u, lr); + + self.insert_use_into_liverange_and_update_stats(lr, u); + + // Add to live-set. + live.set(operand.vreg().vreg(), true); + } + } + } + } + + // Block parameters define vregs at the very beginning of + // the block. Remove their live vregs from the live set + // here. + for vreg in self.func.block_params(block) { + if live.get(vreg.vreg()) { + live.set(vreg.vreg(), false); + } else { + // Create trivial liverange if blockparam is dead. + let start = self.cfginfo.block_entry[block.index()]; + self.add_liverange_to_vreg( + VRegIndex::new(vreg.vreg()), + CodeRange { + from: start, + to: start.next(), + }, + &mut num_ranges, + ); + } + // add `blockparam_ins` entries. + let vreg_idx = VRegIndex::new(vreg.vreg()); + for &pred in self.func.block_preds(block) { + self.blockparam_ins.push((vreg_idx, block, pred)); + } + } + + // Loop-handling: to handle backedges, rather than running + // a fixpoint loop, we add a live-range for every value + // live at the beginning of the loop over the whole loop + // body. + // + // To determine what the "loop body" consists of, we find + // the transitively minimum-reachable traversal index in + // our traversal order before the current block + // index. When we discover a backedge, *all* block indices + // within the traversal range are considered part of the + // loop body. This is guaranteed correct (though perhaps + // an overapproximation) even for irreducible control + // flow, because it will find all blocks to which the + // liveness could flow backward over which we've already + // scanned, and it should give good results for reducible + // control flow with properly ordered blocks. + let mut min_pred = i; + let mut loop_scan = i; + log::debug!( + "looking for loops from postorder#{} (block{})", + i, + self.cfginfo.postorder[i].index() + ); + while loop_scan >= min_pred { + let block = self.cfginfo.postorder[loop_scan]; + log::debug!( + " -> scan at postorder#{} (block{})", + loop_scan, + block.index() + ); + for &pred in self.func.block_preds(block) { + log::debug!( + " -> pred block{} (postorder#{})", + pred.index(), + block_to_postorder[pred.index()].unwrap_or(min_pred as u32) + ); + min_pred = std::cmp::min( + min_pred, + block_to_postorder[pred.index()].unwrap_or(min_pred as u32) as usize, + ); + log::debug!(" -> min_pred = {}", min_pred); + } + if loop_scan == 0 { + break; + } + loop_scan -= 1; + } + + if min_pred < i { + // We have one or more backedges, and the loop body is + // (conservatively) postorder[min_pred..i]. Find a + // range that covers all of those blocks. + let loop_blocks = &self.cfginfo.postorder[min_pred..=i]; + let loop_begin = loop_blocks + .iter() + .map(|b| self.cfginfo.block_entry[b.index()]) + .min() + .unwrap(); + let loop_end = loop_blocks + .iter() + .map(|b| self.cfginfo.block_exit[b.index()]) + .max() + .unwrap(); + let loop_range = CodeRange { + from: loop_begin, + to: loop_end, + }; + log::debug!( + "found backedge wrt postorder: postorder#{}..postorder#{}", + min_pred, + i + ); + log::debug!(" -> loop range {:?}", loop_range); + for &loopblock in loop_blocks { + self.liveins[loopblock.index()].or(&live); + } + for vreg in live.iter() { + log::debug!( + "vreg {:?} live at top of loop (block {:?}) -> range {:?}", + VRegIndex::new(vreg), + block, + loop_range, + ); + self.add_liverange_to_vreg(VRegIndex::new(vreg), loop_range, &mut num_ranges); + } + } + + log::debug!("liveins at block {:?} = {:?}", block, live); + self.liveins[block.index()] = live; + } + + // Do a cleanup pass: if there are any LiveRanges with + // multiple uses (or defs) at the same ProgPoint and there is + // more than one FixedReg constraint at that ProgPoint, we + // need to record all but one of them in a special fixup list + // and handle them later; otherwise, bundle-splitting to + // create minimal bundles becomes much more complex (we would + // have to split the multiple uses at the same progpoint into + // different bundles, which breaks invariants related to + // disjoint ranges and bundles). + for vreg in 0..self.vregs.len() { + let mut iter = self.vregs[vreg].first_range; + while iter.is_valid() { + log::debug!( + "multi-fixed-reg cleanup: vreg {:?} range {:?}", + VRegIndex::new(vreg), + iter + ); + let mut last_point = None; + let mut seen_fixed_for_vreg: SmallVec<[VReg; 16]> = smallvec![]; + let mut first_preg: SmallVec<[PRegIndex; 16]> = smallvec![]; + let mut extra_clobbers: SmallVec<[(PReg, Inst); 8]> = smallvec![]; + let mut fixup_multi_fixed_vregs = |pos: ProgPoint, + op: &mut Operand, + fixups: &mut Vec<( + ProgPoint, + PRegIndex, + PRegIndex, + )>| { + if last_point.is_some() && Some(pos) != last_point { + seen_fixed_for_vreg.clear(); + first_preg.clear(); + } + last_point = Some(pos); + + if let OperandPolicy::FixedReg(preg) = op.policy() { + let vreg_idx = VRegIndex::new(op.vreg().vreg()); + let preg_idx = PRegIndex::new(preg.index()); + log::debug!( + "at pos {:?}, vreg {:?} has fixed constraint to preg {:?}", + pos, + vreg_idx, + preg_idx + ); + if let Some(idx) = seen_fixed_for_vreg.iter().position(|r| *r == op.vreg()) + { + let orig_preg = first_preg[idx]; + log::debug!(" -> duplicate; switching to policy Reg"); + fixups.push((pos, orig_preg, preg_idx)); + *op = Operand::new(op.vreg(), OperandPolicy::Reg, op.kind(), op.pos()); + extra_clobbers.push((preg, pos.inst)); + } else { + seen_fixed_for_vreg.push(op.vreg()); + first_preg.push(preg_idx); + } + } + }; + + if self.ranges[iter.index()].def.is_valid() { + let def_idx = self.vregs[vreg].def; + let pos = self.defs[def_idx.index()].pos; + fixup_multi_fixed_vregs( + pos, + &mut self.defs[def_idx.index()].operand, + &mut self.multi_fixed_reg_fixups, + ); + } + + let mut use_iter = self.ranges[iter.index()].first_use; + while use_iter.is_valid() { + let pos = self.uses[use_iter.index()].pos; + fixup_multi_fixed_vregs( + pos, + &mut self.uses[use_iter.index()].operand, + &mut self.multi_fixed_reg_fixups, + ); + use_iter = self.uses[use_iter.index()].next_use; + } + + for (clobber, inst) in extra_clobbers { + let range = CodeRange { + from: ProgPoint::before(inst), + to: ProgPoint::before(inst.next()), + }; + self.add_liverange_to_preg(range, clobber); + } + + iter = self.ranges[iter.index()].next_in_reg; + } + } + + self.clobbers.sort(); + self.blockparam_ins.sort(); + self.blockparam_outs.sort(); + + self.stats.initial_liverange_count = self.ranges.len(); + self.stats.blockparam_ins_count = self.blockparam_ins.len(); + self.stats.blockparam_outs_count = self.blockparam_outs.len(); + } + + fn compute_hot_code(&mut self) { + // Initialize hot_code to contain inner loops only. + let mut header = Block::invalid(); + let mut backedge = Block::invalid(); + for block in 0..self.func.blocks() { + let block = Block::new(block); + let max_backedge = self + .func + .block_preds(block) + .iter() + .filter(|b| b.index() >= block.index()) + .max(); + if let Some(&b) = max_backedge { + header = block; + backedge = b; + } + if block == backedge { + // We've traversed a loop body without finding a deeper loop. Mark the whole body + // as hot. + let from = self.cfginfo.block_entry[header.index()]; + let to = self.cfginfo.block_exit[backedge.index()].next(); + let range = CodeRange { from, to }; + let lr = self.create_liverange(range); + self.hot_code + .btree + .insert(LiveRangeKey::from_range(&range), lr); + } + } + } + + fn create_bundle(&mut self) -> LiveBundleIndex { + let bundle = self.bundles.len(); + self.bundles.push(LiveBundle { + allocation: Allocation::none(), + first_range: LiveRangeIndex::invalid(), + last_range: LiveRangeIndex::invalid(), + spillset: SpillSetIndex::invalid(), + prio: 0, + spill_weight_and_props: 0, + }); + LiveBundleIndex::new(bundle) + } + + fn try_merge_reused_register(&mut self, from: VRegIndex, to: VRegIndex) { + log::debug!("try_merge_reused_register: from {:?} to {:?}", from, to); + let def_idx = self.vregs[to.index()].def; + log::debug!(" -> def_idx = {:?}", def_idx); + debug_assert!(def_idx.is_valid()); + let def = &mut self.defs[def_idx.index()]; + let def_point = def.pos; + log::debug!(" -> def_point = {:?}", def_point); + + // Can't merge if def happens at use-point. + if def_point.pos == InstPosition::Before { + return; + } + + // Find the corresponding liverange for the use at the def-point. + let use_lr_at_def = self.find_vreg_liverange_for_pos(from, def_point); + log::debug!(" -> use_lr_at_def = {:?}", use_lr_at_def); + + // If the use is not live at the def (i.e. this inst is its last use), we can merge. + if use_lr_at_def.is_none() { + // Find the bundles and merge. Note that bundles have not been split + // yet so every liverange in the vreg will have the same bundle (so + // no need to look up the proper liverange here). + let from_bundle = self.ranges[self.vregs[from.index()].first_range.index()].bundle; + let to_bundle = self.ranges[self.vregs[to.index()].first_range.index()].bundle; + log::debug!(" -> merging from {:?} to {:?}", from_bundle, to_bundle); + self.merge_bundles(from_bundle, to_bundle); + return; + } + + log::debug!(" -> no merge"); + + // Note: there may be other cases where it would benefit us to split the + // LiveRange and bundle for the input at the def-point, allowing us to + // avoid a copy. However, the cases where this helps in IonMonkey (only + // memory uses after the definition, seemingly) appear to be marginal at + // best. + } + + fn merge_bundles(&mut self, from: LiveBundleIndex, to: LiveBundleIndex) -> bool { + if from == to { + // Merge bundle into self -- trivial merge. + return true; + } + log::debug!( + "merging from bundle{} to bundle{}", + from.index(), + to.index() + ); + + let vreg_from = self.ranges[self.bundles[from.index()].first_range.index()].vreg; + let vreg_to = self.ranges[self.bundles[to.index()].first_range.index()].vreg; + // Both bundles must deal with the same RegClass. All vregs in a bundle + // have to have the same regclass (because bundles start with one vreg + // and all merging happens here) so we can just sample the first vreg of + // each bundle. + if self.vregs[vreg_from.index()].reg.class() != self.vregs[vreg_to.index()].reg.class() { + return false; + } + + // Check for overlap in LiveRanges. + let mut iter0 = self.bundles[from.index()].first_range; + let mut iter1 = self.bundles[to.index()].first_range; + let mut range_count = 0; + while iter0.is_valid() && iter1.is_valid() { + range_count += 1; + if range_count > 200 { + // Limit merge complexity. + return false; + } + + if self.ranges[iter0.index()].range.from >= self.ranges[iter1.index()].range.to { + iter1 = self.ranges[iter1.index()].next_in_bundle; + } else if self.ranges[iter1.index()].range.from >= self.ranges[iter0.index()].range.to { + iter0 = self.ranges[iter0.index()].next_in_bundle; + } else { + // Overlap -- cannot merge. + return false; + } + } + + // If we reach here, then the bundles do not overlap -- merge them! + // We do this with a merge-sort-like scan over both chains, removing + // from `to` (`iter1`) and inserting into `from` (`iter0`). + let mut iter0 = self.bundles[from.index()].first_range; + let mut iter1 = self.bundles[to.index()].first_range; + if iter0.is_invalid() { + // `from` bundle is empty -- trivial merge. + return true; + } + if iter1.is_invalid() { + // `to` bundle is empty -- just move head/tail pointers over from + // `from` and set `bundle` up-link on all ranges. + let head = self.bundles[from.index()].first_range; + let tail = self.bundles[from.index()].last_range; + self.bundles[to.index()].first_range = head; + self.bundles[to.index()].last_range = tail; + self.bundles[from.index()].first_range = LiveRangeIndex::invalid(); + self.bundles[from.index()].last_range = LiveRangeIndex::invalid(); + while iter0.is_valid() { + self.ranges[iter0.index()].bundle = from; + iter0 = self.ranges[iter0.index()].next_in_bundle; + } + return true; + } + + // Two non-empty chains of LiveRanges: traverse both simultaneously and + // merge links into `from`. + let mut prev = LiveRangeIndex::invalid(); + while iter0.is_valid() || iter1.is_valid() { + // Pick the next range. + let next_range_iter = if iter0.is_valid() { + if iter1.is_valid() { + if self.ranges[iter0.index()].range.from + <= self.ranges[iter1.index()].range.from + { + &mut iter0 + } else { + &mut iter1 + } + } else { + &mut iter0 + } + } else { + &mut iter1 + }; + let next = *next_range_iter; + *next_range_iter = self.ranges[next.index()].next_in_bundle; + + // link from prev. + if prev.is_valid() { + self.ranges[prev.index()].next_in_bundle = next; + } else { + self.bundles[to.index()].first_range = next; + } + self.bundles[to.index()].last_range = next; + self.ranges[next.index()].bundle = to; + prev = next; + } + self.bundles[from.index()].first_range = LiveRangeIndex::invalid(); + self.bundles[from.index()].last_range = LiveRangeIndex::invalid(); + + true + } + + fn insert_liverange_into_bundle(&mut self, bundle: LiveBundleIndex, lr: LiveRangeIndex) { + self.ranges[lr.index()].next_in_bundle = LiveRangeIndex::invalid(); + self.ranges[lr.index()].bundle = bundle; + if self.bundles[bundle.index()].first_range.is_invalid() { + // Empty bundle. + self.bundles[bundle.index()].first_range = lr; + self.bundles[bundle.index()].last_range = lr; + } else if self.ranges[self.bundles[bundle.index()].first_range.index()] + .range + .to + <= self.ranges[lr.index()].range.from + { + // After last range in bundle. + let last = self.bundles[bundle.index()].last_range; + self.ranges[last.index()].next_in_bundle = lr; + self.bundles[bundle.index()].last_range = lr; + } else { + // Find location to insert. + let mut iter = self.bundles[bundle.index()].first_range; + let mut insert_after = LiveRangeIndex::invalid(); + let insert_range = self.ranges[lr.index()].range; + while iter.is_valid() { + debug_assert!(!self.ranges[iter.index()].range.overlaps(&insert_range)); + if self.ranges[iter.index()].range.to <= insert_range.from { + break; + } + insert_after = iter; + iter = self.ranges[iter.index()].next_in_bundle; + } + if insert_after.is_valid() { + self.ranges[insert_after.index()].next_in_bundle = lr; + if self.bundles[bundle.index()].last_range == insert_after { + self.bundles[bundle.index()].last_range = lr; + } + } else { + let next = self.bundles[bundle.index()].first_range; + self.ranges[lr.index()].next_in_bundle = next; + self.bundles[bundle.index()].first_range = lr; + } + } + } + + fn merge_vreg_bundles(&mut self) { + // Create a bundle for every vreg, initially. + log::debug!("merge_vreg_bundles: creating vreg bundles"); + for vreg in 0..self.vregs.len() { + let vreg = VRegIndex::new(vreg); + if self.vregs[vreg.index()].first_range.is_invalid() { + continue; + } + let bundle = self.create_bundle(); + let mut range = self.vregs[vreg.index()].first_range; + while range.is_valid() { + self.insert_liverange_into_bundle(bundle, range); + range = self.ranges[range.index()].next_in_reg; + } + log::debug!("vreg v{} gets bundle{}", vreg.index(), bundle.index()); + } + + for inst in 0..self.func.insts() { + let inst = Inst::new(inst); + + // Attempt to merge Reuse-policy operand outputs with the corresponding + // inputs. + for operand_idx in 0..self.func.inst_operands(inst).len() { + let operand = self.func.inst_operands(inst)[operand_idx]; + if let OperandPolicy::Reuse(input_idx) = operand.policy() { + log::debug!( + "trying to merge use and def at reused-op {} on inst{}", + operand_idx, + inst.index() + ); + assert_eq!(operand.kind(), OperandKind::Def); + assert_eq!(operand.pos(), OperandPos::After); + let input_vreg = + VRegIndex::new(self.func.inst_operands(inst)[input_idx].vreg().vreg()); + let output_vreg = VRegIndex::new(operand.vreg().vreg()); + self.try_merge_reused_register(input_vreg, output_vreg); + } + } + + // Attempt to merge move srcs and dests. + if let Some((src_vreg, dst_vreg)) = self.func.is_move(inst) { + log::debug!("trying to merge move src {} to dst {}", src_vreg, dst_vreg); + let src_bundle = + self.ranges[self.vregs[src_vreg.vreg()].first_range.index()].bundle; + assert!(src_bundle.is_valid()); + let dest_bundle = + self.ranges[self.vregs[dst_vreg.vreg()].first_range.index()].bundle; + assert!(dest_bundle.is_valid()); + self.merge_bundles(/* from */ dest_bundle, /* to */ src_bundle); + } + } + + // Attempt to merge blockparams with their inputs. + for i in 0..self.blockparam_outs.len() { + let (from_vreg, _, _, to_vreg) = self.blockparam_outs[i]; + log::debug!( + "trying to merge blockparam v{} with input v{}", + to_vreg.index(), + from_vreg.index() + ); + let to_bundle = self.ranges[self.vregs[to_vreg.index()].first_range.index()].bundle; + assert!(to_bundle.is_valid()); + let from_bundle = self.ranges[self.vregs[from_vreg.index()].first_range.index()].bundle; + assert!(from_bundle.is_valid()); + log::debug!( + " -> from bundle{} to bundle{}", + from_bundle.index(), + to_bundle.index() + ); + self.merge_bundles(from_bundle, to_bundle); + } + + log::debug!("done merging bundles"); + } + + fn compute_bundle_prio(&self, bundle: LiveBundleIndex) -> u32 { + // The priority is simply the total "length" -- the number of + // instructions covered by all LiveRanges. + let mut iter = self.bundles[bundle.index()].first_range; + let mut total = 0; + while iter.is_valid() { + total += self.ranges[iter.index()].range.len() as u32; + iter = self.ranges[iter.index()].next_in_bundle; + } + total + } + + fn queue_bundles(&mut self) { + for vreg in 0..self.vregs.len() { + let vreg = VRegIndex::new(vreg); + let mut lr = self.vregs[vreg.index()].first_range; + while lr.is_valid() { + let bundle = self.ranges[lr.index()].bundle; + if self.bundles[bundle.index()].first_range == lr { + // First time seeing `bundle`: allocate a spillslot for it, + // compute its priority, and enqueue it. + let ssidx = SpillSetIndex::new(self.spillsets.len()); + let reg = self.vregs[vreg.index()].reg; + let size = self.func.spillslot_size(reg.class(), reg) as u32; + self.spillsets.push(SpillSet { + bundles: smallvec![], + slot: SpillSlotIndex::invalid(), + size, + class: reg.class(), + reg_hint: None, + }); + self.bundles[bundle.index()].spillset = ssidx; + let prio = self.compute_bundle_prio(bundle); + self.bundles[bundle.index()].prio = prio; + self.recompute_bundle_properties(bundle); + self.allocation_queue.insert(bundle, prio as usize); + } + + // Keep going even if we handled one bundle for this vreg above: + // if we split a vreg's liveranges into multiple bundles, we + // need to hit all the bundles. + lr = self.ranges[lr.index()].next_in_bundle; + } + } + + self.stats.merged_bundle_count = self.allocation_queue.heap.len(); + } + + fn process_bundles(&mut self) { + let mut count = 0; + while let Some(bundle) = self.allocation_queue.pop() { + self.stats.process_bundle_count += 1; + self.process_bundle(bundle); + count += 1; + if count > self.func.insts() * 50 { + self.dump_state(); + panic!("Infinite loop!"); + } + } + self.stats.final_liverange_count = self.ranges.len(); + self.stats.final_bundle_count = self.bundles.len(); + self.stats.spill_bundle_count = self.spilled_bundles.len(); + } + + fn dump_state(&self) { + log::debug!("Bundles:"); + for (i, b) in self.bundles.iter().enumerate() { + log::debug!( + "bundle{}: first_range={:?} last_range={:?} spillset={:?} alloc={:?}", + i, + b.first_range, + b.last_range, + b.spillset, + b.allocation + ); + } + log::debug!("VRegs:"); + for (i, v) in self.vregs.iter().enumerate() { + log::debug!("vreg{}: def={:?} first_range={:?}", i, v.def, v.first_range,); + } + log::debug!("Ranges:"); + for (i, r) in self.ranges.iter().enumerate() { + log::debug!( + concat!( + "range{}: range={:?} vreg={:?} bundle={:?} ", + "weight={} fixed={} first_use={:?} last_use={:?} ", + "def={:?} next_in_bundle={:?} next_in_reg={:?}" + ), + i, + r.range, + r.vreg, + r.bundle, + r.uses_spill_weight, + r.num_fixed_uses(), + r.first_use, + r.last_use, + r.def, + r.next_in_bundle, + r.next_in_reg + ); + } + log::debug!("Uses:"); + for (i, u) in self.uses.iter().enumerate() { + log::debug!( + "use{}: op={:?} pos={:?} slot={} next_use={:?}", + i, + u.operand, + u.pos, + u.slot, + u.next_use + ); + } + log::debug!("Defs:"); + for (i, d) in self.defs.iter().enumerate() { + log::debug!("def{}: op={:?} pos={:?}", i, d.operand, d.pos,); + } + } + + fn compute_requirement(&self, bundle: LiveBundleIndex) -> Option { + let class = self.vregs[self.ranges[self.bundles[bundle.index()].first_range.index()] + .vreg + .index()] + .reg + .class(); + let mut needed = Requirement::Any(class); + + log::debug!("compute_requirement: bundle {:?} class {:?}", bundle, class); + + let mut iter = self.bundles[bundle.index()].first_range; + while iter.is_valid() { + let range = &self.ranges[iter.index()]; + log::debug!(" -> range {:?}", range.range); + if range.def.is_valid() { + let def_op = self.defs[range.def.index()].operand; + let def_req = Requirement::from_operand(def_op); + log::debug!( + " -> def {:?} op {:?} req {:?}", + range.def.index(), + def_op, + def_req + ); + needed = needed.merge(def_req)?; + log::debug!(" -> needed {:?}", needed); + } + let mut use_iter = range.first_use; + while use_iter.is_valid() { + let usedata = &self.uses[use_iter.index()]; + let use_op = usedata.operand; + let use_req = Requirement::from_operand(use_op); + log::debug!(" -> use {:?} op {:?} req {:?}", use_iter, use_op, use_req); + needed = needed.merge(use_req)?; + log::debug!(" -> needed {:?}", needed); + use_iter = usedata.next_use; + } + iter = range.next_in_bundle; + } + + log::debug!(" -> final needed: {:?}", needed); + Some(needed) + } + + fn try_to_allocate_bundle_to_reg( + &mut self, + bundle: LiveBundleIndex, + reg: PRegIndex, + ) -> AllocRegResult { + log::debug!("try_to_allocate_bundle_to_reg: {:?} -> {:?}", bundle, reg); + let mut conflicts = smallvec![]; + let mut iter = self.bundles[bundle.index()].first_range; + while iter.is_valid() { + let range = &self.ranges[iter.index()]; + log::debug!(" -> range {:?}", range); + // Note that the comparator function here tests for *overlap*, so we + // are checking whether the BTree contains any preg range that + // *overlaps* with range `iter`, not literally the range `iter`. + if let Some(preg_range) = self.pregs[reg.index()] + .allocations + .btree + .get(&LiveRangeKey::from_range(&range.range)) + { + log::debug!(" -> btree contains range {:?} that overlaps", preg_range); + if self.ranges[preg_range.index()].vreg.is_valid() { + log::debug!(" -> from vreg {:?}", self.ranges[preg_range.index()].vreg); + // range from an allocated bundle: find the bundle and add to + // conflicts list. + let conflict_bundle = self.ranges[preg_range.index()].bundle; + log::debug!(" -> conflict bundle {:?}", conflict_bundle); + if !conflicts.iter().any(|b| *b == conflict_bundle) { + conflicts.push(conflict_bundle); + } + } else { + log::debug!(" -> conflict with fixed reservation"); + // range from a direct use of the PReg (due to clobber). + return AllocRegResult::ConflictWithFixed; + } + } + iter = range.next_in_bundle; + } + + if conflicts.len() > 0 { + return AllocRegResult::Conflict(conflicts); + } + + // We can allocate! Add our ranges to the preg's BTree. + let preg = self.pregs[reg.index()].reg; + log::debug!(" -> bundle {:?} assigned to preg {:?}", bundle, preg); + self.bundles[bundle.index()].allocation = Allocation::reg(preg); + let mut iter = self.bundles[bundle.index()].first_range; + while iter.is_valid() { + let range = &self.ranges[iter.index()]; + self.pregs[reg.index()] + .allocations + .btree + .insert(LiveRangeKey::from_range(&range.range), iter); + iter = range.next_in_bundle; + } + + AllocRegResult::Allocated(Allocation::reg(preg)) + } + + fn evict_bundle(&mut self, bundle: LiveBundleIndex) { + log::debug!( + "evicting bundle {:?}: alloc {:?}", + bundle, + self.bundles[bundle.index()].allocation + ); + let preg = match self.bundles[bundle.index()].allocation.as_reg() { + Some(preg) => preg, + None => { + log::debug!( + " -> has no allocation! {:?}", + self.bundles[bundle.index()].allocation + ); + return; + } + }; + let preg_idx = PRegIndex::new(preg.index()); + self.bundles[bundle.index()].allocation = Allocation::none(); + let mut iter = self.bundles[bundle.index()].first_range; + while iter.is_valid() { + log::debug!(" -> removing LR {:?} from reg {:?}", iter, preg_idx); + self.pregs[preg_idx.index()] + .allocations + .btree + .remove(&LiveRangeKey::from_range(&self.ranges[iter.index()].range)); + iter = self.ranges[iter.index()].next_in_bundle; + } + let prio = self.bundles[bundle.index()].prio; + log::debug!(" -> prio {}; back into queue", prio); + self.allocation_queue.insert(bundle, prio as usize); + } + + fn bundle_spill_weight(&self, bundle: LiveBundleIndex) -> u32 { + self.bundles[bundle.index()].cached_spill_weight() + } + + fn maximum_spill_weight_in_bundle_set(&self, bundles: &LiveBundleVec) -> u32 { + bundles + .iter() + .map(|&b| self.bundles[b.index()].cached_spill_weight()) + .max() + .unwrap_or(0) + } + + fn recompute_bundle_properties(&mut self, bundle: LiveBundleIndex) { + let minimal; + let mut fixed = false; + let bundledata = &self.bundles[bundle.index()]; + let first_range = &self.ranges[bundledata.first_range.index()]; + + if first_range.vreg.is_invalid() { + minimal = true; + fixed = true; + } else { + if first_range.def.is_valid() { + let def_data = &self.defs[first_range.def.index()]; + if let OperandPolicy::FixedReg(_) = def_data.operand.policy() { + fixed = true; + } + } + let mut use_iter = first_range.first_use; + while use_iter.is_valid() { + let use_data = &self.uses[use_iter.index()]; + if let OperandPolicy::FixedReg(_) = use_data.operand.policy() { + fixed = true; + break; + } + use_iter = use_data.next_use; + } + // Minimal if this is the only range in the bundle, and if + // the range covers only one instruction. Note that it + // could cover just one ProgPoint, i.e. X.Before..X.After, + // or two ProgPoints, i.e. X.Before..X+1.Before. + minimal = first_range.next_in_bundle.is_invalid() + && first_range.range.from.inst == first_range.range.to.prev().inst; + } + + let spill_weight = if minimal { + if fixed { + log::debug!(" -> fixed and minimal: 2000000"); + 2_000_000 + } else { + log::debug!(" -> non-fixed and minimal: 1000000"); + 1_000_000 + } + } else { + let mut total = 0; + let mut range = self.bundles[bundle.index()].first_range; + while range.is_valid() { + let range_data = &self.ranges[range.index()]; + if range_data.def.is_valid() { + log::debug!(" -> has def (2000)"); + total += 2000; + } + log::debug!(" -> uses spill weight: {}", range_data.uses_spill_weight); + total += range_data.uses_spill_weight; + range = range_data.next_in_bundle; + } + + if self.bundles[bundle.index()].prio > 0 { + total / self.bundles[bundle.index()].prio + } else { + total + } + }; + + self.bundles[bundle.index()].set_cached_spill_weight_and_props( + spill_weight, + minimal, + fixed, + ); + } + + fn minimal_bundle(&mut self, bundle: LiveBundleIndex) -> bool { + self.bundles[bundle.index()].cached_minimal() + } + + fn find_split_points( + &mut self, + bundle: LiveBundleIndex, + conflicting: LiveBundleIndex, + ) -> SmallVec<[ProgPoint; 4]> { + // Scan the bundle's ranges once. We want to record: + // - Does the bundle contain any ranges in "hot" code and/or "cold" code? + // If so, record the transition points that are fully included in + // `bundle`: the first ProgPoint in a hot range if the prior cold + // point is also in the bundle; and the first ProgPoint in a cold + // range if the prior hot point is also in the bundle. + // - Does the bundle cross any clobbering insts? + // If so, record the ProgPoint before each such instruction. + // - Is there a register use before the conflicting bundle? + // If so, record the ProgPoint just after the last one. + // - Is there a register use after the conflicting bundle? + // If so, record the ProgPoint just before the last one. + // + // Then choose one of the above kinds of splits, in priority order. + + let mut cold_hot_splits: SmallVec<[ProgPoint; 4]> = smallvec![]; + let mut clobber_splits: SmallVec<[ProgPoint; 4]> = smallvec![]; + let mut last_before_conflict: Option = None; + let mut first_after_conflict: Option = None; + + log::debug!( + "find_split_points: bundle {:?} conflicting {:?}", + bundle, + conflicting + ); + + // We simultaneously scan the sorted list of LiveRanges in our bundle + // and the sorted list of call instruction locations. We also take the + // total range (start of first range to end of last range) of the + // conflicting bundle, if any, so we can find the last use before it and + // first use after it. Each loop iteration handles one range in our + // bundle. Calls are scanned up until they advance past the current + // range. + let mut our_iter = self.bundles[bundle.index()].first_range; + let (conflict_from, conflict_to) = if conflicting.is_valid() { + ( + Some( + self.ranges[self.bundles[conflicting.index()].first_range.index()] + .range + .from, + ), + Some( + self.ranges[self.bundles[conflicting.index()].last_range.index()] + .range + .to, + ), + ) + } else { + (None, None) + }; + + let bundle_start = if self.bundles[bundle.index()].first_range.is_valid() { + self.ranges[self.bundles[bundle.index()].first_range.index()] + .range + .from + } else { + ProgPoint::before(Inst::new(0)) + }; + let bundle_end = if self.bundles[bundle.index()].last_range.is_valid() { + self.ranges[self.bundles[bundle.index()].last_range.index()] + .range + .to + } else { + ProgPoint::before(Inst::new(self.func.insts())) + }; + + log::debug!(" -> conflict from {:?} to {:?}", conflict_from, conflict_to); + let mut clobberidx = 0; + while our_iter.is_valid() { + // Probe the hot-code tree. + let our_range = self.ranges[our_iter.index()].range; + log::debug!(" -> range {:?}", our_range); + if let Some(hot_range_idx) = self + .hot_code + .btree + .get(&LiveRangeKey::from_range(&our_range)) + { + // `hot_range_idx` is a range that *overlaps* with our range. + + // There may be cold code in our range on either side of the hot + // range. Record the transition points if so. + let hot_range = self.ranges[hot_range_idx.index()].range; + log::debug!(" -> overlaps with hot-code range {:?}", hot_range); + let start_cold = our_range.from < hot_range.from; + let end_cold = our_range.to > hot_range.to; + if start_cold { + log::debug!( + " -> our start is cold; potential split at cold->hot transition {:?}", + hot_range.from, + ); + // First ProgPoint in hot range. + cold_hot_splits.push(hot_range.from); + } + if end_cold { + log::debug!( + " -> our end is cold; potential split at hot->cold transition {:?}", + hot_range.to, + ); + // First ProgPoint in cold range (after hot range). + cold_hot_splits.push(hot_range.to); + } + } + + // Scan through clobber-insts from last left-off position until the first + // clobbering inst past this range. Record all clobber sites as potential + // splits. + while clobberidx < self.clobbers.len() { + let cur_clobber = self.clobbers[clobberidx]; + let pos = ProgPoint::before(cur_clobber); + if pos >= our_range.to { + break; + } + clobberidx += 1; + if pos < our_range.from { + continue; + } + if pos > bundle_start { + log::debug!(" -> potential clobber split at {:?}", pos); + clobber_splits.push(pos); + } + } + + // Update last-before-conflict and first-before-conflict positions. + + let mut update_with_pos = |pos: ProgPoint| { + let before_inst = ProgPoint::before(pos.inst); + let before_next_inst = before_inst.next().next(); + if before_inst > bundle_start + && (conflict_from.is_none() || before_inst < conflict_from.unwrap()) + && (last_before_conflict.is_none() + || before_inst > last_before_conflict.unwrap()) + { + last_before_conflict = Some(before_inst); + } + if before_next_inst < bundle_end + && (conflict_to.is_none() || pos >= conflict_to.unwrap()) + && (first_after_conflict.is_none() || pos > first_after_conflict.unwrap()) + { + first_after_conflict = Some(ProgPoint::before(pos.inst.next())); + } + }; + + if self.ranges[our_iter.index()].def.is_valid() { + let def_data = &self.defs[self.ranges[our_iter.index()].def.index()]; + log::debug!(" -> range has def at {:?}", def_data.pos); + update_with_pos(def_data.pos); + } + let mut use_idx = self.ranges[our_iter.index()].first_use; + while use_idx.is_valid() { + let use_data = &self.uses[use_idx.index()]; + log::debug!(" -> range has use at {:?}", use_data.pos); + update_with_pos(use_data.pos); + use_idx = use_data.next_use; + } + + our_iter = self.ranges[our_iter.index()].next_in_bundle; + } + log::debug!( + " -> first use/def after conflict range: {:?}", + first_after_conflict, + ); + log::debug!( + " -> last use/def before conflict range: {:?}", + last_before_conflict, + ); + + // Based on the above, we can determine which split strategy we are taking at this + // iteration: + // - If we span both hot and cold code, split into separate "hot" and "cold" bundles. + // - Otherwise, if we span any calls, split just before every call instruction. + // - Otherwise, if there is a register use after the conflicting bundle, + // split at that use-point ("split before first use"). + // - Otherwise, if there is a register use before the conflicting + // bundle, split at that use-point ("split after last use"). + // - Otherwise, split at every use, to form minimal bundles. + + if cold_hot_splits.len() > 0 { + log::debug!(" going with cold/hot splits: {:?}", cold_hot_splits); + self.stats.splits_hot += 1; + cold_hot_splits + } else if clobber_splits.len() > 0 { + log::debug!(" going with clobber splits: {:?}", clobber_splits); + self.stats.splits_clobbers += 1; + clobber_splits + } else if first_after_conflict.is_some() { + self.stats.splits_conflicts += 1; + log::debug!(" going with first after conflict"); + smallvec![first_after_conflict.unwrap()] + } else if last_before_conflict.is_some() { + self.stats.splits_conflicts += 1; + log::debug!(" going with last before conflict"); + smallvec![last_before_conflict.unwrap()] + } else { + self.stats.splits_all += 1; + log::debug!(" splitting at all uses"); + self.find_all_use_split_points(bundle) + } + } + + fn find_all_use_split_points(&self, bundle: LiveBundleIndex) -> SmallVec<[ProgPoint; 4]> { + let mut splits = smallvec![]; + let mut iter = self.bundles[bundle.index()].first_range; + log::debug!("finding all use/def splits for {:?}", bundle); + let (bundle_start, bundle_end) = if iter.is_valid() { + ( + self.ranges[iter.index()].range.from, + self.ranges[self.bundles[bundle.index()].last_range.index()] + .range + .to, + ) + } else { + ( + ProgPoint::before(Inst::new(0)), + ProgPoint::after(Inst::new(self.func.insts() - 1)), + ) + }; + // N.B.: a minimal bundle must include only ProgPoints in a + // single instruction, but can include both (can include two + // ProgPoints). We split here, taking care to never split *in + // the middle* of an instruction, because we would not be able + // to insert moves to reify such an assignment. + while iter.is_valid() { + let rangedata = &self.ranges[iter.index()]; + log::debug!(" -> range {:?}: {:?}", iter, rangedata.range); + if rangedata.def.is_valid() { + // Split both before and after def (make it a minimal bundle). + let def_pos = self.defs[rangedata.def.index()].pos; + let def_end = ProgPoint::before(def_pos.inst.next()); + log::debug!( + " -> splitting before and after def: {:?} and {:?}", + def_pos, + def_end, + ); + if def_pos > bundle_start { + splits.push(def_pos); + } + if def_end < bundle_end { + splits.push(def_end); + } + } + let mut use_idx = rangedata.first_use; + while use_idx.is_valid() { + let use_data = &self.uses[use_idx.index()]; + let before_use_inst = ProgPoint::before(use_data.pos.inst); + let after_use_inst = before_use_inst.next().next(); + log::debug!( + " -> splitting before and after use: {:?} and {:?}", + before_use_inst, + after_use_inst, + ); + if before_use_inst > bundle_start { + splits.push(before_use_inst); + } + splits.push(after_use_inst); + use_idx = use_data.next_use; + } + + iter = rangedata.next_in_bundle; + } + splits.sort(); + log::debug!(" -> final splits: {:?}", splits); + splits + } + + fn split_and_requeue_bundle( + &mut self, + bundle: LiveBundleIndex, + first_conflicting_bundle: LiveBundleIndex, + ) { + self.stats.splits += 1; + // Try splitting: (i) across hot code; (ii) across all calls, + // if we had a fixed-reg conflict; (iii) before first reg use; + // (iv) after reg use; (v) around all register uses. After + // each type of split, check for conflict with conflicting + // bundle(s); stop when no conflicts. In all cases, re-queue + // the split bundles on the allocation queue. + // + // The critical property here is that we must eventually split + // down to minimal bundles, which consist just of live ranges + // around each individual def/use (this is step (v) + // above). This ensures termination eventually. + + let split_points = self.find_split_points(bundle, first_conflicting_bundle); + log::debug!( + "split bundle {:?} (conflict {:?}): split points {:?}", + bundle, + first_conflicting_bundle, + split_points + ); + + // Split `bundle` at every ProgPoint in `split_points`, + // creating new LiveRanges and bundles (and updating vregs' + // linked lists appropriately), and enqueue the new bundles. + // + // We uphold several basic invariants here: + // - The LiveRanges in every vreg, and in every bundle, are disjoint + // - Every bundle for a given vreg is disjoint + // + // To do so, we make one scan in program order: all ranges in + // the bundle, and the def/all uses in each range. We track + // the currently active bundle. For each range, we distribute + // its uses among one or more ranges, depending on whether it + // crosses any split points. If we had to split a range, then + // we need to insert the new subparts in its vreg as + // well. N.B.: to avoid the need to *remove* ranges from vregs + // (which we could not do without a lookup, since we use + // singly-linked lists and the bundle may contain multiple + // vregs so we cannot simply scan a single vreg simultaneously + // to the main scan), we instead *trim* the existing range + // into its first subpart, and then create the new + // subparts. Note that shrinking a LiveRange is always legal + // (as long as one replaces the shrunk space with new + // LiveRanges). + // + // Note that the original IonMonkey splitting code is quite a + // bit more complex and has some subtle invariants. We stick + // to the above invariants to keep this code maintainable. + + let mut split_idx = 0; + + // Fast-forward past any splits that occur before or exactly + // at the start of the first range in the bundle. + let first_range = self.bundles[bundle.index()].first_range; + let bundle_start = if first_range.is_valid() { + self.ranges[first_range.index()].range.from + } else { + ProgPoint::before(Inst::new(0)) + }; + while split_idx < split_points.len() && split_points[split_idx] <= bundle_start { + split_idx += 1; + } + + let mut new_bundles: LiveBundleVec = smallvec![]; + let mut cur_bundle = bundle; + let mut iter = self.bundles[bundle.index()].first_range; + self.bundles[bundle.index()].first_range = LiveRangeIndex::invalid(); + self.bundles[bundle.index()].last_range = LiveRangeIndex::invalid(); + while iter.is_valid() { + // Read `next` link now and then clear it -- we rebuild the list below. + let next = self.ranges[iter.index()].next_in_bundle; + self.ranges[iter.index()].next_in_bundle = LiveRangeIndex::invalid(); + + let mut range = self.ranges[iter.index()].range; + log::debug!(" -> has range {:?} (LR {:?})", range, iter); + + // If any splits occur before this range, create a new + // bundle, then advance to the first split within the + // range. + if split_idx < split_points.len() && split_points[split_idx] <= range.from { + log::debug!(" -> split before a range; creating new bundle"); + cur_bundle = self.create_bundle(); + self.bundles[cur_bundle.index()].spillset = self.bundles[bundle.index()].spillset; + new_bundles.push(cur_bundle); + split_idx += 1; + } + while split_idx < split_points.len() && split_points[split_idx] <= range.from { + split_idx += 1; + } + + // Link into current bundle. + self.ranges[iter.index()].bundle = cur_bundle; + if self.bundles[cur_bundle.index()].first_range.is_valid() { + self.ranges[self.bundles[cur_bundle.index()].last_range.index()].next_in_bundle = + iter; + } else { + self.bundles[cur_bundle.index()].first_range = iter; + } + self.bundles[cur_bundle.index()].last_range = iter; + + // While the next split point is beyond the start of the + // range and before the end, shorten the current LiveRange + // (this is always legal) and create a new Bundle and + // LiveRange for the remainder. Truncate the old bundle + // (set last_range). Insert the LiveRange into the vreg + // and into the new bundle. Then move the use-chain over, + // splitting at the appropriate point. + // + // We accumulate the use stats (fixed-use count and spill + // weight) as we scan through uses, recomputing the values + // for the truncated initial LiveRange and taking the + // remainders for the split "rest" LiveRange. + + while split_idx < split_points.len() && split_points[split_idx] < range.to { + let split_point = split_points[split_idx]; + split_idx += 1; + + // Skip forward to the current range. + if split_point <= range.from { + continue; + } + + log::debug!( + " -> processing split point {:?} with iter {:?}", + split_point, + iter + ); + + // We split into `first` and `rest`. `rest` may be + // further subdivided in subsequent iterations; we + // only do one split per iteration. + debug_assert!(range.from < split_point && split_point < range.to); + let rest_range = CodeRange { + from: split_point, + to: self.ranges[iter.index()].range.to, + }; + self.ranges[iter.index()].range.to = split_point; + range = rest_range; + log::debug!( + " -> range of {:?} now {:?}", + iter, + self.ranges[iter.index()].range + ); + + // Create the rest-range and insert it into the vreg's + // range list. (Note that the vreg does not keep a + // tail-pointer so we do not need to update that.) + let rest_lr = self.create_liverange(rest_range); + self.ranges[rest_lr.index()].vreg = self.ranges[iter.index()].vreg; + self.ranges[rest_lr.index()].next_in_reg = self.ranges[iter.index()].next_in_reg; + self.ranges[iter.index()].next_in_reg = rest_lr; + + log::debug!( + " -> split tail to new LR {:?} with range {:?}", + rest_lr, + rest_range + ); + + // Scan over uses, accumulating stats for those that + // stay in the first range, finding the first use that + // moves to the rest range. + let mut last_use_in_first_range = UseIndex::invalid(); + let mut use_iter = self.ranges[iter.index()].first_use; + let mut num_fixed_uses = 0; + let mut uses_spill_weight = 0; + while use_iter.is_valid() { + if self.uses[use_iter.index()].pos >= split_point { + break; + } + last_use_in_first_range = use_iter; + let policy = self.uses[use_iter.index()].operand.policy(); + log::debug!( + " -> use {:?} before split point; policy {:?}", + use_iter, + policy + ); + if let OperandPolicy::FixedReg(_) = policy { + num_fixed_uses += 1; + } + uses_spill_weight += spill_weight_from_policy(policy); + log::debug!(" -> use {:?} remains in orig", use_iter); + use_iter = self.uses[use_iter.index()].next_use; + } + + // Move over `rest`'s uses and update stats on first + // and rest LRs. + if use_iter.is_valid() { + log::debug!( + " -> moving uses over the split starting at {:?}", + use_iter + ); + self.ranges[rest_lr.index()].first_use = use_iter; + self.ranges[rest_lr.index()].last_use = self.ranges[iter.index()].last_use; + + self.ranges[iter.index()].last_use = last_use_in_first_range; + if last_use_in_first_range.is_valid() { + self.uses[last_use_in_first_range.index()].next_use = UseIndex::invalid(); + } else { + self.ranges[iter.index()].first_use = UseIndex::invalid(); + } + + let rest_fixed_uses = + self.ranges[iter.index()].num_fixed_uses() - num_fixed_uses; + self.ranges[rest_lr.index()].set_num_fixed_uses(rest_fixed_uses); + self.ranges[rest_lr.index()].uses_spill_weight = + self.ranges[iter.index()].uses_spill_weight - uses_spill_weight; + self.ranges[iter.index()].set_num_fixed_uses(num_fixed_uses); + self.ranges[iter.index()].uses_spill_weight = uses_spill_weight; + } + + // Move over def, if appropriate. + if self.ranges[iter.index()].def.is_valid() { + let def_idx = self.ranges[iter.index()].def; + let def_pos = self.defs[def_idx.index()].pos; + log::debug!(" -> range {:?} has def at {:?}", iter, def_pos); + if def_pos >= split_point { + log::debug!(" -> transferring def bit to {:?}", rest_lr); + self.ranges[iter.index()].def = DefIndex::invalid(); + self.ranges[rest_lr.index()].def = def_idx; + } + } + + log::debug!( + " -> range {:?} next-in-bundle is {:?}", + iter, + self.ranges[iter.index()].next_in_bundle + ); + + // Create a new bundle to hold the rest-range. + let rest_bundle = self.create_bundle(); + cur_bundle = rest_bundle; + new_bundles.push(rest_bundle); + self.bundles[rest_bundle.index()].first_range = rest_lr; + self.bundles[rest_bundle.index()].last_range = rest_lr; + self.bundles[rest_bundle.index()].spillset = self.bundles[bundle.index()].spillset; + self.ranges[rest_lr.index()].bundle = rest_bundle; + log::debug!(" -> new bundle {:?} for LR {:?}", rest_bundle, rest_lr); + + iter = rest_lr; + } + + iter = next; + } + + // Enqueue all split-bundles on the allocation queue. + let prio = self.compute_bundle_prio(bundle); + self.bundles[bundle.index()].prio = prio; + self.recompute_bundle_properties(bundle); + self.allocation_queue.insert(bundle, prio as usize); + for b in new_bundles { + let prio = self.compute_bundle_prio(b); + self.bundles[b.index()].prio = prio; + self.recompute_bundle_properties(b); + self.allocation_queue.insert(b, prio as usize); + } + } + + fn process_bundle(&mut self, bundle: LiveBundleIndex) { + // Find any requirements: for every LR, for every def/use, gather + // requirements (fixed-reg, any-reg, any) and merge them. + let req = self.compute_requirement(bundle); + // Grab a hint from our spillset, if any. + let hint_reg = self.spillsets[self.bundles[bundle.index()].spillset.index()].reg_hint; + log::debug!( + "process_bundle: bundle {:?} requirement {:?} hint {:?}", + bundle, + req, + hint_reg, + ); + + // Try to allocate! + let mut attempts = 0; + let mut first_conflicting_bundle; + loop { + attempts += 1; + debug_assert!(attempts < 100 * self.func.insts()); + first_conflicting_bundle = None; + let req = match req { + Some(r) => r, + // `None` means conflicting requirements, hence impossible to + // allocate. + None => break, + }; + + let conflicting_bundles = match req { + Requirement::Fixed(preg) => { + let preg_idx = PRegIndex::new(preg.index()); + self.stats.process_bundle_reg_probes_fixed += 1; + match self.try_to_allocate_bundle_to_reg(bundle, preg_idx) { + AllocRegResult::Allocated(alloc) => { + self.stats.process_bundle_reg_success_fixed += 1; + log::debug!(" -> allocated to fixed {:?}", preg_idx); + self.spillsets[self.bundles[bundle.index()].spillset.index()] + .reg_hint = Some(alloc.as_reg().unwrap()); + return; + } + AllocRegResult::Conflict(bundles) => bundles, + AllocRegResult::ConflictWithFixed => { + // Empty conflicts set: there's nothing we can + // evict, because fixed conflicts cannot be moved. + smallvec![] + } + } + } + Requirement::Register(class) => { + // Scan all pregs and attempt to allocate. + let mut lowest_cost_conflict_set: Option = None; + let n_regs = self.env.regs_by_class[class as u8 as usize].len(); + let loop_count = if hint_reg.is_some() { + n_regs + 1 + } else { + n_regs + }; + for i in 0..loop_count { + // The order in which we try registers is somewhat complex: + // - First, if there is a hint, we try that. + // - Then, we try registers in a traversal + // order that is based on the bundle index, + // spreading pressure evenly among registers + // to reduce commitment-map + // contention. (TODO: account for + // caller-save vs. callee-saves here too.) + // Note that we avoid retrying the hint_reg; + // this is why the loop count is n_regs + 1 + // if there is a hint reg, because we always + // skip one iteration. + let preg = match (i, hint_reg) { + (0, Some(hint_reg)) => hint_reg, + (i, Some(hint_reg)) => { + let reg = self.env.regs_by_class[class as u8 as usize] + [(i - 1 + bundle.index()) % n_regs]; + if reg == hint_reg { + continue; + } + reg + } + (i, None) => { + self.env.regs_by_class[class as u8 as usize] + [(i + bundle.index()) % n_regs] + } + }; + + self.stats.process_bundle_reg_probes_any += 1; + let preg_idx = PRegIndex::new(preg.index()); + match self.try_to_allocate_bundle_to_reg(bundle, preg_idx) { + AllocRegResult::Allocated(alloc) => { + self.stats.process_bundle_reg_success_any += 1; + log::debug!(" -> allocated to any {:?}", preg_idx); + self.spillsets[self.bundles[bundle.index()].spillset.index()] + .reg_hint = Some(alloc.as_reg().unwrap()); + return; + } + AllocRegResult::Conflict(bundles) => { + if lowest_cost_conflict_set.is_none() { + lowest_cost_conflict_set = Some(bundles); + } else if self.maximum_spill_weight_in_bundle_set(&bundles) + < self.maximum_spill_weight_in_bundle_set( + lowest_cost_conflict_set.as_ref().unwrap(), + ) + { + lowest_cost_conflict_set = Some(bundles); + } + } + AllocRegResult::ConflictWithFixed => { + // Simply don't consider as an option. + } + } + } + + // Otherwise, we *require* a register, but didn't fit into + // any with current bundle assignments. Hence, we will need + // to either split or attempt to evict some bundles. Return + // the conflicting bundles to evict and retry. Empty list + // means nothing to try (due to fixed conflict) so we must + // split instead. + lowest_cost_conflict_set.unwrap_or(smallvec![]) + } + + Requirement::Any(_) => { + // If a register is not *required*, spill now (we'll retry + // allocation on spilled bundles later). + log::debug!("spilling bundle {:?} to spilled_bundles list", bundle); + self.spilled_bundles.push(bundle); + return; + } + }; + + log::debug!(" -> conflict set {:?}", conflicting_bundles); + + // If we have already tried evictions once before and are still unsuccessful, give up + // and move on to splitting as long as this is not a minimal bundle. + if attempts >= 2 && !self.minimal_bundle(bundle) { + break; + } + + // If we hit a fixed conflict, give up and move on to splitting. + if conflicting_bundles.is_empty() { + break; + } + + first_conflicting_bundle = Some(conflicting_bundles[0]); + + // If the maximum spill weight in the conflicting-bundles set is >= this bundle's spill + // weight, then don't evict. + if self.maximum_spill_weight_in_bundle_set(&conflicting_bundles) + >= self.bundle_spill_weight(bundle) + { + log::debug!(" -> we're already the cheapest bundle to spill -- going to split"); + break; + } + + // Evict all bundles in `conflicting bundles` and try again. + self.stats.evict_bundle_event += 1; + for &bundle in &conflicting_bundles { + log::debug!(" -> evicting {:?}", bundle); + self.evict_bundle(bundle); + self.stats.evict_bundle_count += 1; + } + } + + // A minimal bundle cannot be split. + if self.minimal_bundle(bundle) { + self.dump_state(); + } + debug_assert!(!self.minimal_bundle(bundle)); + + self.split_and_requeue_bundle( + bundle, + first_conflicting_bundle.unwrap_or(LiveBundleIndex::invalid()), + ); + } + + fn try_allocating_regs_for_spilled_bundles(&mut self) { + for i in 0..self.spilled_bundles.len() { + let bundle = self.spilled_bundles[i]; // don't borrow self + let any_vreg = self.vregs[self.ranges + [self.bundles[bundle.index()].first_range.index()] + .vreg + .index()] + .reg; + let class = any_vreg.class(); + let mut success = false; + self.stats.spill_bundle_reg_probes += 1; + let nregs = self.env.regs_by_class[class as u8 as usize].len(); + for i in 0..nregs { + let i = (i + bundle.index()) % nregs; + let preg = self.env.regs_by_class[class as u8 as usize][i]; // don't borrow self + let preg_idx = PRegIndex::new(preg.index()); + if let AllocRegResult::Allocated(_) = + self.try_to_allocate_bundle_to_reg(bundle, preg_idx) + { + self.stats.spill_bundle_reg_success += 1; + success = true; + break; + } + } + if !success { + log::debug!( + "spilling bundle {:?} to spillset bundle list {:?}", + bundle, + self.bundles[bundle.index()].spillset + ); + self.spillsets[self.bundles[bundle.index()].spillset.index()] + .bundles + .push(bundle); + } + } + } + + fn spillslot_can_fit_spillset( + &mut self, + spillslot: SpillSlotIndex, + spillset: SpillSetIndex, + ) -> bool { + for &bundle in &self.spillsets[spillset.index()].bundles { + let mut iter = self.bundles[bundle.index()].first_range; + while iter.is_valid() { + let range = self.ranges[iter.index()].range; + if self.spillslots[spillslot.index()] + .ranges + .btree + .contains_key(&LiveRangeKey::from_range(&range)) + { + return false; + } + iter = self.ranges[iter.index()].next_in_bundle; + } + } + true + } + + fn allocate_spillset_to_spillslot( + &mut self, + spillset: SpillSetIndex, + spillslot: SpillSlotIndex, + ) { + self.spillsets[spillset.index()].slot = spillslot; + for i in 0..self.spillsets[spillset.index()].bundles.len() { + // don't borrow self + let bundle = self.spillsets[spillset.index()].bundles[i]; + log::debug!( + "spillslot {:?} alloc'ed to spillset {:?}: bundle {:?}", + spillslot, + spillset, + bundle + ); + let mut iter = self.bundles[bundle.index()].first_range; + while iter.is_valid() { + log::debug!( + "spillslot {:?} getting range {:?} from bundle {:?}: {:?}", + spillslot, + iter, + bundle, + self.ranges[iter.index()].range + ); + let range = self.ranges[iter.index()].range; + self.spillslots[spillslot.index()] + .ranges + .btree + .insert(LiveRangeKey::from_range(&range), iter); + iter = self.ranges[iter.index()].next_in_bundle; + } + } + } + + fn allocate_spillslots(&mut self) { + for spillset in 0..self.spillsets.len() { + log::debug!("allocate spillslot: {}", spillset); + let spillset = SpillSetIndex::new(spillset); + if self.spillsets[spillset.index()].bundles.is_empty() { + continue; + } + // Get or create the spillslot list for this size. + let size = self.spillsets[spillset.index()].size as usize; + if size >= self.slots_by_size.len() { + self.slots_by_size.resize( + size + 1, + SpillSlotList { + first_spillslot: SpillSlotIndex::invalid(), + last_spillslot: SpillSlotIndex::invalid(), + }, + ); + } + // Try a few existing spillslots. + let mut spillslot_iter = self.slots_by_size[size].first_spillslot; + let mut first_slot = SpillSlotIndex::invalid(); + let mut prev = SpillSlotIndex::invalid(); + let mut success = false; + for _attempt in 0..10 { + if spillslot_iter.is_invalid() { + break; + } + if spillslot_iter == first_slot { + // We've started looking at slots we placed at the end; end search. + break; + } + if first_slot.is_invalid() { + first_slot = spillslot_iter; + } + + if self.spillslot_can_fit_spillset(spillslot_iter, spillset) { + self.allocate_spillset_to_spillslot(spillset, spillslot_iter); + success = true; + break; + } + // Remove the slot and place it at the end of the respective list. + let next = self.spillslots[spillslot_iter.index()].next_spillslot; + if prev.is_valid() { + self.spillslots[prev.index()].next_spillslot = next; + } else { + self.slots_by_size[size].first_spillslot = next; + } + if !next.is_valid() { + self.slots_by_size[size].last_spillslot = prev; + } + + let last = self.slots_by_size[size].last_spillslot; + if last.is_valid() { + self.spillslots[last.index()].next_spillslot = spillslot_iter; + } else { + self.slots_by_size[size].first_spillslot = spillslot_iter; + } + self.slots_by_size[size].last_spillslot = spillslot_iter; + + prev = spillslot_iter; + spillslot_iter = next; + } + + if !success { + // Allocate a new spillslot. + let spillslot = SpillSlotIndex::new(self.spillslots.len()); + let next = self.slots_by_size[size].first_spillslot; + self.spillslots.push(SpillSlotData { + ranges: LiveRangeSet::new(), + next_spillslot: next, + size: size as u32, + alloc: Allocation::none(), + class: self.spillsets[spillset.index()].class, + }); + self.slots_by_size[size].first_spillslot = spillslot; + if !next.is_valid() { + self.slots_by_size[size].last_spillslot = spillslot; + } + + self.allocate_spillset_to_spillslot(spillset, spillslot); + } + } + + // Assign actual slot indices to spillslots. + let mut offset: u32 = 0; + for data in &mut self.spillslots { + // Align up to `size`. + debug_assert!(data.size.is_power_of_two()); + offset = (offset + data.size - 1) & !(data.size - 1); + let slot = if self.func.multi_spillslot_named_by_last_slot() { + offset + data.size - 1 + } else { + offset + }; + data.alloc = Allocation::stack(SpillSlot::new(slot as usize, data.class)); + offset += data.size; + } + self.num_spillslots = offset; + + log::debug!("spillslot allocator done"); + } + + fn is_start_of_block(&self, pos: ProgPoint) -> bool { + let block = self.cfginfo.insn_block[pos.inst.index()]; + pos == self.cfginfo.block_entry[block.index()] + } + fn is_end_of_block(&self, pos: ProgPoint) -> bool { + let block = self.cfginfo.insn_block[pos.inst.index()]; + pos == self.cfginfo.block_exit[block.index()] + } + + fn insert_move( + &mut self, + pos: ProgPoint, + prio: InsertMovePrio, + from_alloc: Allocation, + to_alloc: Allocation, + ) { + debug!( + "insert_move: pos {:?} prio {:?} from_alloc {:?} to_alloc {:?}", + pos, prio, from_alloc, to_alloc + ); + self.inserted_moves.push(InsertedMove { + pos, + prio, + from_alloc, + to_alloc, + }); + } + + fn get_alloc(&self, inst: Inst, slot: usize) -> Allocation { + let inst_allocs = &self.allocs[self.inst_alloc_offsets[inst.index()] as usize..]; + inst_allocs[slot] + } + + fn set_alloc(&mut self, inst: Inst, slot: usize, alloc: Allocation) { + let inst_allocs = &mut self.allocs[self.inst_alloc_offsets[inst.index()] as usize..]; + inst_allocs[slot] = alloc; + } + + fn get_alloc_for_range(&self, range: LiveRangeIndex) -> Allocation { + let bundledata = &self.bundles[self.ranges[range.index()].bundle.index()]; + if bundledata.allocation != Allocation::none() { + bundledata.allocation + } else { + self.spillslots[self.spillsets[bundledata.spillset.index()].slot.index()].alloc + } + } + + fn apply_allocations_and_insert_moves(&mut self) { + log::debug!("blockparam_ins: {:?}", self.blockparam_ins); + log::debug!("blockparam_outs: {:?}", self.blockparam_outs); + + /// We create "half-moves" in order to allow a single-scan + /// strategy with a subsequent sort. Basically, the key idea + /// is that as our single scan through a range for a vreg hits + /// upon the source or destination of an edge-move, we emit a + /// "half-move". These half-moves are carefully keyed in a + /// particular sort order (the field order below is + /// significant!) so that all half-moves on a given (from, to) + /// block-edge appear contiguously, and then all moves from a + /// given vreg appear contiguously. Within a given from-vreg, + /// pick the first `Source` (there should only be one, but + /// imprecision in liveranges due to loop handling sometimes + /// means that a blockparam-out is also recognized as a normal-out), + /// and then for each `Dest`, copy the source-alloc to that + /// dest-alloc. + #[derive(Clone, Debug, PartialEq, Eq)] + struct HalfMove { + key: u64, + alloc: Allocation, + } + #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] + #[repr(u8)] + enum HalfMoveKind { + Source = 0, + Dest = 1, + } + fn half_move_key( + from_block: Block, + to_block: Block, + to_vreg: VRegIndex, + kind: HalfMoveKind, + ) -> u64 { + assert!(from_block.index() < 1 << 21); + assert!(to_block.index() < 1 << 21); + assert!(to_vreg.index() < 1 << 21); + ((from_block.index() as u64) << 43) + | ((to_block.index() as u64) << 22) + | ((to_vreg.index() as u64) << 1) + | (kind as u8 as u64) + } + impl HalfMove { + fn from_block(&self) -> Block { + Block::new(((self.key >> 43) & ((1 << 21) - 1)) as usize) + } + fn to_block(&self) -> Block { + Block::new(((self.key >> 22) & ((1 << 21) - 1)) as usize) + } + fn to_vreg(&self) -> VRegIndex { + VRegIndex::new(((self.key >> 1) & ((1 << 21) - 1)) as usize) + } + fn kind(&self) -> HalfMoveKind { + if self.key & 1 == 1 { + HalfMoveKind::Dest + } else { + HalfMoveKind::Source + } + } + } + + let mut half_moves: Vec = vec![]; + + let mut reuse_input_insts = vec![]; + + let mut blockparam_in_idx = 0; + let mut blockparam_out_idx = 0; + for vreg in 0..self.vregs.len() { + let vreg = VRegIndex::new(vreg); + let defidx = self.vregs[vreg.index()].def; + let defining_block = if defidx.is_valid() { + self.cfginfo.insn_block[self.defs[defidx.index()].pos.inst.index()] + } else if self.vregs[vreg.index()].blockparam.is_valid() { + self.vregs[vreg.index()].blockparam + } else { + Block::invalid() + }; + + // For each range in each vreg, insert moves or + // half-moves. We also scan over `blockparam_ins` and + // `blockparam_outs`, which are sorted by (block, vreg). + let mut iter = self.vregs[vreg.index()].first_range; + let mut prev = LiveRangeIndex::invalid(); + while iter.is_valid() { + let alloc = self.get_alloc_for_range(iter); + let range = self.ranges[iter.index()].range; + log::debug!( + "apply_allocations: vreg {:?} LR {:?} with range {:?} has alloc {:?}", + vreg, + iter, + range, + alloc + ); + debug_assert!(alloc != Allocation::none()); + + if log::log_enabled!(log::Level::Debug) { + self.annotate( + range.from, + format!( + " <<< start v{} in {} (LR {})", + vreg.index(), + alloc, + iter.index() + ), + ); + self.annotate( + range.to, + format!( + " end v{} in {} (LR {}) >>>", + vreg.index(), + alloc, + iter.index() + ), + ); + } + + // Does this range follow immediately after a prior + // range in the same block? If so, insert a move (if + // the allocs differ). We do this directly rather than + // with half-moves because we eagerly know both sides + // already (and also, half-moves are specific to + // inter-block transfers). + // + // Note that we do *not* do this if there is also a + // def exactly at `range.from`: it's possible that an + // old liverange covers the Before pos of an inst, a + // new liverange covers the After pos, and the def + // also happens at After. In this case we don't want + // to an insert a move after the instruction copying + // the old liverange. + // + // Note also that we assert that the new range has to + // start at the Before-point of an instruction; we + // can't insert a move that logically happens just + // before After (i.e. in the middle of a single + // instruction). + if prev.is_valid() { + let prev_alloc = self.get_alloc_for_range(prev); + let prev_range = self.ranges[prev.index()].range; + let def_idx = self.ranges[iter.index()].def; + let def_pos = if def_idx.is_valid() { + Some(self.defs[def_idx.index()].pos) + } else { + None + }; + debug_assert!(prev_alloc != Allocation::none()); + if prev_range.to == range.from + && !self.is_start_of_block(range.from) + && def_pos != Some(range.from) + { + log::debug!( + "prev LR {} abuts LR {} in same block; moving {} -> {} for v{}", + prev.index(), + iter.index(), + prev_alloc, + alloc, + vreg.index() + ); + assert_eq!(range.from.pos, InstPosition::Before); + self.insert_move(range.from, InsertMovePrio::Regular, prev_alloc, alloc); + } + } + + // Scan over blocks whose ends are covered by this + // range. For each, for each successor that is not + // already in this range (hence guaranteed to have the + // same allocation) and if the vreg is live, add a + // Source half-move. + let mut block = self.cfginfo.insn_block[range.from.inst.index()]; + while block.is_valid() && block.index() < self.func.blocks() { + if range.to < self.cfginfo.block_exit[block.index()].next() { + break; + } + log::debug!("examining block with end in range: block{}", block.index()); + for &succ in self.func.block_succs(block) { + log::debug!( + " -> has succ block {} with entry {:?}", + succ.index(), + self.cfginfo.block_entry[succ.index()] + ); + if range.contains_point(self.cfginfo.block_entry[succ.index()]) { + continue; + } + log::debug!(" -> out of this range, requires half-move if live"); + if self.liveins[succ.index()].get(vreg.index()) { + log::debug!(" -> live at input to succ, adding halfmove"); + half_moves.push(HalfMove { + key: half_move_key(block, succ, vreg, HalfMoveKind::Source), + alloc, + }); + } + } + + // Scan forward in `blockparam_outs`, adding all + // half-moves for outgoing values to blockparams + // in succs. + log::debug!( + "scanning blockparam_outs for v{} block{}: blockparam_out_idx = {}", + vreg.index(), + block.index(), + blockparam_out_idx, + ); + while blockparam_out_idx < self.blockparam_outs.len() { + let (from_vreg, from_block, to_block, to_vreg) = + self.blockparam_outs[blockparam_out_idx]; + if (from_vreg, from_block) > (vreg, block) { + break; + } + if (from_vreg, from_block) == (vreg, block) { + log::debug!( + " -> found: from v{} block{} to v{} block{}", + from_vreg.index(), + from_block.index(), + to_vreg.index(), + to_vreg.index() + ); + half_moves.push(HalfMove { + key: half_move_key( + from_block, + to_block, + to_vreg, + HalfMoveKind::Source, + ), + alloc, + }); + if log::log_enabled!(log::Level::Debug) { + self.annotate( + self.cfginfo.block_exit[block.index()], + format!( + "blockparam-out: block{} to block{}: v{} to v{} in {}", + from_block.index(), + to_block.index(), + from_vreg.index(), + to_vreg.index(), + alloc + ), + ); + } + } + blockparam_out_idx += 1; + } + + block = block.next(); + } + + // Scan over blocks whose beginnings are covered by + // this range and for which the vreg is live at the + // start of the block, and for which the def of the + // vreg is not in this block. For each, for each + // predecessor, add a Dest half-move. + // + // N.B.: why "def of this vreg is not in this block"? + // Because live-range computation can over-approximate + // (due to the way that we handle loops in a single + // pass), especially if the program has irreducible + // control flow and/or if blocks are not in RPO, it + // may be the case that (i) the vreg is not *actually* + // live into this block, but is *defined* in this + // block. If the value is defined in this block, + // because this is SSA, the value cannot be used + // before the def and so we are not concerned about + // any incoming allocation for it. + let mut block = self.cfginfo.insn_block[range.from.inst.index()]; + if self.cfginfo.block_entry[block.index()] < range.from { + block = block.next(); + } + while block.is_valid() && block.index() < self.func.blocks() { + if self.cfginfo.block_entry[block.index()] >= range.to { + break; + } + + // Add half-moves for blockparam inputs. + log::debug!( + "scanning blockparam_ins at vreg {} block {}: blockparam_in_idx = {}", + vreg.index(), + block.index(), + blockparam_in_idx + ); + while blockparam_in_idx < self.blockparam_ins.len() { + let (to_vreg, to_block, from_block) = + self.blockparam_ins[blockparam_in_idx]; + if (to_vreg, to_block) > (vreg, block) { + break; + } + if (to_vreg, to_block) == (vreg, block) { + half_moves.push(HalfMove { + key: half_move_key( + from_block, + to_block, + to_vreg, + HalfMoveKind::Dest, + ), + alloc, + }); + log::debug!( + "match: blockparam_in: v{} in block{} from block{} into {}", + to_vreg.index(), + to_block.index(), + from_block.index(), + alloc, + ); + if log::log_enabled!(log::Level::Debug) { + self.annotate( + self.cfginfo.block_entry[block.index()], + format!( + "blockparam-in: block{} to block{}:into v{} in {}", + from_block.index(), + to_block.index(), + to_vreg.index(), + alloc + ), + ); + } + } + blockparam_in_idx += 1; + } + + // The below (range incoming into block) must be + // skipped if the def is in this block, as noted + // above. + if block == defining_block || !self.liveins[block.index()].get(vreg.index()) { + block = block.next(); + continue; + } + + log::debug!( + "scanning preds at vreg {} block {} for ends outside the range", + vreg.index(), + block.index() + ); + + // Now find any preds whose ends are not in the + // same range, and insert appropriate moves. + for &pred in self.func.block_preds(block) { + log::debug!( + "pred block {} has exit {:?}", + pred.index(), + self.cfginfo.block_exit[pred.index()] + ); + if range.contains_point(self.cfginfo.block_exit[pred.index()]) { + continue; + } + log::debug!(" -> requires half-move"); + half_moves.push(HalfMove { + key: half_move_key(pred, block, vreg, HalfMoveKind::Dest), + alloc, + }); + } + + block = block.next(); + } + + // If this is a blockparam vreg and the start of block + // is in this range, add to blockparam_allocs. + let (blockparam_block, blockparam_idx) = + self.cfginfo.vreg_def_blockparam[vreg.index()]; + if blockparam_block.is_valid() + && range.contains_point(self.cfginfo.block_entry[blockparam_block.index()]) + { + self.blockparam_allocs + .push((blockparam_block, blockparam_idx, vreg, alloc)); + } + + // Scan over def/uses and apply allocations. + if self.ranges[iter.index()].def.is_valid() { + let defdata = &self.defs[self.ranges[iter.index()].def.index()]; + debug_assert!(range.contains_point(defdata.pos)); + let operand = defdata.operand; + let inst = defdata.pos.inst; + let slot = defdata.slot; + self.set_alloc(inst, slot, alloc); + if let OperandPolicy::Reuse(_) = operand.policy() { + reuse_input_insts.push(inst); + } + } + let mut use_iter = self.ranges[iter.index()].first_use; + while use_iter.is_valid() { + let usedata = &self.uses[use_iter.index()]; + debug_assert!(range.contains_point(usedata.pos)); + let inst = usedata.pos.inst; + let slot = usedata.slot; + self.set_alloc(inst, slot, alloc); + use_iter = self.uses[use_iter.index()].next_use; + } + + prev = iter; + iter = self.ranges[iter.index()].next_in_reg; + } + } + + // Sort the half-moves list. For each (from, to, + // from-vreg) tuple, find the from-alloc and all the + // to-allocs, and insert moves on the block edge. + half_moves.sort_by_key(|h| h.key); + log::debug!("halfmoves: {:?}", half_moves); + self.stats.halfmoves_count = half_moves.len(); + + let mut i = 0; + while i < half_moves.len() { + // Find a Source. + while i < half_moves.len() && half_moves[i].kind() != HalfMoveKind::Source { + i += 1; + } + if i >= half_moves.len() { + break; + } + let src = &half_moves[i]; + i += 1; + + // Find all Dests. + let dest_key = src.key | 1; + let first_dest = i; + while i < half_moves.len() && half_moves[i].key == dest_key { + i += 1; + } + let last_dest = i; + + log::debug!( + "halfmove match: src {:?} dests {:?}", + src, + &half_moves[first_dest..last_dest] + ); + + // Determine the ProgPoint where moves on this (from, to) + // edge should go: + // - If there is more than one in-edge to `to`, then + // `from` must have only one out-edge; moves go at tail of + // `from` just before last Branch/Ret. + // - Otherwise, there must be at most one in-edge to `to`, + // and moves go at start of `to`. + let from_last_insn = self.func.block_insns(src.from_block()).last(); + let to_first_insn = self.func.block_insns(src.to_block()).first(); + let from_is_ret = self.func.is_ret(from_last_insn); + let to_is_entry = self.func.entry_block() == src.to_block(); + let from_outs = + self.func.block_succs(src.from_block()).len() + if from_is_ret { 1 } else { 0 }; + let to_ins = + self.func.block_preds(src.to_block()).len() + if to_is_entry { 1 } else { 0 }; + + let (insertion_point, prio) = if to_ins > 1 && from_outs <= 1 { + ( + // N.B.: "after" the branch should be interpreted + // by the user as happening before the actual + // branching action, but after the branch reads + // all necessary inputs. It's necessary to do this + // rather than to place the moves before the + // branch because the branch may have other + // actions than just the control-flow transfer, + // and these other actions may require other + // inputs (which should be read before the "edge" + // moves). + // + // Edits will only appear after the last (branch) + // instruction if the block has only a single + // successor; we do not expect the user to somehow + // duplicate or predicate these. + ProgPoint::after(from_last_insn), + InsertMovePrio::OutEdgeMoves, + ) + } else if to_ins <= 1 { + ( + ProgPoint::before(to_first_insn), + InsertMovePrio::InEdgeMoves, + ) + } else { + panic!( + "Critical edge: can't insert moves between blocks {:?} and {:?}", + src.from_block(), + src.to_block() + ); + }; + + let mut last = None; + for dest in first_dest..last_dest { + let dest = &half_moves[dest]; + debug_assert!(last != Some(dest.alloc)); + self.insert_move(insertion_point, prio, src.alloc, dest.alloc); + last = Some(dest.alloc); + } + } + + // Handle multi-fixed-reg constraints by copying. + for (progpoint, from_preg, to_preg) in + std::mem::replace(&mut self.multi_fixed_reg_fixups, vec![]) + { + log::debug!( + "multi-fixed-move constraint at {:?} from p{} to p{}", + progpoint, + from_preg.index(), + to_preg.index() + ); + self.insert_move( + progpoint, + InsertMovePrio::MultiFixedReg, + Allocation::reg(self.pregs[from_preg.index()].reg), + Allocation::reg(self.pregs[to_preg.index()].reg), + ); + } + + // Handle outputs that reuse inputs: copy beforehand, then set + // input's alloc to output's. + // + // Note that the output's allocation may not *actually* be + // valid until InstPosition::After, but the reused input may + // occur at InstPosition::Before. This may appear incorrect, + // but we make it work by ensuring that all *other* inputs are + // extended to InstPosition::After so that the def will not + // interfere. (The liveness computation code does this -- we + // do not require the user to do so.) + // + // One might ask: why not insist that input-reusing defs occur + // at InstPosition::Before? this would be correct, but would + // mean that the reused input and the reusing output + // interfere, *guaranteeing* that every such case would + // require a move. This is really bad on ISAs (like x86) where + // reused inputs are ubiquitous. + // + // Another approach might be to put the def at Before, and + // trim the reused input's liverange back to the previous + // instruction's After. This is kind of OK until (i) a block + // boundary occurs between the prior inst and this one, or + // (ii) any moves/spills/reloads occur between the two + // instructions. We really do need the input to be live at + // this inst's Before. + // + // In principle what we really need is a "BeforeBefore" + // program point, but we don't want to introduce that + // everywhere and pay the cost of twice as many ProgPoints + // throughout the allocator. + // + // Or we could introduce a separate move instruction -- this + // is the approach that regalloc.rs takes with "mod" operands + // -- but that is also costly. + // + // So we take this approach (invented by IonMonkey -- somewhat + // hard to discern, though see [0] for a comment that makes + // this slightly less unclear) to avoid interference between + // the actual reused input and reusing output, ensure + // interference (hence no incorrectness) between other inputs + // and the reusing output, and not require a separate explicit + // move instruction. + // + // [0] https://searchfox.org/mozilla-central/rev/3a798ef9252896fb389679f06dd3203169565af0/js/src/jit/shared/Lowering-shared-inl.h#108-110 + for inst in reuse_input_insts { + let mut input_reused: SmallVec<[usize; 4]> = smallvec![]; + for output_idx in 0..self.func.inst_operands(inst).len() { + let operand = self.func.inst_operands(inst)[output_idx]; + if let OperandPolicy::Reuse(input_idx) = operand.policy() { + debug_assert!(!input_reused.contains(&input_idx)); + debug_assert_eq!(operand.pos(), OperandPos::After); + input_reused.push(input_idx); + let input_alloc = self.get_alloc(inst, input_idx); + let output_alloc = self.get_alloc(inst, output_idx); + log::debug!( + "reuse-input inst {:?}: output {} has alloc {:?}, input {} has alloc {:?}", + inst, + output_idx, + output_alloc, + input_idx, + input_alloc + ); + if input_alloc != output_alloc { + if log::log_enabled!(log::Level::Debug) { + self.annotate( + ProgPoint::before(inst), + format!(" reuse-input-copy: {} -> {}", input_alloc, output_alloc), + ); + } + self.insert_move( + ProgPoint::before(inst), + InsertMovePrio::ReusedInput, + input_alloc, + output_alloc, + ); + self.set_alloc(inst, input_idx, output_alloc); + } + } + } + } + } + + fn resolve_inserted_moves(&mut self) { + // For each program point, gather all moves together. Then + // resolve (see cases below). + let mut i = 0; + self.inserted_moves + .sort_by_key(|m| (m.pos.to_index(), m.prio)); + while i < self.inserted_moves.len() { + let start = i; + let pos = self.inserted_moves[i].pos; + let prio = self.inserted_moves[i].prio; + while i < self.inserted_moves.len() + && self.inserted_moves[i].pos == pos + && self.inserted_moves[i].prio == prio + { + i += 1; + } + let moves = &self.inserted_moves[start..i]; + + // Get the regclass from one of the moves. + let regclass = moves[0].from_alloc.class(); + + // All moves in `moves` semantically happen in + // parallel. Let's resolve these to a sequence of moves + // that can be done one at a time. + let mut parallel_moves = ParallelMoves::new(Allocation::reg( + self.env.scratch_by_class[regclass as u8 as usize], + )); + log::debug!("parallel moves at pos {:?} prio {:?}", pos, prio); + for m in moves { + if m.from_alloc != m.to_alloc { + log::debug!(" {} -> {}", m.from_alloc, m.to_alloc,); + parallel_moves.add(m.from_alloc, m.to_alloc); + } + } + + let resolved = parallel_moves.resolve(); + + for (src, dst) in resolved { + log::debug!(" resolved: {} -> {}", src, dst); + self.add_edit(pos, prio, Edit::Move { from: src, to: dst }); + } + } + + // Add edits to describe blockparam locations too. This is + // required by the checker. This comes after any edge-moves. + self.blockparam_allocs + .sort_by_key(|&(block, idx, _, _)| (block, idx)); + self.stats.blockparam_allocs_count = self.blockparam_allocs.len(); + let mut i = 0; + while i < self.blockparam_allocs.len() { + let start = i; + let block = self.blockparam_allocs[i].0; + while i < self.blockparam_allocs.len() && self.blockparam_allocs[i].0 == block { + i += 1; + } + let params = &self.blockparam_allocs[start..i]; + let vregs = params + .iter() + .map(|(_, _, vreg_idx, _)| self.vregs[vreg_idx.index()].reg) + .collect::>(); + let allocs = params + .iter() + .map(|(_, _, _, alloc)| *alloc) + .collect::>(); + assert_eq!(vregs.len(), self.func.block_params(block).len()); + assert_eq!(allocs.len(), self.func.block_params(block).len()); + self.add_edit( + self.cfginfo.block_entry[block.index()], + InsertMovePrio::BlockParam, + Edit::BlockParams { vregs, allocs }, + ); + } + + // Ensure edits are in sorted ProgPoint order. + self.edits.sort_by_key(|&(pos, prio, _)| (pos, prio)); + self.stats.edits_count = self.edits.len(); + + // Add debug annotations. + if log::log_enabled!(log::Level::Debug) { + for i in 0..self.edits.len() { + let &(pos, _, ref edit) = &self.edits[i]; + match edit { + &Edit::Move { from, to } => { + self.annotate( + ProgPoint::from_index(pos), + format!("move {} -> {}", from, to), + ); + } + &Edit::BlockParams { + ref vregs, + ref allocs, + } => { + let s = format!("blockparams vregs:{:?} allocs:{:?}", vregs, allocs); + self.annotate(ProgPoint::from_index(pos), s); + } + } + } + } + } + + fn add_edit(&mut self, pos: ProgPoint, prio: InsertMovePrio, edit: Edit) { + match &edit { + &Edit::Move { from, to } if from == to => return, + _ => {} + } + + self.edits.push((pos.to_index(), prio, edit)); + } + + fn compute_stackmaps(&mut self) {} + + pub(crate) fn init(&mut self) -> Result<(), RegAllocError> { + self.create_pregs_and_vregs(); + self.compute_liveness(); + self.compute_hot_code(); + self.merge_vreg_bundles(); + self.queue_bundles(); + if log::log_enabled!(log::Level::Debug) { + self.dump_state(); + } + Ok(()) + } + + pub(crate) fn run(&mut self) -> Result<(), RegAllocError> { + self.process_bundles(); + self.try_allocating_regs_for_spilled_bundles(); + self.allocate_spillslots(); + self.apply_allocations_and_insert_moves(); + self.resolve_inserted_moves(); + self.compute_stackmaps(); + Ok(()) + } + + fn annotate(&mut self, progpoint: ProgPoint, s: String) { + if log::log_enabled!(log::Level::Debug) { + self.debug_annotations + .entry(progpoint) + .or_insert_with(|| vec![]) + .push(s); + } + } + + fn dump_results(&self) { + log::debug!("=== REGALLOC RESULTS ==="); + for block in 0..self.func.blocks() { + let block = Block::new(block); + log::debug!( + "block{}: [succs {:?} preds {:?}]", + block.index(), + self.func + .block_succs(block) + .iter() + .map(|b| b.index()) + .collect::>(), + self.func + .block_preds(block) + .iter() + .map(|b| b.index()) + .collect::>() + ); + for inst in self.func.block_insns(block).iter() { + for annotation in self + .debug_annotations + .get(&ProgPoint::before(inst)) + .map(|v| &v[..]) + .unwrap_or(&[]) + { + log::debug!(" inst{}-pre: {}", inst.index(), annotation); + } + let ops = self + .func + .inst_operands(inst) + .iter() + .map(|op| format!("{}", op)) + .collect::>(); + let clobbers = self + .func + .inst_clobbers(inst) + .iter() + .map(|preg| format!("{}", preg)) + .collect::>(); + let allocs = (0..ops.len()) + .map(|i| format!("{}", self.get_alloc(inst, i))) + .collect::>(); + let opname = if self.func.is_branch(inst) { + "br" + } else if self.func.is_call(inst) { + "call" + } else if self.func.is_ret(inst) { + "ret" + } else { + "op" + }; + let args = ops + .iter() + .zip(allocs.iter()) + .map(|(op, alloc)| format!("{} [{}]", op, alloc)) + .collect::>(); + let clobbers = if clobbers.is_empty() { + "".to_string() + } else { + format!(" [clobber: {}]", clobbers.join(", ")) + }; + log::debug!( + " inst{}: {} {}{}", + inst.index(), + opname, + args.join(", "), + clobbers + ); + for annotation in self + .debug_annotations + .get(&ProgPoint::after(inst)) + .map(|v| &v[..]) + .unwrap_or(&[]) + { + log::debug!(" inst{}-post: {}", inst.index(), annotation); + } + } + } + } +} + +pub fn run(func: &F, mach_env: &MachineEnv) -> Result { + let cfginfo = CFGInfo::new(func); + validate_ssa(func, &cfginfo)?; + + let mut env = Env::new(func, mach_env, cfginfo); + env.init()?; + + env.run()?; + + if log::log_enabled!(log::Level::Debug) { + env.dump_results(); + } + + Ok(Output { + edits: env + .edits + .into_iter() + .map(|(pos, _, edit)| (ProgPoint::from_index(pos), edit)) + .collect(), + allocs: env.allocs, + inst_alloc_offsets: env.inst_alloc_offsets, + num_spillslots: env.num_spillslots as usize, + stats: env.stats, + }) +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 00000000..0750a824 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,780 @@ +/* + * The fellowing license applies to this file, which derives many + * details (register and constraint definitions, for example) from the + * files `BacktrackingAllocator.h`, `BacktrackingAllocator.cpp`, + * `LIR.h`, and possibly definitions in other related files in + * `js/src/jit/`: + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#![allow(dead_code)] + +pub mod bitvec; +pub mod cfg; +pub mod domtree; +pub mod ion; +pub mod moves; +pub mod postorder; +pub mod ssa; + +#[macro_use] +pub mod index; +pub use index::{Block, Inst, InstRange, InstRangeIter}; + +pub mod checker; +pub mod fuzzing; + +/// Register classes. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum RegClass { + Int = 0, + Float = 1, +} + +/// A physical register. Contains a physical register number and a class. +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct PReg(u8, RegClass); + +impl PReg { + pub const MAX_BITS: usize = 5; + pub const MAX: usize = (1 << Self::MAX_BITS) - 1; + + /// Create a new PReg. The `hw_enc` range is 6 bits. + #[inline(always)] + pub fn new(hw_enc: usize, class: RegClass) -> Self { + assert!(hw_enc <= Self::MAX); + PReg(hw_enc as u8, class) + } + + /// The physical register number, as encoded by the ISA for the particular register class. + #[inline(always)] + pub fn hw_enc(self) -> usize { + self.0 as usize + } + + /// The register class. + #[inline(always)] + pub fn class(self) -> RegClass { + self.1 + } + + /// Get an index into the (not necessarily contiguous) index space of + /// all physical registers. Allows one to maintain an array of data for + /// all PRegs and index it efficiently. + #[inline(always)] + pub fn index(self) -> usize { + ((self.1 as u8 as usize) << 6) | (self.0 as usize) + } + + #[inline(always)] + pub fn from_index(index: usize) -> Self { + let class = (index >> 6) & 1; + let class = match class { + 0 => RegClass::Int, + 1 => RegClass::Float, + _ => unreachable!(), + }; + let index = index & Self::MAX; + PReg::new(index, class) + } + + #[inline(always)] + pub fn invalid() -> Self { + PReg::new(Self::MAX, RegClass::Int) + } +} + +impl std::fmt::Debug for PReg { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!( + f, + "PReg(hw = {}, class = {:?}, index = {})", + self.hw_enc(), + self.class(), + self.index() + ) + } +} + +impl std::fmt::Display for PReg { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + let class = match self.class() { + RegClass::Int => "i", + RegClass::Float => "f", + }; + write!(f, "p{}{}", self.hw_enc(), class) + } +} + +/// A virtual register. Contains a virtual register number and a class. +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct VReg(u32); + +impl VReg { + pub const MAX_BITS: usize = 20; + pub const MAX: usize = (1 << Self::MAX_BITS) - 1; + + #[inline(always)] + pub fn new(virt_reg: usize, class: RegClass) -> Self { + assert!(virt_reg <= Self::MAX); + VReg(((virt_reg as u32) << 1) | (class as u8 as u32)) + } + + #[inline(always)] + pub fn vreg(self) -> usize { + (self.0 >> 1) as usize + } + + #[inline(always)] + pub fn class(self) -> RegClass { + match self.0 & 1 { + 0 => RegClass::Int, + 1 => RegClass::Float, + _ => unreachable!(), + } + } + + #[inline(always)] + pub fn invalid() -> Self { + VReg::new(Self::MAX, RegClass::Int) + } +} + +impl std::fmt::Debug for VReg { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!( + f, + "VReg(vreg = {}, class = {:?})", + self.vreg(), + self.class() + ) + } +} + +impl std::fmt::Display for VReg { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "v{}", self.vreg()) + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct SpillSlot(u32); + +impl SpillSlot { + #[inline(always)] + pub fn new(slot: usize, class: RegClass) -> Self { + assert!(slot < (1 << 24)); + SpillSlot((slot as u32) | (class as u8 as u32) << 24) + } + #[inline(always)] + pub fn index(self) -> usize { + (self.0 & 0x00ffffff) as usize + } + #[inline(always)] + pub fn class(self) -> RegClass { + match (self.0 >> 24) as u8 { + 0 => RegClass::Int, + 1 => RegClass::Float, + _ => unreachable!(), + } + } + #[inline(always)] + pub fn plus(self, offset: usize) -> Self { + SpillSlot::new(self.index() + offset, self.class()) + } +} + +impl std::fmt::Display for SpillSlot { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "stack{}", self.index()) + } +} + +/// An `Operand` encodes everything about a mention of a register in +/// an instruction: virtual register number, and any constraint/policy +/// that applies to the register at this program point. +/// +/// An Operand may be a use or def (this corresponds to `LUse` and +/// `LAllocation` in Ion). +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct Operand { + /// Bit-pack into 31 bits. This allows a `Reg` to encode an + /// `Operand` or an `Allocation` in 32 bits. + /// + /// op-or-alloc:1 pos:2 kind:1 policy:2 class:1 preg:5 vreg:20 + bits: u32, +} + +impl Operand { + #[inline(always)] + pub fn new(vreg: VReg, policy: OperandPolicy, kind: OperandKind, pos: OperandPos) -> Self { + let (preg_field, policy_field): (u32, u32) = match policy { + OperandPolicy::Any => (0, 0), + OperandPolicy::Reg => (0, 1), + OperandPolicy::FixedReg(preg) => { + assert_eq!(preg.class(), vreg.class()); + (preg.hw_enc() as u32, 2) + } + OperandPolicy::Reuse(which) => { + assert!(which <= PReg::MAX); + (which as u32, 3) + } + }; + let class_field = vreg.class() as u8 as u32; + let pos_field = pos as u8 as u32; + let kind_field = kind as u8 as u32; + Operand { + bits: vreg.vreg() as u32 + | (preg_field << 20) + | (class_field << 25) + | (policy_field << 26) + | (kind_field << 28) + | (pos_field << 29), + } + } + + #[inline(always)] + pub fn reg_use(vreg: VReg) -> Self { + Operand::new( + vreg, + OperandPolicy::Reg, + OperandKind::Use, + OperandPos::Before, + ) + } + #[inline(always)] + pub fn reg_use_at_end(vreg: VReg) -> Self { + Operand::new(vreg, OperandPolicy::Reg, OperandKind::Use, OperandPos::Both) + } + #[inline(always)] + pub fn reg_def(vreg: VReg) -> Self { + Operand::new( + vreg, + OperandPolicy::Reg, + OperandKind::Def, + OperandPos::After, + ) + } + #[inline(always)] + pub fn reg_def_at_start(vreg: VReg) -> Self { + Operand::new(vreg, OperandPolicy::Reg, OperandKind::Def, OperandPos::Both) + } + #[inline(always)] + pub fn reg_temp(vreg: VReg) -> Self { + Operand::new(vreg, OperandPolicy::Reg, OperandKind::Def, OperandPos::Both) + } + #[inline(always)] + pub fn reg_reuse_def(vreg: VReg, idx: usize) -> Self { + Operand::new( + vreg, + OperandPolicy::Reuse(idx), + OperandKind::Def, + OperandPos::Both, + ) + } + #[inline(always)] + pub fn reg_fixed_use(vreg: VReg, preg: PReg) -> Self { + Operand::new( + vreg, + OperandPolicy::FixedReg(preg), + OperandKind::Use, + OperandPos::Before, + ) + } + #[inline(always)] + pub fn reg_fixed_def(vreg: VReg, preg: PReg) -> Self { + Operand::new( + vreg, + OperandPolicy::FixedReg(preg), + OperandKind::Def, + OperandPos::After, + ) + } + + #[inline(always)] + pub fn vreg(self) -> VReg { + let vreg_idx = ((self.bits as usize) & VReg::MAX) as usize; + VReg::new(vreg_idx, self.class()) + } + + #[inline(always)] + pub fn class(self) -> RegClass { + let class_field = (self.bits >> 25) & 1; + match class_field { + 0 => RegClass::Int, + 1 => RegClass::Float, + _ => unreachable!(), + } + } + + #[inline(always)] + pub fn kind(self) -> OperandKind { + let kind_field = (self.bits >> 28) & 1; + match kind_field { + 0 => OperandKind::Def, + 1 => OperandKind::Use, + _ => unreachable!(), + } + } + + #[inline(always)] + pub fn pos(self) -> OperandPos { + let pos_field = (self.bits >> 29) & 3; + match pos_field { + 0 => OperandPos::Before, + 1 => OperandPos::After, + 2 => OperandPos::Both, + _ => unreachable!(), + } + } + + #[inline(always)] + pub fn policy(self) -> OperandPolicy { + let policy_field = (self.bits >> 26) & 3; + let preg_field = ((self.bits >> 20) as usize) & PReg::MAX; + match policy_field { + 0 => OperandPolicy::Any, + 1 => OperandPolicy::Reg, + 2 => OperandPolicy::FixedReg(PReg::new(preg_field, self.class())), + 3 => OperandPolicy::Reuse(preg_field), + _ => unreachable!(), + } + } + + #[inline(always)] + pub fn bits(self) -> u32 { + self.bits + } + + #[inline(always)] + pub fn from_bits(bits: u32) -> Self { + Operand { bits } + } +} + +impl std::fmt::Debug for Operand { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!( + f, + "Operand(vreg = {:?}, class = {:?}, kind = {:?}, pos = {:?}, policy = {:?})", + self.vreg().vreg(), + self.class(), + self.kind(), + self.pos(), + self.policy() + ) + } +} + +impl std::fmt::Display for Operand { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!( + f, + "{:?}@{:?}: {} {}", + self.kind(), + self.pos(), + self.vreg(), + self.policy() + ) + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum OperandPolicy { + /// Any location is fine (register or stack slot). + Any, + /// Operand must be in a register. Register is read-only for Uses. + Reg, + /// Operand must be in a fixed register. + FixedReg(PReg), + /// On defs only: reuse a use's register. Which use is given by `preg` field. + Reuse(usize), +} + +impl std::fmt::Display for OperandPolicy { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Self::Any => write!(f, "any"), + Self::Reg => write!(f, "reg"), + Self::FixedReg(preg) => write!(f, "fixed({})", preg), + Self::Reuse(idx) => write!(f, "reuse({})", idx), + } + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum OperandKind { + Def = 0, + Use = 1, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum OperandPos { + Before = 0, + After = 1, + Both = 2, +} + +/// An Allocation represents the end result of regalloc for an +/// Operand. +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Allocation { + /// Bit-pack in 31 bits: + /// + /// op-or-alloc:1 kind:2 index:29 + bits: u32, +} + +impl std::fmt::Debug for Allocation { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!( + f, + "Allocation(kind = {:?}, index = {})", + self.kind(), + self.index() + ) + } +} + +impl std::fmt::Display for Allocation { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self.kind() { + AllocationKind::None => write!(f, "none"), + AllocationKind::Reg => write!(f, "{}", self.as_reg().unwrap()), + AllocationKind::Stack => write!(f, "{}", self.as_stack().unwrap()), + } + } +} + +impl Allocation { + #[inline(always)] + pub(crate) fn new(kind: AllocationKind, index: usize) -> Self { + Self { + bits: ((kind as u8 as u32) << 29) | (index as u32), + } + } + + #[inline(always)] + pub fn none() -> Allocation { + Allocation::new(AllocationKind::None, 0) + } + + #[inline(always)] + pub fn reg(preg: PReg) -> Allocation { + Allocation::new(AllocationKind::Reg, preg.index()) + } + + #[inline(always)] + pub fn stack(slot: SpillSlot) -> Allocation { + Allocation::new(AllocationKind::Stack, slot.0 as usize) + } + + #[inline(always)] + pub fn kind(self) -> AllocationKind { + match (self.bits >> 29) & 3 { + 0 => AllocationKind::None, + 1 => AllocationKind::Reg, + 2 => AllocationKind::Stack, + _ => unreachable!(), + } + } + + #[inline(always)] + pub fn index(self) -> usize { + (self.bits & ((1 << 29) - 1)) as usize + } + + #[inline(always)] + pub fn as_reg(self) -> Option { + if self.kind() == AllocationKind::Reg { + Some(PReg::from_index(self.index())) + } else { + None + } + } + + #[inline(always)] + pub fn as_stack(self) -> Option { + if self.kind() == AllocationKind::Stack { + Some(SpillSlot(self.index() as u32)) + } else { + None + } + } + + #[inline(always)] + pub fn bits(self) -> u32 { + self.bits + } + + #[inline(always)] + pub fn from_bits(bits: u32) -> Self { + Self { bits } + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[repr(u8)] +pub enum AllocationKind { + None = 0, + Reg = 1, + Stack = 2, +} + +impl Allocation { + #[inline(always)] + pub fn class(self) -> RegClass { + match self.kind() { + AllocationKind::None => panic!("Allocation::None has no class"), + AllocationKind::Reg => self.as_reg().unwrap().class(), + AllocationKind::Stack => self.as_stack().unwrap().class(), + } + } +} + +/// A trait defined by the regalloc client to provide access to its +/// machine-instruction / CFG representation. +pub trait Function { + // ------------- + // CFG traversal + // ------------- + + /// How many instructions are there? + fn insts(&self) -> usize; + + /// How many blocks are there? + fn blocks(&self) -> usize; + + /// Get the index of the entry block. + fn entry_block(&self) -> Block; + + /// Provide the range of instruction indices contained in each block. + fn block_insns(&self, block: Block) -> InstRange; + + /// Get CFG successors for a given block. + fn block_succs(&self, block: Block) -> &[Block]; + + /// Get the CFG predecessors for a given block. + fn block_preds(&self, block: Block) -> &[Block]; + + /// Get the block parameters for a given block. + fn block_params(&self, block: Block) -> &[VReg]; + + /// Determine whether an instruction is a call instruction. This is used + /// only for splitting heuristics. + fn is_call(&self, insn: Inst) -> bool; + + /// Determine whether an instruction is a return instruction. + fn is_ret(&self, insn: Inst) -> bool; + + /// Determine whether an instruction is the end-of-block + /// branch. If so, its operands *must* be the block parameters for + /// each of its block's `block_succs` successor blocks, in order. + fn is_branch(&self, insn: Inst) -> bool; + + /// Determine whether an instruction is a safepoint and requires a stackmap. + fn is_safepoint(&self, insn: Inst) -> bool; + + /// Determine whether an instruction is a move; if so, return the + /// vregs for (src, dst). + fn is_move(&self, insn: Inst) -> Option<(VReg, VReg)>; + + // -------------------------- + // Instruction register slots + // -------------------------- + + /// Get the Operands for an instruction. + fn inst_operands(&self, insn: Inst) -> &[Operand]; + + /// Get the clobbers for an instruction. + fn inst_clobbers(&self, insn: Inst) -> &[PReg]; + + /// Get the precise number of `VReg` in use in this function, to allow + /// preallocating data structures. This number *must* be a correct + /// lower-bound, otherwise invalid index failures may happen; it is of + /// course better if it is exact. + fn num_vregs(&self) -> usize; + + // -------------- + // Spills/reloads + // -------------- + + /// How many logical spill slots does the given regclass require? E.g., on + /// a 64-bit machine, spill slots may nominally be 64-bit words, but a + /// 128-bit vector value will require two slots. The regalloc will always + /// align on this size. + /// + /// This passes the associated virtual register to the client as well, + /// because the way in which we spill a real register may depend on the + /// value that we are using it for. E.g., if a machine has V128 registers + /// but we also use them for F32 and F64 values, we may use a different + /// store-slot size and smaller-operand store/load instructions for an F64 + /// than for a true V128. + fn spillslot_size(&self, regclass: RegClass, for_vreg: VReg) -> usize; + + /// When providing a spillslot number for a multi-slot spillslot, + /// do we provide the first or the last? This is usually related + /// to which direction the stack grows and different clients may + /// have different preferences. + fn multi_spillslot_named_by_last_slot(&self) -> bool { + false + } +} + +/// A position before or after an instruction. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[repr(u8)] +pub enum InstPosition { + Before = 0, + After = 1, +} + +/// A program point: a single point before or after a given instruction. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct ProgPoint { + pub inst: Inst, + pub pos: InstPosition, +} + +impl ProgPoint { + pub fn before(inst: Inst) -> Self { + Self { + inst, + pos: InstPosition::Before, + } + } + + pub fn after(inst: Inst) -> Self { + Self { + inst, + pos: InstPosition::After, + } + } + + pub fn next(self) -> ProgPoint { + match self.pos { + InstPosition::Before => ProgPoint { + inst: self.inst, + pos: InstPosition::After, + }, + InstPosition::After => ProgPoint { + inst: self.inst.next(), + pos: InstPosition::Before, + }, + } + } + + pub fn prev(self) -> ProgPoint { + match self.pos { + InstPosition::Before => ProgPoint { + inst: self.inst.prev(), + pos: InstPosition::After, + }, + InstPosition::After => ProgPoint { + inst: self.inst, + pos: InstPosition::Before, + }, + } + } + + pub fn to_index(self) -> u32 { + debug_assert!(self.inst.index() <= ((1 << 31) - 1)); + ((self.inst.index() as u32) << 1) | (self.pos as u8 as u32) + } + + pub fn from_index(index: u32) -> Self { + let inst = Inst::new((index >> 1) as usize); + let pos = match index & 1 { + 0 => InstPosition::Before, + 1 => InstPosition::After, + _ => unreachable!(), + }; + Self { inst, pos } + } +} + +/// An instruction to insert into the program to perform some data movement. +#[derive(Clone, Debug)] +pub enum Edit { + /// Move one allocation to another. Each allocation may be a + /// register or a stack slot (spillslot). + Move { from: Allocation, to: Allocation }, + /// Define blockparams' locations. Note that this is not typically + /// turned into machine code, but can be useful metadata (e.g. for + /// the checker). + BlockParams { + vregs: Vec, + allocs: Vec, + }, +} + +/// A machine envrionment tells the register allocator which registers +/// are available to allocate and what register may be used as a +/// scratch register for each class, and some other miscellaneous info +/// as well. +#[derive(Clone, Debug)] +pub struct MachineEnv { + regs: Vec, + regs_by_class: Vec>, + scratch_by_class: Vec, +} + +/// The output of the register allocator. +#[derive(Clone, Debug)] +pub struct Output { + /// How many spillslots are needed in the frame? + pub num_spillslots: usize, + /// Edits (insertions or removals). Guaranteed to be sorted by + /// program point. + pub edits: Vec<(ProgPoint, Edit)>, + /// Allocations for each operand. Mapping from instruction to + /// allocations provided by `inst_alloc_offsets` below. + pub allocs: Vec, + /// Allocation offset in `allocs` for each instruction. + pub inst_alloc_offsets: Vec, + + /// Internal stats from the allocator. + pub stats: ion::Stats, +} + +impl Output { + pub fn inst_allocs(&self, inst: Inst) -> &[Allocation] { + let start = self.inst_alloc_offsets[inst.index()] as usize; + let end = if inst.index() + 1 == self.inst_alloc_offsets.len() { + self.allocs.len() + } else { + self.inst_alloc_offsets[inst.index() + 1] as usize + }; + &self.allocs[start..end] + } +} + +/// An error that prevents allocation. +#[derive(Clone, Debug)] +pub enum RegAllocError { + /// Invalid SSA for given vreg at given inst: multiple defs or + /// illegal use. `inst` may be `Inst::invalid()` if this concerns + /// a block param. + SSA(VReg, Inst), + /// Invalid basic block: does not end in branch/ret, or contains a + /// branch/ret in the middle. + BB(Block), + /// Invalid branch: operand count does not match sum of block + /// params of successor blocks. + Branch(Inst), +} + +impl std::fmt::Display for RegAllocError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{:?}", self) + } +} + +impl std::error::Error for RegAllocError {} + +pub fn run(func: &F, env: &MachineEnv) -> Result { + ion::run(func, env) +} diff --git a/src/moves.rs b/src/moves.rs new file mode 100644 index 00000000..a5f70be2 --- /dev/null +++ b/src/moves.rs @@ -0,0 +1,199 @@ +use crate::Allocation; +use smallvec::{smallvec, SmallVec}; + +pub type MoveVec = SmallVec<[(Allocation, Allocation); 16]>; + +/// A `ParallelMoves` represents a list of alloc-to-alloc moves that +/// must happen in parallel -- i.e., all reads of sources semantically +/// happen before all writes of destinations, and destinations are +/// allowed to overwrite sources. It can compute a list of sequential +/// moves that will produce the equivalent data movement, possibly +/// using a scratch register if one is necessary. +pub struct ParallelMoves { + parallel_moves: MoveVec, + scratch: Allocation, +} + +impl ParallelMoves { + pub fn new(scratch: Allocation) -> Self { + Self { + parallel_moves: smallvec![], + scratch, + } + } + + pub fn add(&mut self, from: Allocation, to: Allocation) { + self.parallel_moves.push((from, to)); + } + + fn sources_overlap_dests(&self) -> bool { + // Assumes `parallel_moves` has already been sorted in `resolve()` below. + for &(_, dst) in &self.parallel_moves { + if self + .parallel_moves + .binary_search_by_key(&dst, |&(src, _)| src) + .is_ok() + { + return true; + } + } + false + } + + pub fn resolve(mut self) -> MoveVec { + // Easy case: zero or one move. Just return our vec. + if self.parallel_moves.len() <= 1 { + return self.parallel_moves; + } + + // Sort moves by source so that we can efficiently test for + // presence. + self.parallel_moves.sort(); + + // Do any dests overlap sources? If not, we can also just + // return the list. + if !self.sources_overlap_dests() { + return self.parallel_moves; + } + + // General case: some moves overwrite dests that other moves + // read as sources. We'll use a general algorithm. + // + // *Important property*: because we expect that each register + // has only one writer (otherwise the effect of the parallel + // move is undefined), each move can only block one other move + // (with its one source corresponding to the one writer of + // that source). Thus, we *can only have simple cycles*: there + // are no SCCs that are more complex than that. We leverage + // this fact below to avoid having to do a full Tarjan SCC DFS + // (with lowest-index computation, etc.): instead, as soon as + // we find a cycle, we know we have the full cycle and we can + // do a cyclic move sequence and continue. + + // Sort moves by destination and check that each destination + // has only one writer. + self.parallel_moves.sort_by_key(|&(_, dst)| dst); + if cfg!(debug) { + let mut last_dst = None; + for &(_, dst) in &self.parallel_moves { + if last_dst.is_some() { + assert!(last_dst.unwrap() != dst); + } + last_dst = Some(dst); + } + } + + // Construct a mapping from move indices to moves they must + // come before. Any given move must come before a move that + // overwrites its destination; we have moves sorted by dest + // above so we can efficiently find such a move, if any. + let mut must_come_before: SmallVec<[Option; 16]> = + smallvec![None; self.parallel_moves.len()]; + for (i, &(src, _)) in self.parallel_moves.iter().enumerate() { + if let Ok(move_to_dst_idx) = self + .parallel_moves + .binary_search_by_key(&src, |&(_, dst)| dst) + { + must_come_before[i] = Some(move_to_dst_idx); + } + } + + // Do a simple stack-based DFS and emit moves in postorder, + // then reverse at the end for RPO. Unlike Tarjan's SCC + // algorithm, we can emit a cycle as soon as we find one, as + // noted above. + let mut ret: MoveVec = smallvec![]; + let mut stack: SmallVec<[usize; 16]> = smallvec![]; + let mut visited: SmallVec<[bool; 16]> = smallvec![false; self.parallel_moves.len()]; + let mut onstack: SmallVec<[bool; 16]> = smallvec![false; self.parallel_moves.len()]; + + stack.push(0); + onstack[0] = true; + loop { + if stack.is_empty() { + if let Some(next) = visited.iter().position(|&flag| !flag) { + stack.push(next); + onstack[next] = true; + } else { + break; + } + } + + let top = *stack.last().unwrap(); + visited[top] = true; + match must_come_before[top] { + None => { + ret.push(self.parallel_moves[top]); + onstack[top] = false; + stack.pop(); + while let Some(top) = stack.pop() { + ret.push(self.parallel_moves[top]); + onstack[top] = false; + } + } + Some(next) if visited[next] && !onstack[next] => { + ret.push(self.parallel_moves[top]); + onstack[top] = false; + stack.pop(); + while let Some(top) = stack.pop() { + ret.push(self.parallel_moves[top]); + onstack[top] = false; + } + } + Some(next) if !visited[next] && !onstack[next] => { + stack.push(next); + onstack[next] = true; + continue; + } + Some(next) => { + // Found a cycle -- emit a cyclic-move sequence + // for the cycle on the top of stack, then normal + // moves below it. Recall that these moves will be + // reversed in sequence, so from the original + // parallel move set + // + // { B := A, C := B, A := B } + // + // we will generate something like: + // + // A := scratch + // B := A + // C := B + // scratch := C + // + // which will become: + // + // scratch := C + // C := B + // B := A + // A := scratch + let mut last_dst = None; + let mut scratch_src = None; + while let Some(move_idx) = stack.pop() { + onstack[move_idx] = false; + let (mut src, dst) = self.parallel_moves[move_idx]; + if last_dst.is_none() { + scratch_src = Some(src); + src = self.scratch; + } else { + assert_eq!(last_dst.unwrap(), src); + } + ret.push((src, dst)); + + last_dst = Some(dst); + + if move_idx == next { + break; + } + } + if let Some(src) = scratch_src { + ret.push((src, self.scratch)); + } + } + } + } + + ret.reverse(); + ret + } +} diff --git a/src/postorder.rs b/src/postorder.rs new file mode 100644 index 00000000..b5faf90b --- /dev/null +++ b/src/postorder.rs @@ -0,0 +1,51 @@ +//! Fast postorder computation with no allocations (aside from result). + +use crate::Block; +use smallvec::{smallvec, SmallVec}; + +pub fn calculate<'a, SuccFn: Fn(Block) -> &'a [Block]>( + num_blocks: usize, + entry: Block, + succ_blocks: SuccFn, +) -> Vec { + let mut ret = vec![]; + + // State: visited-block map, and explicit DFS stack. + let mut visited = vec![]; + visited.resize(num_blocks, false); + + struct State<'a> { + block: Block, + succs: &'a [Block], + next_succ: usize, + } + let mut stack: SmallVec<[State; 64]> = smallvec![]; + + visited[entry.index()] = true; + stack.push(State { + block: entry, + succs: succ_blocks(entry), + next_succ: 0, + }); + + while let Some(ref mut state) = stack.last_mut() { + // Perform one action: push to new succ, skip an already-visited succ, or pop. + if state.next_succ < state.succs.len() { + let succ = state.succs[state.next_succ]; + state.next_succ += 1; + if !visited[succ.index()] { + visited[succ.index()] = true; + stack.push(State { + block: succ, + succs: succ_blocks(succ), + next_succ: 0, + }); + } + } else { + ret.push(state.block); + stack.pop(); + } + } + + ret +} diff --git a/src/ssa.rs b/src/ssa.rs new file mode 100644 index 00000000..3b0ca143 --- /dev/null +++ b/src/ssa.rs @@ -0,0 +1,87 @@ +//! SSA-related utilities. + +use crate::cfg::CFGInfo; + +use crate::{Block, Function, Inst, OperandKind, RegAllocError}; + +pub fn validate_ssa(f: &F, cfginfo: &CFGInfo) -> Result<(), RegAllocError> { + // Walk the blocks in arbitrary order. Check, for every use, that + // the def is either in the same block in an earlier inst, or is + // defined (by inst or blockparam) in some other block that + // dominates this one. Also check that for every block param and + // inst def, that this is the only def. + let mut defined = vec![false; f.num_vregs()]; + for block in 0..f.blocks() { + let block = Block::new(block); + for blockparam in f.block_params(block) { + if defined[blockparam.vreg()] { + return Err(RegAllocError::SSA(*blockparam, Inst::invalid())); + } + defined[blockparam.vreg()] = true; + } + for iix in f.block_insns(block).iter() { + let operands = f.inst_operands(iix); + for operand in operands { + match operand.kind() { + OperandKind::Use => { + let def_block = if cfginfo.vreg_def_inst[operand.vreg().vreg()].is_valid() { + cfginfo.insn_block[cfginfo.vreg_def_inst[operand.vreg().vreg()].index()] + } else { + cfginfo.vreg_def_blockparam[operand.vreg().vreg()].0 + }; + if def_block.is_invalid() { + return Err(RegAllocError::SSA(operand.vreg(), iix)); + } + if !cfginfo.dominates(def_block, block) { + return Err(RegAllocError::SSA(operand.vreg(), iix)); + } + } + OperandKind::Def => { + if defined[operand.vreg().vreg()] { + return Err(RegAllocError::SSA(operand.vreg(), iix)); + } + defined[operand.vreg().vreg()] = true; + } + } + } + } + } + + // Check that the length of branch args matches the sum of the + // number of blockparams in their succs, and that the end of every + // block ends in this branch or in a ret, and that there are no + // other branches or rets in the middle of the block. + for block in 0..f.blocks() { + let block = Block::new(block); + let insns = f.block_insns(block); + for insn in insns.iter() { + if insn == insns.last() { + if !(f.is_branch(insn) || f.is_ret(insn)) { + return Err(RegAllocError::BB(block)); + } + if f.is_branch(insn) { + let expected = f + .block_succs(block) + .iter() + .map(|&succ| f.block_params(succ).len()) + .sum(); + if f.inst_operands(insn).len() != expected { + return Err(RegAllocError::Branch(insn)); + } + } + } else { + if f.is_branch(insn) || f.is_ret(insn) { + return Err(RegAllocError::BB(block)); + } + } + } + } + + // Check that the entry block has no block args: otherwise it is + // undefined what their value would be. + if f.block_params(f.entry_block()).len() > 0 { + return Err(RegAllocError::BB(f.entry_block())); + } + + Ok(()) +} From 33ac6cb41d9be6ba5714b7814f9432197e86a8c6 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 13 Apr 2021 23:26:56 -0700 Subject: [PATCH 002/155] Heuristic improvement: reg-scan offset by inst location. We currently use a heuristic that our scan for an available PReg starts at an index into the register list that rotates with the bundle index. This is a simple way to distribute contention across the whole register file more evenly and avoid repeating less-likely-to-succeed reg-map probes to lower-numbered registers for every bundle. After some experimentation with different options (queue that dynamically puts registers at end after allocating, various ways of mixing/hashing indices, etc.), adding the *instruction offset* (of the start of the first range in the bundle) as well gave the best results. This is very simple and gives us a likely better-than-random conflict avoidance because ranges tend to be local, so rotating through registers as we scan down the list of instructions seems like a very natural strategy. On the tests used by our `cargo bench` benchmark, this reduces regfile probes for the largest (459 instruction) benchmark from 1538 to 829, i.e., approximately by half, and results in an 11% allocation speedup. --- README.md | 20 +++++++------------- src/ion/mod.rs | 15 +++++++++++++-- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index e755c4c0..c187fe91 100644 --- a/README.md +++ b/README.md @@ -111,27 +111,21 @@ benches/0 time: [365.68 us 367.36 us 369.04 us] ``` I then measured three different fuzztest-SSA-generator test cases in -this allocator, `regalloc2`, measuring between 1.05M and 2.3M +this allocator, `regalloc2`, measuring between 1.1M and 2.3M instructions per second (closer to the former for larger functions): ```plain ==== 459 instructions -benches/0 time: [424.46 us 425.65 us 426.59 us] - thrpt: [1.0760 Melem/s 1.0784 Melem/s 1.0814 Melem/s] +benches/0 time: [377.91 us 378.09 us 378.27 us] + thrpt: [1.2134 Melem/s 1.2140 Melem/s 1.2146 Melem/s] ==== 225 instructions -benches/1 time: [213.05 us 213.28 us 213.54 us] - thrpt: [1.0537 Melem/s 1.0549 Melem/s 1.0561 Melem/s] +benches/1 time: [202.03 us 202.14 us 202.27 us] + thrpt: [1.1124 Melem/s 1.1131 Melem/s 1.1137 Melem/s] -Found 1 outliers among 100 measurements (1.00%) - 1 (1.00%) high mild ==== 21 instructions -benches/2 time: [9.0495 us 9.0571 us 9.0641 us] - thrpt: [2.3168 Melem/s 2.3186 Melem/s 2.3206 Melem/s] - -Found 4 outliers among 100 measurements (4.00%) - 2 (2.00%) high mild - 2 (2.00%) high severe +benches/2 time: [9.5605 us 9.5655 us 9.5702 us] + thrpt: [2.1943 Melem/s 2.1954 Melem/s 2.1965 Melem/s] ``` Though not apples-to-apples (SSA vs. non-SSA, completely different diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 78d42dca..303c31ae 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -2570,6 +2570,17 @@ impl<'a, F: Function> Env<'a, F> { } else { n_regs }; + // Heuristic: start the scan for an available + // register at an offset influenced both by our + // location in the code and by the bundle we're + // considering. This has the effect of spreading + // demand more evenly across registers. + let scan_offset = self.ranges[self.bundles[bundle.index()].first_range.index()] + .range + .from + .inst + .index() + + bundle.index(); for i in 0..loop_count { // The order in which we try registers is somewhat complex: // - First, if there is a hint, we try that. @@ -2587,7 +2598,7 @@ impl<'a, F: Function> Env<'a, F> { (0, Some(hint_reg)) => hint_reg, (i, Some(hint_reg)) => { let reg = self.env.regs_by_class[class as u8 as usize] - [(i - 1 + bundle.index()) % n_regs]; + [(i - 1 + scan_offset) % n_regs]; if reg == hint_reg { continue; } @@ -2595,7 +2606,7 @@ impl<'a, F: Function> Env<'a, F> { } (i, None) => { self.env.regs_by_class[class as u8 as usize] - [(i + bundle.index()) % n_regs] + [(i + scan_offset) % n_regs] } }; From a08b0121a06c25881ad8b335d49f740687c6d819 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sat, 17 Apr 2021 21:28:26 -0700 Subject: [PATCH 003/155] Add support for reftypes/stackmaps and Stack constraints, and misc API changes. The main enhancement in this commit is support for reference types and stackmaps. This requires tracking whether each VReg is a "reference" or "pointer". At certain instructions designated as "safepoints", the regalloc will (i) ensure that all references are in spillslots rather than in registers, and (ii) provide a list of exactly which spillslots have live references at that program point. This can be used by, e.g., a GC to trace and possibly modify pointers. The stackmap of spillslots is precise: it includes all live references, and *only* live references. This commit also brings in some API tweaks as part of the in-progress Cranelift glue. In particular, it makes Allocations and Operands mutually disjoint by using the same bitfield for the type-tag in both and choosing non-overlapping tags. This will allow instructions to carry an Operand for each register slot and then overwrite these in place with Allocations. The `OperandOrAllocation` type does the necessary magic to make this look like an enum, but staying in 32 bits. --- fuzz/fuzz_targets/ion_checker.rs | 1 + src/bitvec.rs | 44 ++++- src/checker.rs | 142 +++++++++++-- src/fuzzing/func.rs | 64 ++++-- src/ion/mod.rs | 328 ++++++++++++++++++++++++++++--- src/lib.rs | 198 +++++++++++++++---- 6 files changed, 686 insertions(+), 91 deletions(-) diff --git a/fuzz/fuzz_targets/ion_checker.rs b/fuzz/fuzz_targets/ion_checker.rs index e3ce1dc7..ea6b1bea 100644 --- a/fuzz/fuzz_targets/ion_checker.rs +++ b/fuzz/fuzz_targets/ion_checker.rs @@ -21,6 +21,7 @@ impl Arbitrary for TestCase { reducible: false, block_params: true, always_local_uses: false, + reftypes: true, })?, }) } diff --git a/src/bitvec.rs b/src/bitvec.rs index ce3be7cf..af5792e6 100644 --- a/src/bitvec.rs +++ b/src/bitvec.rs @@ -48,6 +48,18 @@ impl BitVec { } } + pub fn assign(&mut self, other: &Self) { + if other.bits.len() > 0 { + self.ensure_idx(other.bits.len() - 1); + } + for i in 0..other.bits.len() { + self.bits[i] = other.bits[i]; + } + for i in other.bits.len()..self.bits.len() { + self.bits[i] = 0; + } + } + #[inline(always)] pub fn get(&mut self, idx: usize) -> bool { let word = idx / BITS_PER_WORD; @@ -59,16 +71,21 @@ impl BitVec { } } - pub fn or(&mut self, other: &Self) { + pub fn or(&mut self, other: &Self) -> bool { if other.bits.is_empty() { - return; + return false; } let last_idx = other.bits.len() - 1; self.ensure_idx(last_idx); + let mut changed = false; for (self_word, other_word) in self.bits.iter_mut().zip(other.bits.iter()) { + if *other_word & !*self_word != 0 { + changed = true; + } *self_word |= *other_word; } + changed } pub fn and(&mut self, other: &Self) { @@ -91,6 +108,29 @@ impl BitVec { } } +impl std::cmp::PartialEq for BitVec { + fn eq(&self, other: &Self) -> bool { + let limit = std::cmp::min(self.bits.len(), other.bits.len()); + for i in 0..limit { + if self.bits[i] != other.bits[i] { + return false; + } + } + for i in limit..self.bits.len() { + if self.bits[i] != 0 { + return false; + } + } + for i in limit..other.bits.len() { + if other.bits[i] != 0 { + return false; + } + } + true + } +} +impl std::cmp::Eq for BitVec {} + pub struct SetBitsIter<'a> { words: &'a [u64], word_idx: usize, diff --git a/src/checker.rs b/src/checker.rs index 5cdcb602..4e06c60d 100644 --- a/src/checker.rs +++ b/src/checker.rs @@ -67,10 +67,10 @@ use crate::{ Allocation, AllocationKind, Block, Edit, Function, Inst, InstPosition, Operand, OperandKind, - OperandPolicy, OperandPos, Output, ProgPoint, VReg, + OperandPolicy, OperandPos, Output, PReg, ProgPoint, SpillSlot, VReg, }; -use std::collections::{HashMap, VecDeque}; +use std::collections::{HashMap, HashSet, VecDeque}; use std::default::Default; use std::hash::Hash; use std::result::Result; @@ -127,6 +127,20 @@ pub enum CheckerError { alloc: Allocation, expected_alloc: Allocation, }, + AllocationIsNotStack { + inst: Inst, + op: Operand, + alloc: Allocation, + }, + ConflictedValueInStackmap { + inst: Inst, + slot: SpillSlot, + }, + NonRefValueInStackmap { + inst: Inst, + slot: SpillSlot, + vreg: VReg, + }, } /// Abstract state for an allocation. @@ -162,8 +176,10 @@ impl CheckerValue { (_, &CheckerValue::Unknown) => *self, (&CheckerValue::Conflicted, _) => *self, (_, &CheckerValue::Conflicted) => *other, - (&CheckerValue::Reg(r1, ref1), &CheckerValue::Reg(r2, ref2)) if r1 == r2 => { - CheckerValue::Reg(r1, ref1 || ref2) + (&CheckerValue::Reg(r1, ref1), &CheckerValue::Reg(r2, ref2)) + if r1 == r2 && ref1 == ref2 => + { + CheckerValue::Reg(r1, ref1) } _ => { log::debug!("{:?} and {:?} meet to Conflicted", self, other); @@ -192,7 +208,8 @@ impl std::fmt::Display for CheckerValue { match self { CheckerValue::Unknown => write!(f, "?"), CheckerValue::Conflicted => write!(f, "!"), - CheckerValue::Reg(r, _) => write!(f, "{}", r), + CheckerValue::Reg(r, false) => write!(f, "{}", r), + CheckerValue::Reg(r, true) => write!(f, "{}/ref", r), } } } @@ -305,13 +322,38 @@ impl CheckerState { self.check_val(inst, *op, *alloc, val, allocs)?; } } + &CheckerInst::Safepoint { inst, ref slots } => { + for &slot in slots { + let alloc = Allocation::stack(slot); + let val = self + .allocations + .get(&alloc) + .cloned() + .unwrap_or(Default::default()); + debug!( + "checker: checkinst {:?}: safepoint slot {}, checker value {:?}", + checkinst, slot, val + ); + + match val { + CheckerValue::Unknown => {} + CheckerValue::Conflicted => { + return Err(CheckerError::ConflictedValueInStackmap { inst, slot }); + } + CheckerValue::Reg(vreg, false) => { + return Err(CheckerError::NonRefValueInStackmap { inst, slot, vreg }); + } + CheckerValue::Reg(_, true) => {} + } + } + } _ => {} } Ok(()) } /// Update according to instruction. - fn update(&mut self, checkinst: &CheckerInst) { + fn update<'a, F: Function>(&mut self, checkinst: &CheckerInst, checker: &Checker<'a, F>) { match checkinst { &CheckerInst::Move { into, from } => { let val = self @@ -328,14 +370,19 @@ impl CheckerState { &CheckerInst::Op { ref operands, ref allocs, + ref clobbers, .. } => { for (op, alloc) in operands.iter().zip(allocs.iter()) { if op.kind() != OperandKind::Def { continue; } + let reftyped = checker.reftyped_vregs.contains(&op.vreg()); self.allocations - .insert(*alloc, CheckerValue::Reg(op.vreg(), false)); + .insert(*alloc, CheckerValue::Reg(op.vreg(), reftyped)); + } + for clobber in clobbers { + self.allocations.remove(&Allocation::reg(*clobber)); } } &CheckerInst::BlockParams { @@ -344,8 +391,20 @@ impl CheckerState { .. } => { for (vreg, alloc) in vregs.iter().zip(allocs.iter()) { + let reftyped = checker.reftyped_vregs.contains(vreg); self.allocations - .insert(*alloc, CheckerValue::Reg(*vreg, false)); + .insert(*alloc, CheckerValue::Reg(*vreg, reftyped)); + } + } + &CheckerInst::Safepoint { ref slots, .. } => { + for (alloc, value) in &mut self.allocations { + if let CheckerValue::Reg(_, true) = *value { + if alloc.is_reg() { + *value = CheckerValue::Conflicted; + } else if alloc.is_stack() && !slots.contains(&alloc.as_stack().unwrap()) { + *value = CheckerValue::Conflicted; + } + } } } } @@ -365,6 +424,11 @@ impl CheckerState { return Err(CheckerError::AllocationIsNotReg { inst, op, alloc }); } } + OperandPolicy::Stack => { + if alloc.kind() != AllocationKind::Stack { + return Err(CheckerError::AllocationIsNotStack { inst, op, alloc }); + } + } OperandPolicy::FixedReg(preg) => { if alloc != Allocation::reg(preg) { return Err(CheckerError::AllocationIsNotFixedReg { inst, op, alloc }); @@ -402,6 +466,7 @@ pub(crate) enum CheckerInst { inst: Inst, operands: Vec, allocs: Vec, + clobbers: Vec, }, /// The top of a block with blockparams. We define the given vregs @@ -411,6 +476,10 @@ pub(crate) enum CheckerInst { vregs: Vec, allocs: Vec, }, + + /// A safepoint, with the given SpillSlots specified as containing + /// reftyped values. All other reftyped values become invalid. + Safepoint { inst: Inst, slots: Vec }, } #[derive(Debug)] @@ -418,6 +487,7 @@ pub struct Checker<'a, F: Function> { f: &'a F, bb_in: HashMap, bb_insts: HashMap>, + reftyped_vregs: HashSet, } impl<'a, F: Function> Checker<'a, F> { @@ -428,6 +498,7 @@ impl<'a, F: Function> Checker<'a, F> { pub fn new(f: &'a F) -> Checker<'a, F> { let mut bb_in = HashMap::new(); let mut bb_insts = HashMap::new(); + let mut reftyped_vregs = HashSet::new(); for block in 0..f.blocks() { let block = Block::new(block); @@ -435,13 +506,31 @@ impl<'a, F: Function> Checker<'a, F> { bb_insts.insert(block, vec![]); } - Checker { f, bb_in, bb_insts } + for &vreg in f.reftype_vregs() { + reftyped_vregs.insert(vreg); + } + + Checker { + f, + bb_in, + bb_insts, + reftyped_vregs, + } } /// Build the list of checker instructions based on the given func /// and allocation results. pub fn prepare(&mut self, out: &Output) { debug!("checker: out = {:?}", out); + // Preprocess safepoint stack-maps into per-inst vecs. + let mut safepoint_slots: HashMap> = HashMap::new(); + for &(progpoint, slot) in &out.safepoint_slots { + safepoint_slots + .entry(progpoint.inst) + .or_insert_with(|| vec![]) + .push(slot); + } + // For each original instruction, create an `Op`. let mut last_inst = None; let mut insert_idx = 0; @@ -454,13 +543,23 @@ impl<'a, F: Function> Checker<'a, F> { // Any inserted edits before instruction. self.handle_edits(block, out, &mut insert_idx, ProgPoint::before(inst)); + // If this is a safepoint, then check the spillslots at this point. + if self.f.is_safepoint(inst) { + let slots = safepoint_slots.remove(&inst).unwrap_or_else(|| vec![]); + + let checkinst = CheckerInst::Safepoint { inst, slots }; + self.bb_insts.get_mut(&block).unwrap().push(checkinst); + } + // Instruction itself. let operands: Vec<_> = self.f.inst_operands(inst).iter().cloned().collect(); let allocs: Vec<_> = out.inst_allocs(inst).iter().cloned().collect(); + let clobbers: Vec<_> = self.f.inst_clobbers(inst).iter().cloned().collect(); let checkinst = CheckerInst::Op { inst, operands, allocs, + clobbers, }; debug!("checker: adding inst {:?}", checkinst); self.bb_insts.get_mut(&block).unwrap().push(checkinst); @@ -511,7 +610,7 @@ impl<'a, F: Function> Checker<'a, F> { let mut state = self.bb_in.get(&block).cloned().unwrap(); debug!("analyze: block {} has state {:?}", block.index(), state); for inst in self.bb_insts.get(&block).unwrap() { - state.update(inst); + state.update(inst, self); debug!("analyze: inst {:?} -> state {:?}", inst, state); } @@ -546,7 +645,7 @@ impl<'a, F: Function> Checker<'a, F> { debug!("Checker error: {:?}", e); errors.push(e); } - state.update(inst); + state.update(inst, self); if let Err(e) = state.check(InstPosition::After, inst) { debug!("Checker error: {:?}", e); errors.push(e); @@ -575,6 +674,9 @@ impl<'a, F: Function> Checker<'a, F> { } debug!(" {{ {} }}", s.join(", ")) } + for vreg in self.f.reftype_vregs() { + debug!(" REF: {}", vreg); + } for bb in 0..self.f.blocks() { let bb = Block::new(bb); debug!("block{}:", bb.index()); @@ -587,8 +689,15 @@ impl<'a, F: Function> Checker<'a, F> { inst, ref operands, ref allocs, + ref clobbers, } => { - debug!(" inst{}: {:?} ({:?})", inst.index(), operands, allocs); + debug!( + " inst{}: {:?} ({:?}) clobbers:{:?}", + inst.index(), + operands, + allocs, + clobbers + ); } &CheckerInst::Move { from, into } => { debug!(" {} -> {}", from, into); @@ -604,8 +713,15 @@ impl<'a, F: Function> Checker<'a, F> { } debug!(" blockparams: {}", args.join(", ")); } + &CheckerInst::Safepoint { ref slots, .. } => { + let mut slotargs = vec![]; + for &slot in slots { + slotargs.push(format!("{}", slot)); + } + debug!(" safepoint: {}", slotargs.join(", ")); + } } - state.update(inst); + state.update(inst, &self); print_state(&state); } } diff --git a/src/fuzzing/func.rs b/src/fuzzing/func.rs index ba38e985..67f0dbdb 100644 --- a/src/fuzzing/func.rs +++ b/src/fuzzing/func.rs @@ -20,6 +20,7 @@ pub struct InstData { op: InstOpcode, operands: Vec, clobbers: Vec, + is_safepoint: bool, } impl InstData { @@ -32,6 +33,7 @@ impl InstData { op: InstOpcode::Op, operands, clobbers: vec![], + is_safepoint: false, } } pub fn branch(uses: &[usize]) -> InstData { @@ -43,6 +45,7 @@ impl InstData { op: InstOpcode::Branch, operands, clobbers: vec![], + is_safepoint: false, } } pub fn ret() -> InstData { @@ -50,6 +53,7 @@ impl InstData { op: InstOpcode::Ret, operands: vec![], clobbers: vec![], + is_safepoint: false, } } } @@ -62,6 +66,7 @@ pub struct Func { block_succs: Vec>, block_params: Vec>, num_vregs: usize, + reftype_vregs: Vec, } impl Function for Func { @@ -106,8 +111,12 @@ impl Function for Func { self.insts[insn.index()].op == InstOpcode::Branch } - fn is_safepoint(&self, _: Inst) -> bool { - false + fn is_safepoint(&self, insn: Inst) -> bool { + self.insts[insn.index()].is_safepoint + } + + fn reftype_vregs(&self) -> &[VReg] { + &self.reftype_vregs[..] } fn is_move(&self, _: Inst) -> Option<(VReg, VReg)> { @@ -153,6 +162,7 @@ impl FuncBuilder { insts: vec![], blocks: vec![], num_vregs: 0, + reftype_vregs: vec![], }, insts_per_block: vec![], } @@ -250,6 +260,7 @@ pub struct Options { pub reducible: bool, pub block_params: bool, pub always_local_uses: bool, + pub reftypes: bool, } impl std::default::Default for Options { @@ -262,6 +273,7 @@ impl std::default::Default for Options { reducible: false, block_params: true, always_local_uses: false, + reftypes: false, } } } @@ -355,6 +367,9 @@ impl Func { let vreg = VReg::new(builder.f.num_vregs, RegClass::Int); builder.f.num_vregs += 1; vregs.push(vreg); + if opts.reftypes && bool::arbitrary(u)? { + builder.f.reftype_vregs.push(vreg); + } } vregs_by_block.push(vregs.clone()); vregs_by_block_to_be_defined.push(vec![]); @@ -428,17 +443,28 @@ impl Func { op.kind(), OperandPos::After, ); + // Make sure reused input is a Reg. + let op = operands[reused]; + operands[reused] = + Operand::new(op.vreg(), OperandPolicy::Reg, op.kind(), OperandPos::Before); } else if opts.fixed_regs && bool::arbitrary(u)? { - // Pick an operand and make it a fixed reg. - let fixed_reg = PReg::new(u.int_in_range(0..=30)?, RegClass::Int); - let i = u.int_in_range(0..=(operands.len() - 1))?; - let op = operands[i]; - operands[i] = Operand::new( - op.vreg(), - OperandPolicy::FixedReg(fixed_reg), - op.kind(), - op.pos(), - ); + let mut fixed = vec![]; + for _ in 0..u.int_in_range(0..=operands.len() - 1)? { + // Pick an operand and make it a fixed reg. + let fixed_reg = PReg::new(u.int_in_range(0..=30)?, RegClass::Int); + if fixed.contains(&fixed_reg) { + break; + } + fixed.push(fixed_reg); + let i = u.int_in_range(0..=(operands.len() - 1))?; + let op = operands[i]; + operands[i] = Operand::new( + op.vreg(), + OperandPolicy::FixedReg(fixed_reg), + op.kind(), + op.pos(), + ); + } } else if opts.clobbers && bool::arbitrary(u)? { for _ in 0..u.int_in_range(0..=5)? { let reg = u.int_in_range(0..=30)?; @@ -448,6 +474,13 @@ impl Func { clobbers.push(PReg::new(reg, RegClass::Int)); } } + + let is_safepoint = opts.reftypes + && operands + .iter() + .all(|op| !builder.f.reftype_vregs.contains(&op.vreg())) + && bool::arbitrary(u)?; + let op = *u.choose(&[InstOpcode::Op, InstOpcode::Call])?; builder.add_inst( Block::new(block), @@ -455,6 +488,7 @@ impl Func { op, operands, clobbers, + is_safepoint, }, ); avail.push(vreg); @@ -493,6 +527,9 @@ impl Func { impl std::fmt::Debug for Func { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "{{\n")?; + for vreg in self.reftype_vregs() { + write!(f, " REF: {}\n", vreg)?; + } for (i, blockrange) in self.blocks.iter().enumerate() { let succs = self.block_succs[i] .iter() @@ -513,6 +550,9 @@ impl std::fmt::Debug for Func { i, params, succs, preds )?; for inst in blockrange.iter() { + if self.is_safepoint(inst) { + write!(f, " -- SAFEPOINT --\n")?; + } write!( f, " inst{}: {:?} ops:{:?} clobber:{:?}\n", diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 303c31ae..bcd96c93 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -26,18 +26,13 @@ * * - reused-input reg: don't allocate register for input that is reused. * - * - more fuzzing: - * - test with *multiple* fixed-reg constraints on one vreg (same - * inst, different insts) - * * - modify CL to generate SSA VCode * - lower blockparams to blockparams directly * - use temps properly (`alloc_tmp()` vs `alloc_reg()`) * - * - produce stackmaps - * - stack constraint (also: unify this with stack-args? spillslot vs user stackslot?) - * - vreg reffyness - * - if reffy vreg, add to stackmap lists during reification scan + * - "Fixed-stack location": negative spillslot numbers? + * + * - Rematerialization */ #![allow(dead_code, unused_imports)] @@ -54,7 +49,7 @@ use crate::{ use log::debug; use smallvec::{smallvec, SmallVec}; use std::cmp::Ordering; -use std::collections::{BTreeMap, BinaryHeap}; +use std::collections::{BTreeMap, BinaryHeap, HashMap, HashSet, VecDeque}; use std::fmt::Debug; #[cfg(not(debug))] @@ -185,6 +180,8 @@ struct Use { next_use: UseIndex, } +const SLOT_NONE: usize = usize::MAX; + #[derive(Clone, Debug)] struct Def { operand: Operand, @@ -241,6 +238,7 @@ struct VRegData { def: DefIndex, blockparam: Block, first_range: LiveRangeIndex, + is_ref: bool, } #[derive(Clone, Debug)] @@ -307,7 +305,8 @@ struct Env<'a, F: Function> { pregs: Vec, allocation_queue: PrioQueue, hot_code: LiveRangeSet, - clobbers: Vec, // Sorted list of insts with clobbers. + clobbers: Vec, // Sorted list of insts with clobbers. + safepoints: Vec, // Sorted list of safepoint insts. spilled_bundles: Vec, spillslots: Vec, @@ -322,8 +321,8 @@ struct Env<'a, F: Function> { // will insert a copy from wherever the VReg's primary allocation // was to the approprate PReg. // - // (progpoint, copy-from-preg, copy-to-preg) - multi_fixed_reg_fixups: Vec<(ProgPoint, PRegIndex, PRegIndex)>, + // (progpoint, copy-from-preg, copy-to-preg, to-slot) + multi_fixed_reg_fixups: Vec<(ProgPoint, PRegIndex, PRegIndex, usize)>, inserted_moves: Vec, @@ -332,6 +331,7 @@ struct Env<'a, F: Function> { allocs: Vec, inst_alloc_offsets: Vec, num_spillslots: u32, + safepoint_slots: Vec<(ProgPoint, SpillSlot)>, stats: Stats, @@ -462,13 +462,16 @@ fn spill_weight_from_policy(policy: OperandPolicy) -> u32 { enum Requirement { Fixed(PReg), Register(RegClass), + Stack(RegClass), Any(RegClass), } impl Requirement { fn class(self) -> RegClass { match self { Requirement::Fixed(preg) => preg.class(), - Requirement::Register(class) | Requirement::Any(class) => class, + Requirement::Register(class) | Requirement::Any(class) | Requirement::Stack(class) => { + class + } } } @@ -478,6 +481,7 @@ impl Requirement { } match (self, other) { (other, Requirement::Any(_)) | (Requirement::Any(_), other) => Some(other), + (Requirement::Stack(_), Requirement::Stack(_)) => Some(self), (Requirement::Register(_), Requirement::Fixed(preg)) | (Requirement::Fixed(preg), Requirement::Register(_)) => { Some(Requirement::Fixed(preg)) @@ -491,6 +495,7 @@ impl Requirement { match op.policy() { OperandPolicy::FixedReg(preg) => Requirement::Fixed(preg), OperandPolicy::Reg | OperandPolicy::Reuse(_) => Requirement::Register(op.class()), + OperandPolicy::Stack => Requirement::Stack(op.class()), _ => Requirement::Any(op.class()), } } @@ -575,6 +580,7 @@ impl<'a, F: Function> Env<'a, F> { pregs: vec![], allocation_queue: PrioQueue::new(), clobbers: vec![], + safepoints: vec![], hot_code: LiveRangeSet::new(), spilled_bundles: vec![], spillslots: vec![], @@ -586,6 +592,7 @@ impl<'a, F: Function> Env<'a, F> { allocs: vec![], inst_alloc_offsets: vec![], num_spillslots: 0, + safepoint_slots: vec![], stats: Stats::default(), @@ -610,8 +617,12 @@ impl<'a, F: Function> Env<'a, F> { def: DefIndex::invalid(), first_range: LiveRangeIndex::invalid(), blockparam: Block::invalid(), + is_ref: false, }); } + for v in self.func.reftype_vregs() { + self.vregs[v.vreg()].is_ref = true; + } // Create allocations too. for inst in 0..self.func.insts() { let start = self.allocs.len() as u32; @@ -994,12 +1005,20 @@ impl<'a, F: Function> Env<'a, F> { if self.func.inst_clobbers(inst).len() > 0 { self.clobbers.push(inst); } + if self.func.is_safepoint(inst) { + self.safepoints.push(inst); + } // Mark clobbers with CodeRanges on PRegs. for i in 0..self.func.inst_clobbers(inst).len() { // don't borrow `self` let clobber = self.func.inst_clobbers(inst)[i]; + // Clobber range is at After point only: an + // instruction can still take an input in a reg + // that it later clobbers. (In other words, the + // clobber is like a normal def that never gets + // used.) let range = CodeRange { - from: ProgPoint::before(inst), + from: ProgPoint::after(inst), to: ProgPoint::before(inst.next()), }; self.add_liverange_to_preg(range, clobber); @@ -1089,7 +1108,7 @@ impl<'a, F: Function> Env<'a, F> { // If this is a branch, extend `pos` to // the end of the block. (Branch uses are // blockparams and need to be live at the - // end of the block. + // end of the block.) if self.func.is_branch(inst) { pos = self.cfginfo.block_exit[block.index()]; } @@ -1242,7 +1261,73 @@ impl<'a, F: Function> Env<'a, F> { self.liveins[block.index()] = live; } - // Do a cleanup pass: if there are any LiveRanges with + self.safepoints.sort(); + + // Insert safepoint virtual stack uses, if needed. + for vreg in self.func.reftype_vregs() { + let vreg = VRegIndex::new(vreg.vreg()); + let mut iter = self.vregs[vreg.index()].first_range; + let mut safepoint_idx = 0; + while iter.is_valid() { + let rangedata = &self.ranges[iter.index()]; + let range = rangedata.range; + while safepoint_idx < self.safepoints.len() + && ProgPoint::before(self.safepoints[safepoint_idx]) < range.from + { + safepoint_idx += 1; + } + while safepoint_idx < self.safepoints.len() + && range.contains_point(ProgPoint::before(self.safepoints[safepoint_idx])) + { + // Create a virtual use. + let pos = ProgPoint::before(self.safepoints[safepoint_idx]); + let operand = Operand::new( + self.vregs[vreg.index()].reg, + OperandPolicy::Stack, + OperandKind::Use, + OperandPos::Before, + ); + + // Create the actual use object. + let u = UseIndex(self.uses.len() as u32); + self.uses.push(Use { + operand, + pos, + slot: SLOT_NONE, + next_use: UseIndex::invalid(), + }); + + // Create/extend the LiveRange and add the use to the range. + let range = CodeRange { + from: pos, + to: pos.next(), + }; + let lr = self.add_liverange_to_vreg( + VRegIndex::new(operand.vreg().vreg()), + range, + &mut num_ranges, + ); + vreg_ranges[operand.vreg().vreg()] = lr; + + log::debug!( + "Safepoint-induced stack use of {:?} at {:?} -> {:?} -> {:?}", + operand, + pos, + u, + lr + ); + + self.insert_use_into_liverange_and_update_stats(lr, u); + safepoint_idx += 1; + } + if safepoint_idx >= self.safepoints.len() { + break; + } + iter = self.ranges[iter.index()].next_in_reg; + } + } + + // Do a fixed-reg cleanup pass: if there are any LiveRanges with // multiple uses (or defs) at the same ProgPoint and there is // more than one FixedReg constraint at that ProgPoint, we // need to record all but one of them in a special fixup list @@ -1264,11 +1349,13 @@ impl<'a, F: Function> Env<'a, F> { let mut first_preg: SmallVec<[PRegIndex; 16]> = smallvec![]; let mut extra_clobbers: SmallVec<[(PReg, Inst); 8]> = smallvec![]; let mut fixup_multi_fixed_vregs = |pos: ProgPoint, + slot: usize, op: &mut Operand, fixups: &mut Vec<( ProgPoint, PRegIndex, PRegIndex, + usize, )>| { if last_point.is_some() && Some(pos) != last_point { seen_fixed_for_vreg.clear(); @@ -1289,7 +1376,7 @@ impl<'a, F: Function> Env<'a, F> { { let orig_preg = first_preg[idx]; log::debug!(" -> duplicate; switching to policy Reg"); - fixups.push((pos, orig_preg, preg_idx)); + fixups.push((pos, orig_preg, preg_idx, slot)); *op = Operand::new(op.vreg(), OperandPolicy::Reg, op.kind(), op.pos()); extra_clobbers.push((preg, pos.inst)); } else { @@ -1302,8 +1389,10 @@ impl<'a, F: Function> Env<'a, F> { if self.ranges[iter.index()].def.is_valid() { let def_idx = self.vregs[vreg].def; let pos = self.defs[def_idx.index()].pos; + let slot = self.defs[def_idx.index()].slot; fixup_multi_fixed_vregs( pos, + slot, &mut self.defs[def_idx.index()].operand, &mut self.multi_fixed_reg_fixups, ); @@ -1312,8 +1401,10 @@ impl<'a, F: Function> Env<'a, F> { let mut use_iter = self.ranges[iter.index()].first_use; while use_iter.is_valid() { let pos = self.uses[use_iter.index()].pos; + let slot = self.uses[use_iter.index()].slot; fixup_multi_fixed_vregs( pos, + slot, &mut self.uses[use_iter.index()].operand, &mut self.multi_fixed_reg_fixups, ); @@ -1916,13 +2007,17 @@ impl<'a, F: Function> Env<'a, F> { let bundledata = &self.bundles[bundle.index()]; let first_range = &self.ranges[bundledata.first_range.index()]; + log::debug!("recompute bundle properties: bundle {:?}", bundle); + if first_range.vreg.is_invalid() { + log::debug!(" -> no vreg; minimal and fixed"); minimal = true; fixed = true; } else { if first_range.def.is_valid() { let def_data = &self.defs[first_range.def.index()]; if let OperandPolicy::FixedReg(_) = def_data.operand.policy() { + log::debug!(" -> fixed def {:?}", first_range.def); fixed = true; } } @@ -1930,6 +2025,7 @@ impl<'a, F: Function> Env<'a, F> { while use_iter.is_valid() { let use_data = &self.uses[use_iter.index()]; if let OperandPolicy::FixedReg(_) = use_data.operand.policy() { + log::debug!(" -> fixed use {:?}", use_iter); fixed = true; break; } @@ -1939,16 +2035,22 @@ impl<'a, F: Function> Env<'a, F> { // the range covers only one instruction. Note that it // could cover just one ProgPoint, i.e. X.Before..X.After, // or two ProgPoints, i.e. X.Before..X+1.Before. + log::debug!(" -> first range has range {:?}", first_range.range); + log::debug!( + " -> first range has next in bundle {:?}", + first_range.next_in_bundle + ); minimal = first_range.next_in_bundle.is_invalid() && first_range.range.from.inst == first_range.range.to.prev().inst; + log::debug!(" -> minimal: {}", minimal); } let spill_weight = if minimal { if fixed { - log::debug!(" -> fixed and minimal: 2000000"); + log::debug!(" -> fixed and minimal: spill weight 2000000"); 2_000_000 } else { - log::debug!(" -> non-fixed and minimal: 1000000"); + log::debug!(" -> non-fixed and minimal: spill weight 1000000"); 1_000_000 } } else { @@ -1957,15 +2059,20 @@ impl<'a, F: Function> Env<'a, F> { while range.is_valid() { let range_data = &self.ranges[range.index()]; if range_data.def.is_valid() { - log::debug!(" -> has def (2000)"); + log::debug!(" -> has def (spill weight +2000)"); total += 2000; } - log::debug!(" -> uses spill weight: {}", range_data.uses_spill_weight); + log::debug!(" -> uses spill weight: +{}", range_data.uses_spill_weight); total += range_data.uses_spill_weight; range = range_data.next_in_bundle; } if self.bundles[bundle.index()].prio > 0 { + log::debug!( + " -> dividing by prio {}; final weight {}", + self.bundles[bundle.index()].prio, + total / self.bundles[bundle.index()].prio + ); total / self.bundles[bundle.index()].prio } else { total @@ -2646,6 +2753,15 @@ impl<'a, F: Function> Env<'a, F> { lowest_cost_conflict_set.unwrap_or(smallvec![]) } + Requirement::Stack(_) => { + // If we must be on the stack, put ourselves on + // the spillset's list immediately. + self.spillsets[self.bundles[bundle.index()].spillset.index()] + .bundles + .push(bundle); + return; + } + Requirement::Any(_) => { // If a register is not *required*, spill now (we'll retry // allocation on spilled bundles later). @@ -2657,8 +2773,9 @@ impl<'a, F: Function> Env<'a, F> { log::debug!(" -> conflict set {:?}", conflicting_bundles); - // If we have already tried evictions once before and are still unsuccessful, give up - // and move on to splitting as long as this is not a minimal bundle. + // If we have already tried evictions once before and are + // still unsuccessful, give up and move on to splitting as + // long as this is not a minimal bundle. if attempts >= 2 && !self.minimal_bundle(bundle) { break; } @@ -3324,7 +3441,11 @@ impl<'a, F: Function> Env<'a, F> { debug_assert!(range.contains_point(usedata.pos)); let inst = usedata.pos.inst; let slot = usedata.slot; - self.set_alloc(inst, slot, alloc); + // Safepoints add virtual uses with no slots; + // avoid these. + if slot != SLOT_NONE { + self.set_alloc(inst, slot, alloc); + } use_iter = self.uses[use_iter.index()].next_use; } @@ -3425,7 +3546,7 @@ impl<'a, F: Function> Env<'a, F> { } // Handle multi-fixed-reg constraints by copying. - for (progpoint, from_preg, to_preg) in + for (progpoint, from_preg, to_preg, slot) in std::mem::replace(&mut self.multi_fixed_reg_fixups, vec![]) { log::debug!( @@ -3440,6 +3561,11 @@ impl<'a, F: Function> Env<'a, F> { Allocation::reg(self.pregs[from_preg.index()].reg), Allocation::reg(self.pregs[to_preg.index()].reg), ); + self.set_alloc( + progpoint.inst, + slot, + Allocation::reg(self.pregs[to_preg.index()].reg), + ); } // Handle outputs that reuse inputs: copy beforehand, then set @@ -3633,7 +3759,155 @@ impl<'a, F: Function> Env<'a, F> { self.edits.push((pos.to_index(), prio, edit)); } - fn compute_stackmaps(&mut self) {} + fn compute_stackmaps(&mut self) { + // For each ref-typed vreg, iterate through ranges and find + // safepoints in-range. Add the SpillSlot to the stackmap. + // + // Note that unlike in the rest of the allocator, we cannot + // overapproximate here: we cannot list a vreg's alloc at a + // certain program point in the metadata if it is not yet + // live. Because arbitrary block order and irreducible control + // flow could result in us encountering an (overapproximated, + // not actually live) vreg range for a reftyped value when + // scanning in block order, we need to do a fixpoint liveness + // analysis here for reftyped vregs only. We only perform this + // analysis if there are reftyped vregs present, so it will + // not add to allocation runtime otherwise. + + if self.func.reftype_vregs().is_empty() { + return; + } + + let mut reftype_vreg_map = BitVec::new(); + for vreg in self.func.reftype_vregs() { + reftype_vreg_map.set(vreg.vreg(), true); + } + + let mut live_reftypes_block_start: Vec = vec![]; + let mut live_reftypes_block_end: Vec = vec![]; + for _ in 0..self.func.blocks() { + live_reftypes_block_start.push(BitVec::new()); + live_reftypes_block_end.push(BitVec::new()); + } + + let mut safepoints_per_vreg: HashMap> = HashMap::new(); + for &vreg in self.func.reftype_vregs() { + safepoints_per_vreg.insert(vreg.vreg(), HashSet::new()); + } + + let mut workqueue = VecDeque::new(); + let mut workqueue_set = HashSet::new(); + let mut visited = HashSet::new(); + + // Backward analysis: start at return blocks. + for block in 0..self.func.blocks() { + let block = Block::new(block); + if self.func.is_ret(self.func.block_insns(block).last()) { + workqueue.push_back(block); + workqueue_set.insert(block); + } + } + + // While workqueue is not empty, scan a block backward. + while !workqueue.is_empty() { + let block = workqueue.pop_back().unwrap(); + workqueue_set.remove(&block); + visited.insert(block); + + let live = &mut live_reftypes_block_start[block.index()]; + live.assign(&live_reftypes_block_end[block.index()]); + + for inst in self.func.block_insns(block).rev().iter() { + for pos in &[OperandPos::After, OperandPos::Before] { + for op in self.func.inst_operands(inst) { + if !reftype_vreg_map.get(op.vreg().vreg()) { + continue; + } + if op.pos() != OperandPos::Both && op.pos() != *pos { + continue; + } + match op.kind() { + OperandKind::Def => { + live.set(op.vreg().vreg(), false); + } + OperandKind::Use => { + live.set(op.vreg().vreg(), true); + } + } + } + } + + if self.func.is_safepoint(inst) { + for vreg in live.iter() { + let safepoints = safepoints_per_vreg.get_mut(&vreg).unwrap(); + safepoints.insert(inst); + } + } + } + for blockparam in self.func.block_params(block) { + if !reftype_vreg_map.get(blockparam.vreg()) { + continue; + } + live.set(blockparam.vreg(), false); + } + + for &pred in self.func.block_preds(block) { + if live_reftypes_block_end[pred.index()].or(live) || !visited.contains(&pred) { + if !workqueue_set.contains(&pred) { + workqueue.push_back(pred); + workqueue_set.insert(pred); + } + } + } + } + + // Now we have `safepoints_per_vreg`. All we have to do is, + // for each vreg in this map, step through the LiveRanges + // along with a sorted list of safepoints; and for each + // safepoint in the current range, emit the allocation into + // the `safepoint_slots` list. + + log::debug!("safepoints_per_vreg = {:?}", safepoints_per_vreg); + + for vreg in self.func.reftype_vregs() { + log::debug!("generating safepoint info for vreg {}", vreg); + let vreg = VRegIndex::new(vreg.vreg()); + let mut safepoints: Vec = safepoints_per_vreg + .get(&vreg.index()) + .unwrap() + .iter() + .map(|&inst| ProgPoint::before(inst)) + .collect(); + safepoints.sort(); + log::debug!(" -> live over safepoints: {:?}", safepoints); + + let mut safepoint_idx = 0; + let mut iter = self.vregs[vreg.index()].first_range; + while iter.is_valid() { + let rangedata = &self.ranges[iter.index()]; + let range = rangedata.range; + let alloc = self.get_alloc_for_range(iter); + log::debug!(" -> range {:?}: alloc {}", range, alloc); + while safepoint_idx < safepoints.len() && safepoints[safepoint_idx] < range.to { + if safepoints[safepoint_idx] < range.from { + safepoint_idx += 1; + continue; + } + log::debug!(" -> covers safepoint {:?}", safepoints[safepoint_idx]); + + let slot = alloc + .as_stack() + .expect("Reference-typed value not in spillslot at safepoint"); + self.safepoint_slots.push((safepoints[safepoint_idx], slot)); + safepoint_idx += 1; + } + iter = rangedata.next_in_reg; + } + } + + self.safepoint_slots.sort(); + log::debug!("final safepoint slots info: {:?}", self.safepoint_slots); + } pub(crate) fn init(&mut self) -> Result<(), RegAllocError> { self.create_pregs_and_vregs(); @@ -3769,6 +4043,8 @@ pub fn run(func: &F, mach_env: &MachineEnv) -> Result (0, 0), OperandPolicy::Reg => (0, 1), + OperandPolicy::Stack => (0, 2), OperandPolicy::FixedReg(preg) => { assert_eq!(preg.class(), vreg.class()); - (preg.hw_enc() as u32, 2) + (preg.hw_enc() as u32, 3) } OperandPolicy::Reuse(which) => { assert!(which <= PReg::MAX); - (which as u32, 3) + (which as u32, 4) } }; let class_field = vreg.class() as u8 as u32; @@ -230,9 +233,9 @@ impl Operand { bits: vreg.vreg() as u32 | (preg_field << 20) | (class_field << 25) - | (policy_field << 26) + | (pos_field << 26) | (kind_field << 28) - | (pos_field << 29), + | (policy_field << 29), } } @@ -322,7 +325,7 @@ impl Operand { #[inline(always)] pub fn pos(self) -> OperandPos { - let pos_field = (self.bits >> 29) & 3; + let pos_field = (self.bits >> 26) & 3; match pos_field { 0 => OperandPos::Before, 1 => OperandPos::After, @@ -333,13 +336,14 @@ impl Operand { #[inline(always)] pub fn policy(self) -> OperandPolicy { - let policy_field = (self.bits >> 26) & 3; + let policy_field = (self.bits >> 29) & 7; let preg_field = ((self.bits >> 20) as usize) & PReg::MAX; match policy_field { 0 => OperandPolicy::Any, 1 => OperandPolicy::Reg, - 2 => OperandPolicy::FixedReg(PReg::new(preg_field, self.class())), - 3 => OperandPolicy::Reuse(preg_field), + 2 => OperandPolicy::Stack, + 3 => OperandPolicy::FixedReg(PReg::new(preg_field, self.class())), + 4 => OperandPolicy::Reuse(preg_field), _ => unreachable!(), } } @@ -357,15 +361,7 @@ impl Operand { impl std::fmt::Debug for Operand { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!( - f, - "Operand(vreg = {:?}, class = {:?}, kind = {:?}, pos = {:?}, policy = {:?})", - self.vreg().vreg(), - self.class(), - self.kind(), - self.pos(), - self.policy() - ) + std::fmt::Display::fmt(self, f) } } @@ -373,10 +369,14 @@ impl std::fmt::Display for Operand { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!( f, - "{:?}@{:?}: {} {}", + "{:?}@{:?}: {}{} {}", self.kind(), self.pos(), self.vreg(), + match self.class() { + RegClass::Int => "i", + RegClass::Float => "f", + }, self.policy() ) } @@ -388,6 +388,8 @@ pub enum OperandPolicy { Any, /// Operand must be in a register. Register is read-only for Uses. Reg, + /// Operand must be on the stack. + Stack, /// Operand must be in a fixed register. FixedReg(PReg), /// On defs only: reuse a use's register. Which use is given by `preg` field. @@ -399,6 +401,7 @@ impl std::fmt::Display for OperandPolicy { match self { Self::Any => write!(f, "any"), Self::Reg => write!(f, "reg"), + Self::Stack => write!(f, "stack"), Self::FixedReg(preg) => write!(f, "fixed({})", preg), Self::Reuse(idx) => write!(f, "reuse({})", idx), } @@ -422,20 +425,21 @@ pub enum OperandPos { /// Operand. #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Allocation { - /// Bit-pack in 31 bits: + /// Bit-pack in 32 bits. Note that `kind` overlaps with the + /// `policy` field in `Operand`, and we are careful to use + /// disjoint ranges of values in this field for each type. We also + /// leave the def-or-use bit (`kind` for `Operand`) unused here so + /// that the client may use it to mark `Allocation`s on + /// instructions as read or write when it edits instructions + /// (which is sometimes useful for post-allocation analyses). /// - /// op-or-alloc:1 kind:2 index:29 + /// kind:3 unused:1 index:28 bits: u32, } impl std::fmt::Debug for Allocation { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!( - f, - "Allocation(kind = {:?}, index = {})", - self.kind(), - self.index() - ) + std::fmt::Display::fmt(self, f) } } @@ -452,6 +456,7 @@ impl std::fmt::Display for Allocation { impl Allocation { #[inline(always)] pub(crate) fn new(kind: AllocationKind, index: usize) -> Self { + assert!(index < (1 << 28)); Self { bits: ((kind as u8 as u32) << 29) | (index as u32), } @@ -474,17 +479,32 @@ impl Allocation { #[inline(always)] pub fn kind(self) -> AllocationKind { - match (self.bits >> 29) & 3 { - 0 => AllocationKind::None, - 1 => AllocationKind::Reg, - 2 => AllocationKind::Stack, + match (self.bits >> 29) & 7 { + 5 => AllocationKind::None, + 6 => AllocationKind::Reg, + 7 => AllocationKind::Stack, _ => unreachable!(), } } + #[inline(always)] + pub fn is_none(self) -> bool { + self.kind() == AllocationKind::None + } + + #[inline(always)] + pub fn is_reg(self) -> bool { + self.kind() == AllocationKind::Reg + } + + #[inline(always)] + pub fn is_stack(self) -> bool { + self.kind() == AllocationKind::Stack + } + #[inline(always)] pub fn index(self) -> usize { - (self.bits & ((1 << 29) - 1)) as usize + (self.bits & ((1 << 28) - 1)) as usize } #[inline(always)] @@ -516,12 +536,14 @@ impl Allocation { } } +// N.B.: These values must be *disjoint* with the values used to +// encode `OperandPolicy`, because they share a 3-bit field. #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] #[repr(u8)] pub enum AllocationKind { - None = 0, - Reg = 1, - Stack = 2, + None = 5, + Reg = 6, + Stack = 7, } impl Allocation { @@ -535,6 +557,59 @@ impl Allocation { } } +/// A helper that wraps either an `Operand` or an `Allocation` and is +/// able to tell which it is based on the tag bits. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct OperandOrAllocation { + bits: u32, +} + +impl OperandOrAllocation { + pub fn from_operand(operand: Operand) -> Self { + Self { + bits: operand.bits(), + } + } + pub fn from_alloc(alloc: Allocation) -> Self { + Self { bits: alloc.bits() } + } + pub fn is_operand(&self) -> bool { + (self.bits >> 29) <= 4 + } + pub fn is_allocation(&self) -> bool { + (self.bits >> 29) >= 5 + } + pub fn as_operand(&self) -> Option { + if self.is_operand() { + Some(Operand::from_bits(self.bits)) + } else { + None + } + } + pub fn as_allocation(&self) -> Option { + if self.is_allocation() { + Some(Allocation::from_bits(self.bits & !(1 << 28))) + } else { + None + } + } + + pub fn kind(&self) -> OperandKind { + let kind_field = (self.bits >> 28) & 1; + match kind_field { + 0 => OperandKind::Def, + 1 => OperandKind::Use, + _ => unreachable!(), + } + } + + /// Replaces the Operand with an Allocation, keeping the def/use bit. + pub fn replace_with_alloc(&mut self, alloc: Allocation) { + self.bits &= 1 << 28; + self.bits |= alloc.bits; + } +} + /// A trait defined by the regalloc client to provide access to its /// machine-instruction / CFG representation. pub trait Function { @@ -576,7 +651,9 @@ pub trait Function { fn is_branch(&self, insn: Inst) -> bool; /// Determine whether an instruction is a safepoint and requires a stackmap. - fn is_safepoint(&self, insn: Inst) -> bool; + fn is_safepoint(&self, _: Inst) -> bool { + false + } /// Determine whether an instruction is a move; if so, return the /// vregs for (src, dst). @@ -598,6 +675,40 @@ pub trait Function { /// course better if it is exact. fn num_vregs(&self) -> usize; + /// Get the VRegs that are pointer/reference types. This has the + /// following effects for each such vreg: + /// + /// - At all safepoint instructions, the vreg will be in a + /// SpillSlot, not in a register. + /// - The vreg *may not* be used as a register operand on + /// safepoint instructions: this is because a vreg can only live + /// in one place at a time. The client should copy the value to an + /// integer-typed vreg and use this to pass a pointer as an input + /// to a safepoint instruction (such as a function call). + /// - At all safepoint instructions, all live vregs' locations + /// will be included in a list in the `Output` below, so that + /// pointer-inspecting/updating functionality (such as a moving + /// garbage collector) may observe and edit their values. + fn reftype_vregs(&self) -> &[VReg] { + &[] + } + + /// Get the VRegs for which we should generate value-location + /// metadata for debugging purposes. This can be used to generate + /// e.g. DWARF with valid prgram-point ranges for each value + /// expression in a way that is more efficient than a post-hoc + /// analysis of the allocator's output. + /// + /// Each tuple is (vreg, inclusive_start, exclusive_end, + /// label). In the `Output` there will be (label, inclusive_start, + /// exclusive_end, alloc)` tuples. The ranges may not exactly + /// match -- specifically, the returned metadata may cover only a + /// subset of the requested ranges -- if the value is not live for + /// the entire requested ranges. + fn debug_value_labels(&self) -> &[(Inst, Inst, VReg, u32)] { + &[] + } + // -------------- // Spills/reloads // -------------- @@ -736,6 +847,17 @@ pub struct Output { /// Allocation offset in `allocs` for each instruction. pub inst_alloc_offsets: Vec, + /// Safepoint records: at a given program point, a reference-typed value lives in the given SpillSlot. + pub safepoint_slots: Vec<(ProgPoint, SpillSlot)>, + + /// Debug info: a labeled value (as applied to vregs by + /// `Function::debug_value_labels()` on the input side) is located + /// in the given allocation from the first program point + /// (inclusive) to the second (exclusive). Guaranteed to be sorted + /// by label and program point, and the ranges are guaranteed to + /// be disjoint. + pub debug_locations: Vec<(u32, ProgPoint, ProgPoint, Allocation)>, + /// Internal stats from the allocator. pub stats: ion::Stats, } From 940c1b719d77531362f5a8c37b648331a0a2213a Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sun, 18 Apr 2021 13:02:05 -0700 Subject: [PATCH 004/155] Changes from review comments. --- fuzz/.gitignore | 1 - fuzz/Cargo.toml | 1 - fuzz/fuzz_targets/domtree.rs | 5 +++++ fuzz/fuzz_targets/ion.rs | 5 +++++ fuzz/fuzz_targets/ion_checker.rs | 5 +++++ fuzz/fuzz_targets/moves.rs | 5 +++++ fuzz/fuzz_targets/ssagen.rs | 7 +++++++ src/bin/test.rs | 5 +++++ src/bitvec.rs | 5 +++++ src/cfg.rs | 5 +++++ src/fuzzing/func.rs | 5 +++++ src/fuzzing/mod.rs | 5 +++++ src/lib.rs | 25 ++++++++++++++++++------- src/moves.rs | 5 +++++ src/postorder.rs | 5 +++++ src/ssa.rs | 5 +++++ 16 files changed, 85 insertions(+), 9 deletions(-) diff --git a/fuzz/.gitignore b/fuzz/.gitignore index 572e03bd..a0925114 100644 --- a/fuzz/.gitignore +++ b/fuzz/.gitignore @@ -1,4 +1,3 @@ - target corpus artifacts diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index e0eec8da..7a94f2dc 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -1,4 +1,3 @@ - [package] name = "regalloc2-fuzz" version = "0.0.0" diff --git a/fuzz/fuzz_targets/domtree.rs b/fuzz/fuzz_targets/domtree.rs index 5923befb..06885782 100644 --- a/fuzz/fuzz_targets/domtree.rs +++ b/fuzz/fuzz_targets/domtree.rs @@ -1,3 +1,8 @@ +/* + * Released under the terms of the Apache 2.0 license with LLVM + * exception. See `LICENSE` for details. + */ + #![no_main] use libfuzzer_sys::arbitrary::{Arbitrary, Result, Unstructured}; use libfuzzer_sys::fuzz_target; diff --git a/fuzz/fuzz_targets/ion.rs b/fuzz/fuzz_targets/ion.rs index dc4a3423..13349388 100644 --- a/fuzz/fuzz_targets/ion.rs +++ b/fuzz/fuzz_targets/ion.rs @@ -1,3 +1,8 @@ +/* + * Released under the terms of the Apache 2.0 license with LLVM + * exception. See `LICENSE` for details. + */ + #![no_main] use libfuzzer_sys::fuzz_target; diff --git a/fuzz/fuzz_targets/ion_checker.rs b/fuzz/fuzz_targets/ion_checker.rs index ea6b1bea..da43dbc0 100644 --- a/fuzz/fuzz_targets/ion_checker.rs +++ b/fuzz/fuzz_targets/ion_checker.rs @@ -1,3 +1,8 @@ +/* + * Released under the terms of the Apache 2.0 license with LLVM + * exception. See `LICENSE` for details. + */ + #![no_main] use libfuzzer_sys::fuzz_target; use libfuzzer_sys::arbitrary::{Arbitrary, Unstructured, Result}; diff --git a/fuzz/fuzz_targets/moves.rs b/fuzz/fuzz_targets/moves.rs index a719f7c4..9f685b3c 100644 --- a/fuzz/fuzz_targets/moves.rs +++ b/fuzz/fuzz_targets/moves.rs @@ -1,3 +1,8 @@ +/* + * Released under the terms of the Apache 2.0 license with LLVM + * exception. See `LICENSE` for details. + */ + #![no_main] use libfuzzer_sys::arbitrary::{Arbitrary, Result, Unstructured}; use libfuzzer_sys::fuzz_target; diff --git a/fuzz/fuzz_targets/ssagen.rs b/fuzz/fuzz_targets/ssagen.rs index e69e71a6..c0ff306e 100644 --- a/fuzz/fuzz_targets/ssagen.rs +++ b/fuzz/fuzz_targets/ssagen.rs @@ -1,3 +1,8 @@ +/* + * Released under the terms of the Apache 2.0 license with LLVM + * exception. See `LICENSE` for details. + */ + #![no_main] use libfuzzer_sys::arbitrary::{Arbitrary, Result, Unstructured}; use libfuzzer_sys::fuzz_target; @@ -23,6 +28,8 @@ impl Arbitrary for TestCase { control_flow: true, reducible: false, always_local_uses: false, + block_params: true, + reftypes: true, }, )?, }) diff --git a/src/bin/test.rs b/src/bin/test.rs index 6d7c7de6..c6fd7792 100644 --- a/src/bin/test.rs +++ b/src/bin/test.rs @@ -1,3 +1,8 @@ +/* + * Released under the terms of the Apache 2.0 license with LLVM + * exception. See `LICENSE` for details. + */ + use arbitrary::{Arbitrary, Unstructured}; use rand::{Rng, SeedableRng}; use rand_chacha::ChaCha8Rng; diff --git a/src/bitvec.rs b/src/bitvec.rs index af5792e6..4dc727b7 100644 --- a/src/bitvec.rs +++ b/src/bitvec.rs @@ -1,3 +1,8 @@ +/* + * Released under the terms of the Apache 2.0 license with LLVM + * exception. See `LICENSE` for details. + */ + //! Bit vectors. use smallvec::{smallvec, SmallVec}; diff --git a/src/cfg.rs b/src/cfg.rs index 4c838e78..31853f0b 100644 --- a/src/cfg.rs +++ b/src/cfg.rs @@ -1,3 +1,8 @@ +/* + * Released under the terms of the Apache 2.0 license with LLVM + * exception. See `LICENSE` for details. + */ + //! Lightweight CFG analyses. use crate::{domtree, postorder, Block, Function, Inst, OperandKind, ProgPoint}; diff --git a/src/fuzzing/func.rs b/src/fuzzing/func.rs index 67f0dbdb..96efeda6 100644 --- a/src/fuzzing/func.rs +++ b/src/fuzzing/func.rs @@ -1,3 +1,8 @@ +/* + * Released under the terms of the Apache 2.0 license with LLVM + * exception. See `LICENSE` for details. + */ + use crate::{ domtree, postorder, Allocation, Block, Function, Inst, InstRange, MachineEnv, Operand, OperandKind, OperandPolicy, OperandPos, PReg, RegClass, VReg, diff --git a/src/fuzzing/mod.rs b/src/fuzzing/mod.rs index 8aecdabd..ae548d9f 100644 --- a/src/fuzzing/mod.rs +++ b/src/fuzzing/mod.rs @@ -1,3 +1,8 @@ +/* + * Released under the terms of the Apache 2.0 license with LLVM + * exception. See `LICENSE` for details. + */ + //! Utilities for fuzzing. pub mod func; diff --git a/src/lib.rs b/src/lib.rs index 29202c82..8d1cca08 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -355,6 +355,7 @@ impl Operand { #[inline(always)] pub fn from_bits(bits: u32) -> Self { + debug_assert!(bits >> 29 <= 4); Operand { bits } } } @@ -429,9 +430,9 @@ pub struct Allocation { /// `policy` field in `Operand`, and we are careful to use /// disjoint ranges of values in this field for each type. We also /// leave the def-or-use bit (`kind` for `Operand`) unused here so - /// that the client may use it to mark `Allocation`s on - /// instructions as read or write when it edits instructions - /// (which is sometimes useful for post-allocation analyses). + /// that we can use it below in `OperandOrAllocation` to record + /// whether `Allocation`s are defs or uses (which is often useful + /// to know). /// /// kind:3 unused:1 index:28 bits: u32, @@ -532,6 +533,7 @@ impl Allocation { #[inline(always)] pub fn from_bits(bits: u32) -> Self { + debug_assert!(bits >> 29 >= 5); Self { bits } } } @@ -566,11 +568,13 @@ pub struct OperandOrAllocation { impl OperandOrAllocation { pub fn from_operand(operand: Operand) -> Self { + debug_assert!(operand.bits() >> 29 <= 4); Self { bits: operand.bits(), } } pub fn from_alloc(alloc: Allocation) -> Self { + debug_assert!(alloc.bits() >> 29 >= 5); Self { bits: alloc.bits() } } pub fn is_operand(&self) -> bool { @@ -588,6 +592,10 @@ impl OperandOrAllocation { } pub fn as_allocation(&self) -> Option { if self.is_allocation() { + // Remove the def/use bit -- the canonical `Allocation` + // does not have this, and we want allocs to continue to + // be comparable whether they are used for reads or + // writes. Some(Allocation::from_bits(self.bits & !(1 << 28))) } else { None @@ -612,6 +620,9 @@ impl OperandOrAllocation { /// A trait defined by the regalloc client to provide access to its /// machine-instruction / CFG representation. +/// +/// (This trait's design is inspired by, and derives heavily from, the +/// trait of the same name in regalloc.rs.) pub trait Function { // ------------- // CFG traversal @@ -669,10 +680,7 @@ pub trait Function { /// Get the clobbers for an instruction. fn inst_clobbers(&self, insn: Inst) -> &[PReg]; - /// Get the precise number of `VReg` in use in this function, to allow - /// preallocating data structures. This number *must* be a correct - /// lower-bound, otherwise invalid index failures may happen; it is of - /// course better if it is exact. + /// Get the number of `VReg` in use in this function. fn num_vregs(&self) -> usize; /// Get the VRegs that are pointer/reference types. This has the @@ -724,6 +732,9 @@ pub trait Function { /// but we also use them for F32 and F64 values, we may use a different /// store-slot size and smaller-operand store/load instructions for an F64 /// than for a true V128. + /// + /// (This trait method's design and doc text derives from + /// regalloc.rs' trait of the same name.) fn spillslot_size(&self, regclass: RegClass, for_vreg: VReg) -> usize; /// When providing a spillslot number for a multi-slot spillslot, diff --git a/src/moves.rs b/src/moves.rs index a5f70be2..8cdd59ab 100644 --- a/src/moves.rs +++ b/src/moves.rs @@ -1,3 +1,8 @@ +/* + * Released under the terms of the Apache 2.0 license with LLVM + * exception. See `LICENSE` for details. + */ + use crate::Allocation; use smallvec::{smallvec, SmallVec}; diff --git a/src/postorder.rs b/src/postorder.rs index b5faf90b..9e6eea86 100644 --- a/src/postorder.rs +++ b/src/postorder.rs @@ -1,3 +1,8 @@ +/* + * Released under the terms of the Apache 2.0 license with LLVM + * exception. See `LICENSE` for details. + */ + //! Fast postorder computation with no allocations (aside from result). use crate::Block; diff --git a/src/ssa.rs b/src/ssa.rs index 3b0ca143..2d6e6250 100644 --- a/src/ssa.rs +++ b/src/ssa.rs @@ -1,3 +1,8 @@ +/* + * Released under the terms of the Apache 2.0 license with LLVM + * exception. See `LICENSE` for details. + */ + //! SSA-related utilities. use crate::cfg::CFGInfo; From 34ab744f4f8ee05a5dc8be07914a29d2f47b227f Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sun, 18 Apr 2021 13:18:18 -0700 Subject: [PATCH 005/155] Add GitHub CI config. --- .github/workflows/rust.yml | 64 +++++++++++++++++++++++++++++++++ fuzz/smoketest/ion_checker.bin | Bin 0 -> 2779 bytes 2 files changed, 64 insertions(+) create mode 100644 .github/workflows/rust.yml create mode 100644 fuzz/smoketest/ion_checker.bin diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml new file mode 100644 index 00000000..8081fdc5 --- /dev/null +++ b/.github/workflows/rust.yml @@ -0,0 +1,64 @@ +# Derived from regalloc.rs' GitHub CI config file. + +name: Rust + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + # Lint code with rustfmt, report an error if it needs to be run. + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Install rustfmt + run: rustup component add rustfmt + - name: Run rustfmt and check there's no difference + run: cargo fmt --all -- --check + + # Make sure the code compiles and that all the tests pass. + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Build + run: cargo build + - name: Run tests + run: cargo test --all --verbose + + # Lint dependency graph for security advisories, duplicate versions, and + # incompatible licences. + cargo_deny: + name: Cargo deny + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - run: | + set -e + curl -L https://github.com/EmbarkStudios/cargo-deny/releases/download/0.8.5/cargo-deny-0.8.5-x86_64-unknown-linux-musl.tar.gz | tar xzf - + mv cargo-deny-*-x86_64-unknown-linux-musl/cargo-deny cargo-deny + echo `pwd` >> $GITHUB_PATH + - run: cargo deny check + + # Builds the fuzz targets. + fuzz: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Install nightly + run: rustup toolchain install nightly + - name: Install cargo-fuzz + run: cargo +nightly install cargo-fuzz + - name: Build ssagen fuzzing target + run: cargo +nightly fuzz build ssagen + - name: Build moves fuzzing target + run: cargo +nightly fuzz build moves + - name: Build ion fuzzing target + run: cargo +nightly fuzz build ion + - name: Build and smoke-test ion_checker fuzzing target + run: cargo +nightly fuzz run ion_checker ./fuzz/smoketest/ion_checker.bin diff --git a/fuzz/smoketest/ion_checker.bin b/fuzz/smoketest/ion_checker.bin new file mode 100644 index 0000000000000000000000000000000000000000..5156f22792d0392e0b66617f2e8702bcd4a5d798 GIT binary patch literal 2779 zcmd^BJ!q3b7=A8jTQihEOGt*$7JphTmUeKdgMyPw5f{NK$>*R$7iY;5x;XeV1ph!! zM8PT2DSHstf>Y-&LqV_zQb|n<=li~Q_vK5QPirbD_(1Nx_wL@GyWID@ha3!Jlzs!4 z10y}m&AJJIb&q-9A(>}rV3`-!7#q^=ZIZ@iWg`s3mbZB)9Vje(To6oPBt3s;`L$6O z-r^($(yLnEG^Xrhddo;+2pA`yVHgc{cMz`0bl7Os$cyaT)M^(6mj&0n#U3XcZU$l4 z;1D)*KkCiAOd6MYw(oD_l(oe)od*V2%jI$#N?W|t`nq0nN+hPDp4Jsj|C+-hpuW)= zKvfANYdhxEee-?@emQWPVracT`|6b)bC1n|(j;c<^*V={kXT9%8kkG7??D;ExD~)- z9(=^jQLUe`)aedHk2RPjJEzYm1(V9>*?lwX{51(ku`(<;^5F{)b+8f`c=<-<$H54tmk{o0g!B#S6nTJH7;?C0;_ebACk zueASM*8kq^>TLQyzFl8?-mVp*_t+Io`Vh3f5d^nl3vG9cs3b>x>#k&bI|%r!Hy1Z8 zmaMqsR1v(On|7*1J473IotHyQ9%t%AlKpY{vHrN+ad8r7CyoD{%XwB<{+HG4x4vpV z!wxA6vl55%*ulu%6ZXTu312B@)6tpq)8W(T&3;d4ri-*UY^h<3IQGHWJ~3-1W#c*f z#yQ{=PY$336znsja=KV7jundvDl=0WJ*z%vG8y--p=S-3WqC|_r_;Uy#*>nDN%b%O zfg*LD9L0(WX^D^s6&-(R(Ze8z!(_qrJ92z*zHm9Tx8F!m;fmO^(se%-5Cq12#}q+K Z#(-=oV1iNZV$eH70U7IX()ryjegJnaJ^=s# literal 0 HcmV?d00001 From 414f3f828da0913b43b0a1c59b80ac11ec58b03a Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sun, 18 Apr 2021 13:38:05 -0700 Subject: [PATCH 006/155] Factored out test program and fuzzing features; core crate now only depends on smallvec and log. --- Cargo.toml | 20 ++++++++----------- fuzz/Cargo.toml | 4 +--- src/lib.rs | 2 ++ test/Cargo.toml | 28 +++++++++++++++++++++++++++ {benches => test/benches}/regalloc.rs | 0 src/bin/test.rs => test/src/main.rs | 0 6 files changed, 39 insertions(+), 15 deletions(-) create mode 100644 test/Cargo.toml rename {benches => test/benches}/regalloc.rs (100%) rename src/bin/test.rs => test/src/main.rs (100%) diff --git a/Cargo.toml b/Cargo.toml index 7e32c7c1..c54201c8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,24 +4,20 @@ version = "0.0.1" authors = ["Chris Fallin ", "Mozilla SpiderMonkey Developers"] edition = "2018" license = "Apache-2.0 WITH LLVM-exception AND MPL-2.0" -description = "Backtracking register allocator ported from IonMonkey" -repository = "https://github.com/cfallin/regalloc2" +description = "Backtracking register allocator inspired from IonMonkey" +repository = "https://github.com/bytecodealliance/regalloc2" [dependencies] log = { version = "0.4.8", default-features = false } smallvec = "1.6.1" -# keep this in sync with libfuzzer_sys's crate version: -arbitrary = "^0.4.6" -rand = "0.8" -rand_chacha = "0.3" -env_logger = "*" -[dev-dependencies] -criterion = "0.3" +# The below are only needed for fuzzing. +# Keep this in sync with libfuzzer_sys's crate version: +arbitrary = { version = "^0.4.6", optional = true } [profile.release] debug = true -[[bench]] -name = "regalloc" -harness = false +[features] +default = [] +fuzzing = ["arbitrary"] diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 7a94f2dc..199eb9d1 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -10,14 +10,12 @@ edition = "2018" cargo-fuzz = true [dependencies] +regalloc2 = { path = "../", features = ["fuzzing"] } libfuzzer-sys = "0.3" arbitrary = { version = "^0.4.6", features = ["derive"] } log = { version = "0.4.8", default-features = false } env_logger = "0.8.3" -[dependencies.regalloc2] -path = ".." - # Prevent this from interfering with workspaces [workspace] members = ["."] diff --git a/src/lib.rs b/src/lib.rs index 8d1cca08..34873f9e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,6 +25,8 @@ pub mod index; pub use index::{Block, Inst, InstRange, InstRangeIter}; pub mod checker; + +#[cfg(feature = "fuzzing")] pub mod fuzzing; /// Register classes. diff --git a/test/Cargo.toml b/test/Cargo.toml new file mode 100644 index 00000000..bfbb291d --- /dev/null +++ b/test/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "regalloc2-test" +version = "0.0.1" +authors = ["Chris Fallin ", "Mozilla SpiderMonkey Developers"] +edition = "2018" +license = "Apache-2.0 WITH LLVM-exception AND MPL-2.0" +description = "small test driver for benchmarking regalloc2" +repository = "https://github.com/bytecodealliance/regalloc2" + +[dependencies] +regalloc2 = { version = "*", path = "../", features = ["fuzzing"] } + +# Keep this in sync with libfuzzer_sys's crate version: +arbitrary = { version = "^0.4.6" } +rand = { version = "0.8" } +rand_chacha = { version = "0.3" } +env_logger = { version = "*" } + +[dev-dependencies] +criterion = "0.3" + +[profile.release] +debug = true + +[[bench]] +name = "regalloc" +harness = false + diff --git a/benches/regalloc.rs b/test/benches/regalloc.rs similarity index 100% rename from benches/regalloc.rs rename to test/benches/regalloc.rs diff --git a/src/bin/test.rs b/test/src/main.rs similarity index 100% rename from src/bin/test.rs rename to test/src/main.rs From 49c54b6144dbe398055e6e3ebfd736a87b711c19 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 30 Apr 2021 21:14:09 -0700 Subject: [PATCH 007/155] Misc usability and functionality enhancements: - Support preferred and non-preferred subsets of a register class. This allows allocating, e.g., caller-saved registers before callee-saved registers. - Allow branch blockparam args to start an a certain offset in branch operands; this allows branches to have other operands too (e.g., conditional-branch inputs). - Allow `OperandOrAllocation` to be constructed from an `Allocation` and `OperandKind` as well (i.e., an allocation with an use/def bit). --- src/fuzzing/func.rs | 14 +++++- src/ion/mod.rs | 106 ++++++++++++++++++++++++++++---------------- src/lib.rs | 28 ++++++++++-- 3 files changed, 104 insertions(+), 44 deletions(-) diff --git a/src/fuzzing/func.rs b/src/fuzzing/func.rs index 96efeda6..ae8dccef 100644 --- a/src/fuzzing/func.rs +++ b/src/fuzzing/func.rs @@ -116,6 +116,12 @@ impl Function for Func { self.insts[insn.index()].op == InstOpcode::Branch } + fn branch_blockparam_arg_offset(&self, _: Block, _: Inst) -> usize { + // Branch blockparam args always start at zero for this + // Function implementation. + 0 + } + fn is_safepoint(&self, insn: Inst) -> bool { self.insts[insn.index()].is_safepoint } @@ -576,12 +582,16 @@ impl std::fmt::Debug for Func { pub fn machine_env() -> MachineEnv { // Reg 31 is the scratch reg. let regs: Vec = (0..31).map(|i| PReg::new(i, RegClass::Int)).collect(); - let regs_by_class: Vec> = vec![regs.clone(), vec![]]; + let preferred_regs_by_class: Vec> = + vec![regs.iter().cloned().take(24).collect(), vec![]]; + let non_preferred_regs_by_class: Vec> = + vec![regs.iter().cloned().skip(24).collect(), vec![]]; let scratch_by_class: Vec = vec![PReg::new(31, RegClass::Int), PReg::new(0, RegClass::Float)]; MachineEnv { regs, - regs_by_class, + preferred_regs_by_class, + non_preferred_regs_by_class, scratch_by_class, } } diff --git a/src/ion/mod.rs b/src/ion/mod.rs index bcd96c93..3ecd84c7 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -560,6 +560,69 @@ pub struct Stats { edits_count: usize, } +/// This iterator represents a traversal through all allocatable +/// registers of a given class, in a certain order designed to +/// minimize allocation contention. +/// +/// The order in which we try registers is somewhat complex: +/// - First, if there is a hint, we try that. +/// - Then, we try registers in a traversal order that is based on an +/// "offset" (usually the bundle index) spreading pressure evenly +/// among registers to reduce commitment-map contention. +/// - Within that scan, we try registers in two groups: first, +/// prferred registers; then, non-preferred registers. (In normal +/// usage, these consist of caller-save and callee-save registers +/// respectively, to minimize clobber-saves; but they need not.) +struct RegTraversalIter<'a> { + env: &'a MachineEnv, + class: usize, + hint_reg: Option, + pref_idx: usize, + non_pref_idx: usize, + offset: usize, +} + +impl<'a> RegTraversalIter<'a> { + pub fn new( + env: &'a MachineEnv, + class: RegClass, + hint_reg: Option, + offset: usize, + ) -> Self { + Self { + env, + class: class as u8 as usize, + hint_reg, + pref_idx: 0, + non_pref_idx: 0, + offset, + } + } +} + +impl<'a> std::iter::Iterator for RegTraversalIter<'a> { + type Item = PReg; + + fn next(&mut self) -> Option { + if let Some(preg) = self.hint_reg.take() { + return Some(preg); + } + if self.pref_idx < self.env.preferred_regs_by_class[self.class].len() { + let arr = &self.env.preferred_regs_by_class[self.class][..]; + let r = arr[(self.pref_idx + self.offset) % arr.len()]; + self.pref_idx += 1; + return Some(r); + } + if self.non_pref_idx < self.env.non_preferred_regs_by_class[self.class].len() { + let arr = &self.env.non_preferred_regs_by_class[self.class][..]; + let r = arr[(self.non_pref_idx + self.offset) % arr.len()]; + self.non_pref_idx += 1; + return Some(r); + } + None + } +} + impl<'a, F: Function> Env<'a, F> { pub(crate) fn new(func: &'a F, env: &'a MachineEnv, cfginfo: CFGInfo) -> Self { Self { @@ -987,7 +1050,7 @@ impl<'a, F: Function> Env<'a, F> { // return), create blockparam_out entries. if self.func.is_branch(insns.last()) { let operands = self.func.inst_operands(insns.last()); - let mut i = 0; + let mut i = self.func.branch_blockparam_arg_offset(block, insns.last()); for &succ in self.func.block_succs(block) { for &blockparam in self.func.block_params(succ) { let from_vreg = VRegIndex::new(operands[i].vreg().vreg()); @@ -2671,12 +2734,7 @@ impl<'a, F: Function> Env<'a, F> { Requirement::Register(class) => { // Scan all pregs and attempt to allocate. let mut lowest_cost_conflict_set: Option = None; - let n_regs = self.env.regs_by_class[class as u8 as usize].len(); - let loop_count = if hint_reg.is_some() { - n_regs + 1 - } else { - n_regs - }; + // Heuristic: start the scan for an available // register at an offset influenced both by our // location in the code and by the bundle we're @@ -2688,35 +2746,8 @@ impl<'a, F: Function> Env<'a, F> { .inst .index() + bundle.index(); - for i in 0..loop_count { - // The order in which we try registers is somewhat complex: - // - First, if there is a hint, we try that. - // - Then, we try registers in a traversal - // order that is based on the bundle index, - // spreading pressure evenly among registers - // to reduce commitment-map - // contention. (TODO: account for - // caller-save vs. callee-saves here too.) - // Note that we avoid retrying the hint_reg; - // this is why the loop count is n_regs + 1 - // if there is a hint reg, because we always - // skip one iteration. - let preg = match (i, hint_reg) { - (0, Some(hint_reg)) => hint_reg, - (i, Some(hint_reg)) => { - let reg = self.env.regs_by_class[class as u8 as usize] - [(i - 1 + scan_offset) % n_regs]; - if reg == hint_reg { - continue; - } - reg - } - (i, None) => { - self.env.regs_by_class[class as u8 as usize] - [(i + scan_offset) % n_regs] - } - }; + for preg in RegTraversalIter::new(self.env, class, hint_reg, scan_offset) { self.stats.process_bundle_reg_probes_any += 1; let preg_idx = PRegIndex::new(preg.index()); match self.try_to_allocate_bundle_to_reg(bundle, preg_idx) { @@ -2828,10 +2859,7 @@ impl<'a, F: Function> Env<'a, F> { let class = any_vreg.class(); let mut success = false; self.stats.spill_bundle_reg_probes += 1; - let nregs = self.env.regs_by_class[class as u8 as usize].len(); - for i in 0..nregs { - let i = (i + bundle.index()) % nregs; - let preg = self.env.regs_by_class[class as u8 as usize][i]; // don't borrow self + for preg in RegTraversalIter::new(self.env, class, None, bundle.index()) { let preg_idx = PRegIndex::new(preg.index()); if let AllocRegResult::Allocated(_) = self.try_to_allocate_bundle_to_reg(bundle, preg_idx) diff --git a/src/lib.rs b/src/lib.rs index 34873f9e..925317b9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -579,6 +579,15 @@ impl OperandOrAllocation { debug_assert!(alloc.bits() >> 29 >= 5); Self { bits: alloc.bits() } } + pub fn from_alloc_and_kind(alloc: Allocation, kind: OperandKind) -> Self { + debug_assert!(alloc.bits() >> 29 >= 5); + let bits = alloc.bits() + | match kind { + OperandKind::Def => 0, + OperandKind::Use => 1 << 28, + }; + Self { bits } + } pub fn is_operand(&self) -> bool { (self.bits >> 29) <= 4 } @@ -659,10 +668,22 @@ pub trait Function { fn is_ret(&self, insn: Inst) -> bool; /// Determine whether an instruction is the end-of-block - /// branch. If so, its operands *must* be the block parameters for - /// each of its block's `block_succs` successor blocks, in order. + /// branch. If so, its operands at the indices given by + /// `branch_blockparam_arg_offset()` below *must* be the block + /// parameters for each of its block's `block_succs` successor + /// blocks, in order. fn is_branch(&self, insn: Inst) -> bool; + /// If `insn` is a branch at the end of `block`, returns the + /// operand index at which outgoing blockparam arguments are + /// found. Starting at this index, blockparam arguments for each + /// successor block's blockparams, in order, must be found. + /// + /// It is an error if `self.inst_operands(insn).len() - + /// self.branch_blockparam_arg_offset(insn)` is not exactly equal + /// to the sum of blockparam counts for all successor blocks. + fn branch_blockparam_arg_offset(&self, block: Block, insn: Inst) -> usize; + /// Determine whether an instruction is a safepoint and requires a stackmap. fn is_safepoint(&self, _: Inst) -> bool { false @@ -842,7 +863,8 @@ pub enum Edit { #[derive(Clone, Debug)] pub struct MachineEnv { regs: Vec, - regs_by_class: Vec>, + preferred_regs_by_class: Vec>, + non_preferred_regs_by_class: Vec>, scratch_by_class: Vec, } From 9e7021cfd02d050ce92f6ee425272805dc056b4a Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 3 May 2021 19:18:19 -0700 Subject: [PATCH 008/155] Derive Ord/hash on OperandOrAllocation. --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 925317b9..455cb4a1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -563,7 +563,7 @@ impl Allocation { /// A helper that wraps either an `Operand` or an `Allocation` and is /// able to tell which it is based on the tag bits. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct OperandOrAllocation { bits: u32, } From 15ed2d65224d0248e50a29124475a8d886fa69fa Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Wed, 5 May 2021 22:36:41 -0700 Subject: [PATCH 009/155] Allow multiple defs per vreg (i.e., accept non-SSA code). This generalizes the allocator to accept multiple defs by making defs just another type of "use" (uses are now perhaps more properly called "mentions", but for now we abuse the terminology slightly). It turns out that this actually was not terribly hard, because we don't rely on the properties that a strict SSA requirement otherwise might allow us to: e.g., defs always at exactly the start of a vreg's ranges. Because we already accepted arbitrary block order and irreducible CFGs, and approximated live-ranges with the single-pass algorithm, we are robust in our "stitching" (move insertion) and so all we really care about is computing some superset of the actual live-ranges and then a non-interfering coloring of (split pieces of) those ranges. Multiple defs don't change that, as long as we compute the ranges properly. We still have blockparams in this design, so the client *can* provide SSA directly, and everything will work as before. But a client that produces non-SSA need not use them at all; it can just happily reassign to vregs and everything will Just Work. This is part of the effort to port Cranelift over to regalloc2; I have decided that it may be easier to build a compatibility shim that matches regalloc.rs's interface than to continue boiling the ocean and converting all of the lowering sequences to SSA. It then becomes a separable piece of work (and simply further performance improvements and simplifications) to remove the need for this shim. --- src/ion/mod.rs | 352 ++++++++++++++----------------------------------- src/lib.rs | 5 +- 2 files changed, 102 insertions(+), 255 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 3ecd84c7..1d987393 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -52,16 +52,6 @@ use std::cmp::Ordering; use std::collections::{BTreeMap, BinaryHeap, HashMap, HashSet, VecDeque}; use std::fmt::Debug; -#[cfg(not(debug))] -fn validate_ssa(_: &F, _: &CFGInfo) -> Result<(), RegAllocError> { - Ok(()) -} - -#[cfg(debug)] -fn validate_ssa(f: &F, cfginfo: &CFGInfo) -> Result<(), RegAllocError> { - crate::validate_ssa(f, cfginfo) -} - /// A range from `from` (inclusive) to `to` (exclusive). #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct CodeRange { @@ -125,7 +115,6 @@ struct LiveRange { first_use: UseIndex, last_use: UseIndex, - def: DefIndex, next_in_bundle: LiveRangeIndex, next_in_reg: LiveRangeIndex, @@ -178,17 +167,11 @@ struct Use { pos: ProgPoint, slot: usize, next_use: UseIndex, + is_def: bool, } const SLOT_NONE: usize = usize::MAX; -#[derive(Clone, Debug)] -struct Def { - operand: Operand, - pos: ProgPoint, - slot: usize, -} - #[derive(Clone, Debug)] struct LiveBundle { first_range: LiveRangeIndex, @@ -300,7 +283,6 @@ struct Env<'a, F: Function> { bundles: Vec, spillsets: Vec, uses: Vec, - defs: Vec, vregs: Vec, pregs: Vec, allocation_queue: PrioQueue, @@ -638,7 +620,6 @@ impl<'a, F: Function> Env<'a, F> { ranges: vec![], spillsets: vec![], uses: vec![], - defs: vec![], vregs: vec![], pregs: vec![], allocation_queue: PrioQueue::new(), @@ -664,12 +645,16 @@ impl<'a, F: Function> Env<'a, F> { } fn create_pregs_and_vregs(&mut self) { - // Create RRegs from the RealRegUniverse. - for &preg in &self.env.regs { - self.pregs.push(PRegData { - reg: preg, + // Create PRegs from the env. + self.pregs.resize( + PReg::MAX_INDEX, + PRegData { + reg: PReg::invalid(), allocations: LiveRangeSet::new(), - }); + }, + ); + for &preg in &self.env.regs { + self.pregs[preg.index()].reg = preg; } // Create VRegs from the vreg count. for idx in 0..self.func.num_vregs() { @@ -712,7 +697,6 @@ impl<'a, F: Function> Env<'a, F> { num_fixed_uses_and_flags: 0, first_use: UseIndex::invalid(), last_use: UseIndex::invalid(), - def: DefIndex::invalid(), next_in_bundle: LiveRangeIndex::invalid(), next_in_reg: LiveRangeIndex::invalid(), }); @@ -790,10 +774,7 @@ impl<'a, F: Function> Env<'a, F> { if self.ranges[iter.index()].range.to > self.ranges[merged.index()].range.to { self.ranges[merged.index()].range.to = self.ranges[iter.index()].range.to; } - if self.ranges[iter.index()].def.is_valid() { - self.ranges[merged.index()].def = self.ranges[iter.index()].def; - } - self.distribute_liverange_uses(vreg, iter, merged); + self.distribute_liverange_uses(iter, merged); log::debug!( " -> after: merged {:?}: {:?}", merged, @@ -831,18 +812,12 @@ impl<'a, F: Function> Env<'a, F> { } } - fn distribute_liverange_uses( - &mut self, - vreg: VRegIndex, - from: LiveRangeIndex, - into: LiveRangeIndex, - ) { + fn distribute_liverange_uses(&mut self, from: LiveRangeIndex, into: LiveRangeIndex) { log::debug!("distribute from {:?} to {:?}", from, into); assert_eq!( self.ranges[from.index()].vreg, self.ranges[into.index()].vreg ); - let from_range = self.ranges[from.index()].range; let into_range = self.ranges[into.index()].range; // For every use in `from`... let mut prev = UseIndex::invalid(); @@ -877,14 +852,6 @@ impl<'a, F: Function> Env<'a, F> { iter = usedata.next_use; } } - - // Distribute def too if `from` has a def and the def is in range of `into_range`. - if self.ranges[from.index()].def.is_valid() { - let def_idx = self.vregs[vreg.index()].def; - if from_range.contains_point(self.defs[def_idx.index()].pos) { - self.ranges[into.index()].def = def_idx; - } - } } fn update_liverange_stats_on_remove_use(&mut self, from: LiveRangeIndex, u: UseIndex) { @@ -903,6 +870,9 @@ impl<'a, F: Function> Env<'a, F> { ); lrdata.uses_spill_weight -= spill_weight_from_policy(usedata.operand.policy()); + if usedata.is_def { + lrdata.uses_spill_weight -= 2000; + } } fn insert_use_into_liverange_and_update_stats(&mut self, into: LiveRangeIndex, u: UseIndex) { @@ -952,6 +922,9 @@ impl<'a, F: Function> Env<'a, F> { spill_weight_from_policy(policy) ); self.ranges[into.index()].uses_spill_weight += spill_weight_from_policy(policy); + if self.uses[u.index()].is_def { + self.ranges[into.index()].uses_spill_weight += 2000; + } log::debug!(" -> now {}", self.ranges[into.index()].uses_spill_weight); } @@ -1109,19 +1082,19 @@ impl<'a, F: Function> Env<'a, F> { OperandPos::Before | OperandPos::Both => ProgPoint::before(inst), OperandPos::After => ProgPoint::after(inst), }; - let def = DefIndex(self.defs.len() as u32); - self.defs.push(Def { + let u = UseIndex(self.uses.len() as u32); + self.uses.push(Use { operand, pos, slot: i, + next_use: UseIndex::invalid(), + is_def: true, }); log::debug!("Def of {} at {:?}", operand.vreg(), pos); // Fill in vreg's actual data. - debug_assert!(self.vregs[operand.vreg().vreg()].def.is_invalid()); self.vregs[operand.vreg().vreg()].reg = operand.vreg(); - self.vregs[operand.vreg().vreg()].def = def; // Trim the range for this vreg to start // at `pos` if it previously ended at the @@ -1148,9 +1121,9 @@ impl<'a, F: Function> Env<'a, F> { log::debug!(" -> started at block start; trimming to {:?}", pos); self.ranges[lr.index()].range.from = pos; } - // Note that the liverange contains a def. - self.ranges[lr.index()].def = def; + self.insert_use_into_liverange_and_update_stats(lr, u); // Remove from live-set. + // TODO-cranelift: here is where we keep it live if it's a mod, not def. live.set(operand.vreg().vreg(), false); vreg_ranges[operand.vreg().vreg()] = LiveRangeIndex::invalid(); } @@ -1183,6 +1156,7 @@ impl<'a, F: Function> Env<'a, F> { pos, slot: i, next_use: UseIndex::invalid(), + is_def: false, }); // Create/extend the LiveRange and add the use to the range. @@ -1358,6 +1332,7 @@ impl<'a, F: Function> Env<'a, F> { pos, slot: SLOT_NONE, next_use: UseIndex::invalid(), + is_def: false, }); // Create/extend the LiveRange and add the use to the range. @@ -1449,18 +1424,6 @@ impl<'a, F: Function> Env<'a, F> { } }; - if self.ranges[iter.index()].def.is_valid() { - let def_idx = self.vregs[vreg].def; - let pos = self.defs[def_idx.index()].pos; - let slot = self.defs[def_idx.index()].slot; - fixup_multi_fixed_vregs( - pos, - slot, - &mut self.defs[def_idx.index()].operand, - &mut self.multi_fixed_reg_fixups, - ); - } - let mut use_iter = self.ranges[iter.index()].first_use; while use_iter.is_valid() { let pos = self.uses[use_iter.index()].pos; @@ -1538,45 +1501,6 @@ impl<'a, F: Function> Env<'a, F> { LiveBundleIndex::new(bundle) } - fn try_merge_reused_register(&mut self, from: VRegIndex, to: VRegIndex) { - log::debug!("try_merge_reused_register: from {:?} to {:?}", from, to); - let def_idx = self.vregs[to.index()].def; - log::debug!(" -> def_idx = {:?}", def_idx); - debug_assert!(def_idx.is_valid()); - let def = &mut self.defs[def_idx.index()]; - let def_point = def.pos; - log::debug!(" -> def_point = {:?}", def_point); - - // Can't merge if def happens at use-point. - if def_point.pos == InstPosition::Before { - return; - } - - // Find the corresponding liverange for the use at the def-point. - let use_lr_at_def = self.find_vreg_liverange_for_pos(from, def_point); - log::debug!(" -> use_lr_at_def = {:?}", use_lr_at_def); - - // If the use is not live at the def (i.e. this inst is its last use), we can merge. - if use_lr_at_def.is_none() { - // Find the bundles and merge. Note that bundles have not been split - // yet so every liverange in the vreg will have the same bundle (so - // no need to look up the proper liverange here). - let from_bundle = self.ranges[self.vregs[from.index()].first_range.index()].bundle; - let to_bundle = self.ranges[self.vregs[to.index()].first_range.index()].bundle; - log::debug!(" -> merging from {:?} to {:?}", from_bundle, to_bundle); - self.merge_bundles(from_bundle, to_bundle); - return; - } - - log::debug!(" -> no merge"); - - // Note: there may be other cases where it would benefit us to split the - // LiveRange and bundle for the input at the def-point, allowing us to - // avoid a copy. However, the cases where this helps in IonMonkey (only - // memory uses after the definition, seemingly) appear to be marginal at - // best. - } - fn merge_bundles(&mut self, from: LiveBundleIndex, to: LiveBundleIndex) -> bool { if from == to { // Merge bundle into self -- trivial merge. @@ -1594,7 +1518,8 @@ impl<'a, F: Function> Env<'a, F> { // have to have the same regclass (because bundles start with one vreg // and all merging happens here) so we can just sample the first vreg of // each bundle. - if self.vregs[vreg_from.index()].reg.class() != self.vregs[vreg_to.index()].reg.class() { + let rc = self.vregs[vreg_from.index()].reg.class(); + if rc != self.vregs[vreg_to.index()].reg.class() { return false; } @@ -1684,6 +1609,11 @@ impl<'a, F: Function> Env<'a, F> { } fn insert_liverange_into_bundle(&mut self, bundle: LiveBundleIndex, lr: LiveRangeIndex) { + log::debug!( + "insert_liverange_into_bundle: lr {:?} bundle {:?}", + lr, + bundle + ); self.ranges[lr.index()].next_in_bundle = LiveRangeIndex::invalid(); self.ranges[lr.index()].bundle = bundle; if self.bundles[bundle.index()].first_range.is_invalid() { @@ -1745,26 +1675,9 @@ impl<'a, F: Function> Env<'a, F> { for inst in 0..self.func.insts() { let inst = Inst::new(inst); - // Attempt to merge Reuse-policy operand outputs with the corresponding - // inputs. - for operand_idx in 0..self.func.inst_operands(inst).len() { - let operand = self.func.inst_operands(inst)[operand_idx]; - if let OperandPolicy::Reuse(input_idx) = operand.policy() { - log::debug!( - "trying to merge use and def at reused-op {} on inst{}", - operand_idx, - inst.index() - ); - assert_eq!(operand.kind(), OperandKind::Def); - assert_eq!(operand.pos(), OperandPos::After); - let input_vreg = - VRegIndex::new(self.func.inst_operands(inst)[input_idx].vreg().vreg()); - let output_vreg = VRegIndex::new(operand.vreg().vreg()); - self.try_merge_reused_register(input_vreg, output_vreg); - } - } - - // Attempt to merge move srcs and dests. + // Attempt to merge move srcs and dests, and attempt to + // merge Reuse-policy operand outputs with the + // corresponding inputs. if let Some((src_vreg, dst_vreg)) = self.func.is_move(inst) { log::debug!("trying to merge move src {} to dst {}", src_vreg, dst_vreg); let src_bundle = @@ -1775,6 +1688,24 @@ impl<'a, F: Function> Env<'a, F> { assert!(dest_bundle.is_valid()); self.merge_bundles(/* from */ dest_bundle, /* to */ src_bundle); } + for op in self.func.inst_operands(inst) { + if let OperandPolicy::Reuse(reuse_idx) = op.policy() { + let src_vreg = op.vreg(); + let dst_vreg = self.func.inst_operands(inst)[reuse_idx].vreg(); + log::debug!( + "trying to merge reused-input def: src {} to dst {}", + src_vreg, + dst_vreg + ); + let src_bundle = + self.ranges[self.vregs[src_vreg.vreg()].first_range.index()].bundle; + assert!(src_bundle.is_valid()); + let dest_bundle = + self.ranges[self.vregs[dst_vreg.vreg()].first_range.index()].bundle; + assert!(dest_bundle.is_valid()); + self.merge_bundles(/* from */ dest_bundle, /* to */ src_bundle); + } + } } // Attempt to merge blockparams with their inputs. @@ -1878,7 +1809,7 @@ impl<'a, F: Function> Env<'a, F> { } log::debug!("VRegs:"); for (i, v) in self.vregs.iter().enumerate() { - log::debug!("vreg{}: def={:?} first_range={:?}", i, v.def, v.first_range,); + log::debug!("vreg{}: first_range={:?}", i, v.first_range,); } log::debug!("Ranges:"); for (i, r) in self.ranges.iter().enumerate() { @@ -1886,7 +1817,7 @@ impl<'a, F: Function> Env<'a, F> { concat!( "range{}: range={:?} vreg={:?} bundle={:?} ", "weight={} fixed={} first_use={:?} last_use={:?} ", - "def={:?} next_in_bundle={:?} next_in_reg={:?}" + "next_in_bundle={:?} next_in_reg={:?}" ), i, r.range, @@ -1896,7 +1827,6 @@ impl<'a, F: Function> Env<'a, F> { r.num_fixed_uses(), r.first_use, r.last_use, - r.def, r.next_in_bundle, r.next_in_reg ); @@ -1904,18 +1834,15 @@ impl<'a, F: Function> Env<'a, F> { log::debug!("Uses:"); for (i, u) in self.uses.iter().enumerate() { log::debug!( - "use{}: op={:?} pos={:?} slot={} next_use={:?}", + "use{}: op={:?} pos={:?} slot={} next_use={:?} is_def={:?}", i, u.operand, u.pos, u.slot, - u.next_use + u.next_use, + u.is_def, ); } - log::debug!("Defs:"); - for (i, d) in self.defs.iter().enumerate() { - log::debug!("def{}: op={:?} pos={:?}", i, d.operand, d.pos,); - } } fn compute_requirement(&self, bundle: LiveBundleIndex) -> Option { @@ -1932,18 +1859,6 @@ impl<'a, F: Function> Env<'a, F> { while iter.is_valid() { let range = &self.ranges[iter.index()]; log::debug!(" -> range {:?}", range.range); - if range.def.is_valid() { - let def_op = self.defs[range.def.index()].operand; - let def_req = Requirement::from_operand(def_op); - log::debug!( - " -> def {:?} op {:?} req {:?}", - range.def.index(), - def_op, - def_req - ); - needed = needed.merge(def_req)?; - log::debug!(" -> needed {:?}", needed); - } let mut use_iter = range.first_use; while use_iter.is_valid() { let usedata = &self.uses[use_iter.index()]; @@ -2077,13 +1992,6 @@ impl<'a, F: Function> Env<'a, F> { minimal = true; fixed = true; } else { - if first_range.def.is_valid() { - let def_data = &self.defs[first_range.def.index()]; - if let OperandPolicy::FixedReg(_) = def_data.operand.policy() { - log::debug!(" -> fixed def {:?}", first_range.def); - fixed = true; - } - } let mut use_iter = first_range.first_use; while use_iter.is_valid() { let use_data = &self.uses[use_iter.index()]; @@ -2121,10 +2029,6 @@ impl<'a, F: Function> Env<'a, F> { let mut range = self.bundles[bundle.index()].first_range; while range.is_valid() { let range_data = &self.ranges[range.index()]; - if range_data.def.is_valid() { - log::debug!(" -> has def (spill weight +2000)"); - total += 2000; - } log::debug!(" -> uses spill weight: +{}", range_data.uses_spill_weight); total += range_data.uses_spill_weight; range = range_data.next_in_bundle; @@ -2300,11 +2204,6 @@ impl<'a, F: Function> Env<'a, F> { } }; - if self.ranges[our_iter.index()].def.is_valid() { - let def_data = &self.defs[self.ranges[our_iter.index()].def.index()]; - log::debug!(" -> range has def at {:?}", def_data.pos); - update_with_pos(def_data.pos); - } let mut use_idx = self.ranges[our_iter.index()].first_use; while use_idx.is_valid() { let use_data = &self.uses[use_idx.index()]; @@ -2361,18 +2260,10 @@ impl<'a, F: Function> Env<'a, F> { let mut splits = smallvec![]; let mut iter = self.bundles[bundle.index()].first_range; log::debug!("finding all use/def splits for {:?}", bundle); - let (bundle_start, bundle_end) = if iter.is_valid() { - ( - self.ranges[iter.index()].range.from, - self.ranges[self.bundles[bundle.index()].last_range.index()] - .range - .to, - ) + let bundle_start = if iter.is_valid() { + self.ranges[iter.index()].range.from } else { - ( - ProgPoint::before(Inst::new(0)), - ProgPoint::after(Inst::new(self.func.insts() - 1)), - ) + ProgPoint::before(Inst::new(0)) }; // N.B.: a minimal bundle must include only ProgPoints in a // single instruction, but can include both (can include two @@ -2382,27 +2273,21 @@ impl<'a, F: Function> Env<'a, F> { while iter.is_valid() { let rangedata = &self.ranges[iter.index()]; log::debug!(" -> range {:?}: {:?}", iter, rangedata.range); - if rangedata.def.is_valid() { - // Split both before and after def (make it a minimal bundle). - let def_pos = self.defs[rangedata.def.index()].pos; - let def_end = ProgPoint::before(def_pos.inst.next()); - log::debug!( - " -> splitting before and after def: {:?} and {:?}", - def_pos, - def_end, - ); - if def_pos > bundle_start { - splits.push(def_pos); - } - if def_end < bundle_end { - splits.push(def_end); - } - } let mut use_idx = rangedata.first_use; while use_idx.is_valid() { let use_data = &self.uses[use_idx.index()]; - let before_use_inst = ProgPoint::before(use_data.pos.inst); - let after_use_inst = before_use_inst.next().next(); + log::debug!(" -> use: {:?}", use_data); + let before_use_inst = if use_data.is_def { + // For a def, split *at* the def -- this may be an + // After point, but the value cannot be live into + // the def so we don't need to insert a move. + use_data.pos + } else { + // For an use, split before the instruction -- + // this allows us to insert a move if necessary. + ProgPoint::before(use_data.pos.inst) + }; + let after_use_inst = ProgPoint::before(use_data.pos.inst.next()); log::debug!( " -> splitting before and after use: {:?} and {:?}", before_use_inst, @@ -2507,8 +2392,11 @@ impl<'a, F: Function> Env<'a, F> { // bundle, then advance to the first split within the // range. if split_idx < split_points.len() && split_points[split_idx] <= range.from { - log::debug!(" -> split before a range; creating new bundle"); cur_bundle = self.create_bundle(); + log::debug!( + " -> split before a range; creating new bundle {:?}", + cur_bundle + ); self.bundles[cur_bundle.index()].spillset = self.bundles[bundle.index()].spillset; new_bundles.push(cur_bundle); split_idx += 1; @@ -2637,18 +2525,6 @@ impl<'a, F: Function> Env<'a, F> { self.ranges[iter.index()].uses_spill_weight = uses_spill_weight; } - // Move over def, if appropriate. - if self.ranges[iter.index()].def.is_valid() { - let def_idx = self.ranges[iter.index()].def; - let def_pos = self.defs[def_idx.index()].pos; - log::debug!(" -> range {:?} has def at {:?}", iter, def_pos); - if def_pos >= split_point { - log::debug!(" -> transferring def bit to {:?}", rest_lr); - self.ranges[iter.index()].def = DefIndex::invalid(); - self.ranges[rest_lr.index()].def = def_idx; - } - } - log::debug!( " -> range {:?} next-in-bundle is {:?}", iter, @@ -3157,14 +3033,6 @@ impl<'a, F: Function> Env<'a, F> { let mut blockparam_out_idx = 0; for vreg in 0..self.vregs.len() { let vreg = VRegIndex::new(vreg); - let defidx = self.vregs[vreg.index()].def; - let defining_block = if defidx.is_valid() { - self.cfginfo.insn_block[self.defs[defidx.index()].pos.inst.index()] - } else if self.vregs[vreg.index()].blockparam.is_valid() { - self.vregs[vreg.index()].blockparam - } else { - Block::invalid() - }; // For each range in each vreg, insert moves or // half-moves. We also scan over `blockparam_ins` and @@ -3212,12 +3080,12 @@ impl<'a, F: Function> Env<'a, F> { // inter-block transfers). // // Note that we do *not* do this if there is also a - // def exactly at `range.from`: it's possible that an - // old liverange covers the Before pos of an inst, a - // new liverange covers the After pos, and the def - // also happens at After. In this case we don't want - // to an insert a move after the instruction copying - // the old liverange. + // def as the first use in the new range: it's + // possible that an old liverange covers the Before + // pos of an inst, a new liverange covers the After + // pos, and the def also happens at After. In this + // case we don't want to an insert a move after the + // instruction copying the old liverange. // // Note also that we assert that the new range has to // start at the Before-point of an instruction; we @@ -3227,16 +3095,16 @@ impl<'a, F: Function> Env<'a, F> { if prev.is_valid() { let prev_alloc = self.get_alloc_for_range(prev); let prev_range = self.ranges[prev.index()].range; - let def_idx = self.ranges[iter.index()].def; - let def_pos = if def_idx.is_valid() { - Some(self.defs[def_idx.index()].pos) + let first_use = self.ranges[iter.index()].first_use; + let first_is_def = if first_use.is_valid() { + self.uses[first_use.index()].is_def } else { - None + false }; debug_assert!(prev_alloc != Allocation::none()); if prev_range.to == range.from && !self.is_start_of_block(range.from) - && def_pos != Some(range.from) + && !first_is_def { log::debug!( "prev LR {} abuts LR {} in same block; moving {} -> {} for v{}", @@ -3335,21 +3203,8 @@ impl<'a, F: Function> Env<'a, F> { // Scan over blocks whose beginnings are covered by // this range and for which the vreg is live at the - // start of the block, and for which the def of the - // vreg is not in this block. For each, for each - // predecessor, add a Dest half-move. - // - // N.B.: why "def of this vreg is not in this block"? - // Because live-range computation can over-approximate - // (due to the way that we handle loops in a single - // pass), especially if the program has irreducible - // control flow and/or if blocks are not in RPO, it - // may be the case that (i) the vreg is not *actually* - // live into this block, but is *defined* in this - // block. If the value is defined in this block, - // because this is SSA, the value cannot be used - // before the def and so we are not concerned about - // any incoming allocation for it. + // start of the block. For each, for each predecessor, + // add a Dest half-move. let mut block = self.cfginfo.insn_block[range.from.inst.index()]; if self.cfginfo.block_entry[block.index()] < range.from { block = block.next(); @@ -3405,10 +3260,7 @@ impl<'a, F: Function> Env<'a, F> { blockparam_in_idx += 1; } - // The below (range incoming into block) must be - // skipped if the def is in this block, as noted - // above. - if block == defining_block || !self.liveins[block.index()].get(vreg.index()) { + if !self.liveins[block.index()].get(vreg.index()) { block = block.next(); continue; } @@ -3452,28 +3304,21 @@ impl<'a, F: Function> Env<'a, F> { } // Scan over def/uses and apply allocations. - if self.ranges[iter.index()].def.is_valid() { - let defdata = &self.defs[self.ranges[iter.index()].def.index()]; - debug_assert!(range.contains_point(defdata.pos)); - let operand = defdata.operand; - let inst = defdata.pos.inst; - let slot = defdata.slot; - self.set_alloc(inst, slot, alloc); - if let OperandPolicy::Reuse(_) = operand.policy() { - reuse_input_insts.push(inst); - } - } let mut use_iter = self.ranges[iter.index()].first_use; while use_iter.is_valid() { let usedata = &self.uses[use_iter.index()]; debug_assert!(range.contains_point(usedata.pos)); let inst = usedata.pos.inst; let slot = usedata.slot; + let operand = usedata.operand; // Safepoints add virtual uses with no slots; // avoid these. if slot != SLOT_NONE { self.set_alloc(inst, slot, alloc); } + if let OperandPolicy::Reuse(_) = operand.policy() { + reuse_input_insts.push(inst); + } use_iter = self.uses[use_iter.index()].next_use; } @@ -3567,7 +3412,9 @@ impl<'a, F: Function> Env<'a, F> { let mut last = None; for dest in first_dest..last_dest { let dest = &half_moves[dest]; - debug_assert!(last != Some(dest.alloc)); + if last == Some(dest.alloc) { + continue; + } self.insert_move(insertion_point, prio, src.alloc, dest.alloc); last = Some(dest.alloc); } @@ -4051,7 +3898,6 @@ impl<'a, F: Function> Env<'a, F> { pub fn run(func: &F, mach_env: &MachineEnv) -> Result { let cfginfo = CFGInfo::new(func); - validate_ssa(func, &cfginfo)?; let mut env = Env::new(func, mach_env, cfginfo); env.init()?; diff --git a/src/lib.rs b/src/lib.rs index 455cb4a1..74d462bf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -43,6 +43,7 @@ pub struct PReg(u8, RegClass); impl PReg { pub const MAX_BITS: usize = 5; pub const MAX: usize = (1 << Self::MAX_BITS) - 1; + pub const MAX_INDEX: usize = 2 * Self::MAX; // including RegClass bit /// Create a new PReg. The `hw_enc` range is 6 bits. #[inline(always)] @@ -68,12 +69,12 @@ impl PReg { /// all PRegs and index it efficiently. #[inline(always)] pub fn index(self) -> usize { - ((self.1 as u8 as usize) << 6) | (self.0 as usize) + ((self.1 as u8 as usize) << 5) | (self.0 as usize) } #[inline(always)] pub fn from_index(index: usize) -> Self { - let class = (index >> 6) & 1; + let class = (index >> 5) & 1; let class = match class { 0 => RegClass::Int, 1 => RegClass::Float, From 48fbc235eac54aa23db068fd5cb7b30c26ea499d Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Wed, 5 May 2021 23:08:19 -0700 Subject: [PATCH 010/155] BitVec::get() takes immutable self --- src/bitvec.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bitvec.rs b/src/bitvec.rs index 4dc727b7..9260e1b2 100644 --- a/src/bitvec.rs +++ b/src/bitvec.rs @@ -66,7 +66,7 @@ impl BitVec { } #[inline(always)] - pub fn get(&mut self, idx: usize) -> bool { + pub fn get(&self, idx: usize) -> bool { let word = idx / BITS_PER_WORD; let bit = idx % BITS_PER_WORD; if word >= self.bits.len() { From ab828b6c86f7c00c6499bb10352d55810da1dd03 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Wed, 5 May 2021 23:14:04 -0700 Subject: [PATCH 011/155] MachineEnv fields are public --- src/lib.rs | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 74d462bf..2031f333 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -863,10 +863,23 @@ pub enum Edit { /// as well. #[derive(Clone, Debug)] pub struct MachineEnv { - regs: Vec, - preferred_regs_by_class: Vec>, - non_preferred_regs_by_class: Vec>, - scratch_by_class: Vec, + /// Physical registers. Every register that might be mentioned in + /// any constraint must be listed here, even if it is not + /// allocatable under normal conditions. + pub regs: Vec, + /// Preferred physical registers for each class. These are the + /// registers that will be allocated first, if free. + pub preferred_regs_by_class: Vec>, + /// Non-preferred physical registers for each class. These are the + /// registers that will be allocated if a preferred register is + /// not available; using one of these is considered suboptimal, + /// but still better than spilling. + pub non_preferred_regs_by_class: Vec>, + /// One scratch register per class. This is needed to perform + /// moves between registers when cyclic move patterns occur. The + /// register should not be placed in either the preferred or + /// non-preferred list (i.e., it is not otherwise allocatable). + pub scratch_by_class: Vec, } /// The output of the register allocator. From 80cdd0c5ac6ba3bd0988b22d9e4ac8ae027d2aab Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 6 May 2021 01:01:27 -0700 Subject: [PATCH 012/155] Properly handle multiple same-fixed-reg constraints to the same vreg in one inst. --- src/ion/mod.rs | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 1d987393..138c06dd 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -944,6 +944,7 @@ impl<'a, F: Function> Env<'a, F> { } fn add_liverange_to_preg(&mut self, range: CodeRange, reg: PReg) { + log::debug!("adding liverange to preg: {:?} to {}", range, reg); let preg_idx = PRegIndex::new(reg.index()); let lr = self.create_liverange(range); self.pregs[preg_idx.index()] @@ -1413,10 +1414,22 @@ impl<'a, F: Function> Env<'a, F> { if let Some(idx) = seen_fixed_for_vreg.iter().position(|r| *r == op.vreg()) { let orig_preg = first_preg[idx]; - log::debug!(" -> duplicate; switching to policy Reg"); - fixups.push((pos, orig_preg, preg_idx, slot)); - *op = Operand::new(op.vreg(), OperandPolicy::Reg, op.kind(), op.pos()); - extra_clobbers.push((preg, pos.inst)); + if orig_preg != preg_idx { + log::debug!(" -> duplicate; switching to policy Reg"); + fixups.push((pos, orig_preg, preg_idx, slot)); + *op = Operand::new( + op.vreg(), + OperandPolicy::Reg, + op.kind(), + op.pos(), + ); + log::debug!( + " -> extra clobber {} at inst{}", + preg, + pos.inst.index() + ); + extra_clobbers.push((preg, pos.inst)); + } } else { seen_fixed_for_vreg.push(op.vreg()); first_preg.push(preg_idx); @@ -2716,7 +2729,7 @@ impl<'a, F: Function> Env<'a, F> { if self.minimal_bundle(bundle) { self.dump_state(); } - debug_assert!(!self.minimal_bundle(bundle)); + assert!(!self.minimal_bundle(bundle)); self.split_and_requeue_bundle( bundle, From 1a7a0c5e3dd843a71cf8cbfb1a7d6a8bd1537933 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 6 May 2021 16:09:39 -0700 Subject: [PATCH 013/155] Some performance tweaks -- try to reduce register probe count with soem more hints. Also fix spill-weight caching. --- src/ion/mod.rs | 150 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 130 insertions(+), 20 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 138c06dd..3a2820a8 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -35,6 +35,26 @@ * - Rematerialization */ +/* + Performance ideas: + + - moves: don't consider as normal inst with uses/defs + - explicit list of src/dst pairs? elide completely if + remain in same bundle; otherwise only should appear + when generating halfmoves + - sorted list of (inst, src-vreg, dst-vreg) and + (vreg, inst) + - ignore inst during all passes over code + - when same bundle at both ends, ignore during resolution + - otherwise, fill in move-list alloc slots during scans + + - conflict hints? (note on one bundle that it definitely conflicts + with another, so avoid probing the other's alloc) + + - partial allocation -- place one LR, split rest off into separate + bundle, in one pass? + */ + #![allow(dead_code, unused_imports)] use crate::bitvec::BitVec; @@ -118,6 +138,9 @@ struct LiveRange { next_in_bundle: LiveRangeIndex, next_in_reg: LiveRangeIndex, + + reg_hint: Option, // if a bundle partly fits, this is used to + // record LRs that do fit } #[derive(Clone, Copy, Debug, PartialEq, Eq)] @@ -202,7 +225,7 @@ impl LiveBundle { #[inline(always)] fn cached_spill_weight(&self) -> u32 { - self.spill_weight_and_props & !((1 << 30) - 1) + self.spill_weight_and_props & ((1 << 30) - 1) } } @@ -521,6 +544,8 @@ pub struct Stats { process_bundle_count: usize, process_bundle_reg_probes_fixed: usize, process_bundle_reg_success_fixed: usize, + process_bundle_bounding_range_probes_any: usize, + process_bundle_bounding_range_success_any: usize, process_bundle_reg_probes_any: usize, process_bundle_reg_success_any: usize, evict_bundle_event: usize, @@ -558,7 +583,8 @@ pub struct Stats { struct RegTraversalIter<'a> { env: &'a MachineEnv, class: usize, - hint_reg: Option, + hints: [Option; 2], + hint_idx: usize, pref_idx: usize, non_pref_idx: usize, offset: usize, @@ -568,13 +594,20 @@ impl<'a> RegTraversalIter<'a> { pub fn new( env: &'a MachineEnv, class: RegClass, - hint_reg: Option, + mut hint_reg: Option, + mut hint2_reg: Option, offset: usize, ) -> Self { + if hint_reg.is_none() { + hint_reg = hint2_reg; + hint2_reg = None; + } + let hints = [hint_reg, hint2_reg]; Self { env, class: class as u8 as usize, - hint_reg, + hints, + hint_idx: 0, pref_idx: 0, non_pref_idx: 0, offset, @@ -586,19 +619,27 @@ impl<'a> std::iter::Iterator for RegTraversalIter<'a> { type Item = PReg; fn next(&mut self) -> Option { - if let Some(preg) = self.hint_reg.take() { - return Some(preg); + if self.hint_idx < 2 && self.hints[self.hint_idx].is_some() { + let h = self.hints[self.hint_idx]; + self.hint_idx += 1; + return h; } - if self.pref_idx < self.env.preferred_regs_by_class[self.class].len() { + while self.pref_idx < self.env.preferred_regs_by_class[self.class].len() { let arr = &self.env.preferred_regs_by_class[self.class][..]; let r = arr[(self.pref_idx + self.offset) % arr.len()]; self.pref_idx += 1; + if Some(r) == self.hints[0] || Some(r) == self.hints[1] { + continue; + } return Some(r); } - if self.non_pref_idx < self.env.non_preferred_regs_by_class[self.class].len() { + while self.non_pref_idx < self.env.non_preferred_regs_by_class[self.class].len() { let arr = &self.env.non_preferred_regs_by_class[self.class][..]; let r = arr[(self.non_pref_idx + self.offset) % arr.len()]; self.non_pref_idx += 1; + if Some(r) == self.hints[0] || Some(r) == self.hints[1] { + continue; + } return Some(r); } None @@ -699,6 +740,7 @@ impl<'a, F: Function> Env<'a, F> { last_use: UseIndex::invalid(), next_in_bundle: LiveRangeIndex::invalid(), next_in_reg: LiveRangeIndex::invalid(), + reg_hint: None, }); LiveRangeIndex::new(idx) } @@ -1889,6 +1931,26 @@ impl<'a, F: Function> Env<'a, F> { Some(needed) } + fn bundle_bounding_range_if_multiple(&self, bundle: LiveBundleIndex) -> Option { + let first_range = self.bundles[bundle.index()].first_range; + let last_range = self.bundles[bundle.index()].last_range; + if first_range.is_invalid() || first_range == last_range { + return None; + } + Some(CodeRange { + from: self.ranges[first_range.index()].range.from, + to: self.ranges[last_range.index()].range.to, + }) + } + + fn range_definitely_fits_in_reg(&self, range: CodeRange, reg: PRegIndex) -> bool { + self.pregs[reg.index()] + .allocations + .btree + .get(&LiveRangeKey::from_range(&range)) + .is_none() + } + fn try_to_allocate_bundle_to_reg( &mut self, bundle: LiveBundleIndex, @@ -1899,6 +1961,7 @@ impl<'a, F: Function> Env<'a, F> { let mut iter = self.bundles[bundle.index()].first_range; while iter.is_valid() { let range = &self.ranges[iter.index()]; + let next = range.next_in_bundle; log::debug!(" -> range {:?}", range); // Note that the comparator function here tests for *overlap*, so we // are checking whether the BTree contains any preg range that @@ -1923,8 +1986,10 @@ impl<'a, F: Function> Env<'a, F> { // range from a direct use of the PReg (due to clobber). return AllocRegResult::ConflictWithFixed; } + } else { + self.ranges[iter.index()].reg_hint = Some(self.pregs[reg.index()].reg); } - iter = range.next_in_bundle; + iter = next; } if conflicts.len() > 0 { @@ -1985,11 +2050,18 @@ impl<'a, F: Function> Env<'a, F> { } fn maximum_spill_weight_in_bundle_set(&self, bundles: &LiveBundleVec) -> u32 { - bundles + log::debug!("maximum_spill_weight_in_bundle_set: {:?}", bundles); + let m = bundles .iter() - .map(|&b| self.bundles[b.index()].cached_spill_weight()) + .map(|&b| { + let w = self.bundles[b.index()].cached_spill_weight(); + log::debug!("bundle{}: {}", b.index(), w); + w + }) .max() - .unwrap_or(0) + .unwrap_or(0); + log::debug!(" -> max: {}", m); + m } fn recompute_bundle_properties(&mut self, bundle: LiveBundleIndex) { @@ -2055,7 +2127,7 @@ impl<'a, F: Function> Env<'a, F> { ); total / self.bundles[bundle.index()].prio } else { - total + 0 } }; @@ -2577,13 +2649,19 @@ impl<'a, F: Function> Env<'a, F> { // Find any requirements: for every LR, for every def/use, gather // requirements (fixed-reg, any-reg, any) and merge them. let req = self.compute_requirement(bundle); - // Grab a hint from our spillset, if any. + // Grab a hint from our spillset, if any, and from the first LR, if any. let hint_reg = self.spillsets[self.bundles[bundle.index()].spillset.index()].reg_hint; + let hint2_reg = if self.bundles[bundle.index()].first_range.is_valid() { + self.ranges[self.bundles[bundle.index()].first_range.index()].reg_hint + } else { + None + }; log::debug!( - "process_bundle: bundle {:?} requirement {:?} hint {:?}", + "process_bundle: bundle {:?} requirement {:?} hint {:?} hint2 {:?}", bundle, req, hint_reg, + hint2_reg ); // Try to allocate! @@ -2636,7 +2714,35 @@ impl<'a, F: Function> Env<'a, F> { .index() + bundle.index(); - for preg in RegTraversalIter::new(self.env, class, hint_reg, scan_offset) { + // If the bundle is more than one range, see if we + // can find a reg that the bounding range fits + // completely in first. Use that if so. Otherwise, + // do a detailed (liverange-by-liverange) probe of + // each reg in preference order. + let bounding_range = self.bundle_bounding_range_if_multiple(bundle); + if let Some(bounding_range) = bounding_range { + for preg in + RegTraversalIter::new(self.env, class, hint_reg, hint2_reg, scan_offset) + { + let preg_idx = PRegIndex::new(preg.index()); + self.stats.process_bundle_bounding_range_probes_any += 1; + if self.range_definitely_fits_in_reg(bounding_range, preg_idx) { + let result = self.try_to_allocate_bundle_to_reg(bundle, preg_idx); + self.stats.process_bundle_bounding_range_success_any += 1; + let alloc = match result { + AllocRegResult::Allocated(alloc) => alloc, + _ => panic!("Impossible result: {:?}", result), + }; + self.spillsets[self.bundles[bundle.index()].spillset.index()] + .reg_hint = Some(alloc.as_reg().unwrap()); + return; + } + } + } + + for preg in + RegTraversalIter::new(self.env, class, hint_reg, hint2_reg, scan_offset) + { self.stats.process_bundle_reg_probes_any += 1; let preg_idx = PRegIndex::new(preg.index()); match self.try_to_allocate_bundle_to_reg(bundle, preg_idx) { @@ -2709,9 +2815,13 @@ impl<'a, F: Function> Env<'a, F> { // If the maximum spill weight in the conflicting-bundles set is >= this bundle's spill // weight, then don't evict. - if self.maximum_spill_weight_in_bundle_set(&conflicting_bundles) - >= self.bundle_spill_weight(bundle) - { + let max_spill_weight = self.maximum_spill_weight_in_bundle_set(&conflicting_bundles); + log::debug!( + " -> max_spill_weight = {}; our spill weight {}", + max_spill_weight, + self.bundle_spill_weight(bundle) + ); + if max_spill_weight >= self.bundle_spill_weight(bundle) { log::debug!(" -> we're already the cheapest bundle to spill -- going to split"); break; } @@ -2748,7 +2858,7 @@ impl<'a, F: Function> Env<'a, F> { let class = any_vreg.class(); let mut success = false; self.stats.spill_bundle_reg_probes += 1; - for preg in RegTraversalIter::new(self.env, class, None, bundle.index()) { + for preg in RegTraversalIter::new(self.env, class, None, None, bundle.index()) { let preg_idx = PRegIndex::new(preg.index()); if let AllocRegResult::Allocated(_) = self.try_to_allocate_bundle_to_reg(bundle, preg_idx) From 747c56c2c3cc56574210bf013ac6d5863b08266c Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 6 May 2021 16:19:38 -0700 Subject: [PATCH 014/155] Some micro-optimizations in BitVec. --- src/bitvec.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/bitvec.rs b/src/bitvec.rs index 9260e1b2..5a1c9490 100644 --- a/src/bitvec.rs +++ b/src/bitvec.rs @@ -83,14 +83,16 @@ impl BitVec { let last_idx = other.bits.len() - 1; self.ensure_idx(last_idx); - let mut changed = false; + let mut changed = 0; for (self_word, other_word) in self.bits.iter_mut().zip(other.bits.iter()) { - if *other_word & !*self_word != 0 { - changed = true; + if *other_word == 0 { + // Avoid cache misses in `self` if `other` is zeroes. + continue; } + changed |= *other_word & !*self_word; *self_word |= *other_word; } - changed + changed != 0 } pub fn and(&mut self, other: &Self) { From e2beb471c496b0f4b5fc7d693d01b5777bc65fd4 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 6 May 2021 18:49:23 -0700 Subject: [PATCH 015/155] Handle moves specially with move-insertion logic rather than ordinary operand/inst handling --- src/fuzzing/func.rs | 2 +- src/ion/mod.rs | 297 ++++++++++++++++++++++++++++++++++++++------ 2 files changed, 260 insertions(+), 39 deletions(-) diff --git a/src/fuzzing/func.rs b/src/fuzzing/func.rs index ae8dccef..5e8bff86 100644 --- a/src/fuzzing/func.rs +++ b/src/fuzzing/func.rs @@ -130,7 +130,7 @@ impl Function for Func { &self.reftype_vregs[..] } - fn is_move(&self, _: Inst) -> Option<(VReg, VReg)> { + fn is_move(&self, insn: Inst) -> Option<(VReg, VReg)> { None } diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 3a2820a8..0b9fc12a 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -19,17 +19,10 @@ * - safepoints? * - split just before uses with fixed regs and/or just after defs * with fixed regs? - * - try-any-reg allocate loop should randomly probe in caller-save - * ("preferred") regs first -- have a notion of "preferred regs" in - * MachineEnv? * - measure average liverange length / number of splits / ... * * - reused-input reg: don't allocate register for input that is reused. * - * - modify CL to generate SSA VCode - * - lower blockparams to blockparams directly - * - use temps properly (`alloc_tmp()` vs `alloc_reg()`) - * * - "Fixed-stack location": negative spillslot numbers? * * - Rematerialization @@ -38,16 +31,6 @@ /* Performance ideas: - - moves: don't consider as normal inst with uses/defs - - explicit list of src/dst pairs? elide completely if - remain in same bundle; otherwise only should appear - when generating halfmoves - - sorted list of (inst, src-vreg, dst-vreg) and - (vreg, inst) - - ignore inst during all passes over code - - when same bundle at both ends, ignore during resolution - - otherwise, fill in move-list alloc slots during scans - - conflict hints? (note on one bundle that it definitely conflicts with another, so avoid probing the other's alloc) @@ -139,8 +122,9 @@ struct LiveRange { next_in_bundle: LiveRangeIndex, next_in_reg: LiveRangeIndex, - reg_hint: Option, // if a bundle partly fits, this is used to - // record LRs that do fit + // if a bundle partly fits, this is used to record LRs that do fit + reg_hint: Option, + merged_into: LiveRangeIndex, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] @@ -317,6 +301,21 @@ struct Env<'a, F: Function> { spillslots: Vec, slots_by_size: Vec, + // Program moves: these are moves in the provided program that we + // handle with our internal machinery, in order to avoid the + // overhead of ordinary operand processing. We expect the client + // to not generate any code for instructions that return + // `Some(..)` for `.is_move()`, and instead use the edits that we + // provide to implement those moves (or some simplified version of + // them) post-regalloc. + // + // (from-vreg, inst, from-alloc), sorted by (from-vreg, inst) + prog_move_srcs: Vec<((VRegIndex, Inst), Allocation)>, + // (to-vreg, inst, to-alloc), sorted by (to-vreg, inst) + prog_move_dsts: Vec<((VRegIndex, Inst), Allocation)>, + // (from-vreg, to-vreg) for bundle-merging. + prog_move_merges: Vec<(LiveRangeIndex, LiveRangeIndex)>, + // When multiple fixed-register constraints are present on a // single VReg at a single program point (this can happen for, // e.g., call args that use the same value multiple times), we @@ -535,6 +534,7 @@ enum InsertMovePrio { MultiFixedReg, ReusedInput, OutEdgeMoves, + ProgramMove, } #[derive(Clone, Copy, Debug, Default)] @@ -671,6 +671,10 @@ impl<'a, F: Function> Env<'a, F> { spillslots: vec![], slots_by_size: vec![], + prog_move_srcs: vec![], + prog_move_dsts: vec![], + prog_move_merges: vec![], + multi_fixed_reg_fixups: vec![], inserted_moves: vec![], edits: vec![], @@ -741,6 +745,7 @@ impl<'a, F: Function> Env<'a, F> { next_in_bundle: LiveRangeIndex::invalid(), next_in_reg: LiveRangeIndex::invalid(), reg_hint: None, + merged_into: LiveRangeIndex::invalid(), }); LiveRangeIndex::new(idx) } @@ -894,6 +899,7 @@ impl<'a, F: Function> Env<'a, F> { iter = usedata.next_use; } } + self.ranges[from.index()].merged_into = into; } fn update_liverange_stats_on_remove_use(&mut self, from: LiveRangeIndex, u: UseIndex) { @@ -1114,6 +1120,73 @@ impl<'a, F: Function> Env<'a, F> { } } + // If this is a move, handle specially. + if let Some((src, dst)) = self.func.is_move(inst) { + log::debug!(" -> move inst{}: src {} -> dst {}", inst.index(), src, dst); + assert_eq!(src.class(), dst.class()); + + // Handle the def w.r.t. liveranges: trim the + // start of the range and mark it dead at this + // point in our backward scan. + let pos = ProgPoint::after(inst); + let mut dst_lr = vreg_ranges[dst.vreg()]; + // If there was no liverange (dead def), create a trivial one. + if dst_lr.is_invalid() { + dst_lr = self.add_liverange_to_vreg( + VRegIndex::new(dst.vreg()), + CodeRange { + from: pos, + to: pos.next(), + }, + &mut num_ranges, + ); + log::debug!(" -> invalid; created {:?}", dst_lr); + } else { + log::debug!(" -> has existing LR {:?}", dst_lr); + } + if self.ranges[dst_lr.index()].range.from + == self.cfginfo.block_entry[block.index()] + { + log::debug!(" -> started at block start; trimming to {:?}", pos); + self.ranges[dst_lr.index()].range.from = pos; + } + live.set(dst.vreg(), false); + vreg_ranges[dst.vreg()] = LiveRangeIndex::invalid(); + self.vregs[dst.vreg()].reg = dst; + + // Handle the use w.r.t. liveranges: make it live + // and create an initial LR back to the start of + // the block. + let pos = ProgPoint::before(inst); + let range = CodeRange { + from: self.cfginfo.block_entry[block.index()], + to: pos.next(), + }; + let src_lr = self.add_liverange_to_vreg( + VRegIndex::new(src.vreg()), + range, + &mut num_ranges, + ); + let src_is_dead_after_move = !vreg_ranges[src.vreg()].is_valid(); + vreg_ranges[src.vreg()] = src_lr; + + log::debug!(" -> src LR {:?}", src_lr); + + // Add to live-set. + live.set(src.vreg(), true); + + // Add to program-moves lists. + self.prog_move_srcs + .push(((VRegIndex::new(src.vreg()), inst), Allocation::none())); + self.prog_move_dsts + .push(((VRegIndex::new(dst.vreg()), inst), Allocation::none())); + if src_is_dead_after_move { + self.prog_move_merges.push((src_lr, dst_lr)); + } + + continue; + } + // Process defs and uses. for i in 0..self.func.inst_operands(inst).len() { // don't borrow `self` @@ -1166,7 +1239,6 @@ impl<'a, F: Function> Env<'a, F> { } self.insert_use_into_liverange_and_update_stats(lr, u); // Remove from live-set. - // TODO-cranelift: here is where we keep it live if it's a mod, not def. live.set(operand.vreg().vreg(), false); vreg_ranges[operand.vreg().vreg()] = LiveRangeIndex::invalid(); } @@ -1507,6 +1579,11 @@ impl<'a, F: Function> Env<'a, F> { self.clobbers.sort(); self.blockparam_ins.sort(); self.blockparam_outs.sort(); + self.prog_move_srcs.sort_unstable_by_key(|(pos, _)| *pos); + self.prog_move_dsts.sort_unstable_by_key(|(pos, _)| *pos); + + log::debug!("prog_move_srcs = {:?}", self.prog_move_srcs); + log::debug!("prog_move_dsts = {:?}", self.prog_move_dsts); self.stats.initial_liverange_count = self.ranges.len(); self.stats.blockparam_ins_count = self.blockparam_ins.len(); @@ -1578,6 +1655,20 @@ impl<'a, F: Function> Env<'a, F> { return false; } + // Sanity check: both bundles should contain only ranges with appropriate VReg classes. + let mut iter = self.bundles[from.index()].first_range; + while iter.is_valid() { + let vreg = self.ranges[iter.index()].vreg; + assert_eq!(rc, self.vregs[vreg.index()].reg.class()); + iter = self.ranges[iter.index()].next_in_bundle; + } + let mut iter = self.bundles[to.index()].first_range; + while iter.is_valid() { + let vreg = self.ranges[iter.index()].vreg; + assert_eq!(rc, self.vregs[vreg.index()].reg.class()); + iter = self.ranges[iter.index()].next_in_bundle; + } + // Check for overlap in LiveRanges. let mut iter0 = self.bundles[from.index()].first_range; let mut iter1 = self.bundles[to.index()].first_range; @@ -1730,19 +1821,8 @@ impl<'a, F: Function> Env<'a, F> { for inst in 0..self.func.insts() { let inst = Inst::new(inst); - // Attempt to merge move srcs and dests, and attempt to - // merge Reuse-policy operand outputs with the + // Attempt to merge Reuse-policy operand outputs with the // corresponding inputs. - if let Some((src_vreg, dst_vreg)) = self.func.is_move(inst) { - log::debug!("trying to merge move src {} to dst {}", src_vreg, dst_vreg); - let src_bundle = - self.ranges[self.vregs[src_vreg.vreg()].first_range.index()].bundle; - assert!(src_bundle.is_valid()); - let dest_bundle = - self.ranges[self.vregs[dst_vreg.vreg()].first_range.index()].bundle; - assert!(dest_bundle.is_valid()); - self.merge_bundles(/* from */ dest_bundle, /* to */ src_bundle); - } for op in self.func.inst_operands(inst) { if let OperandPolicy::Reuse(reuse_idx) = op.policy() { let src_vreg = op.vreg(); @@ -1783,9 +1863,36 @@ impl<'a, F: Function> Env<'a, F> { self.merge_bundles(from_bundle, to_bundle); } + // Attempt to merge move srcs/dsts. + for i in 0..self.prog_move_merges.len() { + let (src, dst) = self.prog_move_merges[i]; + log::debug!("trying to merge move src LR {:?} to dst LR {:?}", src, dst); + let src = self.resolve_merged_lr(src); + let dst = self.resolve_merged_lr(dst); + log::debug!( + "resolved LR-construction merging chains: move-merge is now src LR {:?} to dst LR {:?}", + src, + dst + ); + let src_bundle = self.ranges[src.index()].bundle; + assert!(src_bundle.is_valid()); + let dest_bundle = self.ranges[dst.index()].bundle; + assert!(dest_bundle.is_valid()); + self.merge_bundles(/* from */ dest_bundle, /* to */ src_bundle); + } + log::debug!("done merging bundles"); } + fn resolve_merged_lr(&self, mut lr: LiveRangeIndex) -> LiveRangeIndex { + let mut iter = 0; + while iter < 100 && self.ranges[lr.index()].merged_into.is_valid() { + lr = self.ranges[lr.index()].merged_into; + iter += 1; + } + lr + } + fn compute_bundle_prio(&self, bundle: LiveBundleIndex) -> u32 { // The priority is simply the total "length" -- the number of // instructions covered by all LiveRanges. @@ -1901,14 +2008,19 @@ impl<'a, F: Function> Env<'a, F> { } fn compute_requirement(&self, bundle: LiveBundleIndex) -> Option { - let class = self.vregs[self.ranges[self.bundles[bundle.index()].first_range.index()] + let init_vreg = self.vregs[self.ranges[self.bundles[bundle.index()].first_range.index()] .vreg .index()] - .reg - .class(); + .reg; + let class = init_vreg.class(); let mut needed = Requirement::Any(class); - log::debug!("compute_requirement: bundle {:?} class {:?}", bundle, class); + log::debug!( + "compute_requirement: bundle {:?} class {:?} (from vreg {:?})", + bundle, + class, + init_vreg + ); let mut iter = self.bundles[bundle.index()].first_range; while iter.is_valid() { @@ -2669,6 +2781,7 @@ impl<'a, F: Function> Env<'a, F> { let mut first_conflicting_bundle; loop { attempts += 1; + log::debug!("attempt {}, req {:?}", attempts, req); debug_assert!(attempts < 100 * self.func.insts()); first_conflicting_bundle = None; let req = match req { @@ -2682,6 +2795,7 @@ impl<'a, F: Function> Env<'a, F> { Requirement::Fixed(preg) => { let preg_idx = PRegIndex::new(preg.index()); self.stats.process_bundle_reg_probes_fixed += 1; + log::debug!("trying fixed reg {:?}", preg_idx); match self.try_to_allocate_bundle_to_reg(bundle, preg_idx) { AllocRegResult::Allocated(alloc) => { self.stats.process_bundle_reg_success_fixed += 1; @@ -2690,8 +2804,12 @@ impl<'a, F: Function> Env<'a, F> { .reg_hint = Some(alloc.as_reg().unwrap()); return; } - AllocRegResult::Conflict(bundles) => bundles, + AllocRegResult::Conflict(bundles) => { + log::debug!(" -> conflict with bundles {:?}", bundles); + bundles + } AllocRegResult::ConflictWithFixed => { + log::debug!(" -> conflict with fixed alloc"); // Empty conflicts set: there's nothing we can // evict, because fixed conflicts cannot be moved. smallvec![] @@ -2721,10 +2839,12 @@ impl<'a, F: Function> Env<'a, F> { // each reg in preference order. let bounding_range = self.bundle_bounding_range_if_multiple(bundle); if let Some(bounding_range) = bounding_range { + log::debug!("initial scan with bounding range {:?}", bounding_range); for preg in RegTraversalIter::new(self.env, class, hint_reg, hint2_reg, scan_offset) { let preg_idx = PRegIndex::new(preg.index()); + log::debug!("trying preg {:?}", preg_idx); self.stats.process_bundle_bounding_range_probes_any += 1; if self.range_definitely_fits_in_reg(bounding_range, preg_idx) { let result = self.try_to_allocate_bundle_to_reg(bundle, preg_idx); @@ -2735,6 +2855,7 @@ impl<'a, F: Function> Env<'a, F> { }; self.spillsets[self.bundles[bundle.index()].spillset.index()] .reg_hint = Some(alloc.as_reg().unwrap()); + log::debug!(" -> definitely fits; assigning"); return; } } @@ -2745,6 +2866,7 @@ impl<'a, F: Function> Env<'a, F> { { self.stats.process_bundle_reg_probes_any += 1; let preg_idx = PRegIndex::new(preg.index()); + log::debug!("trying preg {:?}", preg_idx); match self.try_to_allocate_bundle_to_reg(bundle, preg_idx) { AllocRegResult::Allocated(alloc) => { self.stats.process_bundle_reg_success_any += 1; @@ -2754,6 +2876,7 @@ impl<'a, F: Function> Env<'a, F> { return; } AllocRegResult::Conflict(bundles) => { + log::debug!(" -> conflict with bundles {:?}", bundles); if lowest_cost_conflict_set.is_none() { lowest_cost_conflict_set = Some(bundles); } else if self.maximum_spill_weight_in_bundle_set(&bundles) @@ -2765,6 +2888,7 @@ impl<'a, F: Function> Env<'a, F> { } } AllocRegResult::ConflictWithFixed => { + log::debug!(" -> conflict with fixed alloc"); // Simply don't consider as an option. } } @@ -3154,12 +3278,15 @@ impl<'a, F: Function> Env<'a, F> { let mut blockparam_in_idx = 0; let mut blockparam_out_idx = 0; + let mut prog_move_src_idx = 0; + let mut prog_move_dst_idx = 0; for vreg in 0..self.vregs.len() { let vreg = VRegIndex::new(vreg); // For each range in each vreg, insert moves or // half-moves. We also scan over `blockparam_ins` and - // `blockparam_outs`, which are sorted by (block, vreg). + // `blockparam_outs`, which are sorted by (block, vreg), + // and over program-move srcs/dsts to fill in allocations. let mut iter = self.vregs[vreg.index()].first_range; let mut prev = LiveRangeIndex::invalid(); while iter.is_valid() { @@ -3445,6 +3572,71 @@ impl<'a, F: Function> Env<'a, F> { use_iter = self.uses[use_iter.index()].next_use; } + // Scan over program move srcs/dsts to fill in allocations. + let move_src_start = if range.from.pos == InstPosition::Before { + (vreg, range.from.inst) + } else { + (vreg, range.from.inst.next()) + }; + let move_src_end = if range.to.pos == InstPosition::Before { + (vreg, range.to.inst) + } else { + (vreg, range.to.inst.next()) + }; + log::debug!( + "vreg {:?} range {:?}: looking for program-move sources from {:?} to {:?}", + vreg, + range, + move_src_start, + move_src_end + ); + while prog_move_src_idx < self.prog_move_srcs.len() + && self.prog_move_srcs[prog_move_src_idx].0 < move_src_start + { + log::debug!(" -> skipping idx {}", prog_move_src_idx); + prog_move_src_idx += 1; + } + while prog_move_src_idx < self.prog_move_srcs.len() + && self.prog_move_srcs[prog_move_src_idx].0 < move_src_end + { + log::debug!( + " -> setting idx {} ({:?}) to alloc {:?}", + prog_move_src_idx, + self.prog_move_srcs[prog_move_src_idx].0, + alloc + ); + self.prog_move_srcs[prog_move_src_idx].1 = alloc; + prog_move_src_idx += 1; + } + + let move_dst_start = (vreg, range.from.inst); + let move_dst_end = (vreg, range.to.inst); + log::debug!( + "vreg {:?} range {:?}: looking for program-move dests from {:?} to {:?}", + vreg, + range, + move_dst_start, + move_dst_end + ); + while prog_move_dst_idx < self.prog_move_dsts.len() + && self.prog_move_dsts[prog_move_dst_idx].0 < move_dst_start + { + log::debug!(" -> skipping idx {}", prog_move_dst_idx); + prog_move_dst_idx += 1; + } + while prog_move_dst_idx < self.prog_move_dsts.len() + && self.prog_move_dsts[prog_move_dst_idx].0 < move_dst_end + { + log::debug!( + " -> setting idx {} ({:?}) to alloc {:?}", + prog_move_dst_idx, + self.prog_move_dsts[prog_move_dst_idx].0, + alloc + ); + self.prog_move_dsts[prog_move_dst_idx].1 = alloc; + prog_move_dst_idx += 1; + } + prev = iter; iter = self.ranges[iter.index()].next_in_reg; } @@ -3646,6 +3838,35 @@ impl<'a, F: Function> Env<'a, F> { } } } + + // Sort the prog-moves lists and insert moves to reify the + // input program's move operations. + self.prog_move_srcs + .sort_unstable_by_key(|((_, inst), _)| *inst); + self.prog_move_dsts + .sort_unstable_by_key(|((_, inst), _)| *inst); + let prog_move_srcs = std::mem::replace(&mut self.prog_move_srcs, vec![]); + let prog_move_dsts = std::mem::replace(&mut self.prog_move_dsts, vec![]); + assert_eq!(prog_move_srcs.len(), prog_move_dsts.len()); + for (&((_, from_inst), from_alloc), &((_, to_inst), to_alloc)) in + prog_move_srcs.iter().zip(prog_move_dsts.iter()) + { + assert!(!from_alloc.is_none()); + assert!(!to_alloc.is_none()); + assert_eq!(from_inst, to_inst); + log::debug!( + "program move at inst {:?}: alloc {:?} -> {:?}", + from_inst, + from_alloc, + to_alloc + ); + self.insert_move( + ProgPoint::before(from_inst), + InsertMovePrio::ProgramMove, + from_alloc, + to_alloc, + ); + } } fn resolve_inserted_moves(&mut self) { From 07a5a88972d9a9dd6d263d9fa610968598aa07b5 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 6 May 2021 20:03:44 -0700 Subject: [PATCH 016/155] BitVec perf: use adaptive hybrid chunked small-array + FxHashMap. --- Cargo.toml | 1 + src/bitvec.rs | 260 ++++++++++++++++++++++++++++++------------------- src/ion/mod.rs | 31 +++--- 3 files changed, 176 insertions(+), 116 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c54201c8..802881be 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,6 +10,7 @@ repository = "https://github.com/bytecodealliance/regalloc2" [dependencies] log = { version = "0.4.8", default-features = false } smallvec = "1.6.1" +fxhash = "0.2.1" # The below are only needed for fuzzing. # Keep this in sync with libfuzzer_sys's crate version: diff --git a/src/bitvec.rs b/src/bitvec.rs index 5a1c9490..7c0dbe2e 100644 --- a/src/bitvec.rs +++ b/src/bitvec.rs @@ -5,158 +5,216 @@ //! Bit vectors. -use smallvec::{smallvec, SmallVec}; +use fxhash::FxHashMap; + +/// A hybrid large/small-mode sparse mapping from integer indices to elements. +#[derive(Clone, Debug)] +enum AdaptiveMap { + Small(u32, [u32; 4], [u64; 4]), + Large(FxHashMap), +} + +const INVALID: u32 = 0xffff_ffff; + +impl AdaptiveMap { + fn new() -> Self { + Self::Small(0, [INVALID, INVALID, INVALID, INVALID], [0, 0, 0, 0]) + } + fn expand(&mut self) { + match self { + &mut Self::Small(len, ref keys, ref values) => { + let mut map = FxHashMap::default(); + for i in 0..len { + map.insert(keys[i as usize], values[i as usize]); + } + *self = Self::Large(map); + } + _ => {} + } + } + fn get_or_insert<'a>(&'a mut self, key: u32) -> &'a mut u64 { + let needs_expand = match self { + &mut Self::Small(len, ref keys, ..) => len == 4 && !keys.iter().any(|k| *k == key), + _ => false, + }; + if needs_expand { + self.expand(); + } + + match self { + &mut Self::Small(ref mut len, ref mut keys, ref mut values) => { + for i in 0..*len { + if keys[i as usize] == key { + return &mut values[i as usize]; + } + } + assert!(*len < 4); + let idx = *len; + *len += 1; + keys[idx as usize] = key; + values[idx as usize] = 0; + &mut values[idx as usize] + } + &mut Self::Large(ref mut map) => map.entry(key).or_insert(0), + } + } + fn get_mut(&mut self, key: u32) -> Option<&mut u64> { + match self { + &mut Self::Small(len, ref keys, ref mut values) => { + for i in 0..len { + if keys[i as usize] == key { + return Some(&mut values[i as usize]); + } + } + None + } + &mut Self::Large(ref mut map) => map.get_mut(&key), + } + } + fn get(&self, key: u32) -> Option<&u64> { + match self { + &Self::Small(len, ref keys, ref values) => { + for i in 0..len { + if keys[i as usize] == key { + return Some(&values[i as usize]); + } + } + None + } + &Self::Large(ref map) => map.get(&key), + } + } + fn iter<'a>(&'a self) -> AdaptiveMapIter<'a> { + match self { + &Self::Small(len, ref keys, ref values) => { + AdaptiveMapIter::Small(&keys[0..len as usize], &values[0..len as usize]) + } + &Self::Large(ref map) => AdaptiveMapIter::Large(map.iter()), + } + } +} + +enum AdaptiveMapIter<'a> { + Small(&'a [u32], &'a [u64]), + Large(std::collections::hash_map::Iter<'a, u32, u64>), +} + +impl<'a> std::iter::Iterator for AdaptiveMapIter<'a> { + type Item = (u32, u64); + fn next(&mut self) -> Option { + match self { + &mut Self::Small(ref mut keys, ref mut values) => { + if keys.is_empty() { + None + } else { + let (k, v) = ((*keys)[0], (*values)[0]); + *keys = &(*keys)[1..]; + *values = &(*values)[1..]; + Some((k, v)) + } + } + &mut Self::Large(ref mut it) => it.next().map(|(&k, &v)| (k, v)), + } + } +} /// A conceptually infinite-length bitvector that allows bitwise operations and /// iteration over set bits efficiently. #[derive(Clone, Debug)] pub struct BitVec { - bits: SmallVec<[u64; 2]>, + elems: AdaptiveMap, } const BITS_PER_WORD: usize = 64; impl BitVec { pub fn new() -> Self { - Self { bits: smallvec![] } - } - - pub fn with_capacity(len: usize) -> Self { - let words = (len + BITS_PER_WORD - 1) / BITS_PER_WORD; Self { - bits: SmallVec::with_capacity(words), + elems: AdaptiveMap::new(), } } - #[inline(never)] - fn ensure_idx(&mut self, word: usize) { - let mut target_len = std::cmp::max(2, self.bits.len()); - while word >= target_len { - target_len *= 2; - } - self.bits.resize(target_len, 0); + #[inline(always)] + fn elem(&mut self, bit_index: usize) -> &mut u64 { + let word_index = (bit_index / BITS_PER_WORD) as u32; + self.elems.get_or_insert(word_index) + } + + #[inline(always)] + fn maybe_elem_mut(&mut self, bit_index: usize) -> Option<&mut u64> { + let word_index = (bit_index / BITS_PER_WORD) as u32; + self.elems.get_mut(word_index) + } + + #[inline(always)] + fn maybe_elem(&self, bit_index: usize) -> Option<&u64> { + let word_index = (bit_index / BITS_PER_WORD) as u32; + self.elems.get(word_index) } #[inline(always)] pub fn set(&mut self, idx: usize, val: bool) { - let word = idx / BITS_PER_WORD; let bit = idx % BITS_PER_WORD; if val { - if word >= self.bits.len() { - self.ensure_idx(word); - } - self.bits[word] |= 1 << bit; - } else { - if word < self.bits.len() { - self.bits[word] &= !(1 << bit); - } + *self.elem(idx) |= 1 << bit; + } else if let Some(word) = self.maybe_elem_mut(idx) { + *word &= !(1 << bit); } } pub fn assign(&mut self, other: &Self) { - if other.bits.len() > 0 { - self.ensure_idx(other.bits.len() - 1); - } - for i in 0..other.bits.len() { - self.bits[i] = other.bits[i]; - } - for i in other.bits.len()..self.bits.len() { - self.bits[i] = 0; - } + self.elems = other.elems.clone(); } #[inline(always)] pub fn get(&self, idx: usize) -> bool { - let word = idx / BITS_PER_WORD; let bit = idx % BITS_PER_WORD; - if word >= self.bits.len() { - false + if let Some(word) = self.maybe_elem(idx) { + (*word & (1 << bit)) != 0 } else { - (self.bits[word] & (1 << bit)) != 0 + false } } pub fn or(&mut self, other: &Self) -> bool { - if other.bits.is_empty() { - return false; - } - let last_idx = other.bits.len() - 1; - self.ensure_idx(last_idx); - let mut changed = 0; - for (self_word, other_word) in self.bits.iter_mut().zip(other.bits.iter()) { - if *other_word == 0 { - // Avoid cache misses in `self` if `other` is zeroes. + for (word_idx, bits) in other.elems.iter() { + if bits == 0 { continue; } - changed |= *other_word & !*self_word; - *self_word |= *other_word; + let word_idx = word_idx as usize; + let self_word = self.elem(word_idx * BITS_PER_WORD); + changed |= bits & !*self_word; + *self_word |= bits; } changed != 0 } - pub fn and(&mut self, other: &Self) { - if other.bits.len() < self.bits.len() { - self.bits.truncate(other.bits.len()); - } - - for (self_word, other_word) in self.bits.iter_mut().zip(other.bits.iter()) { - *self_word &= *other_word; - } - } - - pub fn iter<'a>(&'a self) -> SetBitsIter<'a> { - let cur_word = if self.bits.len() > 0 { self.bits[0] } else { 0 }; - SetBitsIter { - words: &self.bits[..], - word_idx: 0, - cur_word, - } + pub fn iter<'a>(&'a self) -> impl Iterator + 'a { + self.elems.iter().flat_map(|(word_idx, bits)| { + let word_idx = word_idx as usize; + set_bits(bits).map(move |i| BITS_PER_WORD * word_idx + i) + }) } } -impl std::cmp::PartialEq for BitVec { - fn eq(&self, other: &Self) -> bool { - let limit = std::cmp::min(self.bits.len(), other.bits.len()); - for i in 0..limit { - if self.bits[i] != other.bits[i] { - return false; - } - } - for i in limit..self.bits.len() { - if self.bits[i] != 0 { - return false; - } - } - for i in limit..other.bits.len() { - if other.bits[i] != 0 { - return false; - } - } - true - } +fn set_bits(bits: u64) -> impl Iterator { + let iter = SetBitsIter(bits); + iter } -impl std::cmp::Eq for BitVec {} -pub struct SetBitsIter<'a> { - words: &'a [u64], - word_idx: usize, - cur_word: u64, -} +pub struct SetBitsIter(u64); -impl<'a> Iterator for SetBitsIter<'a> { +impl Iterator for SetBitsIter { type Item = usize; fn next(&mut self) -> Option { - while self.cur_word == 0 { - if self.word_idx + 1 >= self.words.len() { - return None; - } - self.word_idx += 1; - self.cur_word = self.words[self.word_idx]; + if self.0 == 0 { + None + } else { + let bitidx = self.0.trailing_zeros(); + self.0 &= !(1 << bitidx); + Some(bitidx as usize) } - let bitidx = self.cur_word.trailing_zeros(); - self.cur_word &= !(1 << bitidx); - Some(self.word_idx * BITS_PER_WORD + bitidx as usize) } } diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 0b9fc12a..bca281dc 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -1007,8 +1007,6 @@ impl<'a, F: Function> Env<'a, F> { self.liveins.push(BitVec::new()); } - let num_vregs = self.func.num_vregs(); - let mut num_ranges = 0; // Create Uses and Defs referring to VRegs, and place the Uses @@ -1040,7 +1038,7 @@ impl<'a, F: Function> Env<'a, F> { // Init live-set to union of liveins from successors // (excluding backedges; those are handled below). - let mut live = BitVec::with_capacity(num_vregs); + let mut live = BitVec::new(); for &succ in self.func.block_succs(block) { live.or(&self.liveins[succ.index()]); } @@ -1655,18 +1653,21 @@ impl<'a, F: Function> Env<'a, F> { return false; } - // Sanity check: both bundles should contain only ranges with appropriate VReg classes. - let mut iter = self.bundles[from.index()].first_range; - while iter.is_valid() { - let vreg = self.ranges[iter.index()].vreg; - assert_eq!(rc, self.vregs[vreg.index()].reg.class()); - iter = self.ranges[iter.index()].next_in_bundle; - } - let mut iter = self.bundles[to.index()].first_range; - while iter.is_valid() { - let vreg = self.ranges[iter.index()].vreg; - assert_eq!(rc, self.vregs[vreg.index()].reg.class()); - iter = self.ranges[iter.index()].next_in_bundle; + #[cfg(debug)] + { + // Sanity check: both bundles should contain only ranges with appropriate VReg classes. + let mut iter = self.bundles[from.index()].first_range; + while iter.is_valid() { + let vreg = self.ranges[iter.index()].vreg; + assert_eq!(rc, self.vregs[vreg.index()].reg.class()); + iter = self.ranges[iter.index()].next_in_bundle; + } + let mut iter = self.bundles[to.index()].first_range; + while iter.is_valid() { + let vreg = self.ranges[iter.index()].vreg; + assert_eq!(rc, self.vregs[vreg.index()].reg.class()); + iter = self.ranges[iter.index()].next_in_bundle; + } } // Check for overlap in LiveRanges. From 02b6516acdb3c1eca296fa36bc08849a881f3179 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 6 May 2021 20:47:17 -0700 Subject: [PATCH 017/155] Some memory-size/bitpacking optimizations --- src/bitvec.rs | 4 ++ src/checker.rs | 2 +- src/ion/mod.rs | 107 ++++++++++++++++++++++++++++--------------------- src/lib.rs | 61 +++++++++++----------------- 4 files changed, 91 insertions(+), 83 deletions(-) diff --git a/src/bitvec.rs b/src/bitvec.rs index 7c0dbe2e..f3c5fb86 100644 --- a/src/bitvec.rs +++ b/src/bitvec.rs @@ -20,6 +20,7 @@ impl AdaptiveMap { fn new() -> Self { Self::Small(0, [INVALID, INVALID, INVALID, INVALID], [0, 0, 0, 0]) } + #[inline(never)] fn expand(&mut self) { match self { &mut Self::Small(len, ref keys, ref values) => { @@ -32,6 +33,7 @@ impl AdaptiveMap { _ => {} } } + #[inline(always)] fn get_or_insert<'a>(&'a mut self, key: u32) -> &'a mut u64 { let needs_expand = match self { &mut Self::Small(len, ref keys, ..) => len == 4 && !keys.iter().any(|k| *k == key), @@ -58,6 +60,7 @@ impl AdaptiveMap { &mut Self::Large(ref mut map) => map.entry(key).or_insert(0), } } + #[inline(always)] fn get_mut(&mut self, key: u32) -> Option<&mut u64> { match self { &mut Self::Small(len, ref keys, ref mut values) => { @@ -71,6 +74,7 @@ impl AdaptiveMap { &mut Self::Large(ref mut map) => map.get_mut(&key), } } + #[inline(always)] fn get(&self, key: u32) -> Option<&u64> { match self { &Self::Small(len, ref keys, ref values) => { diff --git a/src/checker.rs b/src/checker.rs index 4e06c60d..05f32f53 100644 --- a/src/checker.rs +++ b/src/checker.rs @@ -526,7 +526,7 @@ impl<'a, F: Function> Checker<'a, F> { let mut safepoint_slots: HashMap> = HashMap::new(); for &(progpoint, slot) in &out.safepoint_slots { safepoint_slots - .entry(progpoint.inst) + .entry(progpoint.inst()) .or_insert_with(|| vec![]) .push(slot); } diff --git a/src/ion/mod.rs b/src/ion/mod.rs index bca281dc..b77dba42 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -76,7 +76,7 @@ impl CodeRange { other.to > self.from && other.from < self.to } pub fn len(&self) -> usize { - self.to.inst.index() - self.from.inst.index() + self.to.inst().index() - self.from.inst().index() } } @@ -123,7 +123,7 @@ struct LiveRange { next_in_reg: LiveRangeIndex, // if a bundle partly fits, this is used to record LRs that do fit - reg_hint: Option, + reg_hint: PReg, merged_into: LiveRangeIndex, } @@ -172,12 +172,12 @@ impl LiveRange { struct Use { operand: Operand, pos: ProgPoint, - slot: usize, next_use: UseIndex, + slot: u8, is_def: bool, } -const SLOT_NONE: usize = usize::MAX; +const SLOT_NONE: u8 = u8::MAX; #[derive(Clone, Debug)] struct LiveBundle { @@ -216,10 +216,10 @@ impl LiveBundle { #[derive(Clone, Debug)] struct SpillSet { bundles: LiveBundleVec, - size: u32, - class: RegClass, slot: SpillSlotIndex, - reg_hint: Option, + reg_hint: PReg, + class: RegClass, + size: u8, } #[derive(Clone, Debug)] @@ -594,10 +594,21 @@ impl<'a> RegTraversalIter<'a> { pub fn new( env: &'a MachineEnv, class: RegClass, - mut hint_reg: Option, - mut hint2_reg: Option, + hint_reg: PReg, + hint2_reg: PReg, offset: usize, ) -> Self { + let mut hint_reg = if hint_reg != PReg::invalid() { + Some(hint_reg) + } else { + None + }; + let mut hint2_reg = if hint2_reg != PReg::invalid() { + Some(hint2_reg) + } else { + None + }; + if hint_reg.is_none() { hint_reg = hint2_reg; hint2_reg = None; @@ -744,7 +755,7 @@ impl<'a, F: Function> Env<'a, F> { last_use: UseIndex::invalid(), next_in_bundle: LiveRangeIndex::invalid(), next_in_reg: LiveRangeIndex::invalid(), - reg_hint: None, + reg_hint: PReg::invalid(), merged_into: LiveRangeIndex::invalid(), }); LiveRangeIndex::new(idx) @@ -1200,7 +1211,7 @@ impl<'a, F: Function> Env<'a, F> { self.uses.push(Use { operand, pos, - slot: i, + slot: i as u8, next_use: UseIndex::invalid(), is_def: true, }); @@ -1267,7 +1278,7 @@ impl<'a, F: Function> Env<'a, F> { self.uses.push(Use { operand, pos, - slot: i, + slot: i as u8, next_use: UseIndex::invalid(), is_def: false, }); @@ -1538,9 +1549,9 @@ impl<'a, F: Function> Env<'a, F> { log::debug!( " -> extra clobber {} at inst{}", preg, - pos.inst.index() + pos.inst().index() ); - extra_clobbers.push((preg, pos.inst)); + extra_clobbers.push((preg, pos.inst())); } } else { seen_fixed_for_vreg.push(op.vreg()); @@ -1552,7 +1563,7 @@ impl<'a, F: Function> Env<'a, F> { let mut use_iter = self.ranges[iter.index()].first_use; while use_iter.is_valid() { let pos = self.uses[use_iter.index()].pos; - let slot = self.uses[use_iter.index()].slot; + let slot = self.uses[use_iter.index()].slot as usize; fixup_multi_fixed_vregs( pos, slot, @@ -1917,13 +1928,13 @@ impl<'a, F: Function> Env<'a, F> { // compute its priority, and enqueue it. let ssidx = SpillSetIndex::new(self.spillsets.len()); let reg = self.vregs[vreg.index()].reg; - let size = self.func.spillslot_size(reg.class(), reg) as u32; + let size = self.func.spillslot_size(reg.class(), reg) as u8; self.spillsets.push(SpillSet { bundles: smallvec![], slot: SpillSlotIndex::invalid(), size, class: reg.class(), - reg_hint: None, + reg_hint: PReg::invalid(), }); self.bundles[bundle.index()].spillset = ssidx; let prio = self.compute_bundle_prio(bundle); @@ -2100,7 +2111,7 @@ impl<'a, F: Function> Env<'a, F> { return AllocRegResult::ConflictWithFixed; } } else { - self.ranges[iter.index()].reg_hint = Some(self.pregs[reg.index()].reg); + self.ranges[iter.index()].reg_hint = self.pregs[reg.index()].reg; } iter = next; } @@ -2210,7 +2221,7 @@ impl<'a, F: Function> Env<'a, F> { first_range.next_in_bundle ); minimal = first_range.next_in_bundle.is_invalid() - && first_range.range.from.inst == first_range.range.to.prev().inst; + && first_range.range.from.inst() == first_range.range.to.prev().inst(); log::debug!(" -> minimal: {}", minimal); } @@ -2385,7 +2396,7 @@ impl<'a, F: Function> Env<'a, F> { // Update last-before-conflict and first-before-conflict positions. let mut update_with_pos = |pos: ProgPoint| { - let before_inst = ProgPoint::before(pos.inst); + let before_inst = ProgPoint::before(pos.inst()); let before_next_inst = before_inst.next().next(); if before_inst > bundle_start && (conflict_from.is_none() || before_inst < conflict_from.unwrap()) @@ -2398,7 +2409,7 @@ impl<'a, F: Function> Env<'a, F> { && (conflict_to.is_none() || pos >= conflict_to.unwrap()) && (first_after_conflict.is_none() || pos > first_after_conflict.unwrap()) { - first_after_conflict = Some(ProgPoint::before(pos.inst.next())); + first_after_conflict = Some(ProgPoint::before(pos.inst().next())); } }; @@ -2483,9 +2494,9 @@ impl<'a, F: Function> Env<'a, F> { } else { // For an use, split before the instruction -- // this allows us to insert a move if necessary. - ProgPoint::before(use_data.pos.inst) + ProgPoint::before(use_data.pos.inst()) }; - let after_use_inst = ProgPoint::before(use_data.pos.inst.next()); + let after_use_inst = ProgPoint::before(use_data.pos.inst().next()); log::debug!( " -> splitting before and after use: {:?} and {:?}", before_use_inst, @@ -2767,7 +2778,7 @@ impl<'a, F: Function> Env<'a, F> { let hint2_reg = if self.bundles[bundle.index()].first_range.is_valid() { self.ranges[self.bundles[bundle.index()].first_range.index()].reg_hint } else { - None + PReg::invalid() }; log::debug!( "process_bundle: bundle {:?} requirement {:?} hint {:?} hint2 {:?}", @@ -2802,7 +2813,7 @@ impl<'a, F: Function> Env<'a, F> { self.stats.process_bundle_reg_success_fixed += 1; log::debug!(" -> allocated to fixed {:?}", preg_idx); self.spillsets[self.bundles[bundle.index()].spillset.index()] - .reg_hint = Some(alloc.as_reg().unwrap()); + .reg_hint = alloc.as_reg().unwrap(); return; } AllocRegResult::Conflict(bundles) => { @@ -2829,7 +2840,7 @@ impl<'a, F: Function> Env<'a, F> { let scan_offset = self.ranges[self.bundles[bundle.index()].first_range.index()] .range .from - .inst + .inst() .index() + bundle.index(); @@ -2855,7 +2866,7 @@ impl<'a, F: Function> Env<'a, F> { _ => panic!("Impossible result: {:?}", result), }; self.spillsets[self.bundles[bundle.index()].spillset.index()] - .reg_hint = Some(alloc.as_reg().unwrap()); + .reg_hint = alloc.as_reg().unwrap(); log::debug!(" -> definitely fits; assigning"); return; } @@ -2873,7 +2884,7 @@ impl<'a, F: Function> Env<'a, F> { self.stats.process_bundle_reg_success_any += 1; log::debug!(" -> allocated to any {:?}", preg_idx); self.spillsets[self.bundles[bundle.index()].spillset.index()] - .reg_hint = Some(alloc.as_reg().unwrap()); + .reg_hint = alloc.as_reg().unwrap(); return; } AllocRegResult::Conflict(bundles) => { @@ -2983,7 +2994,13 @@ impl<'a, F: Function> Env<'a, F> { let class = any_vreg.class(); let mut success = false; self.stats.spill_bundle_reg_probes += 1; - for preg in RegTraversalIter::new(self.env, class, None, None, bundle.index()) { + for preg in RegTraversalIter::new( + self.env, + class, + PReg::invalid(), + PReg::invalid(), + bundle.index(), + ) { let preg_idx = PRegIndex::new(preg.index()); if let AllocRegResult::Allocated(_) = self.try_to_allocate_bundle_to_reg(bundle, preg_idx) @@ -3165,11 +3182,11 @@ impl<'a, F: Function> Env<'a, F> { } fn is_start_of_block(&self, pos: ProgPoint) -> bool { - let block = self.cfginfo.insn_block[pos.inst.index()]; + let block = self.cfginfo.insn_block[pos.inst().index()]; pos == self.cfginfo.block_entry[block.index()] } fn is_end_of_block(&self, pos: ProgPoint) -> bool { - let block = self.cfginfo.insn_block[pos.inst.index()]; + let block = self.cfginfo.insn_block[pos.inst().index()]; pos == self.cfginfo.block_exit[block.index()] } @@ -3365,7 +3382,7 @@ impl<'a, F: Function> Env<'a, F> { alloc, vreg.index() ); - assert_eq!(range.from.pos, InstPosition::Before); + assert_eq!(range.from.pos(), InstPosition::Before); self.insert_move(range.from, InsertMovePrio::Regular, prev_alloc, alloc); } } @@ -3375,7 +3392,7 @@ impl<'a, F: Function> Env<'a, F> { // already in this range (hence guaranteed to have the // same allocation) and if the vreg is live, add a // Source half-move. - let mut block = self.cfginfo.insn_block[range.from.inst.index()]; + let mut block = self.cfginfo.insn_block[range.from.inst().index()]; while block.is_valid() && block.index() < self.func.blocks() { if range.to < self.cfginfo.block_exit[block.index()].next() { break; @@ -3456,7 +3473,7 @@ impl<'a, F: Function> Env<'a, F> { // this range and for which the vreg is live at the // start of the block. For each, for each predecessor, // add a Dest half-move. - let mut block = self.cfginfo.insn_block[range.from.inst.index()]; + let mut block = self.cfginfo.insn_block[range.from.inst().index()]; if self.cfginfo.block_entry[block.index()] < range.from { block = block.next(); } @@ -3559,13 +3576,13 @@ impl<'a, F: Function> Env<'a, F> { while use_iter.is_valid() { let usedata = &self.uses[use_iter.index()]; debug_assert!(range.contains_point(usedata.pos)); - let inst = usedata.pos.inst; + let inst = usedata.pos.inst(); let slot = usedata.slot; let operand = usedata.operand; // Safepoints add virtual uses with no slots; // avoid these. if slot != SLOT_NONE { - self.set_alloc(inst, slot, alloc); + self.set_alloc(inst, slot as usize, alloc); } if let OperandPolicy::Reuse(_) = operand.policy() { reuse_input_insts.push(inst); @@ -3574,15 +3591,15 @@ impl<'a, F: Function> Env<'a, F> { } // Scan over program move srcs/dsts to fill in allocations. - let move_src_start = if range.from.pos == InstPosition::Before { - (vreg, range.from.inst) + let move_src_start = if range.from.pos() == InstPosition::Before { + (vreg, range.from.inst()) } else { - (vreg, range.from.inst.next()) + (vreg, range.from.inst().next()) }; - let move_src_end = if range.to.pos == InstPosition::Before { - (vreg, range.to.inst) + let move_src_end = if range.to.pos() == InstPosition::Before { + (vreg, range.to.inst()) } else { - (vreg, range.to.inst.next()) + (vreg, range.to.inst().next()) }; log::debug!( "vreg {:?} range {:?}: looking for program-move sources from {:?} to {:?}", @@ -3610,8 +3627,8 @@ impl<'a, F: Function> Env<'a, F> { prog_move_src_idx += 1; } - let move_dst_start = (vreg, range.from.inst); - let move_dst_end = (vreg, range.to.inst); + let move_dst_start = (vreg, range.from.inst()); + let move_dst_end = (vreg, range.to.inst()); log::debug!( "vreg {:?} range {:?}: looking for program-move dests from {:?} to {:?}", vreg, @@ -3753,7 +3770,7 @@ impl<'a, F: Function> Env<'a, F> { Allocation::reg(self.pregs[to_preg.index()].reg), ); self.set_alloc( - progpoint.inst, + progpoint.inst(), slot, Allocation::reg(self.pregs[to_preg.index()].reg), ); diff --git a/src/lib.rs b/src/lib.rs index 2031f333..0232813e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -781,64 +781,51 @@ pub enum InstPosition { /// A program point: a single point before or after a given instruction. #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct ProgPoint { - pub inst: Inst, - pub pos: InstPosition, + bits: u32, } impl ProgPoint { + pub fn new(inst: Inst, pos: InstPosition) -> Self { + let bits = ((inst.0 as u32) << 1) | (pos as u8 as u32); + Self { bits } + } pub fn before(inst: Inst) -> Self { - Self { - inst, - pos: InstPosition::Before, - } + Self::new(inst, InstPosition::Before) } - pub fn after(inst: Inst) -> Self { - Self { - inst, - pos: InstPosition::After, + Self::new(inst, InstPosition::After) + } + pub fn inst(self) -> Inst { + // Cast to i32 to do an arithmetic right-shift, which will + // preserve an `Inst::invalid()` (which is -1, or all-ones). + Inst::new(((self.bits as i32) >> 1) as usize) + } + pub fn pos(self) -> InstPosition { + match self.bits & 1 { + 0 => InstPosition::Before, + 1 => InstPosition::After, + _ => unreachable!(), } } pub fn next(self) -> ProgPoint { - match self.pos { - InstPosition::Before => ProgPoint { - inst: self.inst, - pos: InstPosition::After, - }, - InstPosition::After => ProgPoint { - inst: self.inst.next(), - pos: InstPosition::Before, - }, + Self { + bits: self.bits + 1, } } pub fn prev(self) -> ProgPoint { - match self.pos { - InstPosition::Before => ProgPoint { - inst: self.inst.prev(), - pos: InstPosition::After, - }, - InstPosition::After => ProgPoint { - inst: self.inst, - pos: InstPosition::Before, - }, + Self { + bits: self.bits - 1, } } pub fn to_index(self) -> u32 { - debug_assert!(self.inst.index() <= ((1 << 31) - 1)); - ((self.inst.index() as u32) << 1) | (self.pos as u8 as u32) + self.bits } pub fn from_index(index: u32) -> Self { - let inst = Inst::new((index >> 1) as usize); - let pos = match index & 1 { - 0 => InstPosition::Before, - 1 => InstPosition::After, - _ => unreachable!(), - }; - Self { inst, pos } + Self { bits: index } } } From a148dccac3ac1a55d3049bfbc02877de24c0be54 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 6 May 2021 22:02:10 -0700 Subject: [PATCH 018/155] Parameterize adaptive-map size in BitVec. --- src/bitvec.rs | 12 ++++++--- src/ion/mod.rs | 66 ++++++++++++++++++++++++-------------------------- 2 files changed, 39 insertions(+), 39 deletions(-) diff --git a/src/bitvec.rs b/src/bitvec.rs index f3c5fb86..df53bb55 100644 --- a/src/bitvec.rs +++ b/src/bitvec.rs @@ -7,10 +7,12 @@ use fxhash::FxHashMap; +const SMALL_ELEMS: usize = 12; + /// A hybrid large/small-mode sparse mapping from integer indices to elements. #[derive(Clone, Debug)] enum AdaptiveMap { - Small(u32, [u32; 4], [u64; 4]), + Small(u32, [u32; SMALL_ELEMS], [u64; SMALL_ELEMS]), Large(FxHashMap), } @@ -18,7 +20,7 @@ const INVALID: u32 = 0xffff_ffff; impl AdaptiveMap { fn new() -> Self { - Self::Small(0, [INVALID, INVALID, INVALID, INVALID], [0, 0, 0, 0]) + Self::Small(0, [INVALID; SMALL_ELEMS], [0; SMALL_ELEMS]) } #[inline(never)] fn expand(&mut self) { @@ -36,7 +38,9 @@ impl AdaptiveMap { #[inline(always)] fn get_or_insert<'a>(&'a mut self, key: u32) -> &'a mut u64 { let needs_expand = match self { - &mut Self::Small(len, ref keys, ..) => len == 4 && !keys.iter().any(|k| *k == key), + &mut Self::Small(len, ref keys, ..) => { + len == SMALL_ELEMS as u32 && !keys.iter().any(|k| *k == key) + } _ => false, }; if needs_expand { @@ -50,7 +54,7 @@ impl AdaptiveMap { return &mut values[i as usize]; } } - assert!(*len < 4); + assert!(*len < SMALL_ELEMS as u32); let idx = *len; *len += 1; keys[idx as usize] = key; diff --git a/src/ion/mod.rs b/src/ion/mod.rs index b77dba42..fa28a76f 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -101,7 +101,6 @@ define_index!(LiveBundleIndex); define_index!(LiveRangeIndex); define_index!(SpillSetIndex); define_index!(UseIndex); -define_index!(DefIndex); define_index!(VRegIndex); define_index!(PRegIndex); define_index!(SpillSlotIndex); @@ -174,7 +173,6 @@ struct Use { pos: ProgPoint, next_use: UseIndex, slot: u8, - is_def: bool, } const SLOT_NONE: u8 = u8::MAX; @@ -224,8 +222,6 @@ struct SpillSet { #[derive(Clone, Debug)] struct VRegData { - reg: VReg, - def: DefIndex, blockparam: Block, first_range: LiveRangeIndex, is_ref: bool, @@ -291,6 +287,7 @@ struct Env<'a, F: Function> { spillsets: Vec, uses: Vec, vregs: Vec, + vreg_regs: Vec, pregs: Vec, allocation_queue: PrioQueue, hot_code: LiveRangeSet, @@ -454,6 +451,7 @@ impl LiveRangeSet { } } +#[inline(always)] fn spill_weight_from_policy(policy: OperandPolicy) -> u32 { match policy { OperandPolicy::Any => 1000, @@ -673,6 +671,7 @@ impl<'a, F: Function> Env<'a, F> { spillsets: vec![], uses: vec![], vregs: vec![], + vreg_regs: vec![], pregs: vec![], allocation_queue: PrioQueue::new(), clobbers: vec![], @@ -716,13 +715,14 @@ impl<'a, F: Function> Env<'a, F> { for idx in 0..self.func.num_vregs() { // We'll fill in the real details when we see the def. let reg = VReg::new(idx, RegClass::Int); - self.add_vreg(VRegData { + self.add_vreg( reg, - def: DefIndex::invalid(), - first_range: LiveRangeIndex::invalid(), - blockparam: Block::invalid(), - is_ref: false, - }); + VRegData { + first_range: LiveRangeIndex::invalid(), + blockparam: Block::invalid(), + is_ref: false, + }, + ); } for v in self.func.reftype_vregs() { self.vregs[v.vreg()].is_ref = true; @@ -737,9 +737,10 @@ impl<'a, F: Function> Env<'a, F> { } } - fn add_vreg(&mut self, data: VRegData) -> VRegIndex { + fn add_vreg(&mut self, reg: VReg, data: VRegData) -> VRegIndex { let idx = self.vregs.len(); self.vregs.push(data); + self.vreg_regs.push(reg); VRegIndex::new(idx) } @@ -929,7 +930,7 @@ impl<'a, F: Function> Env<'a, F> { ); lrdata.uses_spill_weight -= spill_weight_from_policy(usedata.operand.policy()); - if usedata.is_def { + if usedata.operand.kind() == OperandKind::Def { lrdata.uses_spill_weight -= 2000; } } @@ -981,7 +982,7 @@ impl<'a, F: Function> Env<'a, F> { spill_weight_from_policy(policy) ); self.ranges[into.index()].uses_spill_weight += spill_weight_from_policy(policy); - if self.uses[u.index()].is_def { + if self.uses[u.index()].operand.kind() == OperandKind::Def { self.ranges[into.index()].uses_spill_weight += 2000; } log::debug!(" -> now {}", self.ranges[into.index()].uses_spill_weight); @@ -1071,7 +1072,7 @@ impl<'a, F: Function> Env<'a, F> { // Create vreg data for blockparams. for param in self.func.block_params(block) { - self.vregs[param.vreg()].reg = *param; + self.vreg_regs[param.vreg()] = *param; self.vregs[param.vreg()].blockparam = block; } @@ -1161,7 +1162,7 @@ impl<'a, F: Function> Env<'a, F> { } live.set(dst.vreg(), false); vreg_ranges[dst.vreg()] = LiveRangeIndex::invalid(); - self.vregs[dst.vreg()].reg = dst; + self.vreg_regs[dst.vreg()] = dst; // Handle the use w.r.t. liveranges: make it live // and create an initial LR back to the start of @@ -1213,13 +1214,12 @@ impl<'a, F: Function> Env<'a, F> { pos, slot: i as u8, next_use: UseIndex::invalid(), - is_def: true, }); log::debug!("Def of {} at {:?}", operand.vreg(), pos); // Fill in vreg's actual data. - self.vregs[operand.vreg().vreg()].reg = operand.vreg(); + self.vreg_regs[operand.vreg().vreg()] = operand.vreg(); // Trim the range for this vreg to start // at `pos` if it previously ended at the @@ -1280,7 +1280,6 @@ impl<'a, F: Function> Env<'a, F> { pos, slot: i as u8, next_use: UseIndex::invalid(), - is_def: false, }); // Create/extend the LiveRange and add the use to the range. @@ -1443,7 +1442,7 @@ impl<'a, F: Function> Env<'a, F> { // Create a virtual use. let pos = ProgPoint::before(self.safepoints[safepoint_idx]); let operand = Operand::new( - self.vregs[vreg.index()].reg, + self.vreg_regs[vreg.index()], OperandPolicy::Stack, OperandKind::Use, OperandPos::Before, @@ -1456,7 +1455,6 @@ impl<'a, F: Function> Env<'a, F> { pos, slot: SLOT_NONE, next_use: UseIndex::invalid(), - is_def: false, }); // Create/extend the LiveRange and add the use to the range. @@ -1659,8 +1657,8 @@ impl<'a, F: Function> Env<'a, F> { // have to have the same regclass (because bundles start with one vreg // and all merging happens here) so we can just sample the first vreg of // each bundle. - let rc = self.vregs[vreg_from.index()].reg.class(); - if rc != self.vregs[vreg_to.index()].reg.class() { + let rc = self.vreg_regs[vreg_from.index()].class(); + if rc != self.vreg_regs[vreg_to.index()].class() { return false; } @@ -1927,7 +1925,7 @@ impl<'a, F: Function> Env<'a, F> { // First time seeing `bundle`: allocate a spillslot for it, // compute its priority, and enqueue it. let ssidx = SpillSetIndex::new(self.spillsets.len()); - let reg = self.vregs[vreg.index()].reg; + let reg = self.vreg_regs[vreg.index()]; let size = self.func.spillslot_size(reg.class(), reg) as u8; self.spillsets.push(SpillSet { bundles: smallvec![], @@ -2008,22 +2006,21 @@ impl<'a, F: Function> Env<'a, F> { log::debug!("Uses:"); for (i, u) in self.uses.iter().enumerate() { log::debug!( - "use{}: op={:?} pos={:?} slot={} next_use={:?} is_def={:?}", + "use{}: op={:?} pos={:?} slot={} next_use={:?}", i, u.operand, u.pos, u.slot, u.next_use, - u.is_def, ); } } fn compute_requirement(&self, bundle: LiveBundleIndex) -> Option { - let init_vreg = self.vregs[self.ranges[self.bundles[bundle.index()].first_range.index()] - .vreg - .index()] - .reg; + let init_vreg = self.vreg_regs[self.ranges + [self.bundles[bundle.index()].first_range.index()] + .vreg + .index()]; let class = init_vreg.class(); let mut needed = Requirement::Any(class); @@ -2486,7 +2483,7 @@ impl<'a, F: Function> Env<'a, F> { while use_idx.is_valid() { let use_data = &self.uses[use_idx.index()]; log::debug!(" -> use: {:?}", use_data); - let before_use_inst = if use_data.is_def { + let before_use_inst = if use_data.operand.kind() == OperandKind::Def { // For a def, split *at* the def -- this may be an // After point, but the value cannot be live into // the def so we don't need to insert a move. @@ -2986,11 +2983,10 @@ impl<'a, F: Function> Env<'a, F> { fn try_allocating_regs_for_spilled_bundles(&mut self) { for i in 0..self.spilled_bundles.len() { let bundle = self.spilled_bundles[i]; // don't borrow self - let any_vreg = self.vregs[self.ranges + let any_vreg = self.vreg_regs[self.ranges [self.bundles[bundle.index()].first_range.index()] .vreg - .index()] - .reg; + .index()]; let class = any_vreg.class(); let mut success = false; self.stats.spill_bundle_reg_probes += 1; @@ -3365,7 +3361,7 @@ impl<'a, F: Function> Env<'a, F> { let prev_range = self.ranges[prev.index()].range; let first_use = self.ranges[iter.index()].first_use; let first_is_def = if first_use.is_valid() { - self.uses[first_use.index()].is_def + self.uses[first_use.index()].operand.kind() == OperandKind::Def } else { false }; @@ -3945,7 +3941,7 @@ impl<'a, F: Function> Env<'a, F> { let params = &self.blockparam_allocs[start..i]; let vregs = params .iter() - .map(|(_, _, vreg_idx, _)| self.vregs[vreg_idx.index()].reg) + .map(|(_, _, vreg_idx, _)| self.vreg_regs[vreg_idx.index()]) .collect::>(); let allocs = params .iter() From 2ff02b50a33cc27902863b3111ee17c1e3021053 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 6 May 2021 22:46:16 -0700 Subject: [PATCH 019/155] Some perf opts in liveness computation and set impl: - Cache the most recent u64 chunk in the set to avoid some hashmap lookups; - Defer the live-set union'ing over the loop body until query time (remember the set that would have been union'd in instead), and lazily propagate the liveness bit at that query time, union-find style; - Do n-1 rather than n union operations for n successors (first is a clone instead); - Don't union in liveness sets from blocks we haven't visited yet (the loop-body/backedge handling handles these). --- src/bitvec.rs | 87 +++++++++++++++++++++++++++++++++++++++----------- src/ion/mod.rs | 34 +++++++++++++++++--- 2 files changed, 97 insertions(+), 24 deletions(-) diff --git a/src/bitvec.rs b/src/bitvec.rs index df53bb55..46a56ff3 100644 --- a/src/bitvec.rs +++ b/src/bitvec.rs @@ -3,34 +3,53 @@ * exception. See `LICENSE` for details. */ -//! Bit vectors. +//! Index sets: sets of integers that represent indices into a space. +//! +//! For historical reasons this is called a `BitVec` but it is no +//! longer a dense bitvector; the chunked adaptive-sparse data +//! structure here has better performance. use fxhash::FxHashMap; +use std::cell::Cell; const SMALL_ELEMS: usize = 12; -/// A hybrid large/small-mode sparse mapping from integer indices to elements. +/// A hybrid large/small-mode sparse mapping from integer indices to +/// elements. +/// +/// The trailing `(u32, u64)` elements in each variant is a one-item +/// cache to allow fast access when streaming through. #[derive(Clone, Debug)] enum AdaptiveMap { - Small(u32, [u32; SMALL_ELEMS], [u64; SMALL_ELEMS]), - Large(FxHashMap), + Small( + u32, + [u32; SMALL_ELEMS], + [u64; SMALL_ELEMS], + Cell<(u32, u64)>, + ), + Large(FxHashMap, Cell<(u32, u64)>), } const INVALID: u32 = 0xffff_ffff; impl AdaptiveMap { fn new() -> Self { - Self::Small(0, [INVALID; SMALL_ELEMS], [0; SMALL_ELEMS]) + Self::Small( + 0, + [INVALID; SMALL_ELEMS], + [0; SMALL_ELEMS], + Cell::new((INVALID, 0)), + ) } #[inline(never)] fn expand(&mut self) { match self { - &mut Self::Small(len, ref keys, ref values) => { + &mut Self::Small(len, ref keys, ref values, ref cache) => { let mut map = FxHashMap::default(); for i in 0..len { map.insert(keys[i as usize], values[i as usize]); } - *self = Self::Large(map); + *self = Self::Large(map, cache.clone()); } _ => {} } @@ -48,7 +67,10 @@ impl AdaptiveMap { } match self { - &mut Self::Small(ref mut len, ref mut keys, ref mut values) => { + &mut Self::Small(ref mut len, ref mut keys, ref mut values, ref cached) => { + if cached.get().0 == key { + cached.set((INVALID, 0)); + } for i in 0..*len { if keys[i as usize] == key { return &mut values[i as usize]; @@ -61,13 +83,21 @@ impl AdaptiveMap { values[idx as usize] = 0; &mut values[idx as usize] } - &mut Self::Large(ref mut map) => map.entry(key).or_insert(0), + &mut Self::Large(ref mut map, ref cached) => { + if cached.get().0 == key { + cached.set((INVALID, 0)); + } + map.entry(key).or_insert(0) + } } } #[inline(always)] fn get_mut(&mut self, key: u32) -> Option<&mut u64> { match self { - &mut Self::Small(len, ref keys, ref mut values) => { + &mut Self::Small(len, ref keys, ref mut values, ref cached) => { + if cached.get().0 == key { + cached.set((INVALID, 0)); + } for i in 0..len { if keys[i as usize] == key { return Some(&mut values[i as usize]); @@ -75,29 +105,48 @@ impl AdaptiveMap { } None } - &mut Self::Large(ref mut map) => map.get_mut(&key), + &mut Self::Large(ref mut map, ref cached) => { + if cached.get().0 == key { + cached.set((INVALID, 0)); + } + map.get_mut(&key) + } } } #[inline(always)] - fn get(&self, key: u32) -> Option<&u64> { + fn get(&self, key: u32) -> Option { match self { - &Self::Small(len, ref keys, ref values) => { + &Self::Small(len, ref keys, ref values, ref cached) => { + if cached.get().0 == key { + return Some(cached.get().1); + } for i in 0..len { if keys[i as usize] == key { - return Some(&values[i as usize]); + let value = values[i as usize]; + cached.set((key, value)); + return Some(value); } } None } - &Self::Large(ref map) => map.get(&key), + &Self::Large(ref map, ref cached) => { + if cached.get().0 == key { + return Some(cached.get().1); + } + let value = map.get(&key).cloned(); + if let Some(value) = value { + cached.set((key, value)); + } + value + } } } fn iter<'a>(&'a self) -> AdaptiveMapIter<'a> { match self { - &Self::Small(len, ref keys, ref values) => { + &Self::Small(len, ref keys, ref values, ..) => { AdaptiveMapIter::Small(&keys[0..len as usize], &values[0..len as usize]) } - &Self::Large(ref map) => AdaptiveMapIter::Large(map.iter()), + &Self::Large(ref map, ..) => AdaptiveMapIter::Large(map.iter()), } } } @@ -155,7 +204,7 @@ impl BitVec { } #[inline(always)] - fn maybe_elem(&self, bit_index: usize) -> Option<&u64> { + fn maybe_elem(&self, bit_index: usize) -> Option { let word_index = (bit_index / BITS_PER_WORD) as u32; self.elems.get(word_index) } @@ -178,7 +227,7 @@ impl BitVec { pub fn get(&self, idx: usize) -> bool { let bit = idx % BITS_PER_WORD; if let Some(word) = self.maybe_elem(idx) { - (*word & (1 << bit)) != 0 + (word & (1 << bit)) != 0 } else { false } diff --git a/src/ion/mod.rs b/src/ion/mod.rs index fa28a76f..a93ded76 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -267,6 +267,7 @@ struct Env<'a, F: Function> { env: &'a MachineEnv, cfginfo: CFGInfo, liveins: Vec, + livein_parents: Vec>, /// Blockparam outputs: from-vreg, (end of) from-block, (start of) /// to-block, to-vreg. The field order is significant: these are sorted so /// that a scan over vregs, then blocks in each range, can scan in @@ -663,6 +664,7 @@ impl<'a, F: Function> Env<'a, F> { cfginfo, liveins: vec![], + livein_parents: vec![], blockparam_outs: vec![], blockparam_ins: vec![], blockparam_allocs: vec![], @@ -1013,10 +1015,24 @@ impl<'a, F: Function> Env<'a, F> { .insert(LiveRangeKey::from_range(&range), lr); } + fn is_live_in(&mut self, block: Block, vreg: VRegIndex) -> bool { + if self.liveins[block.index()].get(vreg.index()) { + return true; + } + for &parent in &self.livein_parents[block.index()] { + if self.liveins[parent.index()].get(vreg.index()) { + self.liveins[block.index()].set(vreg.index(), true); + return true; + } + } + false + } + fn compute_liveness(&mut self) { // Create initial LiveIn bitsets. for _ in 0..self.func.blocks() { self.liveins.push(BitVec::new()); + self.livein_parents.push(vec![]); } let mut num_ranges = 0; @@ -1050,10 +1066,18 @@ impl<'a, F: Function> Env<'a, F> { // Init live-set to union of liveins from successors // (excluding backedges; those are handled below). - let mut live = BitVec::new(); + let mut live = None; for &succ in self.func.block_succs(block) { - live.or(&self.liveins[succ.index()]); + if block_to_postorder[succ.index()].is_none() { + continue; + } + if live.is_none() { + live = Some(self.liveins[succ.index()].clone()); + } else { + live.as_mut().unwrap().or(&self.liveins[succ.index()]); + } } + let mut live = live.unwrap_or(BitVec::new()); // Initially, registers are assumed live for the whole block. for vreg in live.iter() { @@ -1404,7 +1428,7 @@ impl<'a, F: Function> Env<'a, F> { ); log::debug!(" -> loop range {:?}", loop_range); for &loopblock in loop_blocks { - self.liveins[loopblock.index()].or(&live); + self.livein_parents[loopblock.index()].push(block); } for vreg in live.iter() { log::debug!( @@ -3404,7 +3428,7 @@ impl<'a, F: Function> Env<'a, F> { continue; } log::debug!(" -> out of this range, requires half-move if live"); - if self.liveins[succ.index()].get(vreg.index()) { + if self.is_live_in(succ, vreg) { log::debug!(" -> live at input to succ, adding halfmove"); half_moves.push(HalfMove { key: half_move_key(block, succ, vreg, HalfMoveKind::Source), @@ -3524,7 +3548,7 @@ impl<'a, F: Function> Env<'a, F> { blockparam_in_idx += 1; } - if !self.liveins[block.index()].get(vreg.index()) { + if !self.is_live_in(block, vreg) { block = block.next(); continue; } From 2ba518517d91d735aceff3374fdd1936bc1e5ee0 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 6 May 2021 23:29:59 -0700 Subject: [PATCH 020/155] Fuzzbugfix: actually do need eager liveness computation; must uphold invariant that all earlier-in-postorder blocks have full livein sets. --- src/bitvec.rs | 9 ++++++++- src/fuzzing/func.rs | 2 +- src/ion/mod.rs | 16 ++-------------- src/lib.rs | 16 +++++++++++++++- 4 files changed, 26 insertions(+), 17 deletions(-) diff --git a/src/bitvec.rs b/src/bitvec.rs index 46a56ff3..5c2cc2f4 100644 --- a/src/bitvec.rs +++ b/src/bitvec.rs @@ -177,7 +177,7 @@ impl<'a> std::iter::Iterator for AdaptiveMapIter<'a> { /// A conceptually infinite-length bitvector that allows bitwise operations and /// iteration over set bits efficiently. -#[derive(Clone, Debug)] +#[derive(Clone)] pub struct BitVec { elems: AdaptiveMap, } @@ -275,6 +275,13 @@ impl Iterator for SetBitsIter { } } +impl std::fmt::Debug for BitVec { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + let vals = self.iter().collect::>(); + write!(f, "{:?}", vals) + } +} + #[cfg(test)] mod test { use super::BitVec; diff --git a/src/fuzzing/func.rs b/src/fuzzing/func.rs index 5e8bff86..ae8dccef 100644 --- a/src/fuzzing/func.rs +++ b/src/fuzzing/func.rs @@ -130,7 +130,7 @@ impl Function for Func { &self.reftype_vregs[..] } - fn is_move(&self, insn: Inst) -> Option<(VReg, VReg)> { + fn is_move(&self, _: Inst) -> Option<(VReg, VReg)> { None } diff --git a/src/ion/mod.rs b/src/ion/mod.rs index a93ded76..0a5a8163 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -267,7 +267,6 @@ struct Env<'a, F: Function> { env: &'a MachineEnv, cfginfo: CFGInfo, liveins: Vec, - livein_parents: Vec>, /// Blockparam outputs: from-vreg, (end of) from-block, (start of) /// to-block, to-vreg. The field order is significant: these are sorted so /// that a scan over vregs, then blocks in each range, can scan in @@ -664,7 +663,6 @@ impl<'a, F: Function> Env<'a, F> { cfginfo, liveins: vec![], - livein_parents: vec![], blockparam_outs: vec![], blockparam_ins: vec![], blockparam_allocs: vec![], @@ -1016,23 +1014,13 @@ impl<'a, F: Function> Env<'a, F> { } fn is_live_in(&mut self, block: Block, vreg: VRegIndex) -> bool { - if self.liveins[block.index()].get(vreg.index()) { - return true; - } - for &parent in &self.livein_parents[block.index()] { - if self.liveins[parent.index()].get(vreg.index()) { - self.liveins[block.index()].set(vreg.index(), true); - return true; - } - } - false + self.liveins[block.index()].get(vreg.index()) } fn compute_liveness(&mut self) { // Create initial LiveIn bitsets. for _ in 0..self.func.blocks() { self.liveins.push(BitVec::new()); - self.livein_parents.push(vec![]); } let mut num_ranges = 0; @@ -1428,7 +1416,7 @@ impl<'a, F: Function> Env<'a, F> { ); log::debug!(" -> loop range {:?}", loop_range); for &loopblock in loop_blocks { - self.livein_parents[loopblock.index()].push(block); + self.liveins[loopblock.index()].or(&live); } for vreg in live.iter() { log::debug!( diff --git a/src/lib.rs b/src/lib.rs index 0232813e..437e6ba9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -779,11 +779,25 @@ pub enum InstPosition { } /// A program point: a single point before or after a given instruction. -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct ProgPoint { bits: u32, } +impl std::fmt::Debug for ProgPoint { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!( + f, + "progpoint{}{}", + self.inst().index(), + match self.pos() { + InstPosition::Before => "-pre", + InstPosition::After => "-post", + } + ) + } +} + impl ProgPoint { pub fn new(inst: Inst, pos: InstPosition) -> Self { let bits = ((inst.0 as u32) << 1) | (pos as u8 as u32); From 42582e0c6fa81392606fe352cf32e44f9e617792 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 7 May 2021 00:19:41 -0700 Subject: [PATCH 021/155] Some stats for loop effects on liveins: 487k loop set-unions (441 loops) in one func in bz2 -- fix TBD --- src/ion/mod.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 0a5a8163..81e517ad 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -537,6 +537,10 @@ enum InsertMovePrio { #[derive(Clone, Copy, Debug, Default)] pub struct Stats { + livein_blocks: usize, + livein_succ_unions: usize, + livein_loops: usize, + livein_loop_unions: usize, initial_liverange_count: usize, merged_bundle_count: usize, process_bundle_count: usize, @@ -1052,6 +1056,8 @@ impl<'a, F: Function> Env<'a, F> { let block = self.cfginfo.postorder[i]; block_to_postorder[block.index()] = Some(i as u32); + self.stats.livein_blocks += 1; + // Init live-set to union of liveins from successors // (excluding backedges; those are handled below). let mut live = None; @@ -1064,6 +1070,7 @@ impl<'a, F: Function> Env<'a, F> { } else { live.as_mut().unwrap().or(&self.liveins[succ.index()]); } + self.stats.livein_succ_unions += 1; } let mut live = live.unwrap_or(BitVec::new()); @@ -1415,7 +1422,9 @@ impl<'a, F: Function> Env<'a, F> { i ); log::debug!(" -> loop range {:?}", loop_range); + self.stats.livein_loops += 1; for &loopblock in loop_blocks { + self.stats.livein_loop_unions += 1; self.liveins[loopblock.index()].or(&live); } for vreg in live.iter() { From 3713d6131e8082c9d5932153e86f41f8cdddad4d Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 7 May 2021 01:22:12 -0700 Subject: [PATCH 022/155] Replace approximate liveness with true iterative liveness; turns out it is better to improve accuracy so that later stages of the allocator have less wasted work/interference --- src/ion/mod.rs | 342 +++++++++++++++---------------------------------- 1 file changed, 103 insertions(+), 239 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 81e517ad..824b18b0 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -49,6 +49,7 @@ use crate::{ MachineEnv, Operand, OperandKind, OperandPolicy, OperandPos, Output, PReg, ProgPoint, RegAllocError, RegClass, SpillSlot, VReg, }; +use fxhash::FxHashSet; use log::debug; use smallvec::{smallvec, SmallVec}; use std::cmp::Ordering; @@ -267,6 +268,7 @@ struct Env<'a, F: Function> { env: &'a MachineEnv, cfginfo: CFGInfo, liveins: Vec, + liveouts: Vec, /// Blockparam outputs: from-vreg, (end of) from-block, (start of) /// to-block, to-vreg. The field order is significant: these are sorted so /// that a scan over vregs, then blocks in each range, can scan in @@ -293,6 +295,7 @@ struct Env<'a, F: Function> { hot_code: LiveRangeSet, clobbers: Vec, // Sorted list of insts with clobbers. safepoints: Vec, // Sorted list of safepoint insts. + safepoints_per_vreg: HashMap>, spilled_bundles: Vec, spillslots: Vec, @@ -538,9 +541,7 @@ enum InsertMovePrio { #[derive(Clone, Copy, Debug, Default)] pub struct Stats { livein_blocks: usize, - livein_succ_unions: usize, - livein_loops: usize, - livein_loop_unions: usize, + livein_iterations: usize, initial_liverange_count: usize, merged_bundle_count: usize, process_bundle_count: usize, @@ -667,6 +668,7 @@ impl<'a, F: Function> Env<'a, F> { cfginfo, liveins: vec![], + liveouts: vec![], blockparam_outs: vec![], blockparam_ins: vec![], blockparam_allocs: vec![], @@ -680,6 +682,7 @@ impl<'a, F: Function> Env<'a, F> { allocation_queue: PrioQueue::new(), clobbers: vec![], safepoints: vec![], + safepoints_per_vreg: HashMap::new(), hot_code: LiveRangeSet::new(), spilled_bundles: vec![], spillslots: vec![], @@ -1022,57 +1025,96 @@ impl<'a, F: Function> Env<'a, F> { } fn compute_liveness(&mut self) { - // Create initial LiveIn bitsets. + // Create initial LiveIn and LiveOut bitsets. for _ in 0..self.func.blocks() { self.liveins.push(BitVec::new()); + self.liveouts.push(BitVec::new()); + } + + // Run a worklist algorithm to precisely compute liveins and + // liveouts. + let mut workqueue = VecDeque::new(); + let mut workqueue_set = FxHashSet::default(); + // Initialize workqueue with postorder traversal. + for &block in &self.cfginfo.postorder[..] { + workqueue.push_back(block); + workqueue_set.insert(block); + } + + while !workqueue.is_empty() { + let block = workqueue.pop_front().unwrap(); + workqueue_set.remove(&block); + + log::debug!("computing liveins for block{}", block.index()); + + self.stats.livein_iterations += 1; + + let mut live = self.liveouts[block.index()].clone(); + for inst in self.func.block_insns(block).rev().iter() { + if let Some((src, dst)) = self.func.is_move(inst) { + live.set(dst.vreg(), false); + live.set(src.vreg(), true); + } + for pos in &[OperandPos::After, OperandPos::Both, OperandPos::Before] { + for op in self.func.inst_operands(inst) { + if op.pos() == *pos { + match op.kind() { + OperandKind::Use => { + live.set(op.vreg().vreg(), true); + } + OperandKind::Def => { + live.set(op.vreg().vreg(), false); + } + } + } + } + } + } + for &blockparam in self.func.block_params(block) { + live.set(blockparam.vreg(), false); + } + + for &pred in self.func.block_preds(block) { + if self.liveouts[pred.index()].or(&live) { + if !workqueue_set.contains(&pred) { + workqueue_set.insert(pred); + workqueue.push_back(pred); + } + } + } + + log::debug!("computed liveins at block{}: {:?}", block.index(), live); + self.liveins[block.index()] = live; } let mut num_ranges = 0; + for &vreg in self.func.reftype_vregs() { + self.safepoints_per_vreg.insert(vreg.vreg(), HashSet::new()); + } + // Create Uses and Defs referring to VRegs, and place the Uses // in LiveRanges. // - // We iterate backward, so as long as blocks are well-ordered - // (in RPO), we see uses before defs. - // - // Because of this, we can construct live ranges in one pass, - // i.e., considering each block once, propagating live - // registers backward across edges to a bitset at each block - // exit point, gen'ing at uses, kill'ing at defs, and meeting - // with a union. - let mut block_to_postorder: SmallVec<[Option; 16]> = - smallvec![None; self.func.blocks()]; - for i in 0..self.cfginfo.postorder.len() { - let block = self.cfginfo.postorder[i]; - block_to_postorder[block.index()] = Some(i as u32); - } + // We already computed precise liveouts and liveins for every + // block above, so we don't need to run an iterative algorithm + // here; instead, every block's computation is purely local, + // from end to start. // Track current LiveRange for each vreg. + // + // Invariant: a stale range may be present here; ranges are + // only valid if `live.get(vreg)` is true. let mut vreg_ranges: Vec = vec![LiveRangeIndex::invalid(); self.func.num_vregs()]; - for i in 0..self.cfginfo.postorder.len() { - // (avoid borrowing `self`) - let block = self.cfginfo.postorder[i]; - block_to_postorder[block.index()] = Some(i as u32); + for i in (0..self.func.blocks()).rev() { + let block = Block::new(i); self.stats.livein_blocks += 1; - // Init live-set to union of liveins from successors - // (excluding backedges; those are handled below). - let mut live = None; - for &succ in self.func.block_succs(block) { - if block_to_postorder[succ.index()].is_none() { - continue; - } - if live.is_none() { - live = Some(self.liveins[succ.index()].clone()); - } else { - live.as_mut().unwrap().or(&self.liveins[succ.index()]); - } - self.stats.livein_succ_unions += 1; - } - let mut live = live.unwrap_or(BitVec::new()); + // Init our local live-in set. + let mut live = self.liveouts[block.index()].clone(); // Initially, registers are assumed live for the whole block. for vreg in live.iter() { @@ -1119,9 +1161,7 @@ impl<'a, F: Function> Env<'a, F> { if self.func.inst_clobbers(inst).len() > 0 { self.clobbers.push(inst); } - if self.func.is_safepoint(inst) { - self.safepoints.push(inst); - } + // Mark clobbers with CodeRanges on PRegs. for i in 0..self.func.inst_clobbers(inst).len() { // don't borrow `self` @@ -1160,7 +1200,7 @@ impl<'a, F: Function> Env<'a, F> { let pos = ProgPoint::after(inst); let mut dst_lr = vreg_ranges[dst.vreg()]; // If there was no liverange (dead def), create a trivial one. - if dst_lr.is_invalid() { + if !live.get(dst.vreg()) { dst_lr = self.add_liverange_to_vreg( VRegIndex::new(dst.vreg()), CodeRange { @@ -1196,12 +1236,12 @@ impl<'a, F: Function> Env<'a, F> { range, &mut num_ranges, ); - let src_is_dead_after_move = !vreg_ranges[src.vreg()].is_valid(); vreg_ranges[src.vreg()] = src_lr; log::debug!(" -> src LR {:?}", src_lr); // Add to live-set. + let src_is_dead_after_move = !live.get(src.vreg()); live.set(src.vreg(), true); // Add to program-moves lists. @@ -1248,7 +1288,7 @@ impl<'a, F: Function> Env<'a, F> { let mut lr = vreg_ranges[operand.vreg().vreg()]; log::debug!(" -> has existing LR {:?}", lr); // If there was no liverange (dead def), create a trivial one. - if lr.is_invalid() { + if !live.get(operand.vreg().vreg()) { lr = self.add_liverange_to_vreg( VRegIndex::new(operand.vreg().vreg()), CodeRange { @@ -1322,6 +1362,15 @@ impl<'a, F: Function> Env<'a, F> { } } } + + if self.func.is_safepoint(inst) { + self.safepoints.push(inst); + for vreg in live.iter() { + if let Some(safepoints) = self.safepoints_per_vreg.get_mut(&vreg) { + safepoints.insert(inst); + } + } + } } // Block parameters define vregs at the very beginning of @@ -1348,98 +1397,6 @@ impl<'a, F: Function> Env<'a, F> { self.blockparam_ins.push((vreg_idx, block, pred)); } } - - // Loop-handling: to handle backedges, rather than running - // a fixpoint loop, we add a live-range for every value - // live at the beginning of the loop over the whole loop - // body. - // - // To determine what the "loop body" consists of, we find - // the transitively minimum-reachable traversal index in - // our traversal order before the current block - // index. When we discover a backedge, *all* block indices - // within the traversal range are considered part of the - // loop body. This is guaranteed correct (though perhaps - // an overapproximation) even for irreducible control - // flow, because it will find all blocks to which the - // liveness could flow backward over which we've already - // scanned, and it should give good results for reducible - // control flow with properly ordered blocks. - let mut min_pred = i; - let mut loop_scan = i; - log::debug!( - "looking for loops from postorder#{} (block{})", - i, - self.cfginfo.postorder[i].index() - ); - while loop_scan >= min_pred { - let block = self.cfginfo.postorder[loop_scan]; - log::debug!( - " -> scan at postorder#{} (block{})", - loop_scan, - block.index() - ); - for &pred in self.func.block_preds(block) { - log::debug!( - " -> pred block{} (postorder#{})", - pred.index(), - block_to_postorder[pred.index()].unwrap_or(min_pred as u32) - ); - min_pred = std::cmp::min( - min_pred, - block_to_postorder[pred.index()].unwrap_or(min_pred as u32) as usize, - ); - log::debug!(" -> min_pred = {}", min_pred); - } - if loop_scan == 0 { - break; - } - loop_scan -= 1; - } - - if min_pred < i { - // We have one or more backedges, and the loop body is - // (conservatively) postorder[min_pred..i]. Find a - // range that covers all of those blocks. - let loop_blocks = &self.cfginfo.postorder[min_pred..=i]; - let loop_begin = loop_blocks - .iter() - .map(|b| self.cfginfo.block_entry[b.index()]) - .min() - .unwrap(); - let loop_end = loop_blocks - .iter() - .map(|b| self.cfginfo.block_exit[b.index()]) - .max() - .unwrap(); - let loop_range = CodeRange { - from: loop_begin, - to: loop_end, - }; - log::debug!( - "found backedge wrt postorder: postorder#{}..postorder#{}", - min_pred, - i - ); - log::debug!(" -> loop range {:?}", loop_range); - self.stats.livein_loops += 1; - for &loopblock in loop_blocks { - self.stats.livein_loop_unions += 1; - self.liveins[loopblock.index()].or(&live); - } - for vreg in live.iter() { - log::debug!( - "vreg {:?} live at top of loop (block {:?}) -> range {:?}", - VRegIndex::new(vreg), - block, - loop_range, - ); - self.add_liverange_to_vreg(VRegIndex::new(vreg), loop_range, &mut num_ranges); - } - } - - log::debug!("liveins at block {:?} = {:?}", block, live); - self.liveins[block.index()] = live; } self.safepoints.sort(); @@ -3886,15 +3843,15 @@ impl<'a, F: Function> Env<'a, F> { for (&((_, from_inst), from_alloc), &((_, to_inst), to_alloc)) in prog_move_srcs.iter().zip(prog_move_dsts.iter()) { - assert!(!from_alloc.is_none()); - assert!(!to_alloc.is_none()); - assert_eq!(from_inst, to_inst); log::debug!( "program move at inst {:?}: alloc {:?} -> {:?}", from_inst, from_alloc, to_alloc ); + assert!(!from_alloc.is_none()); + assert!(!to_alloc.is_none()); + assert_eq!(from_inst, to_inst); self.insert_move( ProgPoint::before(from_inst), InsertMovePrio::ProgramMove, @@ -4016,117 +3973,24 @@ impl<'a, F: Function> Env<'a, F> { fn compute_stackmaps(&mut self) { // For each ref-typed vreg, iterate through ranges and find // safepoints in-range. Add the SpillSlot to the stackmap. - // - // Note that unlike in the rest of the allocator, we cannot - // overapproximate here: we cannot list a vreg's alloc at a - // certain program point in the metadata if it is not yet - // live. Because arbitrary block order and irreducible control - // flow could result in us encountering an (overapproximated, - // not actually live) vreg range for a reftyped value when - // scanning in block order, we need to do a fixpoint liveness - // analysis here for reftyped vregs only. We only perform this - // analysis if there are reftyped vregs present, so it will - // not add to allocation runtime otherwise. if self.func.reftype_vregs().is_empty() { return; } - let mut reftype_vreg_map = BitVec::new(); - for vreg in self.func.reftype_vregs() { - reftype_vreg_map.set(vreg.vreg(), true); - } - - let mut live_reftypes_block_start: Vec = vec![]; - let mut live_reftypes_block_end: Vec = vec![]; - for _ in 0..self.func.blocks() { - live_reftypes_block_start.push(BitVec::new()); - live_reftypes_block_end.push(BitVec::new()); - } - - let mut safepoints_per_vreg: HashMap> = HashMap::new(); - for &vreg in self.func.reftype_vregs() { - safepoints_per_vreg.insert(vreg.vreg(), HashSet::new()); - } - - let mut workqueue = VecDeque::new(); - let mut workqueue_set = HashSet::new(); - let mut visited = HashSet::new(); - - // Backward analysis: start at return blocks. - for block in 0..self.func.blocks() { - let block = Block::new(block); - if self.func.is_ret(self.func.block_insns(block).last()) { - workqueue.push_back(block); - workqueue_set.insert(block); - } - } - - // While workqueue is not empty, scan a block backward. - while !workqueue.is_empty() { - let block = workqueue.pop_back().unwrap(); - workqueue_set.remove(&block); - visited.insert(block); - - let live = &mut live_reftypes_block_start[block.index()]; - live.assign(&live_reftypes_block_end[block.index()]); - - for inst in self.func.block_insns(block).rev().iter() { - for pos in &[OperandPos::After, OperandPos::Before] { - for op in self.func.inst_operands(inst) { - if !reftype_vreg_map.get(op.vreg().vreg()) { - continue; - } - if op.pos() != OperandPos::Both && op.pos() != *pos { - continue; - } - match op.kind() { - OperandKind::Def => { - live.set(op.vreg().vreg(), false); - } - OperandKind::Use => { - live.set(op.vreg().vreg(), true); - } - } - } - } - - if self.func.is_safepoint(inst) { - for vreg in live.iter() { - let safepoints = safepoints_per_vreg.get_mut(&vreg).unwrap(); - safepoints.insert(inst); - } - } - } - for blockparam in self.func.block_params(block) { - if !reftype_vreg_map.get(blockparam.vreg()) { - continue; - } - live.set(blockparam.vreg(), false); - } - - for &pred in self.func.block_preds(block) { - if live_reftypes_block_end[pred.index()].or(live) || !visited.contains(&pred) { - if !workqueue_set.contains(&pred) { - workqueue.push_back(pred); - workqueue_set.insert(pred); - } - } - } - } - - // Now we have `safepoints_per_vreg`. All we have to do is, - // for each vreg in this map, step through the LiveRanges - // along with a sorted list of safepoints; and for each - // safepoint in the current range, emit the allocation into - // the `safepoint_slots` list. + // Given `safepoints_per_vreg` from the liveness computation, + // all we have to do is, for each vreg in this map, step + // through the LiveRanges along with a sorted list of + // safepoints; and for each safepoint in the current range, + // emit the allocation into the `safepoint_slots` list. - log::debug!("safepoints_per_vreg = {:?}", safepoints_per_vreg); + log::debug!("safepoints_per_vreg = {:?}", self.safepoints_per_vreg); for vreg in self.func.reftype_vregs() { log::debug!("generating safepoint info for vreg {}", vreg); let vreg = VRegIndex::new(vreg.vreg()); - let mut safepoints: Vec = safepoints_per_vreg + let mut safepoints: Vec = self + .safepoints_per_vreg .get(&vreg.index()) .unwrap() .iter() From 0f3454b4d7646ead1aff4f653ba421dab8753d55 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 7 May 2021 01:51:40 -0700 Subject: [PATCH 023/155] Inlining on btree commitment map comparators for a 10% win --- src/ion/mod.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 824b18b0..06bbceda 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -391,17 +391,20 @@ impl LiveRangeKey { } impl std::cmp::PartialEq for LiveRangeKey { + #[inline(always)] fn eq(&self, other: &Self) -> bool { self.to > other.from && self.from < other.to } } impl std::cmp::Eq for LiveRangeKey {} impl std::cmp::PartialOrd for LiveRangeKey { + #[inline(always)] fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl std::cmp::Ord for LiveRangeKey { + #[inline(always)] fn cmp(&self, other: &Self) -> std::cmp::Ordering { if self.to <= other.from { std::cmp::Ordering::Less From 3ddcf05feab880c8ebf1b4a22432a0c2041e13f6 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 7 May 2021 17:03:44 -0700 Subject: [PATCH 024/155] Optimizations: (i) range-summary array; (ii) early exit from btree probe loop (one conflict bundle is enough, empirically) --- src/ion/mod.rs | 217 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 190 insertions(+), 27 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 06bbceda..e6f33477 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -36,6 +36,10 @@ - partial allocation -- place one LR, split rest off into separate bundle, in one pass? + + - coarse-grained "register contention" counters per fixed region; + randomly sample these, adding up a vector of them, to choose + register probe order? */ #![allow(dead_code, unused_imports)] @@ -122,8 +126,6 @@ struct LiveRange { next_in_bundle: LiveRangeIndex, next_in_reg: LiveRangeIndex, - // if a bundle partly fits, this is used to record LRs that do fit - reg_hint: PReg, merged_into: LiveRangeIndex, } @@ -186,6 +188,7 @@ struct LiveBundle { allocation: Allocation, prio: u32, // recomputed after every bulk update spill_weight_and_props: u32, + range_summary: RangeSummary, } impl LiveBundle { @@ -212,6 +215,73 @@ impl LiveBundle { } } +#[derive(Clone, Debug)] +struct RangeSummary { + /// Indices in `range_ranges` dense array of packed CodeRange structs. + from: u32, + to: u32, + bound: CodeRange, +} + +impl RangeSummary { + fn new() -> Self { + Self { + from: 0, + to: 0, + bound: CodeRange { + from: ProgPoint::from_index(0), + to: ProgPoint::from_index(0), + }, + } + } + + fn iter<'a>(&'a self, range_array: &'a [CodeRange]) -> RangeSummaryIter<'a> { + RangeSummaryIter { + idx: self.from as usize, + start: self.from as usize, + limit: self.to as usize, + bound: self.bound, + arr: range_array, + } + } +} + +#[derive(Clone, Copy, Debug)] +struct RangeSummaryIter<'a> { + idx: usize, + start: usize, + limit: usize, + bound: CodeRange, + arr: &'a [CodeRange], +} + +impl<'a> std::iter::Iterator for RangeSummaryIter<'a> { + type Item = CodeRange; + fn next(&mut self) -> Option { + if self.idx == self.limit { + return None; + } + while self.idx < self.limit && self.arr[self.idx].to <= self.bound.from { + self.idx += 1; + } + let mut cur = self.arr[self.idx]; + if cur.from >= self.bound.to { + self.idx = self.limit; + return None; + } + + if cur.from < self.bound.from { + cur.from = self.bound.from; + } + if cur.to > self.bound.to { + cur.to = self.bound.to; + } + + self.idx += 1; + Some(cur) + } +} + #[derive(Clone, Debug)] struct SpillSet { bundles: LiveBundleVec, @@ -285,6 +355,7 @@ struct Env<'a, F: Function> { blockparam_allocs: Vec<(Block, u32, VRegIndex, Allocation)>, ranges: Vec, + range_ranges: Vec, bundles: Vec, spillsets: Vec, uses: Vec, @@ -382,6 +453,7 @@ struct LiveRangeKey { } impl LiveRangeKey { + #[inline(always)] fn from_range(range: &CodeRange) -> Self { Self { from: range.from.to_index(), @@ -550,8 +622,10 @@ pub struct Stats { process_bundle_count: usize, process_bundle_reg_probes_fixed: usize, process_bundle_reg_success_fixed: usize, + process_bundle_bounding_range_probe_start_any: usize, process_bundle_bounding_range_probes_any: usize, process_bundle_bounding_range_success_any: usize, + process_bundle_reg_probe_start_any: usize, process_bundle_reg_probes_any: usize, process_bundle_reg_success_any: usize, evict_bundle_event: usize, @@ -677,6 +751,7 @@ impl<'a, F: Function> Env<'a, F> { blockparam_allocs: vec![], bundles: vec![], ranges: vec![], + range_ranges: vec![], spillsets: vec![], uses: vec![], vregs: vec![], @@ -766,7 +841,6 @@ impl<'a, F: Function> Env<'a, F> { last_use: UseIndex::invalid(), next_in_bundle: LiveRangeIndex::invalid(), next_in_reg: LiveRangeIndex::invalid(), - reg_hint: PReg::invalid(), merged_into: LiveRangeIndex::invalid(), }); LiveRangeIndex::new(idx) @@ -1617,6 +1691,7 @@ impl<'a, F: Function> Env<'a, F> { spillset: SpillSetIndex::invalid(), prio: 0, spill_weight_and_props: 0, + range_summary: RangeSummary::new(), }); LiveBundleIndex::new(bundle) } @@ -1872,6 +1947,35 @@ impl<'a, F: Function> Env<'a, F> { self.merge_bundles(/* from */ dest_bundle, /* to */ src_bundle); } + // Now create range summaries for all bundles. + for bundle in 0..self.bundles.len() { + let bundle = LiveBundleIndex::new(bundle); + let mut iter = self.bundles[bundle.index()].first_range; + let start_idx = self.range_ranges.len(); + let start_pos = if iter.is_valid() { + self.ranges[iter.index()].range.from + } else { + ProgPoint::from_index(0) + }; + let mut end_pos = start_pos; + while iter.is_valid() { + let range = self.ranges[iter.index()].range; + end_pos = range.to; + self.range_ranges.push(range); + iter = self.ranges[iter.index()].next_in_bundle; + } + let end_idx = self.range_ranges.len(); + let bound = CodeRange { + from: start_pos, + to: end_pos, + }; + self.bundles[bundle.index()].range_summary = RangeSummary { + from: start_idx as u32, + to: end_idx as u32, + bound, + }; + } + log::debug!("done merging bundles"); } @@ -2060,18 +2164,21 @@ impl<'a, F: Function> Env<'a, F> { ) -> AllocRegResult { log::debug!("try_to_allocate_bundle_to_reg: {:?} -> {:?}", bundle, reg); let mut conflicts = smallvec![]; - let mut iter = self.bundles[bundle.index()].first_range; - while iter.is_valid() { - let range = &self.ranges[iter.index()]; - let next = range.next_in_bundle; + // Use the range-summary array; this allows fast streaming + // access to CodeRanges (which are just two u32s packed + // together) which is important for this hot loop. + let iter = self.bundles[bundle.index()] + .range_summary + .iter(&self.range_ranges[..]); + for range in iter { log::debug!(" -> range {:?}", range); // Note that the comparator function here tests for *overlap*, so we // are checking whether the BTree contains any preg range that - // *overlaps* with range `iter`, not literally the range `iter`. + // *overlaps* with range `range`, not literally the range `range`. if let Some(preg_range) = self.pregs[reg.index()] .allocations .btree - .get(&LiveRangeKey::from_range(&range.range)) + .get(&LiveRangeKey::from_range(&range)) { log::debug!(" -> btree contains range {:?} that overlaps", preg_range); if self.ranges[preg_range.index()].vreg.is_valid() { @@ -2083,15 +2190,25 @@ impl<'a, F: Function> Env<'a, F> { if !conflicts.iter().any(|b| *b == conflict_bundle) { conflicts.push(conflict_bundle); } + + // Empirically, it seems to be essentially as good + // to return only one conflicting bundle as all of + // them; it is very rare that the combination of + // all conflicting bundles yields a maximum spill + // weight that is enough to keep them in place + // when a single conflict does not. It is also a + // quite significant compile-time win to *stop + // scanning* as soon as we have a conflict. To + // experiment with this, however, just remove this + // `break`; the rest of the code will do the right + // thing. + break; } else { log::debug!(" -> conflict with fixed reservation"); // range from a direct use of the PReg (due to clobber). return AllocRegResult::ConflictWithFixed; } - } else { - self.ranges[iter.index()].reg_hint = self.pregs[reg.index()].reg; } - iter = next; } if conflicts.len() > 0 { @@ -2567,6 +2684,7 @@ impl<'a, F: Function> Env<'a, F> { let mut iter = self.bundles[bundle.index()].first_range; self.bundles[bundle.index()].first_range = LiveRangeIndex::invalid(); self.bundles[bundle.index()].last_range = LiveRangeIndex::invalid(); + let mut range_summary_idx = self.bundles[bundle.index()].range_summary.from; while iter.is_valid() { // Read `next` link now and then clear it -- we rebuild the list below. let next = self.ranges[iter.index()].next_in_bundle; @@ -2587,6 +2705,7 @@ impl<'a, F: Function> Env<'a, F> { self.bundles[cur_bundle.index()].spillset = self.bundles[bundle.index()].spillset; new_bundles.push(cur_bundle); split_idx += 1; + self.bundles[cur_bundle.index()].range_summary.from = range_summary_idx; } while split_idx < split_points.len() && split_points[split_idx] <= range.from { split_idx += 1; @@ -2720,7 +2839,10 @@ impl<'a, F: Function> Env<'a, F> { // Create a new bundle to hold the rest-range. let rest_bundle = self.create_bundle(); + self.bundles[cur_bundle.index()].range_summary.to = range_summary_idx + 1; cur_bundle = rest_bundle; + self.bundles[cur_bundle.index()].range_summary.from = range_summary_idx; + self.bundles[cur_bundle.index()].range_summary.to = range_summary_idx + 1; new_bundles.push(rest_bundle); self.bundles[rest_bundle.index()].first_range = rest_lr; self.bundles[rest_bundle.index()].last_range = rest_lr; @@ -2732,6 +2854,13 @@ impl<'a, F: Function> Env<'a, F> { } iter = next; + range_summary_idx += 1; + self.bundles[cur_bundle.index()].range_summary.to = range_summary_idx; + } + + self.fixup_range_summary_bound(bundle); + for &b in &new_bundles { + self.fixup_range_summary_bound(b); } // Enqueue all split-bundles on the allocation queue. @@ -2739,7 +2868,7 @@ impl<'a, F: Function> Env<'a, F> { self.bundles[bundle.index()].prio = prio; self.recompute_bundle_properties(bundle); self.allocation_queue.insert(bundle, prio as usize); - for b in new_bundles { + for &b in &new_bundles { let prio = self.compute_bundle_prio(b); self.bundles[b.index()].prio = prio; self.recompute_bundle_properties(b); @@ -2747,23 +2876,47 @@ impl<'a, F: Function> Env<'a, F> { } } + fn fixup_range_summary_bound(&mut self, bundle: LiveBundleIndex) { + let bundledata = &mut self.bundles[bundle.index()]; + let from = if bundledata.first_range.is_valid() { + self.ranges[bundledata.first_range.index()].range.from + } else { + ProgPoint::from_index(0) + }; + let to = if bundledata.last_range.is_valid() { + self.ranges[bundledata.last_range.index()].range.to + } else { + ProgPoint::from_index(0) + }; + bundledata.range_summary.bound = CodeRange { from, to }; + + #[cfg(debug_assertions)] + { + // Sanity check: ensure that ranges returned by the range + // summary correspond to actual ranges. + let mut iter = self.bundles[bundle.index()].first_range; + let mut summary_iter = self.bundles[bundle.index()] + .range_summary + .iter(&self.range_ranges[..]); + while iter.is_valid() { + assert_eq!(summary_iter.next(), Some(self.ranges[iter.index()].range)); + iter = self.ranges[iter.index()].next_in_bundle; + } + assert_eq!(summary_iter.next(), None); + } + } + fn process_bundle(&mut self, bundle: LiveBundleIndex) { // Find any requirements: for every LR, for every def/use, gather // requirements (fixed-reg, any-reg, any) and merge them. let req = self.compute_requirement(bundle); - // Grab a hint from our spillset, if any, and from the first LR, if any. + // Grab a hint from our spillset, if any. let hint_reg = self.spillsets[self.bundles[bundle.index()].spillset.index()].reg_hint; - let hint2_reg = if self.bundles[bundle.index()].first_range.is_valid() { - self.ranges[self.bundles[bundle.index()].first_range.index()].reg_hint - } else { - PReg::invalid() - }; log::debug!( - "process_bundle: bundle {:?} requirement {:?} hint {:?} hint2 {:?}", + "process_bundle: bundle {:?} requirement {:?} hint {:?}", bundle, req, hint_reg, - hint2_reg ); // Try to allocate! @@ -2830,9 +2983,14 @@ impl<'a, F: Function> Env<'a, F> { let bounding_range = self.bundle_bounding_range_if_multiple(bundle); if let Some(bounding_range) = bounding_range { log::debug!("initial scan with bounding range {:?}", bounding_range); - for preg in - RegTraversalIter::new(self.env, class, hint_reg, hint2_reg, scan_offset) - { + self.stats.process_bundle_bounding_range_probe_start_any += 1; + for preg in RegTraversalIter::new( + self.env, + class, + hint_reg, + PReg::invalid(), + scan_offset, + ) { let preg_idx = PRegIndex::new(preg.index()); log::debug!("trying preg {:?}", preg_idx); self.stats.process_bundle_bounding_range_probes_any += 1; @@ -2851,9 +3009,14 @@ impl<'a, F: Function> Env<'a, F> { } } - for preg in - RegTraversalIter::new(self.env, class, hint_reg, hint2_reg, scan_offset) - { + self.stats.process_bundle_reg_probe_start_any += 1; + for preg in RegTraversalIter::new( + self.env, + class, + hint_reg, + PReg::invalid(), + scan_offset, + ) { self.stats.process_bundle_reg_probes_any += 1; let preg_idx = PRegIndex::new(preg.index()); log::debug!("trying preg {:?}", preg_idx); From 4f6346768e154fa719c061c8089f5a68b3a143df Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 7 May 2021 17:45:51 -0700 Subject: [PATCH 025/155] Pinned-VReg mechanism. --- src/ion/mod.rs | 28 ++++++++++++++++++++++++++++ src/lib.rs | 9 +++++++++ 2 files changed, 37 insertions(+) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index e6f33477..25a23122 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -1718,6 +1718,13 @@ impl<'a, F: Function> Env<'a, F> { return false; } + // If either bundle is already assigned (due to a pinned vreg), don't merge. + if !self.bundles[from.index()].allocation.is_none() + || !self.bundles[to.index()].allocation.is_none() + { + return false; + } + #[cfg(debug)] { // Sanity check: both bundles should contain only ranges with appropriate VReg classes. @@ -1882,6 +1889,22 @@ impl<'a, F: Function> Env<'a, F> { range = self.ranges[range.index()].next_in_reg; } log::debug!("vreg v{} gets bundle{}", vreg.index(), bundle.index()); + + // If this vreg is pinned, assign the allocation and block the PRegs. + if let Some(preg) = self.func.is_pinned_vreg(self.vreg_regs[vreg.index()]) { + self.bundles[bundle.index()].allocation = Allocation::reg(preg); + + let mut iter = self.bundles[bundle.index()].first_range; + while iter.is_valid() { + let range = self.ranges[iter.index()].range; + // Create a new LiveRange for the PReg + // reservation, unaffiliated with the VReg, to + // reserve it (like a clobber) without the + // possibility of eviction. + self.add_liverange_to_preg(range, preg); + iter = self.ranges[iter.index()].next_in_bundle; + } + } } for inst in 0..self.func.insts() { @@ -2006,6 +2029,11 @@ impl<'a, F: Function> Env<'a, F> { let mut lr = self.vregs[vreg.index()].first_range; while lr.is_valid() { let bundle = self.ranges[lr.index()].bundle; + if !self.bundles[bundle.index()].allocation.is_none() { + // Pinned VReg -- already allocated, so skip. + lr = self.ranges[lr.index()].next_in_bundle; + continue; + } if self.bundles[bundle.index()].first_range == lr { // First time seeing `bundle`: allocate a spillslot for it, // compute its priority, and enqueue it. diff --git a/src/lib.rs b/src/lib.rs index 437e6ba9..860aa430 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -741,6 +741,15 @@ pub trait Function { &[] } + /// Is the given VReg pinned permanently to a PReg? Note that the + /// input program must not contain constraints that contradict + /// this (e.g., using another VReg with a fixed-reg policy to a + /// given preg at the same time as using a VReg pinned to that + /// preg) or else allocation may be impossible. + fn is_pinned_vreg(&self, _: VReg) -> Option { + None + } + // -------------- // Spills/reloads // -------------- From df59b5ede4af62eaa86abee99a5513946b2b962f Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 7 May 2021 17:55:04 -0700 Subject: [PATCH 026/155] Inline all the things (ProgPoint edition) --- src/lib.rs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 860aa430..8b8b69da 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -808,21 +808,26 @@ impl std::fmt::Debug for ProgPoint { } impl ProgPoint { + #[inline(always)] pub fn new(inst: Inst, pos: InstPosition) -> Self { let bits = ((inst.0 as u32) << 1) | (pos as u8 as u32); Self { bits } } + #[inline(always)] pub fn before(inst: Inst) -> Self { Self::new(inst, InstPosition::Before) } + #[inline(always)] pub fn after(inst: Inst) -> Self { Self::new(inst, InstPosition::After) } + #[inline(always)] pub fn inst(self) -> Inst { // Cast to i32 to do an arithmetic right-shift, which will // preserve an `Inst::invalid()` (which is -1, or all-ones). Inst::new(((self.bits as i32) >> 1) as usize) } + #[inline(always)] pub fn pos(self) -> InstPosition { match self.bits & 1 { 0 => InstPosition::Before, @@ -830,23 +835,23 @@ impl ProgPoint { _ => unreachable!(), } } - + #[inline(always)] pub fn next(self) -> ProgPoint { Self { bits: self.bits + 1, } } - + #[inline(always)] pub fn prev(self) -> ProgPoint { Self { bits: self.bits - 1, } } - + #[inline(always)] pub fn to_index(self) -> u32 { self.bits } - + #[inline(always)] pub fn from_index(index: u32) -> Self { Self { bits: index } } From a453501ebb7cc9a827712841de40e9d11537cae9 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 7 May 2021 18:17:13 -0700 Subject: [PATCH 027/155] sort_unstable (quicksort) everywhere --- src/ion/mod.rs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 25a23122..bfe89cc0 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -1476,7 +1476,7 @@ impl<'a, F: Function> Env<'a, F> { } } - self.safepoints.sort(); + self.safepoints.sort_unstable(); // Insert safepoint virtual stack uses, if needed. for vreg in self.func.reftype_vregs() { @@ -1638,9 +1638,9 @@ impl<'a, F: Function> Env<'a, F> { } } - self.clobbers.sort(); - self.blockparam_ins.sort(); - self.blockparam_outs.sort(); + self.clobbers.sort_unstable(); + self.blockparam_ins.sort_unstable(); + self.blockparam_outs.sort_unstable(); self.prog_move_srcs.sort_unstable_by_key(|(pos, _)| *pos); self.prog_move_dsts.sort_unstable_by_key(|(pos, _)| *pos); @@ -2634,7 +2634,7 @@ impl<'a, F: Function> Env<'a, F> { iter = rangedata.next_in_bundle; } - splits.sort(); + splits.sort_unstable(); log::debug!(" -> final splits: {:?}", splits); splits } @@ -3831,7 +3831,7 @@ impl<'a, F: Function> Env<'a, F> { // Sort the half-moves list. For each (from, to, // from-vreg) tuple, find the from-alloc and all the // to-allocs, and insert moves on the block edge. - half_moves.sort_by_key(|h| h.key); + half_moves.sort_unstable_by_key(|h| h.key); log::debug!("halfmoves: {:?}", half_moves); self.stats.halfmoves_count = half_moves.len(); @@ -4060,7 +4060,7 @@ impl<'a, F: Function> Env<'a, F> { // resolve (see cases below). let mut i = 0; self.inserted_moves - .sort_by_key(|m| (m.pos.to_index(), m.prio)); + .sort_unstable_by_key(|m| (m.pos.to_index(), m.prio)); while i < self.inserted_moves.len() { let start = i; let pos = self.inserted_moves[i].pos; @@ -4101,7 +4101,7 @@ impl<'a, F: Function> Env<'a, F> { // Add edits to describe blockparam locations too. This is // required by the checker. This comes after any edge-moves. self.blockparam_allocs - .sort_by_key(|&(block, idx, _, _)| (block, idx)); + .sort_unstable_by_key(|&(block, idx, _, _)| (block, idx)); self.stats.blockparam_allocs_count = self.blockparam_allocs.len(); let mut i = 0; while i < self.blockparam_allocs.len() { @@ -4129,7 +4129,7 @@ impl<'a, F: Function> Env<'a, F> { } // Ensure edits are in sorted ProgPoint order. - self.edits.sort_by_key(|&(pos, prio, _)| (pos, prio)); + self.edits.sort_unstable_by_key(|&(pos, prio, _)| (pos, prio)); self.stats.edits_count = self.edits.len(); // Add debug annotations. @@ -4190,7 +4190,7 @@ impl<'a, F: Function> Env<'a, F> { .iter() .map(|&inst| ProgPoint::before(inst)) .collect(); - safepoints.sort(); + safepoints.sort_unstable(); log::debug!(" -> live over safepoints: {:?}", safepoints); let mut safepoint_idx = 0; @@ -4217,7 +4217,7 @@ impl<'a, F: Function> Env<'a, F> { } } - self.safepoint_slots.sort(); + self.safepoint_slots.sort_unstable(); log::debug!("final safepoint slots info: {:?}", self.safepoint_slots); } From bfe1c632c954db9146ec757563b485d00d41d9c4 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 7 May 2021 18:52:46 -0700 Subject: [PATCH 028/155] Some preallocation and removal of one u32 from LiveRange struct --- src/ion/mod.rs | 154 +++++++++++++++++++++++-------------------------- 1 file changed, 72 insertions(+), 82 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index bfe89cc0..594cd81d 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -117,8 +117,7 @@ struct LiveRange { range: CodeRange, vreg: VRegIndex, bundle: LiveBundleIndex, - uses_spill_weight: u32, - num_fixed_uses_and_flags: u32, + uses_spill_weight_and_flags: u32, first_use: UseIndex, last_use: UseIndex, @@ -138,35 +137,26 @@ enum LiveRangeFlag { impl LiveRange { #[inline(always)] - pub fn num_fixed_uses(&self) -> u32 { - self.num_fixed_uses_and_flags & ((1 << 24) - 1) - } - #[inline(always)] - pub fn set_num_fixed_uses(&mut self, count: u32) { - debug_assert!(count < (1 << 24)); - self.num_fixed_uses_and_flags = (self.num_fixed_uses_and_flags & !((1 << 24) - 1)) | count; - } - #[inline(always)] - pub fn inc_num_fixed_uses(&mut self) { - debug_assert!(self.num_fixed_uses_and_flags & ((1 << 24) - 1) < ((1 << 24) - 1)); - self.num_fixed_uses_and_flags += 1; + pub fn set_flag(&mut self, flag: LiveRangeFlag) { + self.uses_spill_weight_and_flags |= (flag as u32) << 30; } #[inline(always)] - pub fn dec_num_fixed_uses(&mut self) { - debug_assert!(self.num_fixed_uses_and_flags & ((1 << 24) - 1) > 0); - self.num_fixed_uses_and_flags -= 1; + pub fn clear_flag(&mut self, flag: LiveRangeFlag) { + self.uses_spill_weight_and_flags &= !((flag as u32) << 30); } #[inline(always)] - pub fn set_flag(&mut self, flag: LiveRangeFlag) { - self.num_fixed_uses_and_flags |= (flag as u32) << 24; + pub fn has_flag(&self, flag: LiveRangeFlag) -> bool { + self.uses_spill_weight_and_flags & ((flag as u32) << 30) != 0 } #[inline(always)] - pub fn clear_flag(&mut self, flag: LiveRangeFlag) { - self.num_fixed_uses_and_flags &= !((flag as u32) << 24); + pub fn uses_spill_weight(&self) -> u32 { + self.uses_spill_weight_and_flags & 0x3fff_ffff } #[inline(always)] - pub fn has_flag(&self, flag: LiveRangeFlag) -> bool { - self.num_fixed_uses_and_flags & ((flag as u32) << 24) != 0 + pub fn set_uses_spill_weight(&mut self, weight: u32) { + assert!(weight < (1 << 30)); + self.uses_spill_weight_and_flags = + (self.uses_spill_weight_and_flags & 0xc000_0000) | weight; } } @@ -739,23 +729,24 @@ impl<'a> std::iter::Iterator for RegTraversalIter<'a> { impl<'a, F: Function> Env<'a, F> { pub(crate) fn new(func: &'a F, env: &'a MachineEnv, cfginfo: CFGInfo) -> Self { + let n = func.insts(); Self { func, env, cfginfo, - liveins: vec![], - liveouts: vec![], + liveins: Vec::with_capacity(func.blocks()), + liveouts: Vec::with_capacity(func.blocks()), blockparam_outs: vec![], blockparam_ins: vec![], blockparam_allocs: vec![], - bundles: vec![], - ranges: vec![], - range_ranges: vec![], - spillsets: vec![], - uses: vec![], - vregs: vec![], - vreg_regs: vec![], + bundles: Vec::with_capacity(n), + ranges: Vec::with_capacity(4 * n), + range_ranges: Vec::with_capacity(4 * n), + spillsets: Vec::with_capacity(n), + uses: Vec::with_capacity(4 * n), + vregs: Vec::with_capacity(n), + vreg_regs: Vec::with_capacity(n), pregs: vec![], allocation_queue: PrioQueue::new(), clobbers: vec![], @@ -766,14 +757,14 @@ impl<'a, F: Function> Env<'a, F> { spillslots: vec![], slots_by_size: vec![], - prog_move_srcs: vec![], - prog_move_dsts: vec![], - prog_move_merges: vec![], + prog_move_srcs: Vec::with_capacity(n / 2), + prog_move_dsts: Vec::with_capacity(n / 2), + prog_move_merges: Vec::with_capacity(n / 2), multi_fixed_reg_fixups: vec![], inserted_moves: vec![], - edits: vec![], - allocs: vec![], + edits: Vec::with_capacity(n), + allocs: Vec::with_capacity(4 * n), inst_alloc_offsets: vec![], num_spillslots: 0, safepoint_slots: vec![], @@ -835,14 +826,17 @@ impl<'a, F: Function> Env<'a, F> { range, vreg: VRegIndex::invalid(), bundle: LiveBundleIndex::invalid(), - uses_spill_weight: 0, - num_fixed_uses_and_flags: 0, + uses_spill_weight_and_flags: 0, + first_use: UseIndex::invalid(), last_use: UseIndex::invalid(), + next_in_bundle: LiveRangeIndex::invalid(), next_in_reg: LiveRangeIndex::invalid(), + merged_into: LiveRangeIndex::invalid(), }); + LiveRangeIndex::new(idx) } @@ -1003,19 +997,16 @@ impl<'a, F: Function> Env<'a, F> { debug_assert!(u.is_valid()); let usedata = &self.uses[u.index()]; let lrdata = &mut self.ranges[from.index()]; - if let OperandPolicy::FixedReg(_) = usedata.operand.policy() { - lrdata.dec_num_fixed_uses(); - } log::debug!( " -> subtract {} from uses_spill_weight {}; now {}", spill_weight_from_policy(usedata.operand.policy()), - lrdata.uses_spill_weight, - lrdata.uses_spill_weight - spill_weight_from_policy(usedata.operand.policy()), + lrdata.uses_spill_weight(), + lrdata.uses_spill_weight() - spill_weight_from_policy(usedata.operand.policy()), ); - lrdata.uses_spill_weight -= spill_weight_from_policy(usedata.operand.policy()); + lrdata.uses_spill_weight_and_flags -= spill_weight_from_policy(usedata.operand.policy()); if usedata.operand.kind() == OperandKind::Def { - lrdata.uses_spill_weight -= 2000; + lrdata.uses_spill_weight_and_flags -= 2000; } } @@ -1056,20 +1047,17 @@ impl<'a, F: Function> Env<'a, F> { // Update stats. let policy = self.uses[u.index()].operand.policy(); - if let OperandPolicy::FixedReg(_) = policy { - self.ranges[into.index()].inc_num_fixed_uses(); - } log::debug!( "insert use {:?} into lr {:?} with weight {}", u, into, spill_weight_from_policy(policy) ); - self.ranges[into.index()].uses_spill_weight += spill_weight_from_policy(policy); + self.ranges[into.index()].uses_spill_weight_and_flags += spill_weight_from_policy(policy); if self.uses[u.index()].operand.kind() == OperandKind::Def { - self.ranges[into.index()].uses_spill_weight += 2000; + self.ranges[into.index()].uses_spill_weight_and_flags += 2000; } - log::debug!(" -> now {}", self.ranges[into.index()].uses_spill_weight); + log::debug!(" -> now {}", self.ranges[into.index()].uses_spill_weight()); } fn find_vreg_liverange_for_pos( @@ -1940,9 +1928,11 @@ impl<'a, F: Function> Env<'a, F> { to_vreg.index(), from_vreg.index() ); - let to_bundle = self.ranges[self.vregs[to_vreg.index()].first_range.index()].bundle; + let to_bundle = + self.ranges[self.vregs[to_vreg.index()].first_range.index()].bundle; assert!(to_bundle.is_valid()); - let from_bundle = self.ranges[self.vregs[from_vreg.index()].first_range.index()].bundle; + let from_bundle = + self.ranges[self.vregs[from_vreg.index()].first_range.index()].bundle; assert!(from_bundle.is_valid()); log::debug!( " -> from bundle{} to bundle{}", @@ -2097,23 +2087,22 @@ impl<'a, F: Function> Env<'a, F> { log::debug!("vreg{}: first_range={:?}", i, v.first_range,); } log::debug!("Ranges:"); - for (i, r) in self.ranges.iter().enumerate() { + for (i, (r, rc)) in self.ranges.iter().zip(self.ranges.iter()).enumerate() { log::debug!( concat!( "range{}: range={:?} vreg={:?} bundle={:?} ", - "weight={} fixed={} first_use={:?} last_use={:?} ", + "weight={} first_use={:?} last_use={:?} ", "next_in_bundle={:?} next_in_reg={:?}" ), i, r.range, - r.vreg, - r.bundle, - r.uses_spill_weight, - r.num_fixed_uses(), + rc.vreg, + rc.bundle, + r.uses_spill_weight(), r.first_use, - r.last_use, + rc.last_use, r.next_in_bundle, - r.next_in_reg + rc.next_in_reg ); } log::debug!("Uses:"); @@ -2210,7 +2199,10 @@ impl<'a, F: Function> Env<'a, F> { { log::debug!(" -> btree contains range {:?} that overlaps", preg_range); if self.ranges[preg_range.index()].vreg.is_valid() { - log::debug!(" -> from vreg {:?}", self.ranges[preg_range.index()].vreg); + log::debug!( + " -> from vreg {:?}", + self.ranges[preg_range.index()].vreg + ); // range from an allocated bundle: find the bundle and add to // conflicts list. let conflict_bundle = self.ranges[preg_range.index()].bundle; @@ -2361,8 +2353,11 @@ impl<'a, F: Function> Env<'a, F> { let mut range = self.bundles[bundle.index()].first_range; while range.is_valid() { let range_data = &self.ranges[range.index()]; - log::debug!(" -> uses spill weight: +{}", range_data.uses_spill_weight); - total += range_data.uses_spill_weight; + log::debug!( + " -> uses spill weight: +{}", + range_data.uses_spill_weight() + ); + total += range_data.uses_spill_weight(); range = range_data.next_in_bundle; } @@ -2798,7 +2793,8 @@ impl<'a, F: Function> Env<'a, F> { // tail-pointer so we do not need to update that.) let rest_lr = self.create_liverange(rest_range); self.ranges[rest_lr.index()].vreg = self.ranges[iter.index()].vreg; - self.ranges[rest_lr.index()].next_in_reg = self.ranges[iter.index()].next_in_reg; + self.ranges[rest_lr.index()].next_in_reg = + self.ranges[iter.index()].next_in_reg; self.ranges[iter.index()].next_in_reg = rest_lr; log::debug!( @@ -2812,7 +2808,6 @@ impl<'a, F: Function> Env<'a, F> { // moves to the rest range. let mut last_use_in_first_range = UseIndex::invalid(); let mut use_iter = self.ranges[iter.index()].first_use; - let mut num_fixed_uses = 0; let mut uses_spill_weight = 0; while use_iter.is_valid() { if self.uses[use_iter.index()].pos >= split_point { @@ -2825,9 +2820,6 @@ impl<'a, F: Function> Env<'a, F> { use_iter, policy ); - if let OperandPolicy::FixedReg(_) = policy { - num_fixed_uses += 1; - } uses_spill_weight += spill_weight_from_policy(policy); log::debug!(" -> use {:?} remains in orig", use_iter); use_iter = self.uses[use_iter.index()].next_use; @@ -2841,7 +2833,8 @@ impl<'a, F: Function> Env<'a, F> { use_iter ); self.ranges[rest_lr.index()].first_use = use_iter; - self.ranges[rest_lr.index()].last_use = self.ranges[iter.index()].last_use; + self.ranges[rest_lr.index()].last_use = + self.ranges[iter.index()].last_use; self.ranges[iter.index()].last_use = last_use_in_first_range; if last_use_in_first_range.is_valid() { @@ -2850,13 +2843,10 @@ impl<'a, F: Function> Env<'a, F> { self.ranges[iter.index()].first_use = UseIndex::invalid(); } - let rest_fixed_uses = - self.ranges[iter.index()].num_fixed_uses() - num_fixed_uses; - self.ranges[rest_lr.index()].set_num_fixed_uses(rest_fixed_uses); - self.ranges[rest_lr.index()].uses_spill_weight = - self.ranges[iter.index()].uses_spill_weight - uses_spill_weight; - self.ranges[iter.index()].set_num_fixed_uses(num_fixed_uses); - self.ranges[iter.index()].uses_spill_weight = uses_spill_weight; + let new_spill_weight = + self.ranges[iter.index()].uses_spill_weight() - uses_spill_weight; + self.ranges[rest_lr.index()].set_uses_spill_weight(new_spill_weight); + self.ranges[iter.index()].set_uses_spill_weight(uses_spill_weight); } log::debug!( @@ -3458,9 +3448,8 @@ impl<'a, F: Function> Env<'a, F> { } } - let mut half_moves: Vec = vec![]; - - let mut reuse_input_insts = vec![]; + let mut half_moves: Vec = Vec::with_capacity(6 * self.func.insts()); + let mut reuse_input_insts = Vec::with_capacity(self.func.insts() / 2); let mut blockparam_in_idx = 0; let mut blockparam_out_idx = 0; @@ -4129,7 +4118,8 @@ impl<'a, F: Function> Env<'a, F> { } // Ensure edits are in sorted ProgPoint order. - self.edits.sort_unstable_by_key(|&(pos, prio, _)| (pos, prio)); + self.edits + .sort_unstable_by_key(|&(pos, prio, _)| (pos, prio)); self.stats.edits_count = self.edits.len(); // Add debug annotations. From 040c3c838ced27ee09c15739a34ff62f0744d7b1 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 7 May 2021 19:05:20 -0700 Subject: [PATCH 029/155] Some structure packing: Use now fits in three u32s. --- src/ion/mod.rs | 115 +++++++++++++++++++++++++++---------------------- 1 file changed, 63 insertions(+), 52 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 594cd81d..32f29942 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -164,8 +164,37 @@ impl LiveRange { struct Use { operand: Operand, pos: ProgPoint, - next_use: UseIndex, - slot: u8, + next_use_and_slot: u32, +} + +impl Use { + #[inline(always)] + fn new(operand: Operand, pos: ProgPoint, next_use: UseIndex, slot: u8) -> Self { + debug_assert!(next_use.is_invalid() || next_use.index() < ((1 << 24) - 1)); + let next_use = next_use.0 & 0x00ff_ffff; + Self { + operand, + pos, + next_use_and_slot: next_use | ((slot as u32) << 24), + } + } + #[inline(always)] + fn next_use(&self) -> UseIndex { + let val = self.next_use_and_slot & 0x00ff_ffff; + // Sign-extend 0x00ff_ffff to INVALID (0xffff_ffff). + let val = ((val as i32) << 8) >> 8; + UseIndex(val as u32) + } + #[inline(always)] + fn slot(&self) -> u8 { + (self.next_use_and_slot >> 24) as u8 + } + #[inline(always)] + fn set_next_use(&mut self, u: UseIndex) { + debug_assert!(u.is_invalid() || u.index() < ((1 << 24) - 1)); + let u = u.0 & 0x00ff_ffff; + self.next_use_and_slot = (self.next_use_and_slot & 0xff00_0000) | u; + } } const SLOT_NONE: u8 = u8::MAX; @@ -968,9 +997,9 @@ impl<'a, F: Function> Env<'a, F> { // If this use is within the range of `into`, move it over. if into_range.contains_point(usedata.pos) { log::debug!(" -> moving {:?}", iter); - let next = usedata.next_use; + let next = usedata.next_use(); if prev.is_valid() { - self.uses[prev.index()].next_use = next; + self.uses[prev.index()].set_next_use(next); } else { self.ranges[from.index()].first_use = next; } @@ -986,7 +1015,7 @@ impl<'a, F: Function> Env<'a, F> { iter = next; } else { prev = iter; - iter = usedata.next_use; + iter = usedata.next_use(); } } self.ranges[from.index()].merged_into = into; @@ -1013,7 +1042,7 @@ impl<'a, F: Function> Env<'a, F> { fn insert_use_into_liverange_and_update_stats(&mut self, into: LiveRangeIndex, u: UseIndex) { let insert_pos = self.uses[u.index()].pos; let first = self.ranges[into.index()].first_use; - self.uses[u.index()].next_use = UseIndex::invalid(); + self.uses[u.index()].set_next_use(UseIndex::invalid()); if first.is_invalid() { // Empty list. self.ranges[into.index()].first_use = u; @@ -1021,7 +1050,7 @@ impl<'a, F: Function> Env<'a, F> { } else if insert_pos > self.uses[self.ranges[into.index()].last_use.index()].pos { // After tail. let tail = self.ranges[into.index()].last_use; - self.uses[tail.index()].next_use = u; + self.uses[tail.index()].set_next_use(u); self.ranges[into.index()].last_use = u; } else { // Otherwise, scan linearly to find insertion position. @@ -1032,11 +1061,11 @@ impl<'a, F: Function> Env<'a, F> { break; } prev = iter; - iter = self.uses[iter.index()].next_use; + iter = self.uses[iter.index()].next_use(); } - self.uses[u.index()].next_use = iter; + self.uses[u.index()].set_next_use(iter); if prev.is_valid() { - self.uses[prev.index()].next_use = u; + self.uses[prev.index()].set_next_use(u); } else { self.ranges[into.index()].first_use = u; } @@ -1333,12 +1362,8 @@ impl<'a, F: Function> Env<'a, F> { OperandPos::After => ProgPoint::after(inst), }; let u = UseIndex(self.uses.len() as u32); - self.uses.push(Use { - operand, - pos, - slot: i as u8, - next_use: UseIndex::invalid(), - }); + self.uses + .push(Use::new(operand, pos, UseIndex::invalid(), i as u8)); log::debug!("Def of {} at {:?}", operand.vreg(), pos); @@ -1399,12 +1424,8 @@ impl<'a, F: Function> Env<'a, F> { // Create the actual use object. let u = UseIndex(self.uses.len() as u32); - self.uses.push(Use { - operand, - pos, - slot: i as u8, - next_use: UseIndex::invalid(), - }); + self.uses + .push(Use::new(operand, pos, UseIndex::invalid(), i as u8)); // Create/extend the LiveRange and add the use to the range. let range = CodeRange { @@ -1493,12 +1514,8 @@ impl<'a, F: Function> Env<'a, F> { // Create the actual use object. let u = UseIndex(self.uses.len() as u32); - self.uses.push(Use { - operand, - pos, - slot: SLOT_NONE, - next_use: UseIndex::invalid(), - }); + self.uses + .push(Use::new(operand, pos, UseIndex::invalid(), SLOT_NONE)); // Create/extend the LiveRange and add the use to the range. let range = CodeRange { @@ -1604,14 +1621,14 @@ impl<'a, F: Function> Env<'a, F> { let mut use_iter = self.ranges[iter.index()].first_use; while use_iter.is_valid() { let pos = self.uses[use_iter.index()].pos; - let slot = self.uses[use_iter.index()].slot as usize; + let slot = self.uses[use_iter.index()].slot() as usize; fixup_multi_fixed_vregs( pos, slot, &mut self.uses[use_iter.index()].operand, &mut self.multi_fixed_reg_fixups, ); - use_iter = self.uses[use_iter.index()].next_use; + use_iter = self.uses[use_iter.index()].next_use(); } for (clobber, inst) in extra_clobbers { @@ -1928,11 +1945,9 @@ impl<'a, F: Function> Env<'a, F> { to_vreg.index(), from_vreg.index() ); - let to_bundle = - self.ranges[self.vregs[to_vreg.index()].first_range.index()].bundle; + let to_bundle = self.ranges[self.vregs[to_vreg.index()].first_range.index()].bundle; assert!(to_bundle.is_valid()); - let from_bundle = - self.ranges[self.vregs[from_vreg.index()].first_range.index()].bundle; + let from_bundle = self.ranges[self.vregs[from_vreg.index()].first_range.index()].bundle; assert!(from_bundle.is_valid()); log::debug!( " -> from bundle{} to bundle{}", @@ -2112,8 +2127,8 @@ impl<'a, F: Function> Env<'a, F> { i, u.operand, u.pos, - u.slot, - u.next_use, + u.slot(), + u.next_use(), ); } } @@ -2145,7 +2160,7 @@ impl<'a, F: Function> Env<'a, F> { log::debug!(" -> use {:?} op {:?} req {:?}", use_iter, use_op, use_req); needed = needed.merge(use_req)?; log::debug!(" -> needed {:?}", needed); - use_iter = usedata.next_use; + use_iter = usedata.next_use(); } iter = range.next_in_bundle; } @@ -2199,10 +2214,7 @@ impl<'a, F: Function> Env<'a, F> { { log::debug!(" -> btree contains range {:?} that overlaps", preg_range); if self.ranges[preg_range.index()].vreg.is_valid() { - log::debug!( - " -> from vreg {:?}", - self.ranges[preg_range.index()].vreg - ); + log::debug!(" -> from vreg {:?}", self.ranges[preg_range.index()].vreg); // range from an allocated bundle: find the bundle and add to // conflicts list. let conflict_bundle = self.ranges[preg_range.index()].bundle; @@ -2324,7 +2336,7 @@ impl<'a, F: Function> Env<'a, F> { fixed = true; break; } - use_iter = use_data.next_use; + use_iter = use_data.next_use(); } // Minimal if this is the only range in the bundle, and if // the range covers only one instruction. Note that it @@ -2536,7 +2548,7 @@ impl<'a, F: Function> Env<'a, F> { let use_data = &self.uses[use_idx.index()]; log::debug!(" -> range has use at {:?}", use_data.pos); update_with_pos(use_data.pos); - use_idx = use_data.next_use; + use_idx = use_data.next_use(); } our_iter = self.ranges[our_iter.index()].next_in_bundle; @@ -2624,7 +2636,7 @@ impl<'a, F: Function> Env<'a, F> { splits.push(before_use_inst); } splits.push(after_use_inst); - use_idx = use_data.next_use; + use_idx = use_data.next_use(); } iter = rangedata.next_in_bundle; @@ -2793,8 +2805,7 @@ impl<'a, F: Function> Env<'a, F> { // tail-pointer so we do not need to update that.) let rest_lr = self.create_liverange(rest_range); self.ranges[rest_lr.index()].vreg = self.ranges[iter.index()].vreg; - self.ranges[rest_lr.index()].next_in_reg = - self.ranges[iter.index()].next_in_reg; + self.ranges[rest_lr.index()].next_in_reg = self.ranges[iter.index()].next_in_reg; self.ranges[iter.index()].next_in_reg = rest_lr; log::debug!( @@ -2822,7 +2833,7 @@ impl<'a, F: Function> Env<'a, F> { ); uses_spill_weight += spill_weight_from_policy(policy); log::debug!(" -> use {:?} remains in orig", use_iter); - use_iter = self.uses[use_iter.index()].next_use; + use_iter = self.uses[use_iter.index()].next_use(); } // Move over `rest`'s uses and update stats on first @@ -2833,12 +2844,12 @@ impl<'a, F: Function> Env<'a, F> { use_iter ); self.ranges[rest_lr.index()].first_use = use_iter; - self.ranges[rest_lr.index()].last_use = - self.ranges[iter.index()].last_use; + self.ranges[rest_lr.index()].last_use = self.ranges[iter.index()].last_use; self.ranges[iter.index()].last_use = last_use_in_first_range; if last_use_in_first_range.is_valid() { - self.uses[last_use_in_first_range.index()].next_use = UseIndex::invalid(); + self.uses[last_use_in_first_range.index()] + .set_next_use(UseIndex::invalid()); } else { self.ranges[iter.index()].first_use = UseIndex::invalid(); } @@ -3734,7 +3745,7 @@ impl<'a, F: Function> Env<'a, F> { let usedata = &self.uses[use_iter.index()]; debug_assert!(range.contains_point(usedata.pos)); let inst = usedata.pos.inst(); - let slot = usedata.slot; + let slot = usedata.slot(); let operand = usedata.operand; // Safepoints add virtual uses with no slots; // avoid these. @@ -3744,7 +3755,7 @@ impl<'a, F: Function> Env<'a, F> { if let OperandPolicy::Reuse(_) = operand.policy() { reuse_input_insts.push(inst); } - use_iter = self.uses[use_iter.index()].next_use; + use_iter = self.uses[use_iter.index()].next_use(); } // Scan over program move srcs/dsts to fill in allocations. From d2cc4f1ac2a82bc8b49271e638c9faffe72a4159 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 7 May 2021 19:20:28 -0700 Subject: [PATCH 030/155] More efficient queue_bundles (saves 18% on clang.wasm) --- src/ion/mod.rs | 62 +++++++++++++++++++++----------------------------- 1 file changed, 26 insertions(+), 36 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 32f29942..cc714a02 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -303,7 +303,7 @@ impl<'a> std::iter::Iterator for RangeSummaryIter<'a> { #[derive(Clone, Debug)] struct SpillSet { - bundles: LiveBundleVec, + bundles: SmallVec<[LiveBundleIndex; 2]>, slot: SpillSlotIndex, reg_hint: PReg, class: RegClass, @@ -1909,7 +1909,21 @@ impl<'a, F: Function> Env<'a, F> { self.add_liverange_to_preg(range, preg); iter = self.ranges[iter.index()].next_in_bundle; } + continue; } + + // Otherwise, create a spillslot for it. + let ssidx = SpillSetIndex::new(self.spillsets.len()); + let reg = self.vreg_regs[vreg.index()]; + let size = self.func.spillslot_size(reg.class(), reg) as u8; + self.spillsets.push(SpillSet { + bundles: smallvec![], + slot: SpillSlotIndex::invalid(), + size, + class: reg.class(), + reg_hint: PReg::invalid(), + }); + self.bundles[bundle.index()].spillset = ssidx; } for inst in 0..self.func.insts() { @@ -2029,43 +2043,19 @@ impl<'a, F: Function> Env<'a, F> { } fn queue_bundles(&mut self) { - for vreg in 0..self.vregs.len() { - let vreg = VRegIndex::new(vreg); - let mut lr = self.vregs[vreg.index()].first_range; - while lr.is_valid() { - let bundle = self.ranges[lr.index()].bundle; - if !self.bundles[bundle.index()].allocation.is_none() { - // Pinned VReg -- already allocated, so skip. - lr = self.ranges[lr.index()].next_in_bundle; - continue; - } - if self.bundles[bundle.index()].first_range == lr { - // First time seeing `bundle`: allocate a spillslot for it, - // compute its priority, and enqueue it. - let ssidx = SpillSetIndex::new(self.spillsets.len()); - let reg = self.vreg_regs[vreg.index()]; - let size = self.func.spillslot_size(reg.class(), reg) as u8; - self.spillsets.push(SpillSet { - bundles: smallvec![], - slot: SpillSlotIndex::invalid(), - size, - class: reg.class(), - reg_hint: PReg::invalid(), - }); - self.bundles[bundle.index()].spillset = ssidx; - let prio = self.compute_bundle_prio(bundle); - self.bundles[bundle.index()].prio = prio; - self.recompute_bundle_properties(bundle); - self.allocation_queue.insert(bundle, prio as usize); - } - - // Keep going even if we handled one bundle for this vreg above: - // if we split a vreg's liveranges into multiple bundles, we - // need to hit all the bundles. - lr = self.ranges[lr.index()].next_in_bundle; + for bundle in 0..self.bundles.len() { + if self.bundles[bundle].first_range.is_invalid() { + continue; } + if !self.bundles[bundle].allocation.is_none() { + continue; + } + let bundle = LiveBundleIndex::new(bundle); + let prio = self.compute_bundle_prio(bundle); + self.bundles[bundle.index()].prio = prio; + self.recompute_bundle_properties(bundle); + self.allocation_queue.insert(bundle, prio as usize); } - self.stats.merged_bundle_count = self.allocation_queue.heap.len(); } From a6e312882190fe0cff1071f4f62cbac23f0d7a1d Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 7 May 2021 19:48:34 -0700 Subject: [PATCH 031/155] Support `mod` (modify) operands, for better efficiency with regalloc.rs/Cranelift shim. --- src/checker.rs | 6 ++-- src/ion/mod.rs | 76 ++++++++++++++++++++++++++++---------------------- src/lib.rs | 55 +++++++++++++++++++++++------------- src/ssa.rs | 6 ++++ 4 files changed, 87 insertions(+), 56 deletions(-) diff --git a/src/checker.rs b/src/checker.rs index 05f32f53..244a1e76 100644 --- a/src/checker.rs +++ b/src/checker.rs @@ -297,10 +297,8 @@ impl CheckerState { // the requirements of the OperandPolicy. for (op, alloc) in operands.iter().zip(allocs.iter()) { let is_here = match (op.pos(), pos) { - (OperandPos::Before, InstPosition::Before) - | (OperandPos::Both, InstPosition::Before) => true, - (OperandPos::After, InstPosition::After) - | (OperandPos::Both, InstPosition::After) => true, + (OperandPos::Before, InstPosition::Before) => true, + (OperandPos::After, InstPosition::After) => true, _ => false, }; if !is_here { diff --git a/src/ion/mod.rs b/src/ion/mod.rs index cc714a02..86ca6094 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -326,19 +326,16 @@ struct PRegData { /* * Environment setup: * - * We have seven fundamental objects: LiveRange, LiveBundle, SpillSet, Use, Def, VReg, PReg. + * We have seven fundamental objects: LiveRange, LiveBundle, SpillSet, Use, VReg, PReg. * * The relationship is as follows: * * LiveRange --(vreg)--> shared(VReg) * LiveRange --(bundle)--> shared(LiveBundle) - * LiveRange --(def)--> owns(Def) * LiveRange --(use) --> list(Use) * * Use --(vreg)--> shared(VReg) * - * Def --(vreg) --> owns(VReg) - * * LiveBundle --(range)--> list(LiveRange) * LiveBundle --(spillset)--> shared(SpillSet) * LiveBundle --(parent)--> parent(LiveBundle) @@ -565,6 +562,7 @@ enum Requirement { Any(RegClass), } impl Requirement { + #[inline(always)] fn class(self) -> RegClass { match self { Requirement::Fixed(preg) => preg.class(), @@ -573,7 +571,7 @@ impl Requirement { } } } - + #[inline(always)] fn merge(self, other: Requirement) -> Option { if self.class() != other.class() { return None; @@ -590,6 +588,7 @@ impl Requirement { _ => None, } } + #[inline(always)] fn from_operand(op: Operand) -> Requirement { match op.policy() { OperandPolicy::FixedReg(preg) => Requirement::Fixed(preg), @@ -1034,7 +1033,7 @@ impl<'a, F: Function> Env<'a, F> { ); lrdata.uses_spill_weight_and_flags -= spill_weight_from_policy(usedata.operand.policy()); - if usedata.operand.kind() == OperandKind::Def { + if usedata.operand.kind() != OperandKind::Use { lrdata.uses_spill_weight_and_flags -= 2000; } } @@ -1083,7 +1082,7 @@ impl<'a, F: Function> Env<'a, F> { spill_weight_from_policy(policy) ); self.ranges[into.index()].uses_spill_weight_and_flags += spill_weight_from_policy(policy); - if self.uses[u.index()].operand.kind() == OperandKind::Def { + if self.uses[u.index()].operand.kind() != OperandKind::Use { self.ranges[into.index()].uses_spill_weight_and_flags += 2000; } log::debug!(" -> now {}", self.ranges[into.index()].uses_spill_weight()); @@ -1149,11 +1148,11 @@ impl<'a, F: Function> Env<'a, F> { live.set(dst.vreg(), false); live.set(src.vreg(), true); } - for pos in &[OperandPos::After, OperandPos::Both, OperandPos::Before] { + for pos in &[OperandPos::After, OperandPos::Before] { for op in self.func.inst_operands(inst) { if op.pos() == *pos { match op.kind() { - OperandKind::Use => { + OperandKind::Use | OperandKind::Mod => { live.set(op.vreg().vreg(), true); } OperandKind::Def => { @@ -1355,11 +1354,12 @@ impl<'a, F: Function> Env<'a, F> { // don't borrow `self` let operand = self.func.inst_operands(inst)[i]; match operand.kind() { - OperandKind::Def => { + OperandKind::Def | OperandKind::Mod => { // Create the Def object. - let pos = match operand.pos() { - OperandPos::Before | OperandPos::Both => ProgPoint::before(inst), - OperandPos::After => ProgPoint::after(inst), + let pos = match (operand.kind(), operand.pos()) { + (OperandKind::Mod, _) => ProgPoint::before(inst), + (_, OperandPos::Before) => ProgPoint::before(inst), + (_, OperandPos::After) => ProgPoint::after(inst), }; let u = UseIndex(self.uses.len() as u32); self.uses @@ -1370,11 +1370,6 @@ impl<'a, F: Function> Env<'a, F> { // Fill in vreg's actual data. self.vreg_regs[operand.vreg().vreg()] = operand.vreg(); - // Trim the range for this vreg to start - // at `pos` if it previously ended at the - // start of this block (i.e. was not - // merged into some larger LiveRange due - // to out-of-order blocks). let mut lr = vreg_ranges[operand.vreg().vreg()]; log::debug!(" -> has existing LR {:?}", lr); // If there was no liverange (dead def), create a trivial one. @@ -1389,22 +1384,34 @@ impl<'a, F: Function> Env<'a, F> { ); log::debug!(" -> invalid; created {:?}", lr); } - if self.ranges[lr.index()].range.from - == self.cfginfo.block_entry[block.index()] - { - log::debug!(" -> started at block start; trimming to {:?}", pos); - self.ranges[lr.index()].range.from = pos; - } self.insert_use_into_liverange_and_update_stats(lr, u); - // Remove from live-set. - live.set(operand.vreg().vreg(), false); - vreg_ranges[operand.vreg().vreg()] = LiveRangeIndex::invalid(); + + if operand.kind() == OperandKind::Def { + // Trim the range for this vreg to start + // at `pos` if it previously ended at the + // start of this block (i.e. was not + // merged into some larger LiveRange due + // to out-of-order blocks). + if self.ranges[lr.index()].range.from + == self.cfginfo.block_entry[block.index()] + { + log::debug!( + " -> started at block start; trimming to {:?}", + pos + ); + self.ranges[lr.index()].range.from = pos; + } + + // Remove from live-set. + live.set(operand.vreg().vreg(), false); + vreg_ranges[operand.vreg().vreg()] = LiveRangeIndex::invalid(); + } } OperandKind::Use => { // Establish where the use occurs. let mut pos = match operand.pos() { OperandPos::Before => ProgPoint::before(inst), - OperandPos::Both | OperandPos::After => ProgPoint::after(inst), + OperandPos::After => ProgPoint::after(inst), }; // If there are any reused inputs in this // instruction, and this is *not* the @@ -2612,8 +2619,9 @@ impl<'a, F: Function> Env<'a, F> { // the def so we don't need to insert a move. use_data.pos } else { - // For an use, split before the instruction -- - // this allows us to insert a move if necessary. + // For an use or mod, split before the instruction + // -- this allows us to insert a move if + // necessary. ProgPoint::before(use_data.pos.inst()) }; let after_use_inst = ProgPoint::before(use_data.pos.inst().next()); @@ -4118,9 +4126,11 @@ impl<'a, F: Function> Env<'a, F> { ); } - // Ensure edits are in sorted ProgPoint order. - self.edits - .sort_unstable_by_key(|&(pos, prio, _)| (pos, prio)); + // Ensure edits are in sorted ProgPoint order. N.B.: this must + // be a stable sort! We have to keep the order produced by the + // parallel-move resolver for all moves within a single sort + // key. + self.edits.sort_by_key(|&(pos, prio, _)| (pos, prio)); self.stats.edits_count = self.edits.len(); // Add debug annotations. diff --git a/src/lib.rs b/src/lib.rs index 8b8b69da..3dfc41f7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -209,7 +209,7 @@ pub struct Operand { /// so that clients, if they wish, can track just one `u32` per /// register slot and edit it in-place after allocation. /// - /// policy:3 kind:1 pos:2 class:1 preg:5 vreg:20 + /// policy:3 kind:2 pos:1 class:1 preg:5 vreg:20 bits: u32, } @@ -237,7 +237,7 @@ impl Operand { | (preg_field << 20) | (class_field << 25) | (pos_field << 26) - | (kind_field << 28) + | (kind_field << 27) | (policy_field << 29), } } @@ -253,7 +253,12 @@ impl Operand { } #[inline(always)] pub fn reg_use_at_end(vreg: VReg) -> Self { - Operand::new(vreg, OperandPolicy::Reg, OperandKind::Use, OperandPos::Both) + Operand::new( + vreg, + OperandPolicy::Reg, + OperandKind::Use, + OperandPos::After, + ) } #[inline(always)] pub fn reg_def(vreg: VReg) -> Self { @@ -266,11 +271,21 @@ impl Operand { } #[inline(always)] pub fn reg_def_at_start(vreg: VReg) -> Self { - Operand::new(vreg, OperandPolicy::Reg, OperandKind::Def, OperandPos::Both) + Operand::new( + vreg, + OperandPolicy::Reg, + OperandKind::Def, + OperandPos::Before, + ) } #[inline(always)] pub fn reg_temp(vreg: VReg) -> Self { - Operand::new(vreg, OperandPolicy::Reg, OperandKind::Def, OperandPos::Both) + Operand::new( + vreg, + OperandPolicy::Reg, + OperandKind::Def, + OperandPos::Before, + ) } #[inline(always)] pub fn reg_reuse_def(vreg: VReg, idx: usize) -> Self { @@ -278,7 +293,7 @@ impl Operand { vreg, OperandPolicy::Reuse(idx), OperandKind::Def, - OperandPos::Both, + OperandPos::After, ) } #[inline(always)] @@ -318,21 +333,21 @@ impl Operand { #[inline(always)] pub fn kind(self) -> OperandKind { - let kind_field = (self.bits >> 28) & 1; + let kind_field = (self.bits >> 27) & 3; match kind_field { 0 => OperandKind::Def, - 1 => OperandKind::Use, + 1 => OperandKind::Mod, + 2 => OperandKind::Use, _ => unreachable!(), } } #[inline(always)] pub fn pos(self) -> OperandPos { - let pos_field = (self.bits >> 26) & 3; + let pos_field = (self.bits >> 26) & 1; match pos_field { 0 => OperandPos::Before, 1 => OperandPos::After, - 2 => OperandPos::Both, _ => unreachable!(), } } @@ -415,14 +430,14 @@ impl std::fmt::Display for OperandPolicy { #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum OperandKind { Def = 0, - Use = 1, + Mod = 1, + Use = 2, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum OperandPos { Before = 0, After = 1, - Both = 2, } /// An Allocation represents the end result of regalloc for an @@ -585,7 +600,8 @@ impl OperandOrAllocation { let bits = alloc.bits() | match kind { OperandKind::Def => 0, - OperandKind::Use => 1 << 28, + OperandKind::Mod => 1 << 27, + OperandKind::Use => 2 << 27, }; Self { bits } } @@ -604,11 +620,11 @@ impl OperandOrAllocation { } pub fn as_allocation(&self) -> Option { if self.is_allocation() { - // Remove the def/use bit -- the canonical `Allocation` - // does not have this, and we want allocs to continue to - // be comparable whether they are used for reads or - // writes. - Some(Allocation::from_bits(self.bits & !(1 << 28))) + // Remove the kind (def/use/mod) bits -- the canonical + // `Allocation` does not have this, and we want allocs to + // continue to be comparable whether they are used for + // reads or writes. + Some(Allocation::from_bits(self.bits & !(3 << 27))) } else { None } @@ -618,7 +634,8 @@ impl OperandOrAllocation { let kind_field = (self.bits >> 28) & 1; match kind_field { 0 => OperandKind::Def, - 1 => OperandKind::Use, + 1 => OperandKind::Mod, + 2 => OperandKind::Use, _ => unreachable!(), } } diff --git a/src/ssa.rs b/src/ssa.rs index 2d6e6250..de69841b 100644 --- a/src/ssa.rs +++ b/src/ssa.rs @@ -47,6 +47,12 @@ pub fn validate_ssa(f: &F, cfginfo: &CFGInfo) -> Result<(), RegAllo } defined[operand.vreg().vreg()] = true; } + OperandKind::Mod => { + // Mod (modify) operands are not used in SSA, + // but can be used by non-SSA code (e.g. with + // the regalloc.rs compatibility shim). + return Err(RegAllocError::SSA(operand.vreg(), iix)); + } } } } From 4185eab44152191fd1597ef72c2706e13788b881 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 7 May 2021 20:12:40 -0700 Subject: [PATCH 032/155] More efficient live-range creation re: uses --- src/ion/mod.rs | 218 +++++++++++++++++++++++++++++-------------------- 1 file changed, 128 insertions(+), 90 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 86ca6094..6779d662 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -1350,108 +1350,145 @@ impl<'a, F: Function> Env<'a, F> { } // Process defs and uses. - for i in 0..self.func.inst_operands(inst).len() { - // don't borrow `self` - let operand = self.func.inst_operands(inst)[i]; - match operand.kind() { - OperandKind::Def | OperandKind::Mod => { - // Create the Def object. - let pos = match (operand.kind(), operand.pos()) { - (OperandKind::Mod, _) => ProgPoint::before(inst), - (_, OperandPos::Before) => ProgPoint::before(inst), - (_, OperandPos::After) => ProgPoint::after(inst), - }; - let u = UseIndex(self.uses.len() as u32); - self.uses - .push(Use::new(operand, pos, UseIndex::invalid(), i as u8)); - - log::debug!("Def of {} at {:?}", operand.vreg(), pos); - - // Fill in vreg's actual data. - self.vreg_regs[operand.vreg().vreg()] = operand.vreg(); - - let mut lr = vreg_ranges[operand.vreg().vreg()]; - log::debug!(" -> has existing LR {:?}", lr); - // If there was no liverange (dead def), create a trivial one. - if !live.get(operand.vreg().vreg()) { - lr = self.add_liverange_to_vreg( - VRegIndex::new(operand.vreg().vreg()), - CodeRange { - from: pos, - to: pos.next(), - }, - &mut num_ranges, - ); - log::debug!(" -> invalid; created {:?}", lr); - } - self.insert_use_into_liverange_and_update_stats(lr, u); - - if operand.kind() == OperandKind::Def { - // Trim the range for this vreg to start - // at `pos` if it previously ended at the - // start of this block (i.e. was not - // merged into some larger LiveRange due - // to out-of-order blocks). - if self.ranges[lr.index()].range.from - == self.cfginfo.block_entry[block.index()] - { - log::debug!( - " -> started at block start; trimming to {:?}", - pos - ); - self.ranges[lr.index()].range.from = pos; - } - - // Remove from live-set. - live.set(operand.vreg().vreg(), false); - vreg_ranges[operand.vreg().vreg()] = LiveRangeIndex::invalid(); + for &cur_pos in &[InstPosition::After, InstPosition::Before] { + for i in 0..self.func.inst_operands(inst).len() { + // don't borrow `self` + let operand = self.func.inst_operands(inst)[i]; + let pos = match (operand.kind(), operand.pos()) { + (OperandKind::Mod, _) => ProgPoint::before(inst), + (OperandKind::Def, OperandPos::Before) => ProgPoint::before(inst), + (OperandKind::Def, OperandPos::After) => ProgPoint::after(inst), + (OperandKind::Use, OperandPos::After) => ProgPoint::after(inst), + // If this is a branch, extend `pos` to + // the end of the block. (Branch uses are + // blockparams and need to be live at the + // end of the block.) + (OperandKind::Use, _) if self.func.is_branch(inst) => { + self.cfginfo.block_exit[block.index()] } - } - OperandKind::Use => { - // Establish where the use occurs. - let mut pos = match operand.pos() { - OperandPos::Before => ProgPoint::before(inst), - OperandPos::After => ProgPoint::after(inst), - }; // If there are any reused inputs in this // instruction, and this is *not* the // reused input, force `pos` to // `After`. (See note below for why; it's // very subtle!) - if reused_input.is_some() && reused_input.unwrap() != i { - pos = ProgPoint::after(inst); - } - // If this is a branch, extend `pos` to - // the end of the block. (Branch uses are - // blockparams and need to be live at the - // end of the block.) - if self.func.is_branch(inst) { - pos = self.cfginfo.block_exit[block.index()]; + (OperandKind::Use, OperandPos::Before) + if reused_input.is_some() && reused_input.unwrap() != i => + { + ProgPoint::after(inst) } + (OperandKind::Use, OperandPos::Before) => ProgPoint::before(inst), + }; - // Create the actual use object. - let u = UseIndex(self.uses.len() as u32); - self.uses - .push(Use::new(operand, pos, UseIndex::invalid(), i as u8)); + if pos.pos() != cur_pos { + continue; + } - // Create/extend the LiveRange and add the use to the range. - let range = CodeRange { - from: self.cfginfo.block_entry[block.index()], - to: pos.next(), - }; - let lr = self.add_liverange_to_vreg( - VRegIndex::new(operand.vreg().vreg()), - range, - &mut num_ranges, - ); - vreg_ranges[operand.vreg().vreg()] = lr; + log::debug!( + "processing inst{} operand at {:?}: {:?}", + inst.index(), + pos, + operand + ); + + match operand.kind() { + OperandKind::Def | OperandKind::Mod => { + // Create the use object. + let u = UseIndex(self.uses.len() as u32); + self.uses.push(Use::new( + operand, + pos, + UseIndex::invalid(), + i as u8, + )); + + log::debug!("Def of {} at {:?}", operand.vreg(), pos); + + // Fill in vreg's actual data. + self.vreg_regs[operand.vreg().vreg()] = operand.vreg(); + + let mut lr = vreg_ranges[operand.vreg().vreg()]; + log::debug!(" -> has existing LR {:?}", lr); + // If there was no liverange (dead def), create a trivial one. + if !live.get(operand.vreg().vreg()) { + let from = match operand.kind() { + OperandKind::Def => pos, + OperandKind::Mod => self.cfginfo.block_entry[block.index()], + _ => unreachable!(), + }; + lr = self.add_liverange_to_vreg( + VRegIndex::new(operand.vreg().vreg()), + CodeRange { + from, + to: pos.next(), + }, + &mut num_ranges, + ); + log::debug!(" -> invalid; created {:?}", lr); + } + self.insert_use_into_liverange_and_update_stats(lr, u); + + if operand.kind() == OperandKind::Def { + // Trim the range for this vreg to start + // at `pos` if it previously ended at the + // start of this block (i.e. was not + // merged into some larger LiveRange due + // to out-of-order blocks). + if self.ranges[lr.index()].range.from + == self.cfginfo.block_entry[block.index()] + { + log::debug!( + " -> started at block start; trimming to {:?}", + pos + ); + self.ranges[lr.index()].range.from = pos; + } + + // Remove from live-set. + live.set(operand.vreg().vreg(), false); + vreg_ranges[operand.vreg().vreg()] = LiveRangeIndex::invalid(); + } + } + OperandKind::Use => { + // Create the use object. + let u = UseIndex(self.uses.len() as u32); + self.uses.push(Use::new( + operand, + pos, + UseIndex::invalid(), + i as u8, + )); + + // Create/extend the LiveRange if it + // doesn't already exist, and add the use + // to the range. + let mut lr = vreg_ranges[operand.vreg().vreg()]; + if !live.get(operand.vreg().vreg()) { + let range = CodeRange { + from: self.cfginfo.block_entry[block.index()], + to: pos.next(), + }; + lr = self.add_liverange_to_vreg( + VRegIndex::new(operand.vreg().vreg()), + range, + &mut num_ranges, + ); + vreg_ranges[operand.vreg().vreg()] = lr; + } + assert!(lr.is_valid()); - log::debug!("Use of {:?} at {:?} -> {:?} -> {:?}", operand, pos, u, lr); + log::debug!( + "Use of {:?} at {:?} -> {:?} -> {:?}", + operand, + pos, + u, + lr + ); - self.insert_use_into_liverange_and_update_stats(lr, u); + self.insert_use_into_liverange_and_update_stats(lr, u); - // Add to live-set. - live.set(operand.vreg().vreg(), true); + // Add to live-set. + live.set(operand.vreg().vreg(), true); + } } } } @@ -3741,6 +3778,7 @@ impl<'a, F: Function> Env<'a, F> { let mut use_iter = self.ranges[iter.index()].first_use; while use_iter.is_valid() { let usedata = &self.uses[use_iter.index()]; + log::debug!("applying to use: {:?}", usedata); debug_assert!(range.contains_point(usedata.pos)); let inst = usedata.pos.inst(); let slot = usedata.slot(); From e41b0101a86732e90a8cdbb37685da6bb40c28d7 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 7 May 2021 20:41:33 -0700 Subject: [PATCH 033/155] Struct-of-array transform: pull LiveRangeHot out of LiveRange with just range and next-in-bundle link --- src/ion/mod.rs | 268 ++++++++++++++++++++++++++----------------------- 1 file changed, 145 insertions(+), 123 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 6779d662..92f5cb1a 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -113,8 +113,13 @@ define_index!(SpillSlotIndex); type LiveBundleVec = SmallVec<[LiveBundleIndex; 4]>; #[derive(Clone, Debug)] -struct LiveRange { +struct LiveRangeHot { range: CodeRange, + next_in_bundle: LiveRangeIndex, +} + +#[derive(Clone, Debug)] +struct LiveRange { vreg: VRegIndex, bundle: LiveBundleIndex, uses_spill_weight_and_flags: u32, @@ -122,7 +127,6 @@ struct LiveRange { first_use: UseIndex, last_use: UseIndex, - next_in_bundle: LiveRangeIndex, next_in_reg: LiveRangeIndex, merged_into: LiveRangeIndex, @@ -171,11 +175,11 @@ impl Use { #[inline(always)] fn new(operand: Operand, pos: ProgPoint, next_use: UseIndex, slot: u8) -> Self { debug_assert!(next_use.is_invalid() || next_use.index() < ((1 << 24) - 1)); - let next_use = next_use.0 & 0x00ff_ffff; + let next_use = (next_use.0 as usize) & 0x00ff_ffff; Self { operand, pos, - next_use_and_slot: next_use | ((slot as u32) << 24), + next_use_and_slot: (next_use as u32) | ((slot as u32) << 24), } } #[inline(always)] @@ -183,7 +187,7 @@ impl Use { let val = self.next_use_and_slot & 0x00ff_ffff; // Sign-extend 0x00ff_ffff to INVALID (0xffff_ffff). let val = ((val as i32) << 8) >> 8; - UseIndex(val as u32) + UseIndex::new(val as usize) } #[inline(always)] fn slot(&self) -> u8 { @@ -192,8 +196,8 @@ impl Use { #[inline(always)] fn set_next_use(&mut self, u: UseIndex) { debug_assert!(u.is_invalid() || u.index() < ((1 << 24) - 1)); - let u = u.0 & 0x00ff_ffff; - self.next_use_and_slot = (self.next_use_and_slot & 0xff00_0000) | u; + let u = (u.0 as usize) & 0x00ff_ffff; + self.next_use_and_slot = (self.next_use_and_slot & 0xff00_0000) | (u as u32); } } @@ -371,6 +375,7 @@ struct Env<'a, F: Function> { blockparam_allocs: Vec<(Block, u32, VRegIndex, Allocation)>, ranges: Vec, + ranges_hot: Vec, range_ranges: Vec, bundles: Vec, spillsets: Vec, @@ -770,6 +775,7 @@ impl<'a, F: Function> Env<'a, F> { blockparam_allocs: vec![], bundles: Vec::with_capacity(n), ranges: Vec::with_capacity(4 * n), + ranges_hot: Vec::with_capacity(4 * n), range_ranges: Vec::with_capacity(4 * n), spillsets: Vec::with_capacity(n), uses: Vec::with_capacity(4 * n), @@ -850,8 +856,12 @@ impl<'a, F: Function> Env<'a, F> { fn create_liverange(&mut self, range: CodeRange) -> LiveRangeIndex { let idx = self.ranges.len(); - self.ranges.push(LiveRange { + self.ranges_hot.push(LiveRangeHot { range, + next_in_bundle: LiveRangeIndex::invalid(), + }); + + self.ranges.push(LiveRange { vreg: VRegIndex::invalid(), bundle: LiveBundleIndex::invalid(), uses_spill_weight_and_flags: 0, @@ -859,7 +869,6 @@ impl<'a, F: Function> Env<'a, F> { first_use: UseIndex::invalid(), last_use: UseIndex::invalid(), - next_in_bundle: LiveRangeIndex::invalid(), next_in_reg: LiveRangeIndex::invalid(), merged_into: LiveRangeIndex::invalid(), @@ -889,40 +898,40 @@ impl<'a, F: Function> Env<'a, F> { let mut iter = self.vregs[vreg.index()].first_range; let mut prev = LiveRangeIndex::invalid(); while iter.is_valid() { - let existing = &mut self.ranges[iter.index()]; - log::debug!(" -> existing range: {:?}", existing); - if range.from >= existing.range.to && *num_ranges < COALESCE_LIMIT { + log::debug!(" -> existing range: {:?}", self.ranges[iter.index()]); + if range.from >= self.ranges_hot[iter.index()].range.to && *num_ranges < COALESCE_LIMIT + { // New range comes fully after this one -- record it as a lower bound. insert_after = iter; prev = iter; - iter = existing.next_in_reg; + iter = self.ranges[iter.index()].next_in_reg; log::debug!(" -> lower bound"); continue; } - if range.to <= existing.range.from { + if range.to <= self.ranges_hot[iter.index()].range.from { // New range comes fully before this one -- we're found our spot. log::debug!(" -> upper bound (break search loop)"); break; } // If we're here, then we overlap with at least one endpoint of the range. log::debug!(" -> must overlap"); - debug_assert!(range.overlaps(&existing.range)); + debug_assert!(range.overlaps(&self.ranges_hot[iter.index()].range)); if merged.is_invalid() { // This is the first overlapping range. Extend to simply cover the new range. merged = iter; - if range.from < existing.range.from { - existing.range.from = range.from; + if range.from < self.ranges_hot[iter.index()].range.from { + self.ranges_hot[iter.index()].range.from = range.from; } - if range.to > existing.range.to { - existing.range.to = range.to; + if range.to > self.ranges_hot[iter.index()].range.to { + self.ranges_hot[iter.index()].range.to = range.to; } log::debug!( " -> extended range of existing range to {:?}", - existing.range + self.ranges_hot[iter.index()].range ); // Continue; there may be more ranges to merge with. prev = iter; - iter = existing.next_in_reg; + iter = self.ranges[iter.index()].next_in_reg; continue; } // We overlap but we've already extended the first overlapping existing liverange, so @@ -934,10 +943,11 @@ impl<'a, F: Function> Env<'a, F> { self.ranges[merged.index()] ); debug_assert!( - self.ranges[iter.index()].range.from >= self.ranges[merged.index()].range.from + self.ranges_hot[iter.index()].range.from + >= self.ranges_hot[merged.index()].range.from ); // Because we see LRs in order. - if self.ranges[iter.index()].range.to > self.ranges[merged.index()].range.to { - self.ranges[merged.index()].range.to = self.ranges[iter.index()].range.to; + if self.ranges_hot[iter.index()].range.to > self.ranges_hot[merged.index()].range.to { + self.ranges_hot[merged.index()].range.to = self.ranges_hot[iter.index()].range.to; } self.distribute_liverange_uses(iter, merged); log::debug!( @@ -983,7 +993,7 @@ impl<'a, F: Function> Env<'a, F> { self.ranges[from.index()].vreg, self.ranges[into.index()].vreg ); - let into_range = self.ranges[into.index()].range; + let into_range = self.ranges_hot[into.index()].range; // For every use in `from`... let mut prev = UseIndex::invalid(); let mut iter = self.ranges[from.index()].first_use; @@ -1095,7 +1105,7 @@ impl<'a, F: Function> Env<'a, F> { ) -> Option { let mut range = self.vregs[vreg.index()].first_range; while range.is_valid() { - if self.ranges[range.index()].range.contains_point(pos) { + if self.ranges_hot[range.index()].range.contains_point(pos) { return Some(range); } range = self.ranges[range.index()].next_in_reg; @@ -1306,11 +1316,11 @@ impl<'a, F: Function> Env<'a, F> { } else { log::debug!(" -> has existing LR {:?}", dst_lr); } - if self.ranges[dst_lr.index()].range.from + if self.ranges_hot[dst_lr.index()].range.from == self.cfginfo.block_entry[block.index()] { log::debug!(" -> started at block start; trimming to {:?}", pos); - self.ranges[dst_lr.index()].range.from = pos; + self.ranges_hot[dst_lr.index()].range.from = pos; } live.set(dst.vreg(), false); vreg_ranges[dst.vreg()] = LiveRangeIndex::invalid(); @@ -1393,7 +1403,7 @@ impl<'a, F: Function> Env<'a, F> { match operand.kind() { OperandKind::Def | OperandKind::Mod => { // Create the use object. - let u = UseIndex(self.uses.len() as u32); + let u = UseIndex::new(self.uses.len()); self.uses.push(Use::new( operand, pos, @@ -1433,14 +1443,14 @@ impl<'a, F: Function> Env<'a, F> { // start of this block (i.e. was not // merged into some larger LiveRange due // to out-of-order blocks). - if self.ranges[lr.index()].range.from + if self.ranges_hot[lr.index()].range.from == self.cfginfo.block_entry[block.index()] { log::debug!( " -> started at block start; trimming to {:?}", pos ); - self.ranges[lr.index()].range.from = pos; + self.ranges_hot[lr.index()].range.from = pos; } // Remove from live-set. @@ -1450,7 +1460,7 @@ impl<'a, F: Function> Env<'a, F> { } OperandKind::Use => { // Create the use object. - let u = UseIndex(self.uses.len() as u32); + let u = UseIndex::new(self.uses.len()); self.uses.push(Use::new( operand, pos, @@ -1537,8 +1547,7 @@ impl<'a, F: Function> Env<'a, F> { let mut iter = self.vregs[vreg.index()].first_range; let mut safepoint_idx = 0; while iter.is_valid() { - let rangedata = &self.ranges[iter.index()]; - let range = rangedata.range; + let range = self.ranges_hot[iter.index()].range; while safepoint_idx < self.safepoints.len() && ProgPoint::before(self.safepoints[safepoint_idx]) < range.from { @@ -1557,7 +1566,7 @@ impl<'a, F: Function> Env<'a, F> { ); // Create the actual use object. - let u = UseIndex(self.uses.len() as u32); + let u = UseIndex::new(self.uses.len()); self.uses .push(Use::new(operand, pos, UseIndex::invalid(), SLOT_NONE)); @@ -1781,13 +1790,13 @@ impl<'a, F: Function> Env<'a, F> { while iter.is_valid() { let vreg = self.ranges[iter.index()].vreg; assert_eq!(rc, self.vregs[vreg.index()].reg.class()); - iter = self.ranges[iter.index()].next_in_bundle; + iter = self.ranges_hot[iter.index()].next_in_bundle; } let mut iter = self.bundles[to.index()].first_range; while iter.is_valid() { let vreg = self.ranges[iter.index()].vreg; assert_eq!(rc, self.vregs[vreg.index()].reg.class()); - iter = self.ranges[iter.index()].next_in_bundle; + iter = self.ranges_hot[iter.index()].next_in_bundle; } } @@ -1802,10 +1811,13 @@ impl<'a, F: Function> Env<'a, F> { return false; } - if self.ranges[iter0.index()].range.from >= self.ranges[iter1.index()].range.to { - iter1 = self.ranges[iter1.index()].next_in_bundle; - } else if self.ranges[iter1.index()].range.from >= self.ranges[iter0.index()].range.to { - iter0 = self.ranges[iter0.index()].next_in_bundle; + if self.ranges_hot[iter0.index()].range.from >= self.ranges_hot[iter1.index()].range.to + { + iter1 = self.ranges_hot[iter1.index()].next_in_bundle; + } else if self.ranges_hot[iter1.index()].range.from + >= self.ranges_hot[iter0.index()].range.to + { + iter0 = self.ranges_hot[iter0.index()].next_in_bundle; } else { // Overlap -- cannot merge. return false; @@ -1832,7 +1844,7 @@ impl<'a, F: Function> Env<'a, F> { self.bundles[from.index()].last_range = LiveRangeIndex::invalid(); while iter0.is_valid() { self.ranges[iter0.index()].bundle = from; - iter0 = self.ranges[iter0.index()].next_in_bundle; + iter0 = self.ranges_hot[iter0.index()].next_in_bundle; } return true; } @@ -1844,8 +1856,8 @@ impl<'a, F: Function> Env<'a, F> { // Pick the next range. let next_range_iter = if iter0.is_valid() { if iter1.is_valid() { - if self.ranges[iter0.index()].range.from - <= self.ranges[iter1.index()].range.from + if self.ranges_hot[iter0.index()].range.from + <= self.ranges_hot[iter1.index()].range.from { &mut iter0 } else { @@ -1858,11 +1870,11 @@ impl<'a, F: Function> Env<'a, F> { &mut iter1 }; let next = *next_range_iter; - *next_range_iter = self.ranges[next.index()].next_in_bundle; + *next_range_iter = self.ranges_hot[next.index()].next_in_bundle; // link from prev. if prev.is_valid() { - self.ranges[prev.index()].next_in_bundle = next; + self.ranges_hot[prev.index()].next_in_bundle = next; } else { self.bundles[to.index()].first_range = next; } @@ -1882,42 +1894,42 @@ impl<'a, F: Function> Env<'a, F> { lr, bundle ); - self.ranges[lr.index()].next_in_bundle = LiveRangeIndex::invalid(); + self.ranges_hot[lr.index()].next_in_bundle = LiveRangeIndex::invalid(); self.ranges[lr.index()].bundle = bundle; if self.bundles[bundle.index()].first_range.is_invalid() { // Empty bundle. self.bundles[bundle.index()].first_range = lr; self.bundles[bundle.index()].last_range = lr; - } else if self.ranges[self.bundles[bundle.index()].first_range.index()] + } else if self.ranges_hot[self.bundles[bundle.index()].first_range.index()] .range .to - <= self.ranges[lr.index()].range.from + <= self.ranges_hot[lr.index()].range.from { // After last range in bundle. let last = self.bundles[bundle.index()].last_range; - self.ranges[last.index()].next_in_bundle = lr; + self.ranges_hot[last.index()].next_in_bundle = lr; self.bundles[bundle.index()].last_range = lr; } else { // Find location to insert. let mut iter = self.bundles[bundle.index()].first_range; let mut insert_after = LiveRangeIndex::invalid(); - let insert_range = self.ranges[lr.index()].range; + let insert_range = self.ranges_hot[lr.index()].range; while iter.is_valid() { - debug_assert!(!self.ranges[iter.index()].range.overlaps(&insert_range)); - if self.ranges[iter.index()].range.to <= insert_range.from { + debug_assert!(!self.ranges_hot[iter.index()].range.overlaps(&insert_range)); + if self.ranges_hot[iter.index()].range.to <= insert_range.from { break; } insert_after = iter; - iter = self.ranges[iter.index()].next_in_bundle; + iter = self.ranges_hot[iter.index()].next_in_bundle; } if insert_after.is_valid() { - self.ranges[insert_after.index()].next_in_bundle = lr; + self.ranges_hot[insert_after.index()].next_in_bundle = lr; if self.bundles[bundle.index()].last_range == insert_after { self.bundles[bundle.index()].last_range = lr; } } else { let next = self.bundles[bundle.index()].first_range; - self.ranges[lr.index()].next_in_bundle = next; + self.ranges_hot[lr.index()].next_in_bundle = next; self.bundles[bundle.index()].first_range = lr; } } @@ -1945,13 +1957,13 @@ impl<'a, F: Function> Env<'a, F> { let mut iter = self.bundles[bundle.index()].first_range; while iter.is_valid() { - let range = self.ranges[iter.index()].range; + let range = self.ranges_hot[iter.index()].range; // Create a new LiveRange for the PReg // reservation, unaffiliated with the VReg, to // reserve it (like a clobber) without the // possibility of eviction. self.add_liverange_to_preg(range, preg); - iter = self.ranges[iter.index()].next_in_bundle; + iter = self.ranges_hot[iter.index()].next_in_bundle; } continue; } @@ -2039,16 +2051,16 @@ impl<'a, F: Function> Env<'a, F> { let mut iter = self.bundles[bundle.index()].first_range; let start_idx = self.range_ranges.len(); let start_pos = if iter.is_valid() { - self.ranges[iter.index()].range.from + self.ranges_hot[iter.index()].range.from } else { ProgPoint::from_index(0) }; let mut end_pos = start_pos; while iter.is_valid() { - let range = self.ranges[iter.index()].range; + let range = self.ranges_hot[iter.index()].range; end_pos = range.to; self.range_ranges.push(range); - iter = self.ranges[iter.index()].next_in_bundle; + iter = self.ranges_hot[iter.index()].next_in_bundle; } let end_idx = self.range_ranges.len(); let bound = CodeRange { @@ -2080,8 +2092,8 @@ impl<'a, F: Function> Env<'a, F> { let mut iter = self.bundles[bundle.index()].first_range; let mut total = 0; while iter.is_valid() { - total += self.ranges[iter.index()].range.len() as u32; - iter = self.ranges[iter.index()].next_in_bundle; + total += self.ranges_hot[iter.index()].range.len() as u32; + iter = self.ranges_hot[iter.index()].next_in_bundle; } total } @@ -2136,7 +2148,7 @@ impl<'a, F: Function> Env<'a, F> { log::debug!("vreg{}: first_range={:?}", i, v.first_range,); } log::debug!("Ranges:"); - for (i, (r, rc)) in self.ranges.iter().zip(self.ranges.iter()).enumerate() { + for (i, (r, rh)) in self.ranges.iter().zip(self.ranges_hot.iter()).enumerate() { log::debug!( concat!( "range{}: range={:?} vreg={:?} bundle={:?} ", @@ -2144,14 +2156,14 @@ impl<'a, F: Function> Env<'a, F> { "next_in_bundle={:?} next_in_reg={:?}" ), i, - r.range, - rc.vreg, - rc.bundle, + rh.range, + r.vreg, + r.bundle, r.uses_spill_weight(), r.first_use, - rc.last_use, - r.next_in_bundle, - rc.next_in_reg + r.last_use, + rh.next_in_bundle, + r.next_in_reg ); } log::debug!("Uses:"); @@ -2184,8 +2196,9 @@ impl<'a, F: Function> Env<'a, F> { let mut iter = self.bundles[bundle.index()].first_range; while iter.is_valid() { + let range_hot = &self.ranges_hot[iter.index()]; let range = &self.ranges[iter.index()]; - log::debug!(" -> range {:?}", range.range); + log::debug!(" -> range {:?}", range_hot.range); let mut use_iter = range.first_use; while use_iter.is_valid() { let usedata = &self.uses[use_iter.index()]; @@ -2196,7 +2209,7 @@ impl<'a, F: Function> Env<'a, F> { log::debug!(" -> needed {:?}", needed); use_iter = usedata.next_use(); } - iter = range.next_in_bundle; + iter = range_hot.next_in_bundle; } log::debug!(" -> final needed: {:?}", needed); @@ -2210,8 +2223,8 @@ impl<'a, F: Function> Env<'a, F> { return None; } Some(CodeRange { - from: self.ranges[first_range.index()].range.from, - to: self.ranges[last_range.index()].range.to, + from: self.ranges_hot[first_range.index()].range.from, + to: self.ranges_hot[last_range.index()].range.to, }) } @@ -2287,7 +2300,7 @@ impl<'a, F: Function> Env<'a, F> { self.bundles[bundle.index()].allocation = Allocation::reg(preg); let mut iter = self.bundles[bundle.index()].first_range; while iter.is_valid() { - let range = &self.ranges[iter.index()]; + let range = &self.ranges_hot[iter.index()]; self.pregs[reg.index()] .allocations .btree @@ -2322,8 +2335,10 @@ impl<'a, F: Function> Env<'a, F> { self.pregs[preg_idx.index()] .allocations .btree - .remove(&LiveRangeKey::from_range(&self.ranges[iter.index()].range)); - iter = self.ranges[iter.index()].next_in_bundle; + .remove(&LiveRangeKey::from_range( + &self.ranges_hot[iter.index()].range, + )); + iter = self.ranges_hot[iter.index()].next_in_bundle; } let prio = self.bundles[bundle.index()].prio; log::debug!(" -> prio {}; back into queue", prio); @@ -2354,6 +2369,7 @@ impl<'a, F: Function> Env<'a, F> { let mut fixed = false; let bundledata = &self.bundles[bundle.index()]; let first_range = &self.ranges[bundledata.first_range.index()]; + let first_range_hot = &self.ranges_hot[bundledata.first_range.index()]; log::debug!("recompute bundle properties: bundle {:?}", bundle); @@ -2376,13 +2392,13 @@ impl<'a, F: Function> Env<'a, F> { // the range covers only one instruction. Note that it // could cover just one ProgPoint, i.e. X.Before..X.After, // or two ProgPoints, i.e. X.Before..X+1.Before. - log::debug!(" -> first range has range {:?}", first_range.range); + log::debug!(" -> first range has range {:?}", first_range_hot.range); log::debug!( " -> first range has next in bundle {:?}", - first_range.next_in_bundle + first_range_hot.next_in_bundle ); - minimal = first_range.next_in_bundle.is_invalid() - && first_range.range.from.inst() == first_range.range.to.prev().inst(); + minimal = first_range_hot.next_in_bundle.is_invalid() + && first_range_hot.range.from.inst() == first_range_hot.range.to.prev().inst(); log::debug!(" -> minimal: {}", minimal); } @@ -2404,7 +2420,7 @@ impl<'a, F: Function> Env<'a, F> { range_data.uses_spill_weight() ); total += range_data.uses_spill_weight(); - range = range_data.next_in_bundle; + range = self.ranges_hot[range.index()].next_in_bundle; } if self.bundles[bundle.index()].prio > 0 { @@ -2472,12 +2488,12 @@ impl<'a, F: Function> Env<'a, F> { let (conflict_from, conflict_to) = if conflicting.is_valid() { ( Some( - self.ranges[self.bundles[conflicting.index()].first_range.index()] + self.ranges_hot[self.bundles[conflicting.index()].first_range.index()] .range .from, ), Some( - self.ranges[self.bundles[conflicting.index()].last_range.index()] + self.ranges_hot[self.bundles[conflicting.index()].last_range.index()] .range .to, ), @@ -2487,14 +2503,14 @@ impl<'a, F: Function> Env<'a, F> { }; let bundle_start = if self.bundles[bundle.index()].first_range.is_valid() { - self.ranges[self.bundles[bundle.index()].first_range.index()] + self.ranges_hot[self.bundles[bundle.index()].first_range.index()] .range .from } else { ProgPoint::before(Inst::new(0)) }; let bundle_end = if self.bundles[bundle.index()].last_range.is_valid() { - self.ranges[self.bundles[bundle.index()].last_range.index()] + self.ranges_hot[self.bundles[bundle.index()].last_range.index()] .range .to } else { @@ -2505,7 +2521,7 @@ impl<'a, F: Function> Env<'a, F> { let mut clobberidx = 0; while our_iter.is_valid() { // Probe the hot-code tree. - let our_range = self.ranges[our_iter.index()].range; + let our_range = self.ranges_hot[our_iter.index()].range; log::debug!(" -> range {:?}", our_range); if let Some(hot_range_idx) = self .hot_code @@ -2516,7 +2532,7 @@ impl<'a, F: Function> Env<'a, F> { // There may be cold code in our range on either side of the hot // range. Record the transition points if so. - let hot_range = self.ranges[hot_range_idx.index()].range; + let hot_range = self.ranges_hot[hot_range_idx.index()].range; log::debug!(" -> overlaps with hot-code range {:?}", hot_range); let start_cold = our_range.from < hot_range.from; let end_cold = our_range.to > hot_range.to; @@ -2585,7 +2601,7 @@ impl<'a, F: Function> Env<'a, F> { use_idx = use_data.next_use(); } - our_iter = self.ranges[our_iter.index()].next_in_bundle; + our_iter = self.ranges_hot[our_iter.index()].next_in_bundle; } log::debug!( " -> first use/def after conflict range: {:?}", @@ -2634,7 +2650,7 @@ impl<'a, F: Function> Env<'a, F> { let mut iter = self.bundles[bundle.index()].first_range; log::debug!("finding all use/def splits for {:?}", bundle); let bundle_start = if iter.is_valid() { - self.ranges[iter.index()].range.from + self.ranges_hot[iter.index()].range.from } else { ProgPoint::before(Inst::new(0)) }; @@ -2644,9 +2660,12 @@ impl<'a, F: Function> Env<'a, F> { // the middle* of an instruction, because we would not be able // to insert moves to reify such an assignment. while iter.is_valid() { - let rangedata = &self.ranges[iter.index()]; - log::debug!(" -> range {:?}: {:?}", iter, rangedata.range); - let mut use_idx = rangedata.first_use; + log::debug!( + " -> range {:?}: {:?}", + iter, + self.ranges_hot[iter.index()].range + ); + let mut use_idx = self.ranges[iter.index()].first_use; while use_idx.is_valid() { let use_data = &self.uses[use_idx.index()]; log::debug!(" -> use: {:?}", use_data); @@ -2674,7 +2693,7 @@ impl<'a, F: Function> Env<'a, F> { use_idx = use_data.next_use(); } - iter = rangedata.next_in_bundle; + iter = self.ranges_hot[iter.index()].next_in_bundle; } splits.sort_unstable(); log::debug!(" -> final splits: {:?}", splits); @@ -2741,7 +2760,7 @@ impl<'a, F: Function> Env<'a, F> { // at the start of the first range in the bundle. let first_range = self.bundles[bundle.index()].first_range; let bundle_start = if first_range.is_valid() { - self.ranges[first_range.index()].range.from + self.ranges_hot[first_range.index()].range.from } else { ProgPoint::before(Inst::new(0)) }; @@ -2757,10 +2776,10 @@ impl<'a, F: Function> Env<'a, F> { let mut range_summary_idx = self.bundles[bundle.index()].range_summary.from; while iter.is_valid() { // Read `next` link now and then clear it -- we rebuild the list below. - let next = self.ranges[iter.index()].next_in_bundle; - self.ranges[iter.index()].next_in_bundle = LiveRangeIndex::invalid(); + let next = self.ranges_hot[iter.index()].next_in_bundle; + self.ranges_hot[iter.index()].next_in_bundle = LiveRangeIndex::invalid(); - let mut range = self.ranges[iter.index()].range; + let mut range = self.ranges_hot[iter.index()].range; log::debug!(" -> has range {:?} (LR {:?})", range, iter); // If any splits occur before this range, create a new @@ -2784,8 +2803,8 @@ impl<'a, F: Function> Env<'a, F> { // Link into current bundle. self.ranges[iter.index()].bundle = cur_bundle; if self.bundles[cur_bundle.index()].first_range.is_valid() { - self.ranges[self.bundles[cur_bundle.index()].last_range.index()].next_in_bundle = - iter; + self.ranges_hot[self.bundles[cur_bundle.index()].last_range.index()] + .next_in_bundle = iter; } else { self.bundles[cur_bundle.index()].first_range = iter; } @@ -2825,14 +2844,14 @@ impl<'a, F: Function> Env<'a, F> { debug_assert!(range.from < split_point && split_point < range.to); let rest_range = CodeRange { from: split_point, - to: self.ranges[iter.index()].range.to, + to: self.ranges_hot[iter.index()].range.to, }; - self.ranges[iter.index()].range.to = split_point; + self.ranges_hot[iter.index()].range.to = split_point; range = rest_range; log::debug!( " -> range of {:?} now {:?}", iter, - self.ranges[iter.index()].range + self.ranges_hot[iter.index()].range ); // Create the rest-range and insert it into the vreg's @@ -2898,7 +2917,7 @@ impl<'a, F: Function> Env<'a, F> { log::debug!( " -> range {:?} next-in-bundle is {:?}", iter, - self.ranges[iter.index()].next_in_bundle + self.ranges_hot[iter.index()].next_in_bundle ); // Create a new bundle to hold the rest-range. @@ -2943,12 +2962,12 @@ impl<'a, F: Function> Env<'a, F> { fn fixup_range_summary_bound(&mut self, bundle: LiveBundleIndex) { let bundledata = &mut self.bundles[bundle.index()]; let from = if bundledata.first_range.is_valid() { - self.ranges[bundledata.first_range.index()].range.from + self.ranges_hot[bundledata.first_range.index()].range.from } else { ProgPoint::from_index(0) }; let to = if bundledata.last_range.is_valid() { - self.ranges[bundledata.last_range.index()].range.to + self.ranges_hot[bundledata.last_range.index()].range.to } else { ProgPoint::from_index(0) }; @@ -2963,8 +2982,11 @@ impl<'a, F: Function> Env<'a, F> { .range_summary .iter(&self.range_ranges[..]); while iter.is_valid() { - assert_eq!(summary_iter.next(), Some(self.ranges[iter.index()].range)); - iter = self.ranges[iter.index()].next_in_bundle; + assert_eq!( + summary_iter.next(), + Some(self.ranges_hot[iter.index()].range) + ); + iter = self.ranges_hot[iter.index()].next_in_bundle; } assert_eq!(summary_iter.next(), None); } @@ -3032,11 +3054,12 @@ impl<'a, F: Function> Env<'a, F> { // location in the code and by the bundle we're // considering. This has the effect of spreading // demand more evenly across registers. - let scan_offset = self.ranges[self.bundles[bundle.index()].first_range.index()] - .range - .from - .inst() - .index() + let scan_offset = self.ranges_hot + [self.bundles[bundle.index()].first_range.index()] + .range + .from + .inst() + .index() + bundle.index(); // If the bundle is more than one range, see if we @@ -3235,7 +3258,7 @@ impl<'a, F: Function> Env<'a, F> { for &bundle in &self.spillsets[spillset.index()].bundles { let mut iter = self.bundles[bundle.index()].first_range; while iter.is_valid() { - let range = self.ranges[iter.index()].range; + let range = self.ranges_hot[iter.index()].range; if self.spillslots[spillslot.index()] .ranges .btree @@ -3243,7 +3266,7 @@ impl<'a, F: Function> Env<'a, F> { { return false; } - iter = self.ranges[iter.index()].next_in_bundle; + iter = self.ranges_hot[iter.index()].next_in_bundle; } } true @@ -3271,14 +3294,14 @@ impl<'a, F: Function> Env<'a, F> { spillslot, iter, bundle, - self.ranges[iter.index()].range + self.ranges_hot[iter.index()].range ); - let range = self.ranges[iter.index()].range; + let range = self.ranges_hot[iter.index()].range; self.spillslots[spillslot.index()] .ranges .btree .insert(LiveRangeKey::from_range(&range), iter); - iter = self.ranges[iter.index()].next_in_bundle; + iter = self.ranges_hot[iter.index()].next_in_bundle; } } } @@ -3512,7 +3535,7 @@ impl<'a, F: Function> Env<'a, F> { let mut prev = LiveRangeIndex::invalid(); while iter.is_valid() { let alloc = self.get_alloc_for_range(iter); - let range = self.ranges[iter.index()].range; + let range = self.ranges_hot[iter.index()].range; log::debug!( "apply_allocations: vreg {:?} LR {:?} with range {:?} has alloc {:?}", vreg, @@ -3565,7 +3588,7 @@ impl<'a, F: Function> Env<'a, F> { // instruction). if prev.is_valid() { let prev_alloc = self.get_alloc_for_range(prev); - let prev_range = self.ranges[prev.index()].range; + let prev_range = self.ranges_hot[prev.index()].range; let first_use = self.ranges[iter.index()].first_use; let first_is_def = if first_use.is_valid() { self.uses[first_use.index()].operand.kind() == OperandKind::Def @@ -4235,8 +4258,7 @@ impl<'a, F: Function> Env<'a, F> { let mut safepoint_idx = 0; let mut iter = self.vregs[vreg.index()].first_range; while iter.is_valid() { - let rangedata = &self.ranges[iter.index()]; - let range = rangedata.range; + let range = self.ranges_hot[iter.index()].range; let alloc = self.get_alloc_for_range(iter); log::debug!(" -> range {:?}: alloc {}", range, alloc); while safepoint_idx < safepoints.len() && safepoints[safepoint_idx] < range.to { @@ -4252,7 +4274,7 @@ impl<'a, F: Function> Env<'a, F> { self.safepoint_slots.push((safepoints[safepoint_idx], slot)); safepoint_idx += 1; } - iter = rangedata.next_in_reg; + iter = self.ranges[iter.index()].next_in_reg; } } From ed339ab4d8137c984b091ad16de3c26cab222812 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 7 May 2021 20:54:27 -0700 Subject: [PATCH 034/155] Some minor opts: inlining, and smallvec reuse --- src/ion/mod.rs | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 92f5cb1a..38ea2685 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -68,29 +68,36 @@ pub struct CodeRange { } impl CodeRange { + #[inline(always)] pub fn is_empty(&self) -> bool { self.from == self.to } + #[inline(always)] pub fn contains(&self, other: &Self) -> bool { other.from >= self.from && other.to <= self.to } + #[inline(always)] pub fn contains_point(&self, other: ProgPoint) -> bool { other >= self.from && other < self.to } + #[inline(always)] pub fn overlaps(&self, other: &Self) -> bool { other.to > self.from && other.from < self.to } + #[inline(always)] pub fn len(&self) -> usize { self.to.inst().index() - self.from.inst().index() } } impl std::cmp::PartialOrd for CodeRange { + #[inline(always)] fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl std::cmp::Ord for CodeRange { + #[inline(always)] fn cmp(&self, other: &Self) -> Ordering { if self.to <= other.from { Ordering::Less @@ -1609,6 +1616,9 @@ impl<'a, F: Function> Env<'a, F> { // have to split the multiple uses at the same progpoint into // different bundles, which breaks invariants related to // disjoint ranges and bundles). + let mut seen_fixed_for_vreg: SmallVec<[VReg; 16]> = smallvec![]; + let mut first_preg: SmallVec<[PRegIndex; 16]> = smallvec![]; + let mut extra_clobbers: SmallVec<[(PReg, Inst); 8]> = smallvec![]; for vreg in 0..self.vregs.len() { let mut iter = self.vregs[vreg].first_range; while iter.is_valid() { @@ -1618,9 +1628,6 @@ impl<'a, F: Function> Env<'a, F> { iter ); let mut last_point = None; - let mut seen_fixed_for_vreg: SmallVec<[VReg; 16]> = smallvec![]; - let mut first_preg: SmallVec<[PRegIndex; 16]> = smallvec![]; - let mut extra_clobbers: SmallVec<[(PReg, Inst); 8]> = smallvec![]; let mut fixup_multi_fixed_vregs = |pos: ProgPoint, slot: usize, op: &mut Operand, @@ -1684,7 +1691,7 @@ impl<'a, F: Function> Env<'a, F> { use_iter = self.uses[use_iter.index()].next_use(); } - for (clobber, inst) in extra_clobbers { + for &(clobber, inst) in &extra_clobbers { let range = CodeRange { from: ProgPoint::before(inst), to: ProgPoint::before(inst.next()), @@ -1692,6 +1699,10 @@ impl<'a, F: Function> Env<'a, F> { self.add_liverange_to_preg(range, clobber); } + extra_clobbers.clear(); + first_preg.clear(); + seen_fixed_for_vreg.clear(); + iter = self.ranges[iter.index()].next_in_reg; } } From 179ef0e53491c16583147880431548fa727d10c3 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sat, 8 May 2021 15:47:38 -0700 Subject: [PATCH 035/155] Bugfix: Mod with dead def spans both Before and After positions --- src/ion/mod.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 38ea2685..fca128d3 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -1432,12 +1432,14 @@ impl<'a, F: Function> Env<'a, F> { OperandKind::Mod => self.cfginfo.block_entry[block.index()], _ => unreachable!(), }; + let to = match operand.kind() { + OperandKind::Def => pos.next(), + OperandKind::Mod => pos.next().next(), // both Before and After positions + _ => unreachable!(), + }; lr = self.add_liverange_to_vreg( VRegIndex::new(operand.vreg().vreg()), - CodeRange { - from, - to: pos.next(), - }, + CodeRange { from, to }, &mut num_ranges, ); log::debug!(" -> invalid; created {:?}", lr); From 3d0d760c70d8c4297f532996a31d63a7e97e3f11 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sat, 8 May 2021 16:16:30 -0700 Subject: [PATCH 036/155] Bugfix: process parallel moves separately for Int and Float classes --- src/ion/mod.rs | 64 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 48 insertions(+), 16 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index fca128d3..f77bfed8 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -3441,6 +3441,12 @@ impl<'a, F: Function> Env<'a, F> { "insert_move: pos {:?} prio {:?} from_alloc {:?} to_alloc {:?}", pos, prio, from_alloc, to_alloc ); + match (from_alloc.as_reg(), to_alloc.as_reg()) { + (Some(from), Some(to)) => { + assert_eq!(from.class(), to.class()); + } + _ => {} + } self.inserted_moves.push(InsertedMove { pos, prio, @@ -4145,28 +4151,51 @@ impl<'a, F: Function> Env<'a, F> { } let moves = &self.inserted_moves[start..i]; - // Get the regclass from one of the moves. - let regclass = moves[0].from_alloc.class(); + // Gather all the moves with Int class and Float class + // separately. These cannot interact, so it is safe to + // have two separate ParallelMove instances. They need to + // be separate because moves between the two classes are + // impossible. (We could enhance ParallelMoves to + // understand register classes and take multiple scratch + // regs, but this seems simpler.) + let mut int_moves: SmallVec<[InsertedMove; 8]> = smallvec![]; + let mut float_moves: SmallVec<[InsertedMove; 8]> = smallvec![]; - // All moves in `moves` semantically happen in - // parallel. Let's resolve these to a sequence of moves - // that can be done one at a time. - let mut parallel_moves = ParallelMoves::new(Allocation::reg( - self.env.scratch_by_class[regclass as u8 as usize], - )); - log::debug!("parallel moves at pos {:?} prio {:?}", pos, prio); for m in moves { - if m.from_alloc != m.to_alloc { - log::debug!(" {} -> {}", m.from_alloc, m.to_alloc,); - parallel_moves.add(m.from_alloc, m.to_alloc); + assert_eq!(m.from_alloc.class(), m.to_alloc.class()); + match m.from_alloc.class() { + RegClass::Int => { + int_moves.push(m.clone()); + } + RegClass::Float => { + float_moves.push(m.clone()); + } } } - let resolved = parallel_moves.resolve(); + for &(regclass, moves) in + &[(RegClass::Int, &int_moves), (RegClass::Float, &float_moves)] + { + // All moves in `moves` semantically happen in + // parallel. Let's resolve these to a sequence of moves + // that can be done one at a time. + let mut parallel_moves = ParallelMoves::new(Allocation::reg( + self.env.scratch_by_class[regclass as u8 as usize], + )); + log::debug!("parallel moves at pos {:?} prio {:?}", pos, prio); + for m in moves { + if m.from_alloc != m.to_alloc { + log::debug!(" {} -> {}", m.from_alloc, m.to_alloc,); + parallel_moves.add(m.from_alloc, m.to_alloc); + } + } + + let resolved = parallel_moves.resolve(); - for (src, dst) in resolved { - log::debug!(" resolved: {} -> {}", src, dst); - self.add_edit(pos, prio, Edit::Move { from: src, to: dst }); + for (src, dst) in resolved { + log::debug!(" resolved: {} -> {}", src, dst); + self.add_edit(pos, prio, Edit::Move { from: src, to: dst }); + } } } @@ -4233,6 +4262,9 @@ impl<'a, F: Function> Env<'a, F> { fn add_edit(&mut self, pos: ProgPoint, prio: InsertMovePrio, edit: Edit) { match &edit { &Edit::Move { from, to } if from == to => return, + &Edit::Move { from, to } if from.is_reg() && to.is_reg() => { + assert_eq!(from.as_reg().unwrap().class(), to.as_reg().unwrap().class()); + } _ => {} } From 00c64f680a02ebb0b9a047533fbd913037fbf449 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sat, 8 May 2021 17:45:24 -0700 Subject: [PATCH 037/155] Handle moves by joining LRs at inst boundary, not middle of move inst --- src/ion/mod.rs | 121 ++++++++++++++++++++++++++----------------------- 1 file changed, 65 insertions(+), 56 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index f77bfed8..1a75c038 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -1301,69 +1301,78 @@ impl<'a, F: Function> Env<'a, F> { // If this is a move, handle specially. if let Some((src, dst)) = self.func.is_move(inst) { - log::debug!(" -> move inst{}: src {} -> dst {}", inst.index(), src, dst); - assert_eq!(src.class(), dst.class()); - - // Handle the def w.r.t. liveranges: trim the - // start of the range and mark it dead at this - // point in our backward scan. - let pos = ProgPoint::after(inst); - let mut dst_lr = vreg_ranges[dst.vreg()]; - // If there was no liverange (dead def), create a trivial one. - if !live.get(dst.vreg()) { - dst_lr = self.add_liverange_to_vreg( - VRegIndex::new(dst.vreg()), - CodeRange { - from: pos, - to: pos.next(), - }, + if src != dst { + log::debug!(" -> move inst{}: src {} -> dst {}", inst.index(), src, dst); + assert_eq!(src.class(), dst.class()); + + // Handle the def w.r.t. liveranges: trim the + // start of the range and mark it dead at this + // point in our backward scan. + let pos = ProgPoint::before(inst); // See note below re: pos of use. + let mut dst_lr = vreg_ranges[dst.vreg()]; + // If there was no liverange (dead def), create a trivial one. + if !live.get(dst.vreg()) { + dst_lr = self.add_liverange_to_vreg( + VRegIndex::new(dst.vreg()), + CodeRange { + from: pos, + to: pos.next(), + }, + &mut num_ranges, + ); + log::debug!(" -> invalid; created {:?}", dst_lr); + } else { + log::debug!(" -> has existing LR {:?}", dst_lr); + } + if self.ranges_hot[dst_lr.index()].range.from + == self.cfginfo.block_entry[block.index()] + { + log::debug!(" -> started at block start; trimming to {:?}", pos); + self.ranges_hot[dst_lr.index()].range.from = pos; + } + live.set(dst.vreg(), false); + vreg_ranges[dst.vreg()] = LiveRangeIndex::invalid(); + self.vreg_regs[dst.vreg()] = dst; + + // Handle the use w.r.t. liveranges: make it live + // and create an initial LR back to the start of + // the block. + let pos = ProgPoint::before(inst); + let range = CodeRange { + from: self.cfginfo.block_entry[block.index()], + // Live up to end of previous inst. Because + // the move isn't actually reading the + // value as part of the inst, all we need + // to do is to decide where to join the + // LRs; and we want this to be at an inst + // boundary, not in the middle, so that + // the move-insertion logic remains happy. + to: pos, + }; + let src_lr = self.add_liverange_to_vreg( + VRegIndex::new(src.vreg()), + range, &mut num_ranges, ); - log::debug!(" -> invalid; created {:?}", dst_lr); - } else { - log::debug!(" -> has existing LR {:?}", dst_lr); - } - if self.ranges_hot[dst_lr.index()].range.from - == self.cfginfo.block_entry[block.index()] - { - log::debug!(" -> started at block start; trimming to {:?}", pos); - self.ranges_hot[dst_lr.index()].range.from = pos; - } - live.set(dst.vreg(), false); - vreg_ranges[dst.vreg()] = LiveRangeIndex::invalid(); - self.vreg_regs[dst.vreg()] = dst; + vreg_ranges[src.vreg()] = src_lr; - // Handle the use w.r.t. liveranges: make it live - // and create an initial LR back to the start of - // the block. - let pos = ProgPoint::before(inst); - let range = CodeRange { - from: self.cfginfo.block_entry[block.index()], - to: pos.next(), - }; - let src_lr = self.add_liverange_to_vreg( - VRegIndex::new(src.vreg()), - range, - &mut num_ranges, - ); - vreg_ranges[src.vreg()] = src_lr; + log::debug!(" -> src LR {:?}", src_lr); - log::debug!(" -> src LR {:?}", src_lr); + // Add to live-set. + let src_is_dead_after_move = !live.get(src.vreg()); + live.set(src.vreg(), true); - // Add to live-set. - let src_is_dead_after_move = !live.get(src.vreg()); - live.set(src.vreg(), true); + // Add to program-moves lists. + self.prog_move_srcs + .push(((VRegIndex::new(src.vreg()), inst), Allocation::none())); + self.prog_move_dsts + .push(((VRegIndex::new(dst.vreg()), inst), Allocation::none())); + if src_is_dead_after_move { + self.prog_move_merges.push((src_lr, dst_lr)); + } - // Add to program-moves lists. - self.prog_move_srcs - .push(((VRegIndex::new(src.vreg()), inst), Allocation::none())); - self.prog_move_dsts - .push(((VRegIndex::new(dst.vreg()), inst), Allocation::none())); - if src_is_dead_after_move { - self.prog_move_merges.push((src_lr, dst_lr)); + continue; } - - continue; } // Process defs and uses. From f1fc9a8f7e374ceb112a5413fc4962cd157e40fc Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sat, 8 May 2021 19:04:16 -0700 Subject: [PATCH 038/155] Fix related to move handling --- src/ion/mod.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 1a75c038..08ebf322 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -1316,7 +1316,7 @@ impl<'a, F: Function> Env<'a, F> { VRegIndex::new(dst.vreg()), CodeRange { from: pos, - to: pos.next(), + to: pos.next().next(), }, &mut num_ranges, ); @@ -3851,11 +3851,7 @@ impl<'a, F: Function> Env<'a, F> { } else { (vreg, range.from.inst().next()) }; - let move_src_end = if range.to.pos() == InstPosition::Before { - (vreg, range.to.inst()) - } else { - (vreg, range.to.inst().next()) - }; + let move_src_end = (vreg, range.to.inst().next()); log::debug!( "vreg {:?} range {:?}: looking for program-move sources from {:?} to {:?}", vreg, From b9e89885c41f88909bec8a66e1020921d22b90a2 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sat, 8 May 2021 21:48:58 -0700 Subject: [PATCH 039/155] Error checking: properly signal a crit-edge requirement failure (used for regalloc.rs fuzzer) --- src/cfg.rs | 20 ++++++++------------ src/ion/mod.rs | 2 +- src/lib.rs | 2 ++ 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/cfg.rs b/src/cfg.rs index 31853f0b..f73ca1aa 100644 --- a/src/cfg.rs +++ b/src/cfg.rs @@ -5,7 +5,7 @@ //! Lightweight CFG analyses. -use crate::{domtree, postorder, Block, Function, Inst, OperandKind, ProgPoint}; +use crate::{domtree, postorder, Block, Function, Inst, OperandKind, ProgPoint, RegAllocError}; #[derive(Clone, Debug)] pub struct CFGInfo { @@ -37,7 +37,7 @@ pub struct CFGInfo { } impl CFGInfo { - pub fn new(f: &F) -> CFGInfo { + pub fn new(f: &F) -> Result { let postorder = postorder::calculate(f.blocks(), f.entry_block(), |block| f.block_succs(block)); let domtree = domtree::calculate( @@ -74,20 +74,16 @@ impl CFGInfo { if f.block_preds(block).len() > 1 { for (i, &pred) in f.block_preds(block).iter().enumerate() { - // Assert critical edge condition. - assert_eq!( - f.block_succs(pred).len(), - 1, - "Edge {} -> {} is critical", - pred.index(), - block.index(), - ); + // Check critical edge condition. + if f.block_succs(pred).len() > 1 { + return Err(RegAllocError::CritEdge(pred, block)); + } pred_pos[pred.index()] = i; } } } - CFGInfo { + Ok(CFGInfo { postorder, domtree, insn_block, @@ -96,7 +92,7 @@ impl CFGInfo { block_entry, block_exit, pred_pos, - } + }) } pub fn dominates(&self, a: Block, b: Block) -> bool { diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 08ebf322..9d9c3c20 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -4445,7 +4445,7 @@ impl<'a, F: Function> Env<'a, F> { } pub fn run(func: &F, mach_env: &MachineEnv) -> Result { - let cfginfo = CFGInfo::new(func); + let cfginfo = CFGInfo::new(func)?; let mut env = Env::new(func, mach_env, cfginfo); env.init()?; diff --git a/src/lib.rs b/src/lib.rs index 3dfc41f7..68279396 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -958,6 +958,8 @@ impl Output { /// An error that prevents allocation. #[derive(Clone, Debug)] pub enum RegAllocError { + /// Critical edge is not split between given blocks. + CritEdge(Block, Block), /// Invalid SSA for given vreg at given inst: multiple defs or /// illegal use. `inst` may be `Inst::invalid()` if this concerns /// a block param. From 095a883814638ed0b0ef0c492da015ad164e54e6 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sun, 9 May 2021 01:06:59 -0700 Subject: [PATCH 040/155] Fix crit-edge detection logic in CFGInfo analysis --- src/cfg.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/cfg.rs b/src/cfg.rs index f73ca1aa..cf4428cd 100644 --- a/src/cfg.rs +++ b/src/cfg.rs @@ -72,10 +72,12 @@ impl CFGInfo { block_entry[block.index()] = ProgPoint::before(f.block_insns(block).first()); block_exit[block.index()] = ProgPoint::after(f.block_insns(block).last()); - if f.block_preds(block).len() > 1 { + let preds = f.block_preds(block).len() + if block == f.entry_block() { 1 } else { 0 }; + if preds > 1 { for (i, &pred) in f.block_preds(block).iter().enumerate() { // Check critical edge condition. - if f.block_succs(pred).len() > 1 { + let succs = f.block_succs(pred).len(); + if succs > 1 { return Err(RegAllocError::CritEdge(pred, block)); } pred_pos[pred.index()] = i; From 509c5dc2fd1d6c3c24d2d7b54ea85d9c13ec4cc9 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sun, 9 May 2021 01:14:03 -0700 Subject: [PATCH 041/155] Remove sanity-check logic in range summary construction -- zero-length ranges make this somewhat fickle to verify, and fuzzing will catch any issues. --- src/ion/mod.rs | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 9d9c3c20..1bc48bd3 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -2994,24 +2994,6 @@ impl<'a, F: Function> Env<'a, F> { ProgPoint::from_index(0) }; bundledata.range_summary.bound = CodeRange { from, to }; - - #[cfg(debug_assertions)] - { - // Sanity check: ensure that ranges returned by the range - // summary correspond to actual ranges. - let mut iter = self.bundles[bundle.index()].first_range; - let mut summary_iter = self.bundles[bundle.index()] - .range_summary - .iter(&self.range_ranges[..]); - while iter.is_valid() { - assert_eq!( - summary_iter.next(), - Some(self.ranges_hot[iter.index()].range) - ); - iter = self.ranges_hot[iter.index()].next_in_bundle; - } - assert_eq!(summary_iter.next(), None); - } } fn process_bundle(&mut self, bundle: LiveBundleIndex) { From 9fdc69edde8ce4d763aace8ffe9bf8c2f437da74 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sun, 9 May 2021 01:48:16 -0700 Subject: [PATCH 042/155] fuzzbug fix in range-summary iter --- src/ion/mod.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 1bc48bd3..a7029d71 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -294,6 +294,9 @@ impl<'a> std::iter::Iterator for RangeSummaryIter<'a> { while self.idx < self.limit && self.arr[self.idx].to <= self.bound.from { self.idx += 1; } + if self.idx == self.limit { + return None; + } let mut cur = self.arr[self.idx]; if cur.from >= self.bound.to { self.idx = self.limit; @@ -3216,6 +3219,7 @@ impl<'a, F: Function> Env<'a, F> { } fn try_allocating_regs_for_spilled_bundles(&mut self) { + log::debug!("allocating regs for spilled bundles"); for i in 0..self.spilled_bundles.len() { let bundle = self.spilled_bundles[i]; // don't borrow self let any_vreg = self.vreg_regs[self.ranges @@ -3232,6 +3236,7 @@ impl<'a, F: Function> Env<'a, F> { PReg::invalid(), bundle.index(), ) { + log::debug!("trying bundle {:?} to preg {:?}", bundle, preg); let preg_idx = PRegIndex::new(preg.index()); if let AllocRegResult::Allocated(_) = self.try_to_allocate_bundle_to_reg(bundle, preg_idx) From c380b0d979345f4ba13c9e46102599fd98de030a Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sun, 9 May 2021 01:51:00 -0700 Subject: [PATCH 043/155] assert fix: RegClass doesn't need to match for spillslots (can be reused across classes) --- src/ion/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index a7029d71..b1d47bf5 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -4154,7 +4154,9 @@ impl<'a, F: Function> Env<'a, F> { let mut float_moves: SmallVec<[InsertedMove; 8]> = smallvec![]; for m in moves { - assert_eq!(m.from_alloc.class(), m.to_alloc.class()); + if m.from_alloc.is_reg() && m.to_alloc.is_reg() { + assert_eq!(m.from_alloc.class(), m.to_alloc.class()); + } match m.from_alloc.class() { RegClass::Int => { int_moves.push(m.clone()); From 8d7530d3fa0a7de4b123c174e2889ab256ad2fd6 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sun, 9 May 2021 02:20:38 -0700 Subject: [PATCH 044/155] Edge moves always before jumps, never after; semantics are too subtle otherwise (client needs to handle specially) --- src/ion/mod.rs | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index b1d47bf5..232cbb23 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -3949,22 +3949,25 @@ impl<'a, F: Function> Env<'a, F> { let (insertion_point, prio) = if to_ins > 1 && from_outs <= 1 { ( - // N.B.: "after" the branch should be interpreted - // by the user as happening before the actual - // branching action, but after the branch reads - // all necessary inputs. It's necessary to do this - // rather than to place the moves before the - // branch because the branch may have other - // actions than just the control-flow transfer, - // and these other actions may require other - // inputs (which should be read before the "edge" - // moves). - // - // Edits will only appear after the last (branch) - // instruction if the block has only a single - // successor; we do not expect the user to somehow - // duplicate or predicate these. - ProgPoint::after(from_last_insn), + // N.B.: though semantically the edge moves happen + // after the branch, we must insert them before + // the branch because otherwise, of course, they + // would never execute. This is correct even in + // the presence of branches that read register + // inputs (e.g. conditional branches on some RISCs + // that branch on reg zero/not-zero, or any + // indirect branch), but for a very subtle reason: + // all cases of such branches will (or should) + // have multiple successors, and thus due to + // critical-edge splitting, their successors will + // have only the single predecessor, and we prefer + // to insert at the head of the successor in that + // case (rather than here). We make this a + // requirement, in fact: the user of this library + // shall not read registers in a branch + // instruction of there is only one successor per + // the given CFG information. + ProgPoint::before(from_last_insn), InsertMovePrio::OutEdgeMoves, ) } else if to_ins <= 1 { From 34421fcc6b7e41eff73003a1581db1548f886847 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sun, 9 May 2021 03:51:10 -0700 Subject: [PATCH 045/155] fix to prog-move handling: happens in middle of inst; and insert uses to make later move-insertion happy with this --- src/ion/mod.rs | 41 +++++++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 232cbb23..f8b57525 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -1311,7 +1311,7 @@ impl<'a, F: Function> Env<'a, F> { // Handle the def w.r.t. liveranges: trim the // start of the range and mark it dead at this // point in our backward scan. - let pos = ProgPoint::before(inst); // See note below re: pos of use. + let pos = ProgPoint::after(inst); let mut dst_lr = vreg_ranges[dst.vreg()]; // If there was no liverange (dead def), create a trivial one. if !live.get(dst.vreg()) { @@ -1319,7 +1319,7 @@ impl<'a, F: Function> Env<'a, F> { VRegIndex::new(dst.vreg()), CodeRange { from: pos, - to: pos.next().next(), + to: pos.next(), }, &mut num_ranges, ); @@ -1337,20 +1337,27 @@ impl<'a, F: Function> Env<'a, F> { vreg_ranges[dst.vreg()] = LiveRangeIndex::invalid(); self.vreg_regs[dst.vreg()] = dst; + let u = UseIndex::new(self.uses.len()); + self.uses.push(Use::new( + Operand::new( + dst, + OperandPolicy::Any, + OperandKind::Def, + OperandPos::After, + ), + pos, + UseIndex::invalid(), + SLOT_NONE as u8, + )); + self.insert_use_into_liverange_and_update_stats(dst_lr, u); + // Handle the use w.r.t. liveranges: make it live // and create an initial LR back to the start of // the block. let pos = ProgPoint::before(inst); let range = CodeRange { from: self.cfginfo.block_entry[block.index()], - // Live up to end of previous inst. Because - // the move isn't actually reading the - // value as part of the inst, all we need - // to do is to decide where to join the - // LRs; and we want this to be at an inst - // boundary, not in the middle, so that - // the move-insertion logic remains happy. - to: pos, + to: pos.next(), }; let src_lr = self.add_liverange_to_vreg( VRegIndex::new(src.vreg()), @@ -1361,6 +1368,20 @@ impl<'a, F: Function> Env<'a, F> { log::debug!(" -> src LR {:?}", src_lr); + let u = UseIndex::new(self.uses.len()); + self.uses.push(Use::new( + Operand::new( + dst, + OperandPolicy::Any, + OperandKind::Use, + OperandPos::Before, + ), + pos, + UseIndex::invalid(), + SLOT_NONE as u8, + )); + self.insert_use_into_liverange_and_update_stats(src_lr, u); + // Add to live-set. let src_is_dead_after_move = !live.get(src.vreg()); live.set(src.vreg(), true); From 5c5ea4cb9b6358366a7a7de52b73f6d108d7fdda Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sun, 9 May 2021 04:11:30 -0700 Subject: [PATCH 046/155] bugfix --- src/ion/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index f8b57525..ae5c4e4e 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -3859,7 +3859,7 @@ impl<'a, F: Function> Env<'a, F> { } else { (vreg, range.from.inst().next()) }; - let move_src_end = (vreg, range.to.inst().next()); + let move_src_end = (vreg, range.to.inst()); log::debug!( "vreg {:?} range {:?}: looking for program-move sources from {:?} to {:?}", vreg, From 4f26b1c78fcac68ec874c827e2a1c08f7a502005 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sun, 9 May 2021 13:35:38 -0700 Subject: [PATCH 047/155] Properly handle prog-moves with fixed srcs or dests --- src/fuzzing/func.rs | 2 +- src/ion/mod.rs | 74 +++++++++++++++++++++------------------------ src/lib.rs | 4 +-- 3 files changed, 37 insertions(+), 43 deletions(-) diff --git a/src/fuzzing/func.rs b/src/fuzzing/func.rs index ae8dccef..258c0b1d 100644 --- a/src/fuzzing/func.rs +++ b/src/fuzzing/func.rs @@ -130,7 +130,7 @@ impl Function for Func { &self.reftype_vregs[..] } - fn is_move(&self, _: Inst) -> Option<(VReg, VReg)> { + fn is_move(&self, _: Inst) -> Option<(Operand, Operand)> { None } diff --git a/src/ion/mod.rs b/src/ion/mod.rs index ae5c4e4e..d508f26f 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -1165,8 +1165,8 @@ impl<'a, F: Function> Env<'a, F> { let mut live = self.liveouts[block.index()].clone(); for inst in self.func.block_insns(block).rev().iter() { if let Some((src, dst)) = self.func.is_move(inst) { - live.set(dst.vreg(), false); - live.set(src.vreg(), true); + live.set(dst.vreg().vreg(), false); + live.set(src.vreg().vreg(), true); } for pos in &[OperandPos::After, OperandPos::Before] { for op in self.func.inst_operands(inst) { @@ -1304,19 +1304,23 @@ impl<'a, F: Function> Env<'a, F> { // If this is a move, handle specially. if let Some((src, dst)) = self.func.is_move(inst) { - if src != dst { + if src.vreg() != dst.vreg() { log::debug!(" -> move inst{}: src {} -> dst {}", inst.index(), src, dst); assert_eq!(src.class(), dst.class()); + assert_eq!(src.kind(), OperandKind::Use); + assert_eq!(src.pos(), OperandPos::Before); + assert_eq!(dst.kind(), OperandKind::Def); + assert_eq!(dst.pos(), OperandPos::After); // Handle the def w.r.t. liveranges: trim the // start of the range and mark it dead at this // point in our backward scan. let pos = ProgPoint::after(inst); - let mut dst_lr = vreg_ranges[dst.vreg()]; + let mut dst_lr = vreg_ranges[dst.vreg().vreg()]; // If there was no liverange (dead def), create a trivial one. - if !live.get(dst.vreg()) { + if !live.get(dst.vreg().vreg()) { dst_lr = self.add_liverange_to_vreg( - VRegIndex::new(dst.vreg()), + VRegIndex::new(dst.vreg().vreg()), CodeRange { from: pos, to: pos.next(), @@ -1333,22 +1337,13 @@ impl<'a, F: Function> Env<'a, F> { log::debug!(" -> started at block start; trimming to {:?}", pos); self.ranges_hot[dst_lr.index()].range.from = pos; } - live.set(dst.vreg(), false); - vreg_ranges[dst.vreg()] = LiveRangeIndex::invalid(); - self.vreg_regs[dst.vreg()] = dst; + live.set(dst.vreg().vreg(), false); + vreg_ranges[dst.vreg().vreg()] = LiveRangeIndex::invalid(); + self.vreg_regs[dst.vreg().vreg()] = dst.vreg(); let u = UseIndex::new(self.uses.len()); - self.uses.push(Use::new( - Operand::new( - dst, - OperandPolicy::Any, - OperandKind::Def, - OperandPos::After, - ), - pos, - UseIndex::invalid(), - SLOT_NONE as u8, - )); + self.uses + .push(Use::new(dst, pos, UseIndex::invalid(), SLOT_NONE as u8)); self.insert_use_into_liverange_and_update_stats(dst_lr, u); // Handle the use w.r.t. liveranges: make it live @@ -1360,37 +1355,32 @@ impl<'a, F: Function> Env<'a, F> { to: pos.next(), }; let src_lr = self.add_liverange_to_vreg( - VRegIndex::new(src.vreg()), + VRegIndex::new(src.vreg().vreg()), range, &mut num_ranges, ); - vreg_ranges[src.vreg()] = src_lr; + vreg_ranges[src.vreg().vreg()] = src_lr; log::debug!(" -> src LR {:?}", src_lr); let u = UseIndex::new(self.uses.len()); - self.uses.push(Use::new( - Operand::new( - dst, - OperandPolicy::Any, - OperandKind::Use, - OperandPos::Before, - ), - pos, - UseIndex::invalid(), - SLOT_NONE as u8, - )); + self.uses + .push(Use::new(src, pos, UseIndex::invalid(), SLOT_NONE as u8)); self.insert_use_into_liverange_and_update_stats(src_lr, u); // Add to live-set. - let src_is_dead_after_move = !live.get(src.vreg()); - live.set(src.vreg(), true); + let src_is_dead_after_move = !live.get(src.vreg().vreg()); + live.set(src.vreg().vreg(), true); // Add to program-moves lists. - self.prog_move_srcs - .push(((VRegIndex::new(src.vreg()), inst), Allocation::none())); - self.prog_move_dsts - .push(((VRegIndex::new(dst.vreg()), inst), Allocation::none())); + self.prog_move_srcs.push(( + (VRegIndex::new(src.vreg().vreg()), inst), + Allocation::none(), + )); + self.prog_move_dsts.push(( + (VRegIndex::new(dst.vreg().vreg()), inst), + Allocation::none(), + )); if src_is_dead_after_move { self.prog_move_merges.push((src_lr, dst_lr)); } @@ -3859,7 +3849,11 @@ impl<'a, F: Function> Env<'a, F> { } else { (vreg, range.from.inst().next()) }; - let move_src_end = (vreg, range.to.inst()); + let move_src_end = if range.to.pos() == InstPosition::Before { + (vreg, range.to.inst()) + } else { + (vreg, range.to.inst().next()) + }; log::debug!( "vreg {:?} range {:?}: looking for program-move sources from {:?} to {:?}", vreg, diff --git a/src/lib.rs b/src/lib.rs index 68279396..16066a45 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -708,8 +708,8 @@ pub trait Function { } /// Determine whether an instruction is a move; if so, return the - /// vregs for (src, dst). - fn is_move(&self, insn: Inst) -> Option<(VReg, VReg)>; + /// Operands for (src, dst). + fn is_move(&self, insn: Inst) -> Option<(Operand, Operand)>; // -------------------------- // Instruction register slots From b7fd53efc5da535eb97b772f3c95bfe74002dcf4 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sun, 9 May 2021 19:38:20 -0700 Subject: [PATCH 048/155] Fix checker: after moving edge-moves to prior to last branch of block (for simpler semantics for library user), we can no longer check blockparams; but this is fine because they do not exist in post-regalloc code. --- src/checker.rs | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/src/checker.rs b/src/checker.rs index 244a1e76..76b815a7 100644 --- a/src/checker.rs +++ b/src/checker.rs @@ -549,18 +549,24 @@ impl<'a, F: Function> Checker<'a, F> { self.bb_insts.get_mut(&block).unwrap().push(checkinst); } - // Instruction itself. - let operands: Vec<_> = self.f.inst_operands(inst).iter().cloned().collect(); - let allocs: Vec<_> = out.inst_allocs(inst).iter().cloned().collect(); - let clobbers: Vec<_> = self.f.inst_clobbers(inst).iter().cloned().collect(); - let checkinst = CheckerInst::Op { - inst, - operands, - allocs, - clobbers, - }; - debug!("checker: adding inst {:?}", checkinst); - self.bb_insts.get_mut(&block).unwrap().push(checkinst); + // Skip if this is a branch: the blockparams do not + // exist in post-regalloc code, and the edge-moves + // have to be inserted before the branch rather than + // after. + if !self.f.is_branch(inst) { + // Instruction itself. + let operands: Vec<_> = self.f.inst_operands(inst).iter().cloned().collect(); + let allocs: Vec<_> = out.inst_allocs(inst).iter().cloned().collect(); + let clobbers: Vec<_> = self.f.inst_clobbers(inst).iter().cloned().collect(); + let checkinst = CheckerInst::Op { + inst, + operands, + allocs, + clobbers, + }; + debug!("checker: adding inst {:?}", checkinst); + self.bb_insts.get_mut(&block).unwrap().push(checkinst); + } // Any inserted edits after instruction. self.handle_edits(block, out, &mut insert_idx, ProgPoint::after(inst)); From 0dbf4a790f5ae3f006ee6ec58448671a99d26240 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sun, 9 May 2021 20:21:57 -0700 Subject: [PATCH 049/155] Collect full conflict-bundle list, by not ending PhysReg probe on first conflict; this leads to better eviction decisions on bz2 --- src/ion/mod.rs | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index d508f26f..6101b20e 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -2305,19 +2305,6 @@ impl<'a, F: Function> Env<'a, F> { if !conflicts.iter().any(|b| *b == conflict_bundle) { conflicts.push(conflict_bundle); } - - // Empirically, it seems to be essentially as good - // to return only one conflicting bundle as all of - // them; it is very rare that the combination of - // all conflicting bundles yields a maximum spill - // weight that is enough to keep them in place - // when a single conflict does not. It is also a - // quite significant compile-time win to *stop - // scanning* as soon as we have a conflict. To - // experiment with this, however, just remove this - // `break`; the rest of the code will do the right - // thing. - break; } else { log::debug!(" -> conflict with fixed reservation"); // range from a direct use of the PReg (due to clobber). From f7551c68d1000fcab58662e2ee88b2da4c066915 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 10 May 2021 22:47:57 -0700 Subject: [PATCH 050/155] Integrate prog-moves with LR-moves; this should in theory reduce move traffic somewhat --- src/ion/mod.rs | 161 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 129 insertions(+), 32 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 6101b20e..f4691d1c 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -643,7 +643,6 @@ enum InsertMovePrio { MultiFixedReg, ReusedInput, OutEdgeMoves, - ProgramMove, } #[derive(Clone, Copy, Debug, Default)] @@ -667,6 +666,7 @@ pub struct Stats { splits_clobbers: usize, splits_hot: usize, splits_conflicts: usize, + splits_defs: usize, splits_all: usize, final_liverange_count: usize, final_bundle_count: usize, @@ -1076,7 +1076,7 @@ impl<'a, F: Function> Env<'a, F> { let mut prev = UseIndex::invalid(); let mut iter = first; while iter.is_valid() { - if self.uses[iter.index()].pos > insert_pos { + if self.uses[iter.index()].pos >= insert_pos { break; } prev = iter; @@ -1304,6 +1304,9 @@ impl<'a, F: Function> Env<'a, F> { // If this is a move, handle specially. if let Some((src, dst)) = self.func.is_move(inst) { + // We can completely skip the move if it is + // trivial (vreg to same vreg) or its output is + // dead. if src.vreg() != dst.vreg() { log::debug!(" -> move inst{}: src {} -> dst {}", inst.index(), src, dst); assert_eq!(src.class(), dst.class()); @@ -1312,25 +1315,70 @@ impl<'a, F: Function> Env<'a, F> { assert_eq!(dst.kind(), OperandKind::Def); assert_eq!(dst.pos(), OperandPos::After); + // Redefine src and dst operands to have + // positions of After and Before respectively + // (see note below), and to have Any + // constraints if they were originally Reg. + let src_policy = match src.policy() { + OperandPolicy::Reg => OperandPolicy::Any, + x => x, + }; + let dst_policy = match dst.policy() { + OperandPolicy::Reg => OperandPolicy::Any, + x => x, + }; + let src = Operand::new( + src.vreg(), + src_policy, + OperandKind::Use, + OperandPos::After, + ); + let dst = Operand::new( + dst.vreg(), + dst_policy, + OperandKind::Def, + OperandPos::Before, + ); + + // N.B.: in order to integrate with the move + // resolution that joins LRs in general, we + // conceptually treat the move as happening + // between the move inst's After and the next + // inst's Before. Thus the src LR goes up to + // (exclusive) next-inst-pre, and the dst LR + // starts at next-inst-pre. We have to take + // care in our move insertion to handle this + // like other inter-inst moves, i.e., at + // `Regular` priority, so it properly happens + // in parallel with other inter-LR moves. + // + // Why the progpoint between move and next + // inst, and not the progpoint between prev + // inst and move? Because a move can be the + // first inst in a block, but cannot be the + // last; so the following progpoint is always + // within the same block, while the previous + // one may be an inter-block point (and the + // After of the prev inst in a different + // block). + // Handle the def w.r.t. liveranges: trim the // start of the range and mark it dead at this // point in our backward scan. - let pos = ProgPoint::after(inst); + let pos = ProgPoint::before(inst.next()); let mut dst_lr = vreg_ranges[dst.vreg().vreg()]; - // If there was no liverange (dead def), create a trivial one. if !live.get(dst.vreg().vreg()) { + let from = pos; + let to = pos.next(); dst_lr = self.add_liverange_to_vreg( VRegIndex::new(dst.vreg().vreg()), - CodeRange { - from: pos, - to: pos.next(), - }, + CodeRange { from, to }, &mut num_ranges, ); - log::debug!(" -> invalid; created {:?}", dst_lr); - } else { - log::debug!(" -> has existing LR {:?}", dst_lr); + log::debug!(" -> invalid LR for def; created {:?}", dst_lr); } + log::debug!(" -> has existing LR {:?}", dst_lr); + // Trim the LR to start here. if self.ranges_hot[dst_lr.index()].range.from == self.cfginfo.block_entry[block.index()] { @@ -1349,7 +1397,7 @@ impl<'a, F: Function> Env<'a, F> { // Handle the use w.r.t. liveranges: make it live // and create an initial LR back to the start of // the block. - let pos = ProgPoint::before(inst); + let pos = ProgPoint::after(inst); let range = CodeRange { from: self.cfginfo.block_entry[block.index()], to: pos.next(), @@ -1378,15 +1426,15 @@ impl<'a, F: Function> Env<'a, F> { Allocation::none(), )); self.prog_move_dsts.push(( - (VRegIndex::new(dst.vreg().vreg()), inst), + (VRegIndex::new(dst.vreg().vreg()), inst.next()), Allocation::none(), )); if src_is_dead_after_move { self.prog_move_merges.push((src_lr, dst_lr)); } - - continue; } + + continue; } // Process defs and uses. @@ -1809,6 +1857,7 @@ impl<'a, F: Function> Env<'a, F> { // each bundle. let rc = self.vreg_regs[vreg_from.index()].class(); if rc != self.vreg_regs[vreg_to.index()].class() { + log::debug!(" -> mismatching reg classes"); return false; } @@ -1816,6 +1865,7 @@ impl<'a, F: Function> Env<'a, F> { if !self.bundles[from.index()].allocation.is_none() || !self.bundles[to.index()].allocation.is_none() { + log::debug!("one of the bundles is already assigned (pinned)"); return false; } @@ -1843,6 +1893,10 @@ impl<'a, F: Function> Env<'a, F> { while iter0.is_valid() && iter1.is_valid() { range_count += 1; if range_count > 200 { + log::debug!( + "reached merge complexity (range_count = {}); exiting", + range_count + ); // Limit merge complexity. return false; } @@ -1856,10 +1910,13 @@ impl<'a, F: Function> Env<'a, F> { iter0 = self.ranges_hot[iter0.index()].next_in_bundle; } else { // Overlap -- cannot merge. + log::debug!(" -> overlap between {:?} and {:?}, exiting", iter0, iter1); return false; } } + log::debug!(" -> committing to merge"); + // If we reach here, then the bundles do not overlap -- merge them! // We do this with a merge-sort-like scan over both chains, removing // from `to` (`iter1`) and inserting into `from` (`iter0`). @@ -2234,7 +2291,12 @@ impl<'a, F: Function> Env<'a, F> { while iter.is_valid() { let range_hot = &self.ranges_hot[iter.index()]; let range = &self.ranges[iter.index()]; - log::debug!(" -> range {:?}", range_hot.range); + log::debug!( + " -> range LR {} ({:?}): {:?}", + iter.index(), + iter, + range_hot.range + ); let mut use_iter = range.first_use; while use_iter.is_valid() { let usedata = &self.uses[use_iter.index()]; @@ -2489,6 +2551,8 @@ impl<'a, F: Function> Env<'a, F> { // // Then choose one of the above kinds of splits, in priority order. + let mut def_splits: SmallVec<[ProgPoint; 4]> = smallvec![]; + let mut seen_defs = 0; let mut cold_hot_splits: SmallVec<[ProgPoint; 4]> = smallvec![]; let mut clobber_splits: SmallVec<[ProgPoint; 4]> = smallvec![]; let mut last_before_conflict: Option = None; @@ -2621,6 +2685,12 @@ impl<'a, F: Function> Env<'a, F> { let use_data = &self.uses[use_idx.index()]; log::debug!(" -> range has use at {:?}", use_data.pos); update_with_pos(use_data.pos); + if use_data.operand.kind() == OperandKind::Def { + if seen_defs > 0 { + def_splits.push(use_data.pos); + } + seen_defs += 1; + } use_idx = use_data.next_use(); } @@ -2661,6 +2731,10 @@ impl<'a, F: Function> Env<'a, F> { self.stats.splits_conflicts += 1; log::debug!(" going with last before conflict"); smallvec![last_before_conflict.unwrap()] + } else if def_splits.len() > 0 { + log::debug!(" going with non-first def splits: {:?}", def_splits); + self.stats.splits_defs += 1; + def_splits } else { self.stats.splits_all += 1; log::debug!(" splitting at all uses"); @@ -3831,16 +3905,19 @@ impl<'a, F: Function> Env<'a, F> { } // Scan over program move srcs/dsts to fill in allocations. - let move_src_start = if range.from.pos() == InstPosition::Before { - (vreg, range.from.inst()) - } else { - (vreg, range.from.inst().next()) - }; - let move_src_end = if range.to.pos() == InstPosition::Before { - (vreg, range.to.inst()) - } else { - (vreg, range.to.inst().next()) - }; + + // Move srcs happen at `After` of a given + // inst. Compute [from, to) semi-inclusive range of + // inst indices for which we should fill in the source + // with this LR's allocation. + // + // range from inst-Before or inst-After covers cur + // inst's After; so includes move srcs from inst. + let move_src_start = (vreg, range.from.inst()); + // range to (exclusive) inst-Before or inst-After + // covers only prev inst's After; so includes move + // srcs to (exclusive) inst. + let move_src_end = (vreg, range.to.inst()); log::debug!( "vreg {:?} range {:?}: looking for program-move sources from {:?} to {:?}", vreg, @@ -3867,8 +3944,23 @@ impl<'a, F: Function> Env<'a, F> { prog_move_src_idx += 1; } - let move_dst_start = (vreg, range.from.inst()); - let move_dst_end = (vreg, range.to.inst()); + // move dsts happen at Before point. + // + // Range from inst-Before includes cur inst, while inst-After includes only next inst. + let move_dst_start = if range.from.pos() == InstPosition::Before { + (vreg, range.from.inst()) + } else { + (vreg, range.from.inst().next()) + }; + // Range to (exclusive) inst-Before includes prev + // inst, so to (exclusive) cur inst; range to + // (exclusive) inst-After includes cur inst, so to + // (exclusive) next inst. + let move_dst_end = if range.to.pos() == InstPosition::Before { + (vreg, range.to.inst()) + } else { + (vreg, range.to.inst().next()) + }; log::debug!( "vreg {:?} range {:?}: looking for program-move dests from {:?} to {:?}", vreg, @@ -4105,7 +4197,7 @@ impl<'a, F: Function> Env<'a, F> { self.prog_move_srcs .sort_unstable_by_key(|((_, inst), _)| *inst); self.prog_move_dsts - .sort_unstable_by_key(|((_, inst), _)| *inst); + .sort_unstable_by_key(|((_, inst), _)| inst.prev()); let prog_move_srcs = std::mem::replace(&mut self.prog_move_srcs, vec![]); let prog_move_dsts = std::mem::replace(&mut self.prog_move_dsts, vec![]); assert_eq!(prog_move_srcs.len(), prog_move_dsts.len()); @@ -4120,10 +4212,15 @@ impl<'a, F: Function> Env<'a, F> { ); assert!(!from_alloc.is_none()); assert!(!to_alloc.is_none()); - assert_eq!(from_inst, to_inst); + assert_eq!(from_inst, to_inst.prev()); + // N.B.: these moves happen with the *same* priority as + // LR-to-LR moves, because they work just like them: they + // connect a use at one progpoint (move-After) with a def + // at an adjacent progpoint (move+1-Before), so they must + // happen in parallel with all other LR-to-LR moves. self.insert_move( - ProgPoint::before(from_inst), - InsertMovePrio::ProgramMove, + ProgPoint::before(to_inst), + InsertMovePrio::Regular, from_alloc, to_alloc, ); From e1a37cf0e0df63906fb24a2b383312678ff7cbc5 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 10 May 2021 22:53:44 -0700 Subject: [PATCH 051/155] some more stats --- src/ion/mod.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index f4691d1c..18bb1bf8 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -651,6 +651,10 @@ pub struct Stats { livein_iterations: usize, initial_liverange_count: usize, merged_bundle_count: usize, + prog_moves: usize, + prog_moves_dead_src: usize, + prog_move_merge_attempt: usize, + prog_move_merge_success: usize, process_bundle_count: usize, process_bundle_reg_probes_fixed: usize, process_bundle_reg_success_fixed: usize, @@ -1429,7 +1433,9 @@ impl<'a, F: Function> Env<'a, F> { (VRegIndex::new(dst.vreg().vreg()), inst.next()), Allocation::none(), )); + self.stats.prog_moves += 1; if src_is_dead_after_move { + self.stats.prog_moves_dead_src += 1; self.prog_move_merges.push((src_lr, dst_lr)); } } @@ -2135,7 +2141,10 @@ impl<'a, F: Function> Env<'a, F> { assert!(src_bundle.is_valid()); let dest_bundle = self.ranges[dst.index()].bundle; assert!(dest_bundle.is_valid()); - self.merge_bundles(/* from */ dest_bundle, /* to */ src_bundle); + self.stats.prog_move_merge_attempt += 1; + if self.merge_bundles(/* from */ dest_bundle, /* to */ src_bundle) { + self.stats.prog_move_merge_success += 1; + } } // Now create range summaries for all bundles. From b069ae099d470f8d3411e4bfb77264c8685f814c Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 11 May 2021 17:59:10 -0700 Subject: [PATCH 052/155] Use hot-code map to augment spill weights of each use --- src/ion/mod.rs | 49 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 18bb1bf8..62648e41 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -561,10 +561,11 @@ impl LiveRangeSet { } #[inline(always)] -fn spill_weight_from_policy(policy: OperandPolicy) -> u32 { +fn spill_weight_from_policy(policy: OperandPolicy, is_hot: bool) -> u32 { + let bonus = if is_hot { 10000 } else { 0 }; match policy { - OperandPolicy::Any => 1000, - OperandPolicy::Reg | OperandPolicy::FixedReg(_) => 2000, + OperandPolicy::Any => 1000 + bonus, + OperandPolicy::Reg | OperandPolicy::FixedReg(_) => 2000 + bonus, _ => 0, } } @@ -1049,14 +1050,24 @@ impl<'a, F: Function> Env<'a, F> { debug_assert!(u.is_valid()); let usedata = &self.uses[u.index()]; let lrdata = &mut self.ranges[from.index()]; + let pos = usedata.pos; + let is_hot = self + .hot_code + .btree + .contains_key(&LiveRangeKey::from_range(&CodeRange { + from: pos, + to: pos.next(), + })); + let policy = usedata.operand.policy(); + let weight = spill_weight_from_policy(policy, is_hot); log::debug!( " -> subtract {} from uses_spill_weight {}; now {}", - spill_weight_from_policy(usedata.operand.policy()), + weight, lrdata.uses_spill_weight(), - lrdata.uses_spill_weight() - spill_weight_from_policy(usedata.operand.policy()), + lrdata.uses_spill_weight() - weight, ); - lrdata.uses_spill_weight_and_flags -= spill_weight_from_policy(usedata.operand.policy()); + lrdata.uses_spill_weight_and_flags -= weight; if usedata.operand.kind() != OperandKind::Use { lrdata.uses_spill_weight_and_flags -= 2000; } @@ -1099,13 +1110,22 @@ impl<'a, F: Function> Env<'a, F> { // Update stats. let policy = self.uses[u.index()].operand.policy(); + let is_hot = self + .hot_code + .btree + .contains_key(&LiveRangeKey::from_range(&CodeRange { + from: insert_pos, + to: insert_pos.next(), + })); + let weight = spill_weight_from_policy(policy, is_hot); log::debug!( "insert use {:?} into lr {:?} with weight {}", u, into, - spill_weight_from_policy(policy) + weight, ); - self.ranges[into.index()].uses_spill_weight_and_flags += spill_weight_from_policy(policy); + + self.ranges[into.index()].uses_spill_weight_and_flags += weight; if self.uses[u.index()].operand.kind() != OperandKind::Use { self.ranges[into.index()].uses_spill_weight_and_flags += 2000; } @@ -2991,7 +3011,16 @@ impl<'a, F: Function> Env<'a, F> { use_iter, policy ); - uses_spill_weight += spill_weight_from_policy(policy); + let pos = self.uses[use_iter.index()].pos; + let is_hot = + self.hot_code + .btree + .contains_key(&LiveRangeKey::from_range(&CodeRange { + from: pos, + to: pos.next(), + })); + let weight = spill_weight_from_policy(policy, is_hot); + uses_spill_weight += weight; log::debug!(" -> use {:?} remains in orig", use_iter); use_iter = self.uses[use_iter.index()].next_use(); } @@ -4434,8 +4463,8 @@ impl<'a, F: Function> Env<'a, F> { pub(crate) fn init(&mut self) -> Result<(), RegAllocError> { self.create_pregs_and_vregs(); - self.compute_liveness(); self.compute_hot_code(); + self.compute_liveness(); self.merge_vreg_bundles(); self.queue_bundles(); if log::log_enabled!(log::Level::Debug) { From 6066d02f6f007da4cbf599dbc6dd6a84dba0bd5a Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 11 May 2021 18:19:40 -0700 Subject: [PATCH 053/155] More annotations --- src/ion/mod.rs | 81 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 76 insertions(+), 5 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 62648e41..84d31e33 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -1333,6 +1333,19 @@ impl<'a, F: Function> Env<'a, F> { // dead. if src.vreg() != dst.vreg() { log::debug!(" -> move inst{}: src {} -> dst {}", inst.index(), src, dst); + if log::log_enabled!(log::Level::Debug) { + self.annotate( + ProgPoint::after(inst), + format!( + " prog-move v{} ({:?}) -> v{} ({:?})", + src.vreg().vreg(), + src.policy(), + dst.vreg().vreg(), + dst.policy(), + ), + ); + } + assert_eq!(src.class(), dst.class()); assert_eq!(src.kind(), OperandKind::Use); assert_eq!(src.pos(), OperandPos::Before); @@ -1963,6 +1976,19 @@ impl<'a, F: Function> Env<'a, F> { self.bundles[from.index()].last_range = LiveRangeIndex::invalid(); while iter0.is_valid() { self.ranges[iter0.index()].bundle = from; + + if log::log_enabled!(log::Level::Debug) { + self.annotate( + self.ranges_hot[iter0.index()].range.from, + format!( + " MERGE range{} from bundle{} to bundle{}", + iter0.index(), + from.index(), + to.index(), + ), + ); + } + iter0 = self.ranges_hot[iter0.index()].next_in_bundle; } return true; @@ -1991,6 +2017,20 @@ impl<'a, F: Function> Env<'a, F> { let next = *next_range_iter; *next_range_iter = self.ranges_hot[next.index()].next_in_bundle; + if self.ranges[next.index()].bundle == from { + if log::log_enabled!(log::Level::Debug) { + self.annotate( + self.ranges_hot[next.index()].range.from, + format!( + " MERGE range{} from bundle{} to bundle{}", + next.index(), + from.index(), + to.index(), + ), + ); + } + } + // link from prev. if prev.is_valid() { self.ranges_hot[prev.index()].next_in_bundle = next; @@ -2919,8 +2959,23 @@ impl<'a, F: Function> Env<'a, F> { ); self.bundles[cur_bundle.index()].spillset = self.bundles[bundle.index()].spillset; new_bundles.push(cur_bundle); - split_idx += 1; + self.bundles[cur_bundle.index()].range_summary.from = range_summary_idx; + + if log::log_enabled!(log::Level::Debug) { + self.annotate( + range.from, + format!( + " SPLIT bundle{} / range{} -> bundle{} / range{}", + bundle.index(), + iter.index(), + cur_bundle.index(), + iter.index(), + ), + ); + } + + split_idx += 1; } while split_idx < split_points.len() && split_points[split_idx] <= range.from { split_idx += 1; @@ -3058,6 +3113,7 @@ impl<'a, F: Function> Env<'a, F> { // Create a new bundle to hold the rest-range. let rest_bundle = self.create_bundle(); self.bundles[cur_bundle.index()].range_summary.to = range_summary_idx + 1; + let old_bundle = cur_bundle; cur_bundle = rest_bundle; self.bundles[cur_bundle.index()].range_summary.from = range_summary_idx; self.bundles[cur_bundle.index()].range_summary.to = range_summary_idx + 1; @@ -3068,6 +3124,19 @@ impl<'a, F: Function> Env<'a, F> { self.ranges[rest_lr.index()].bundle = rest_bundle; log::debug!(" -> new bundle {:?} for LR {:?}", rest_bundle, rest_lr); + if log::log_enabled!(log::Level::Debug) { + self.annotate( + split_point, + format!( + " SPLIT bundle{} / range{} -> bundle{} / range{}", + old_bundle.index(), + iter.index(), + cur_bundle.index(), + rest_lr.index(), + ), + ); + } + iter = rest_lr; } @@ -3674,19 +3743,21 @@ impl<'a, F: Function> Env<'a, F> { self.annotate( range.from, format!( - " <<< start v{} in {} (LR {})", + " <<< start v{} in {} (range{}) (bundle{})", vreg.index(), alloc, - iter.index() + iter.index(), + self.ranges[iter.index()].bundle.index(), ), ); self.annotate( range.to, format!( - " end v{} in {} (LR {}) >>>", + " end v{} in {} (range{}) (bundle{}) >>>", vreg.index(), alloc, - iter.index() + iter.index(), + self.ranges[iter.index()].bundle.index(), ), ); } From 37fa3ec763ee4b0ef03775794644fe6b33add58c Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 11 May 2021 23:59:12 -0700 Subject: [PATCH 054/155] Improve prog-move handling: no use/def records, just directly connect the LRs. Also requires some metadata in edit output to properly hook up the checker in regalloc.rs to track user-moves without seeing the original insts with operands. --- fuzz/fuzz_targets/moves.rs | 4 +- src/ion/mod.rs | 114 +++++++++++++++++++++---------------- src/lib.rs | 13 ++++- src/moves.rs | 36 ++++++------ 4 files changed, 96 insertions(+), 71 deletions(-) diff --git a/fuzz/fuzz_targets/moves.rs b/fuzz/fuzz_targets/moves.rs index 9f685b3c..040c3e14 100644 --- a/fuzz/fuzz_targets/moves.rs +++ b/fuzz/fuzz_targets/moves.rs @@ -41,7 +41,7 @@ fuzz_target!(|testcase: TestCase| { let scratch = Allocation::reg(PReg::new(31, RegClass::Int)); let mut par = ParallelMoves::new(scratch); for &(src, dst) in &testcase.moves { - par.add(src, dst); + par.add(src, dst, ()); } let moves = par.resolve(); @@ -59,7 +59,7 @@ fuzz_target!(|testcase: TestCase| { for i in 0..32 { regfile[i] = Some(i); } - for (src, dst) in moves { + for (src, dst, _) in moves { if let (Some(preg_src), Some(preg_dst)) = (src.as_reg(), dst.as_reg()) { let data = regfile[preg_src.hw_enc()]; regfile[preg_dst.hw_enc()] = data; diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 84d31e33..459f069b 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -144,30 +144,31 @@ struct LiveRange { enum LiveRangeFlag { Minimal = 1, Fixed = 2, + StartsAtDef = 4, } impl LiveRange { #[inline(always)] pub fn set_flag(&mut self, flag: LiveRangeFlag) { - self.uses_spill_weight_and_flags |= (flag as u32) << 30; + self.uses_spill_weight_and_flags |= (flag as u32) << 29; } #[inline(always)] pub fn clear_flag(&mut self, flag: LiveRangeFlag) { - self.uses_spill_weight_and_flags &= !((flag as u32) << 30); + self.uses_spill_weight_and_flags &= !((flag as u32) << 29); } #[inline(always)] pub fn has_flag(&self, flag: LiveRangeFlag) -> bool { - self.uses_spill_weight_and_flags & ((flag as u32) << 30) != 0 + self.uses_spill_weight_and_flags & ((flag as u32) << 29) != 0 } #[inline(always)] pub fn uses_spill_weight(&self) -> u32 { - self.uses_spill_weight_and_flags & 0x3fff_ffff + self.uses_spill_weight_and_flags & 0x1fff_ffff } #[inline(always)] pub fn set_uses_spill_weight(&mut self, weight: u32) { - assert!(weight < (1 << 30)); + assert!(weight < (1 << 29)); self.uses_spill_weight_and_flags = - (self.uses_spill_weight_and_flags & 0xc000_0000) | weight; + (self.uses_spill_weight_and_flags & 0xe000_0000) | weight; } } @@ -634,6 +635,7 @@ struct InsertedMove { prio: InsertMovePrio, from_alloc: Allocation, to_alloc: Allocation, + to_vreg: Option, } #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] @@ -1333,18 +1335,6 @@ impl<'a, F: Function> Env<'a, F> { // dead. if src.vreg() != dst.vreg() { log::debug!(" -> move inst{}: src {} -> dst {}", inst.index(), src, dst); - if log::log_enabled!(log::Level::Debug) { - self.annotate( - ProgPoint::after(inst), - format!( - " prog-move v{} ({:?}) -> v{} ({:?})", - src.vreg().vreg(), - src.policy(), - dst.vreg().vreg(), - dst.policy(), - ), - ); - } assert_eq!(src.class(), dst.class()); assert_eq!(src.kind(), OperandKind::Use); @@ -1377,6 +1367,19 @@ impl<'a, F: Function> Env<'a, F> { OperandPos::Before, ); + if log::log_enabled!(log::Level::Debug) { + self.annotate( + ProgPoint::after(inst), + format!( + " prog-move v{} ({:?}) -> v{} ({:?})", + src.vreg().vreg(), + src_policy, + dst.vreg().vreg(), + dst_policy, + ), + ); + } + // N.B.: in order to integrate with the move // resolution that joins LRs in general, we // conceptually treat the move as happening @@ -1422,15 +1425,11 @@ impl<'a, F: Function> Env<'a, F> { log::debug!(" -> started at block start; trimming to {:?}", pos); self.ranges_hot[dst_lr.index()].range.from = pos; } + self.ranges[dst_lr.index()].set_flag(LiveRangeFlag::StartsAtDef); live.set(dst.vreg().vreg(), false); vreg_ranges[dst.vreg().vreg()] = LiveRangeIndex::invalid(); self.vreg_regs[dst.vreg().vreg()] = dst.vreg(); - let u = UseIndex::new(self.uses.len()); - self.uses - .push(Use::new(dst, pos, UseIndex::invalid(), SLOT_NONE as u8)); - self.insert_use_into_liverange_and_update_stats(dst_lr, u); - // Handle the use w.r.t. liveranges: make it live // and create an initial LR back to the start of // the block. @@ -1448,11 +1447,6 @@ impl<'a, F: Function> Env<'a, F> { log::debug!(" -> src LR {:?}", src_lr); - let u = UseIndex::new(self.uses.len()); - self.uses - .push(Use::new(src, pos, UseIndex::invalid(), SLOT_NONE as u8)); - self.insert_use_into_liverange_and_update_stats(src_lr, u); - // Add to live-set. let src_is_dead_after_move = !live.get(src.vreg().vreg()); live.set(src.vreg().vreg(), true); @@ -1572,6 +1566,8 @@ impl<'a, F: Function> Env<'a, F> { self.ranges_hot[lr.index()].range.from = pos; } + self.ranges[lr.index()].set_flag(LiveRangeFlag::StartsAtDef); + // Remove from live-set. live.set(operand.vreg().vreg(), false); vreg_ranges[operand.vreg().vreg()] = LiveRangeIndex::invalid(); @@ -1981,8 +1977,9 @@ impl<'a, F: Function> Env<'a, F> { self.annotate( self.ranges_hot[iter0.index()].range.from, format!( - " MERGE range{} from bundle{} to bundle{}", + " MERGE range{} v{} from bundle{} to bundle{}", iter0.index(), + self.ranges[iter0.index()].vreg.index(), from.index(), to.index(), ), @@ -2022,8 +2019,9 @@ impl<'a, F: Function> Env<'a, F> { self.annotate( self.ranges_hot[next.index()].range.from, format!( - " MERGE range{} from bundle{} to bundle{}", + " MERGE range{} v{} from bundle{} to bundle{}", next.index(), + self.ranges[next.index()].vreg.index(), from.index(), to.index(), ), @@ -3611,6 +3609,7 @@ impl<'a, F: Function> Env<'a, F> { prio: InsertMovePrio, from_alloc: Allocation, to_alloc: Allocation, + to_vreg: Option, ) { debug!( "insert_move: pos {:?} prio {:?} from_alloc {:?} to_alloc {:?}", @@ -3627,6 +3626,7 @@ impl<'a, F: Function> Env<'a, F> { prio, from_alloc, to_alloc, + to_vreg, }); } @@ -3785,12 +3785,8 @@ impl<'a, F: Function> Env<'a, F> { if prev.is_valid() { let prev_alloc = self.get_alloc_for_range(prev); let prev_range = self.ranges_hot[prev.index()].range; - let first_use = self.ranges[iter.index()].first_use; - let first_is_def = if first_use.is_valid() { - self.uses[first_use.index()].operand.kind() == OperandKind::Def - } else { - false - }; + let first_is_def = + self.ranges[iter.index()].has_flag(LiveRangeFlag::StartsAtDef); debug_assert!(prev_alloc != Allocation::none()); if prev_range.to == range.from && !self.is_start_of_block(range.from) @@ -3805,7 +3801,13 @@ impl<'a, F: Function> Env<'a, F> { vreg.index() ); assert_eq!(range.from.pos(), InstPosition::Before); - self.insert_move(range.from, InsertMovePrio::Regular, prev_alloc, alloc); + self.insert_move( + range.from, + InsertMovePrio::Regular, + prev_alloc, + alloc, + None, + ); } } @@ -4192,7 +4194,7 @@ impl<'a, F: Function> Env<'a, F> { if last == Some(dest.alloc) { continue; } - self.insert_move(insertion_point, prio, src.alloc, dest.alloc); + self.insert_move(insertion_point, prio, src.alloc, dest.alloc, None); last = Some(dest.alloc); } } @@ -4212,6 +4214,7 @@ impl<'a, F: Function> Env<'a, F> { InsertMovePrio::MultiFixedReg, Allocation::reg(self.pregs[from_preg.index()].reg), Allocation::reg(self.pregs[to_preg.index()].reg), + None, ); self.set_alloc( progpoint.inst(), @@ -4294,6 +4297,7 @@ impl<'a, F: Function> Env<'a, F> { InsertMovePrio::ReusedInput, input_alloc, output_alloc, + None, ); self.set_alloc(inst, input_idx, output_alloc); } @@ -4310,14 +4314,15 @@ impl<'a, F: Function> Env<'a, F> { let prog_move_srcs = std::mem::replace(&mut self.prog_move_srcs, vec![]); let prog_move_dsts = std::mem::replace(&mut self.prog_move_dsts, vec![]); assert_eq!(prog_move_srcs.len(), prog_move_dsts.len()); - for (&((_, from_inst), from_alloc), &((_, to_inst), to_alloc)) in + for (&((_, from_inst), from_alloc), &((to_vreg, to_inst), to_alloc)) in prog_move_srcs.iter().zip(prog_move_dsts.iter()) { log::debug!( - "program move at inst {:?}: alloc {:?} -> {:?}", + "program move at inst {:?}: alloc {:?} -> {:?} (v{})", from_inst, from_alloc, - to_alloc + to_alloc, + to_vreg.index(), ); assert!(!from_alloc.is_none()); assert!(!to_alloc.is_none()); @@ -4332,6 +4337,7 @@ impl<'a, F: Function> Env<'a, F> { InsertMovePrio::Regular, from_alloc, to_alloc, + Some(self.vreg_regs[to_vreg.index()]), ); } } @@ -4389,17 +4395,25 @@ impl<'a, F: Function> Env<'a, F> { )); log::debug!("parallel moves at pos {:?} prio {:?}", pos, prio); for m in moves { - if m.from_alloc != m.to_alloc { + if (m.from_alloc != m.to_alloc) || m.to_vreg.is_some() { log::debug!(" {} -> {}", m.from_alloc, m.to_alloc,); - parallel_moves.add(m.from_alloc, m.to_alloc); + parallel_moves.add(m.from_alloc, m.to_alloc, m.to_vreg); } } let resolved = parallel_moves.resolve(); - for (src, dst) in resolved { - log::debug!(" resolved: {} -> {}", src, dst); - self.add_edit(pos, prio, Edit::Move { from: src, to: dst }); + for (src, dst, to_vreg) in resolved { + log::debug!(" resolved: {} -> {} ({:?})", src, dst, to_vreg); + self.add_edit( + pos, + prio, + Edit::Move { + from: src, + to: dst, + to_vreg, + }, + ); } } } @@ -4446,10 +4460,10 @@ impl<'a, F: Function> Env<'a, F> { for i in 0..self.edits.len() { let &(pos, _, ref edit) = &self.edits[i]; match edit { - &Edit::Move { from, to } => { + &Edit::Move { from, to, to_vreg } => { self.annotate( ProgPoint::from_index(pos), - format!("move {} -> {}", from, to), + format!("move {} -> {} ({:?})", from, to, to_vreg), ); } &Edit::BlockParams { @@ -4466,8 +4480,8 @@ impl<'a, F: Function> Env<'a, F> { fn add_edit(&mut self, pos: ProgPoint, prio: InsertMovePrio, edit: Edit) { match &edit { - &Edit::Move { from, to } if from == to => return, - &Edit::Move { from, to } if from.is_reg() && to.is_reg() => { + &Edit::Move { from, to, to_vreg } if from == to && to_vreg.is_none() => return, + &Edit::Move { from, to, .. } if from.is_reg() && to.is_reg() => { assert_eq!(from.as_reg().unwrap().class(), to.as_reg().unwrap().class()); } _ => {} diff --git a/src/lib.rs b/src/lib.rs index 16066a45..f6f01391 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -879,7 +879,18 @@ impl ProgPoint { pub enum Edit { /// Move one allocation to another. Each allocation may be a /// register or a stack slot (spillslot). - Move { from: Allocation, to: Allocation }, + /// + /// `to_vreg`, if defined, is useful as metadata: it indicates + /// that the moved value is a def of a new vreg. + /// + /// `Move` edits will be generated even if src and dst allocation + /// are the same if the vreg changes; this allows proper metadata + /// tracking even when moves are elided. + Move { + from: Allocation, + to: Allocation, + to_vreg: Option, + }, /// Define blockparams' locations. Note that this is not typically /// turned into machine code, but can be useful metadata (e.g. for /// the checker). diff --git a/src/moves.rs b/src/moves.rs index 8cdd59ab..25b1e819 100644 --- a/src/moves.rs +++ b/src/moves.rs @@ -6,7 +6,7 @@ use crate::Allocation; use smallvec::{smallvec, SmallVec}; -pub type MoveVec = SmallVec<[(Allocation, Allocation); 16]>; +pub type MoveVec = SmallVec<[(Allocation, Allocation, T); 16]>; /// A `ParallelMoves` represents a list of alloc-to-alloc moves that /// must happen in parallel -- i.e., all reads of sources semantically @@ -14,12 +14,12 @@ pub type MoveVec = SmallVec<[(Allocation, Allocation); 16]>; /// allowed to overwrite sources. It can compute a list of sequential /// moves that will produce the equivalent data movement, possibly /// using a scratch register if one is necessary. -pub struct ParallelMoves { - parallel_moves: MoveVec, +pub struct ParallelMoves { + parallel_moves: MoveVec, scratch: Allocation, } -impl ParallelMoves { +impl ParallelMoves { pub fn new(scratch: Allocation) -> Self { Self { parallel_moves: smallvec![], @@ -27,16 +27,16 @@ impl ParallelMoves { } } - pub fn add(&mut self, from: Allocation, to: Allocation) { - self.parallel_moves.push((from, to)); + pub fn add(&mut self, from: Allocation, to: Allocation, t: T) { + self.parallel_moves.push((from, to, t)); } fn sources_overlap_dests(&self) -> bool { // Assumes `parallel_moves` has already been sorted in `resolve()` below. - for &(_, dst) in &self.parallel_moves { + for &(_, dst, _) in &self.parallel_moves { if self .parallel_moves - .binary_search_by_key(&dst, |&(src, _)| src) + .binary_search_by_key(&dst, |&(src, _, _)| src) .is_ok() { return true; @@ -45,7 +45,7 @@ impl ParallelMoves { false } - pub fn resolve(mut self) -> MoveVec { + pub fn resolve(mut self) -> MoveVec { // Easy case: zero or one move. Just return our vec. if self.parallel_moves.len() <= 1 { return self.parallel_moves; @@ -53,7 +53,7 @@ impl ParallelMoves { // Sort moves by source so that we can efficiently test for // presence. - self.parallel_moves.sort(); + self.parallel_moves.sort_by_key(|&(src, dst, _)| (src, dst)); // Do any dests overlap sources? If not, we can also just // return the list. @@ -77,10 +77,10 @@ impl ParallelMoves { // Sort moves by destination and check that each destination // has only one writer. - self.parallel_moves.sort_by_key(|&(_, dst)| dst); + self.parallel_moves.sort_by_key(|&(_, dst, _)| dst); if cfg!(debug) { let mut last_dst = None; - for &(_, dst) in &self.parallel_moves { + for &(_, dst, _) in &self.parallel_moves { if last_dst.is_some() { assert!(last_dst.unwrap() != dst); } @@ -94,10 +94,10 @@ impl ParallelMoves { // above so we can efficiently find such a move, if any. let mut must_come_before: SmallVec<[Option; 16]> = smallvec![None; self.parallel_moves.len()]; - for (i, &(src, _)) in self.parallel_moves.iter().enumerate() { + for (i, &(src, _, _)) in self.parallel_moves.iter().enumerate() { if let Ok(move_to_dst_idx) = self .parallel_moves - .binary_search_by_key(&src, |&(_, dst)| dst) + .binary_search_by_key(&src, |&(_, dst, _)| dst) { must_come_before[i] = Some(move_to_dst_idx); } @@ -107,7 +107,7 @@ impl ParallelMoves { // then reverse at the end for RPO. Unlike Tarjan's SCC // algorithm, we can emit a cycle as soon as we find one, as // noted above. - let mut ret: MoveVec = smallvec![]; + let mut ret: MoveVec = smallvec![]; let mut stack: SmallVec<[usize; 16]> = smallvec![]; let mut visited: SmallVec<[bool; 16]> = smallvec![false; self.parallel_moves.len()]; let mut onstack: SmallVec<[bool; 16]> = smallvec![false; self.parallel_moves.len()]; @@ -176,14 +176,14 @@ impl ParallelMoves { let mut scratch_src = None; while let Some(move_idx) = stack.pop() { onstack[move_idx] = false; - let (mut src, dst) = self.parallel_moves[move_idx]; + let (mut src, dst, dst_t) = self.parallel_moves[move_idx]; if last_dst.is_none() { scratch_src = Some(src); src = self.scratch; } else { assert_eq!(last_dst.unwrap(), src); } - ret.push((src, dst)); + ret.push((src, dst, dst_t)); last_dst = Some(dst); @@ -192,7 +192,7 @@ impl ParallelMoves { } } if let Some(src) = scratch_src { - ret.push((src, self.scratch)); + ret.push((src, self.scratch, T::default())); } } } From 1f9258bea5ff10ed93bb28b11319af0ca4c557b3 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Wed, 12 May 2021 01:06:27 -0700 Subject: [PATCH 055/155] Detect undefined liveins. --- src/ion/mod.rs | 17 +++++++++++++++-- src/lib.rs | 2 ++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 459f069b..7c3adaf8 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -1163,7 +1163,7 @@ impl<'a, F: Function> Env<'a, F> { self.liveins[block.index()].get(vreg.index()) } - fn compute_liveness(&mut self) { + fn compute_liveness(&mut self) -> Result<(), RegAllocError> { // Create initial LiveIn and LiveOut bitsets. for _ in 0..self.func.blocks() { self.liveins.push(BitVec::new()); @@ -1226,6 +1226,17 @@ impl<'a, F: Function> Env<'a, F> { self.liveins[block.index()] = live; } + // Check that there are no liveins to the entry block. (The + // client should create a virtual intsruction that defines any + // PReg liveins if necessary.) + if self.liveins[self.func.entry_block().index()] + .iter() + .next() + .is_some() + { + return Err(RegAllocError::EntryLivein); + } + let mut num_ranges = 0; for &vreg in self.func.reftype_vregs() { @@ -1827,6 +1838,8 @@ impl<'a, F: Function> Env<'a, F> { self.stats.initial_liverange_count = self.ranges.len(); self.stats.blockparam_ins_count = self.blockparam_ins.len(); self.stats.blockparam_outs_count = self.blockparam_outs.len(); + + Ok(()) } fn compute_hot_code(&mut self) { @@ -4549,7 +4562,7 @@ impl<'a, F: Function> Env<'a, F> { pub(crate) fn init(&mut self) -> Result<(), RegAllocError> { self.create_pregs_and_vregs(); self.compute_hot_code(); - self.compute_liveness(); + self.compute_liveness()?; self.merge_vreg_bundles(); self.queue_bundles(); if log::log_enabled!(log::Level::Debug) { diff --git a/src/lib.rs b/src/lib.rs index f6f01391..961be75f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -981,6 +981,8 @@ pub enum RegAllocError { /// Invalid branch: operand count does not match sum of block /// params of successor blocks. Branch(Inst), + /// A VReg is live-in on entry; this is not allowed. + EntryLivein, } impl std::fmt::Display for RegAllocError { From 5b55948feb1b2ae1fe4dc52e80ea15f9f3de0244 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 13 May 2021 17:25:11 -0700 Subject: [PATCH 056/155] Check branch-args for conflicts with edge-move placement. --- src/cfg.rs | 23 ++++++++++++++++++++++- src/lib.rs | 6 ++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/src/cfg.rs b/src/cfg.rs index cf4428cd..0ad6c07f 100644 --- a/src/cfg.rs +++ b/src/cfg.rs @@ -72,10 +72,12 @@ impl CFGInfo { block_entry[block.index()] = ProgPoint::before(f.block_insns(block).first()); block_exit[block.index()] = ProgPoint::after(f.block_insns(block).last()); + // Check critical edge condition: if there is more than + // one predecessor, each must have only one successor + // (this block). let preds = f.block_preds(block).len() + if block == f.entry_block() { 1 } else { 0 }; if preds > 1 { for (i, &pred) in f.block_preds(block).iter().enumerate() { - // Check critical edge condition. let succs = f.block_succs(pred).len(); if succs > 1 { return Err(RegAllocError::CritEdge(pred, block)); @@ -83,6 +85,25 @@ impl CFGInfo { pred_pos[pred.index()] = i; } } + + // Check branch-arg condition: if any successors have more + // than one predecessor (given above, there will only be + // one such successor), then the last instruction of this + // block (the branch) cannot have any args other than the + // blockparams. + let mut require_no_branch_args = false; + for &succ in f.block_succs(block) { + let preds = f.block_preds(succ).len() + if succ == f.entry_block() { 1 } else { 0 }; + if preds > 1 { + require_no_branch_args = true; + } + } + if require_no_branch_args { + let last = f.block_insns(block).last(); + if f.branch_blockparam_arg_offset(block, last) > 0 { + return Err(RegAllocError::DisallowedBranchArg(last)); + } + } } Ok(CFGInfo { diff --git a/src/lib.rs b/src/lib.rs index 961be75f..a4d81858 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -983,6 +983,12 @@ pub enum RegAllocError { Branch(Inst), /// A VReg is live-in on entry; this is not allowed. EntryLivein, + /// A branch has non-blockparam arg(s) and at least one of the + /// successor blocks has more than one predecessor, forcing + /// edge-moves before this branch. This is disallowed because it + /// places a use after the edge moves occur; insert an edge block + /// to avoid the situation. + DisallowedBranchArg(Inst), } impl std::fmt::Display for RegAllocError { From f0fbf3aa0c99b5454f8d0c149f2b3c11506ef526 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 17 May 2021 22:19:47 -0700 Subject: [PATCH 057/155] Rework data structures: bundles have a SmallVec of ranges, and ranges a SmallVec of uses. Appears to be a small speed improvement on the highly-artificial fuzz-generator test inputs; Cranelift tests TBD. --- src/ion/mod.rs | 1680 +++++++++++++++++------------------------------- src/lib.rs | 9 - 2 files changed, 579 insertions(+), 1110 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 7c3adaf8..e3aae1a4 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -58,6 +58,7 @@ use log::debug; use smallvec::{smallvec, SmallVec}; use std::cmp::Ordering; use std::collections::{BTreeMap, BinaryHeap, HashMap, HashSet, VecDeque}; +use std::convert::TryFrom; use std::fmt::Debug; /// A range from `from` (inclusive) to `to` (exclusive). @@ -117,24 +118,27 @@ define_index!(VRegIndex); define_index!(PRegIndex); define_index!(SpillSlotIndex); +/// Used to carry small sets of bundles, e.g. for conflict sets. type LiveBundleVec = SmallVec<[LiveBundleIndex; 4]>; -#[derive(Clone, Debug)] -struct LiveRangeHot { +#[derive(Clone, Copy, Debug)] +struct LiveRangeListEntry { range: CodeRange, - next_in_bundle: LiveRangeIndex, + index: LiveRangeIndex, } +type LiveRangeList = SmallVec<[LiveRangeListEntry; 4]>; +type UseList = SmallVec<[Use; 4]>; + #[derive(Clone, Debug)] struct LiveRange { + range: CodeRange, + vreg: VRegIndex, bundle: LiveBundleIndex, uses_spill_weight_and_flags: u32, - first_use: UseIndex, - last_use: UseIndex, - - next_in_reg: LiveRangeIndex, + uses: UseList, merged_into: LiveRangeIndex, } @@ -142,9 +146,7 @@ struct LiveRange { #[derive(Clone, Copy, Debug, PartialEq, Eq)] #[repr(u32)] enum LiveRangeFlag { - Minimal = 1, - Fixed = 2, - StartsAtDef = 4, + StartsAtDef = 1, } impl LiveRange { @@ -172,54 +174,36 @@ impl LiveRange { } } -#[derive(Clone, Debug)] +#[derive(Clone, Copy, Debug)] struct Use { operand: Operand, pos: ProgPoint, - next_use_and_slot: u32, + slot: u8, + weight: u16, } impl Use { #[inline(always)] - fn new(operand: Operand, pos: ProgPoint, next_use: UseIndex, slot: u8) -> Self { - debug_assert!(next_use.is_invalid() || next_use.index() < ((1 << 24) - 1)); - let next_use = (next_use.0 as usize) & 0x00ff_ffff; + fn new(operand: Operand, pos: ProgPoint, slot: u8) -> Self { Self { operand, pos, - next_use_and_slot: (next_use as u32) | ((slot as u32) << 24), + slot, + // Weight is updated on insertion into LR. + weight: 0, } } - #[inline(always)] - fn next_use(&self) -> UseIndex { - let val = self.next_use_and_slot & 0x00ff_ffff; - // Sign-extend 0x00ff_ffff to INVALID (0xffff_ffff). - let val = ((val as i32) << 8) >> 8; - UseIndex::new(val as usize) - } - #[inline(always)] - fn slot(&self) -> u8 { - (self.next_use_and_slot >> 24) as u8 - } - #[inline(always)] - fn set_next_use(&mut self, u: UseIndex) { - debug_assert!(u.is_invalid() || u.index() < ((1 << 24) - 1)); - let u = (u.0 as usize) & 0x00ff_ffff; - self.next_use_and_slot = (self.next_use_and_slot & 0xff00_0000) | (u as u32); - } } const SLOT_NONE: u8 = u8::MAX; #[derive(Clone, Debug)] struct LiveBundle { - first_range: LiveRangeIndex, - last_range: LiveRangeIndex, + ranges: LiveRangeList, spillset: SpillSetIndex, allocation: Allocation, prio: u32, // recomputed after every bulk update spill_weight_and_props: u32, - range_summary: RangeSummary, } impl LiveBundle { @@ -246,76 +230,6 @@ impl LiveBundle { } } -#[derive(Clone, Debug)] -struct RangeSummary { - /// Indices in `range_ranges` dense array of packed CodeRange structs. - from: u32, - to: u32, - bound: CodeRange, -} - -impl RangeSummary { - fn new() -> Self { - Self { - from: 0, - to: 0, - bound: CodeRange { - from: ProgPoint::from_index(0), - to: ProgPoint::from_index(0), - }, - } - } - - fn iter<'a>(&'a self, range_array: &'a [CodeRange]) -> RangeSummaryIter<'a> { - RangeSummaryIter { - idx: self.from as usize, - start: self.from as usize, - limit: self.to as usize, - bound: self.bound, - arr: range_array, - } - } -} - -#[derive(Clone, Copy, Debug)] -struct RangeSummaryIter<'a> { - idx: usize, - start: usize, - limit: usize, - bound: CodeRange, - arr: &'a [CodeRange], -} - -impl<'a> std::iter::Iterator for RangeSummaryIter<'a> { - type Item = CodeRange; - fn next(&mut self) -> Option { - if self.idx == self.limit { - return None; - } - while self.idx < self.limit && self.arr[self.idx].to <= self.bound.from { - self.idx += 1; - } - if self.idx == self.limit { - return None; - } - let mut cur = self.arr[self.idx]; - if cur.from >= self.bound.to { - self.idx = self.limit; - return None; - } - - if cur.from < self.bound.from { - cur.from = self.bound.from; - } - if cur.to > self.bound.to { - cur.to = self.bound.to; - } - - self.idx += 1; - Some(cur) - } -} - #[derive(Clone, Debug)] struct SpillSet { bundles: SmallVec<[LiveBundleIndex; 2]>, @@ -327,8 +241,8 @@ struct SpillSet { #[derive(Clone, Debug)] struct VRegData { + ranges: LiveRangeList, blockparam: Block, - first_range: LiveRangeIndex, is_ref: bool, } @@ -386,11 +300,8 @@ struct Env<'a, F: Function> { blockparam_allocs: Vec<(Block, u32, VRegIndex, Allocation)>, ranges: Vec, - ranges_hot: Vec, - range_ranges: Vec, bundles: Vec, spillsets: Vec, - uses: Vec, vregs: Vec, vreg_regs: Vec, pregs: Vec, @@ -562,13 +473,15 @@ impl LiveRangeSet { } #[inline(always)] -fn spill_weight_from_policy(policy: OperandPolicy, is_hot: bool) -> u32 { - let bonus = if is_hot { 10000 } else { 0 }; - match policy { - OperandPolicy::Any => 1000 + bonus, - OperandPolicy::Reg | OperandPolicy::FixedReg(_) => 2000 + bonus, +fn spill_weight_from_policy(policy: OperandPolicy, is_hot: bool, is_def: bool) -> u32 { + let hot_bonus = if is_hot { 10000 } else { 0 }; + let def_bonus = if is_def { 2000 } else { 0 }; + let policy_bonus = match policy { + OperandPolicy::Any => 1000, + OperandPolicy::Reg | OperandPolicy::FixedReg(_) => 2000, _ => 0, - } + }; + hot_bonus + def_bonus + policy_bonus } #[derive(Clone, Copy, Debug, PartialEq, Eq)] @@ -792,10 +705,7 @@ impl<'a, F: Function> Env<'a, F> { blockparam_allocs: vec![], bundles: Vec::with_capacity(n), ranges: Vec::with_capacity(4 * n), - ranges_hot: Vec::with_capacity(4 * n), - range_ranges: Vec::with_capacity(4 * n), spillsets: Vec::with_capacity(n), - uses: Vec::with_capacity(4 * n), vregs: Vec::with_capacity(n), vreg_regs: Vec::with_capacity(n), pregs: vec![], @@ -845,7 +755,7 @@ impl<'a, F: Function> Env<'a, F> { self.add_vreg( reg, VRegData { - first_range: LiveRangeIndex::invalid(), + ranges: smallvec![], blockparam: Block::invalid(), is_ref: false, }, @@ -873,20 +783,14 @@ impl<'a, F: Function> Env<'a, F> { fn create_liverange(&mut self, range: CodeRange) -> LiveRangeIndex { let idx = self.ranges.len(); - self.ranges_hot.push(LiveRangeHot { - range, - next_in_bundle: LiveRangeIndex::invalid(), - }); self.ranges.push(LiveRange { + range, vreg: VRegIndex::invalid(), bundle: LiveBundleIndex::invalid(), uses_spill_weight_and_flags: 0, - first_use: UseIndex::invalid(), - last_use: UseIndex::invalid(), - - next_in_reg: LiveRangeIndex::invalid(), + uses: smallvec![], merged_into: LiveRangeIndex::invalid(), }); @@ -894,224 +798,43 @@ impl<'a, F: Function> Env<'a, F> { LiveRangeIndex::new(idx) } - /// Mark `range` as live for the given `vreg`. `num_ranges` is used to prevent - /// excessive coalescing on pathological inputs. + /// Mark `range` as live for the given `vreg`. /// /// Returns the liverange that contains the given range. - fn add_liverange_to_vreg( - &mut self, - vreg: VRegIndex, - range: CodeRange, - num_ranges: &mut usize, - ) -> LiveRangeIndex { + fn add_liverange_to_vreg(&mut self, vreg: VRegIndex, range: CodeRange) -> LiveRangeIndex { log::debug!("add_liverange_to_vreg: vreg {:?} range {:?}", vreg, range); - const COALESCE_LIMIT: usize = 100_000; - - // Look for a single or contiguous sequence of existing live ranges that overlap with the - // given range. - - let mut insert_after = LiveRangeIndex::invalid(); - let mut merged = LiveRangeIndex::invalid(); - let mut iter = self.vregs[vreg.index()].first_range; - let mut prev = LiveRangeIndex::invalid(); - while iter.is_valid() { - log::debug!(" -> existing range: {:?}", self.ranges[iter.index()]); - if range.from >= self.ranges_hot[iter.index()].range.to && *num_ranges < COALESCE_LIMIT - { - // New range comes fully after this one -- record it as a lower bound. - insert_after = iter; - prev = iter; - iter = self.ranges[iter.index()].next_in_reg; - log::debug!(" -> lower bound"); - continue; - } - if range.to <= self.ranges_hot[iter.index()].range.from { - // New range comes fully before this one -- we're found our spot. - log::debug!(" -> upper bound (break search loop)"); - break; - } - // If we're here, then we overlap with at least one endpoint of the range. - log::debug!(" -> must overlap"); - debug_assert!(range.overlaps(&self.ranges_hot[iter.index()].range)); - if merged.is_invalid() { - // This is the first overlapping range. Extend to simply cover the new range. - merged = iter; - if range.from < self.ranges_hot[iter.index()].range.from { - self.ranges_hot[iter.index()].range.from = range.from; - } - if range.to > self.ranges_hot[iter.index()].range.to { - self.ranges_hot[iter.index()].range.to = range.to; - } - log::debug!( - " -> extended range of existing range to {:?}", - self.ranges_hot[iter.index()].range - ); - // Continue; there may be more ranges to merge with. - prev = iter; - iter = self.ranges[iter.index()].next_in_reg; - continue; - } - // We overlap but we've already extended the first overlapping existing liverange, so - // we need to do a true merge instead. - log::debug!(" -> merging {:?} into {:?}", iter, merged); - log::debug!( - " -> before: merged {:?}: {:?}", - merged, - self.ranges[merged.index()] - ); - debug_assert!( - self.ranges_hot[iter.index()].range.from - >= self.ranges_hot[merged.index()].range.from - ); // Because we see LRs in order. - if self.ranges_hot[iter.index()].range.to > self.ranges_hot[merged.index()].range.to { - self.ranges_hot[merged.index()].range.to = self.ranges_hot[iter.index()].range.to; - } - self.distribute_liverange_uses(iter, merged); - log::debug!( - " -> after: merged {:?}: {:?}", - merged, - self.ranges[merged.index()] - ); - // Remove from list of liveranges for this vreg. - let next = self.ranges[iter.index()].next_in_reg; - if prev.is_valid() { - self.ranges[prev.index()].next_in_reg = next; - } else { - self.vregs[vreg.index()].first_range = next; + // When we use this function, the LR lists in VRegs are not + // yet sorted. We can extend an existing LR if we happen to + // see that one abuts the new range -- we check the end, + // because this one should be the earliest given how we build + // liveness (but we don't claim or uphold this as an + // invariant) -- or we can just append to the end. After we + // add all ranges, we will sort the lists. + + if let Some(last) = self.vregs[vreg.index()].ranges.last_mut() { + if last.range.from == range.to { + log::debug!(" -> abuts existing range {:?}, extending", last.index); + last.range.from = range.from; + self.ranges[last.index.index()].range.from = range.from; + return last.index; } - // `prev` remains the same (we deleted current range). - iter = next; } // If we get here and did not merge into an existing liverange or liveranges, then we need // to create a new one. - if merged.is_invalid() { - let lr = self.create_liverange(range); - self.ranges[lr.index()].vreg = vreg; - if insert_after.is_valid() { - let next = self.ranges[insert_after.index()].next_in_reg; - self.ranges[lr.index()].next_in_reg = next; - self.ranges[insert_after.index()].next_in_reg = lr; - } else { - self.ranges[lr.index()].next_in_reg = self.vregs[vreg.index()].first_range; - self.vregs[vreg.index()].first_range = lr; - } - *num_ranges += 1; - lr - } else { - merged - } - } - - fn distribute_liverange_uses(&mut self, from: LiveRangeIndex, into: LiveRangeIndex) { - log::debug!("distribute from {:?} to {:?}", from, into); - assert_eq!( - self.ranges[from.index()].vreg, - self.ranges[into.index()].vreg - ); - let into_range = self.ranges_hot[into.index()].range; - // For every use in `from`... - let mut prev = UseIndex::invalid(); - let mut iter = self.ranges[from.index()].first_use; - while iter.is_valid() { - let usedata = &mut self.uses[iter.index()]; - // If we have already passed `into`, we're done. - if usedata.pos >= into_range.to { - break; - } - // If this use is within the range of `into`, move it over. - if into_range.contains_point(usedata.pos) { - log::debug!(" -> moving {:?}", iter); - let next = usedata.next_use(); - if prev.is_valid() { - self.uses[prev.index()].set_next_use(next); - } else { - self.ranges[from.index()].first_use = next; - } - if iter == self.ranges[from.index()].last_use { - self.ranges[from.index()].last_use = prev; - } - // `prev` remains the same. - self.update_liverange_stats_on_remove_use(from, iter); - // This may look inefficient but because we are always merging - // non-overlapping LiveRanges, all uses will be at the beginning - // or end of the existing use-list; both cases are optimized. - self.insert_use_into_liverange_and_update_stats(into, iter); - iter = next; - } else { - prev = iter; - iter = usedata.next_use(); - } - } - self.ranges[from.index()].merged_into = into; - } - - fn update_liverange_stats_on_remove_use(&mut self, from: LiveRangeIndex, u: UseIndex) { - log::debug!("remove use {:?} from lr {:?}", u, from); - debug_assert!(u.is_valid()); - let usedata = &self.uses[u.index()]; - let lrdata = &mut self.ranges[from.index()]; - let pos = usedata.pos; - let is_hot = self - .hot_code - .btree - .contains_key(&LiveRangeKey::from_range(&CodeRange { - from: pos, - to: pos.next(), - })); - let policy = usedata.operand.policy(); - let weight = spill_weight_from_policy(policy, is_hot); - log::debug!( - " -> subtract {} from uses_spill_weight {}; now {}", - weight, - lrdata.uses_spill_weight(), - lrdata.uses_spill_weight() - weight, - ); - - lrdata.uses_spill_weight_and_flags -= weight; - if usedata.operand.kind() != OperandKind::Use { - lrdata.uses_spill_weight_and_flags -= 2000; - } + let lr = self.create_liverange(range); + self.ranges[lr.index()].vreg = vreg; + self.vregs[vreg.index()] + .ranges + .push(LiveRangeListEntry { range, index: lr }); + lr } - fn insert_use_into_liverange_and_update_stats(&mut self, into: LiveRangeIndex, u: UseIndex) { - let insert_pos = self.uses[u.index()].pos; - let first = self.ranges[into.index()].first_use; - self.uses[u.index()].set_next_use(UseIndex::invalid()); - if first.is_invalid() { - // Empty list. - self.ranges[into.index()].first_use = u; - self.ranges[into.index()].last_use = u; - } else if insert_pos > self.uses[self.ranges[into.index()].last_use.index()].pos { - // After tail. - let tail = self.ranges[into.index()].last_use; - self.uses[tail.index()].set_next_use(u); - self.ranges[into.index()].last_use = u; - } else { - // Otherwise, scan linearly to find insertion position. - let mut prev = UseIndex::invalid(); - let mut iter = first; - while iter.is_valid() { - if self.uses[iter.index()].pos >= insert_pos { - break; - } - prev = iter; - iter = self.uses[iter.index()].next_use(); - } - self.uses[u.index()].set_next_use(iter); - if prev.is_valid() { - self.uses[prev.index()].set_next_use(u); - } else { - self.ranges[into.index()].first_use = u; - } - if iter.is_invalid() { - self.ranges[into.index()].last_use = u; - } - } - - // Update stats. - let policy = self.uses[u.index()].operand.policy(); + fn insert_use_into_liverange(&mut self, into: LiveRangeIndex, mut u: Use) { + let insert_pos = u.pos; + let operand = u.operand; + let policy = operand.policy(); let is_hot = self .hot_code .btree @@ -1119,7 +842,9 @@ impl<'a, F: Function> Env<'a, F> { from: insert_pos, to: insert_pos.next(), })); - let weight = spill_weight_from_policy(policy, is_hot); + let weight = spill_weight_from_policy(policy, is_hot, operand.kind() != OperandKind::Use); + u.weight = u16::try_from(weight).expect("weight too large for u16 field"); + log::debug!( "insert use {:?} into lr {:?} with weight {}", u, @@ -1127,10 +852,10 @@ impl<'a, F: Function> Env<'a, F> { weight, ); + self.ranges[into.index()].uses.push(u); + + // Update stats. self.ranges[into.index()].uses_spill_weight_and_flags += weight; - if self.uses[u.index()].operand.kind() != OperandKind::Use { - self.ranges[into.index()].uses_spill_weight_and_flags += 2000; - } log::debug!(" -> now {}", self.ranges[into.index()].uses_spill_weight()); } @@ -1139,12 +864,10 @@ impl<'a, F: Function> Env<'a, F> { vreg: VRegIndex, pos: ProgPoint, ) -> Option { - let mut range = self.vregs[vreg.index()].first_range; - while range.is_valid() { - if self.ranges_hot[range.index()].range.contains_point(pos) { - return Some(range); + for entry in &self.vregs[vreg.index()].ranges { + if entry.range.contains_point(pos) { + return Some(entry.index); } - range = self.ranges[range.index()].next_in_reg; } None } @@ -1237,8 +960,6 @@ impl<'a, F: Function> Env<'a, F> { return Err(RegAllocError::EntryLivein); } - let mut num_ranges = 0; - for &vreg in self.func.reftype_vregs() { self.safepoints_per_vreg.insert(vreg.vreg(), HashSet::new()); } @@ -1277,7 +998,7 @@ impl<'a, F: Function> Env<'a, F> { VRegIndex::new(vreg), range ); - let lr = self.add_liverange_to_vreg(VRegIndex::new(vreg), range, &mut num_ranges); + let lr = self.add_liverange_to_vreg(VRegIndex::new(vreg), range); vreg_ranges[vreg] = lr; } @@ -1424,17 +1145,16 @@ impl<'a, F: Function> Env<'a, F> { dst_lr = self.add_liverange_to_vreg( VRegIndex::new(dst.vreg().vreg()), CodeRange { from, to }, - &mut num_ranges, ); log::debug!(" -> invalid LR for def; created {:?}", dst_lr); } log::debug!(" -> has existing LR {:?}", dst_lr); // Trim the LR to start here. - if self.ranges_hot[dst_lr.index()].range.from + if self.ranges[dst_lr.index()].range.from == self.cfginfo.block_entry[block.index()] { log::debug!(" -> started at block start; trimming to {:?}", pos); - self.ranges_hot[dst_lr.index()].range.from = pos; + self.ranges[dst_lr.index()].range.from = pos; } self.ranges[dst_lr.index()].set_flag(LiveRangeFlag::StartsAtDef); live.set(dst.vreg().vreg(), false); @@ -1449,11 +1169,8 @@ impl<'a, F: Function> Env<'a, F> { from: self.cfginfo.block_entry[block.index()], to: pos.next(), }; - let src_lr = self.add_liverange_to_vreg( - VRegIndex::new(src.vreg().vreg()), - range, - &mut num_ranges, - ); + let src_lr = + self.add_liverange_to_vreg(VRegIndex::new(src.vreg().vreg()), range); vreg_ranges[src.vreg().vreg()] = src_lr; log::debug!(" -> src LR {:?}", src_lr); @@ -1524,20 +1241,12 @@ impl<'a, F: Function> Env<'a, F> { match operand.kind() { OperandKind::Def | OperandKind::Mod => { - // Create the use object. - let u = UseIndex::new(self.uses.len()); - self.uses.push(Use::new( - operand, - pos, - UseIndex::invalid(), - i as u8, - )); - log::debug!("Def of {} at {:?}", operand.vreg(), pos); // Fill in vreg's actual data. self.vreg_regs[operand.vreg().vreg()] = operand.vreg(); + // Get or create the LiveRange. let mut lr = vreg_ranges[operand.vreg().vreg()]; log::debug!(" -> has existing LR {:?}", lr); // If there was no liverange (dead def), create a trivial one. @@ -1555,26 +1264,27 @@ impl<'a, F: Function> Env<'a, F> { lr = self.add_liverange_to_vreg( VRegIndex::new(operand.vreg().vreg()), CodeRange { from, to }, - &mut num_ranges, ); log::debug!(" -> invalid; created {:?}", lr); } - self.insert_use_into_liverange_and_update_stats(lr, u); - + // Create the use in the LiveRange. + self.insert_use_into_liverange(lr, Use::new(operand, pos, i as u8)); + // If def (not mod), this reg is now dead, + // scanning backward; make it so. if operand.kind() == OperandKind::Def { // Trim the range for this vreg to start // at `pos` if it previously ended at the // start of this block (i.e. was not // merged into some larger LiveRange due // to out-of-order blocks). - if self.ranges_hot[lr.index()].range.from + if self.ranges[lr.index()].range.from == self.cfginfo.block_entry[block.index()] { log::debug!( " -> started at block start; trimming to {:?}", pos ); - self.ranges_hot[lr.index()].range.from = pos; + self.ranges[lr.index()].range.from = pos; } self.ranges[lr.index()].set_flag(LiveRangeFlag::StartsAtDef); @@ -1585,15 +1295,6 @@ impl<'a, F: Function> Env<'a, F> { } } OperandKind::Use => { - // Create the use object. - let u = UseIndex::new(self.uses.len()); - self.uses.push(Use::new( - operand, - pos, - UseIndex::invalid(), - i as u8, - )); - // Create/extend the LiveRange if it // doesn't already exist, and add the use // to the range. @@ -1606,21 +1307,14 @@ impl<'a, F: Function> Env<'a, F> { lr = self.add_liverange_to_vreg( VRegIndex::new(operand.vreg().vreg()), range, - &mut num_ranges, ); vreg_ranges[operand.vreg().vreg()] = lr; } assert!(lr.is_valid()); - log::debug!( - "Use of {:?} at {:?} -> {:?} -> {:?}", - operand, - pos, - u, - lr - ); + log::debug!("Use of {:?} at {:?} -> {:?}", operand, pos, lr,); - self.insert_use_into_liverange_and_update_stats(lr, u); + self.insert_use_into_liverange(lr, Use::new(operand, pos, i as u8)); // Add to live-set. live.set(operand.vreg().vreg(), true); @@ -1654,7 +1348,6 @@ impl<'a, F: Function> Env<'a, F> { from: start, to: start.next(), }, - &mut num_ranges, ); } // add `blockparam_ins` entries. @@ -1667,13 +1360,33 @@ impl<'a, F: Function> Env<'a, F> { self.safepoints.sort_unstable(); + // Sort ranges in each vreg, and uses in each range, so we can + // iterate over them in order below. The ordering invariant is + // always maintained for uses and always for ranges in bundles + // (which are initialized later), but not always for ranges in + // vregs; those are sorted only when needed, here and then + // again at the end of allocation when resolving moves. + for vreg in &mut self.vregs { + for entry in &mut vreg.ranges { + // Ranges may have been truncated above at defs. We + // need to update with the final range here. + entry.range = self.ranges[entry.index.index()].range; + } + vreg.ranges.sort_unstable_by_key(|entry| entry.range.from); + } + + for range in 0..self.ranges.len() { + self.ranges[range].uses.sort_unstable_by_key(|u| u.pos); + } + // Insert safepoint virtual stack uses, if needed. for vreg in self.func.reftype_vregs() { let vreg = VRegIndex::new(vreg.vreg()); - let mut iter = self.vregs[vreg.index()].first_range; + let mut inserted = false; let mut safepoint_idx = 0; - while iter.is_valid() { - let range = self.ranges_hot[iter.index()].range; + for range_idx in 0..self.vregs[vreg.index()].ranges.len() { + let LiveRangeListEntry { range, index } = + self.vregs[vreg.index()].ranges[range_idx]; while safepoint_idx < self.safepoints.len() && ProgPoint::before(self.safepoints[safepoint_idx]) < range.from { @@ -1691,38 +1404,28 @@ impl<'a, F: Function> Env<'a, F> { OperandPos::Before, ); - // Create the actual use object. - let u = UseIndex::new(self.uses.len()); - self.uses - .push(Use::new(operand, pos, UseIndex::invalid(), SLOT_NONE)); - - // Create/extend the LiveRange and add the use to the range. - let range = CodeRange { - from: pos, - to: pos.next(), - }; - let lr = self.add_liverange_to_vreg( - VRegIndex::new(operand.vreg().vreg()), - range, - &mut num_ranges, - ); - vreg_ranges[operand.vreg().vreg()] = lr; - log::debug!( - "Safepoint-induced stack use of {:?} at {:?} -> {:?} -> {:?}", + "Safepoint-induced stack use of {:?} at {:?} -> {:?}", operand, pos, - u, - lr + index, ); - self.insert_use_into_liverange_and_update_stats(lr, u); + self.insert_use_into_liverange(index, Use::new(operand, pos, SLOT_NONE)); safepoint_idx += 1; + + inserted = true; } + + if inserted { + self.ranges[index.index()] + .uses + .sort_unstable_by_key(|u| u.pos); + } + if safepoint_idx >= self.safepoints.len() { break; } - iter = self.ranges[iter.index()].next_in_reg; } } @@ -1739,12 +1442,13 @@ impl<'a, F: Function> Env<'a, F> { let mut first_preg: SmallVec<[PRegIndex; 16]> = smallvec![]; let mut extra_clobbers: SmallVec<[(PReg, Inst); 8]> = smallvec![]; for vreg in 0..self.vregs.len() { - let mut iter = self.vregs[vreg].first_range; - while iter.is_valid() { + for range_idx in 0..self.vregs[vreg].ranges.len() { + let entry = self.vregs[vreg].ranges[range_idx]; + let range = entry.index; log::debug!( "multi-fixed-reg cleanup: vreg {:?} range {:?}", VRegIndex::new(vreg), - iter + range, ); let mut last_point = None; let mut fixup_multi_fixed_vregs = |pos: ProgPoint, @@ -1797,17 +1501,15 @@ impl<'a, F: Function> Env<'a, F> { } }; - let mut use_iter = self.ranges[iter.index()].first_use; - while use_iter.is_valid() { - let pos = self.uses[use_iter.index()].pos; - let slot = self.uses[use_iter.index()].slot() as usize; + for u in &mut self.ranges[range.index()].uses { + let pos = u.pos; + let slot = u.slot as usize; fixup_multi_fixed_vregs( pos, slot, - &mut self.uses[use_iter.index()].operand, + &mut u.operand, &mut self.multi_fixed_reg_fixups, ); - use_iter = self.uses[use_iter.index()].next_use(); } for &(clobber, inst) in &extra_clobbers { @@ -1821,8 +1523,6 @@ impl<'a, F: Function> Env<'a, F> { extra_clobbers.clear(); first_preg.clear(); seen_fixed_for_vreg.clear(); - - iter = self.ranges[iter.index()].next_in_reg; } } @@ -1876,12 +1576,10 @@ impl<'a, F: Function> Env<'a, F> { let bundle = self.bundles.len(); self.bundles.push(LiveBundle { allocation: Allocation::none(), - first_range: LiveRangeIndex::invalid(), - last_range: LiveRangeIndex::invalid(), + ranges: smallvec![], spillset: SpillSetIndex::invalid(), prio: 0, spill_weight_and_props: 0, - range_summary: RangeSummary::new(), }); LiveBundleIndex::new(bundle) } @@ -1897,14 +1595,10 @@ impl<'a, F: Function> Env<'a, F> { to.index() ); - let vreg_from = self.ranges[self.bundles[from.index()].first_range.index()].vreg; - let vreg_to = self.ranges[self.bundles[to.index()].first_range.index()].vreg; - // Both bundles must deal with the same RegClass. All vregs in a bundle - // have to have the same regclass (because bundles start with one vreg - // and all merging happens here) so we can just sample the first vreg of - // each bundle. - let rc = self.vreg_regs[vreg_from.index()].class(); - if rc != self.vreg_regs[vreg_to.index()].class() { + // Both bundles must deal with the same RegClass. + let from_rc = self.spillsets[self.bundles[from.index()].spillset.index()].class; + let to_rc = self.spillsets[self.bundles[to.index()].spillset.index()].class; + if from_rc != to_rc { log::debug!(" -> mismatching reg classes"); return false; } @@ -1920,25 +1614,23 @@ impl<'a, F: Function> Env<'a, F> { #[cfg(debug)] { // Sanity check: both bundles should contain only ranges with appropriate VReg classes. - let mut iter = self.bundles[from.index()].first_range; - while iter.is_valid() { - let vreg = self.ranges[iter.index()].vreg; + for entry in &self.bundles[from.index()].ranges { + let vreg = self.ranges[entry.index.index()].vreg; assert_eq!(rc, self.vregs[vreg.index()].reg.class()); - iter = self.ranges_hot[iter.index()].next_in_bundle; } - let mut iter = self.bundles[to.index()].first_range; - while iter.is_valid() { - let vreg = self.ranges[iter.index()].vreg; + for entry in &self.bundles[to.index()].ranges { + let vreg = self.ranges[entry.index.index()].vreg; assert_eq!(rc, self.vregs[vreg.index()].reg.class()); - iter = self.ranges_hot[iter.index()].next_in_bundle; } } // Check for overlap in LiveRanges. - let mut iter0 = self.bundles[from.index()].first_range; - let mut iter1 = self.bundles[to.index()].first_range; + let ranges_from = &self.bundles[from.index()].ranges[..]; + let ranges_to = &self.bundles[to.index()].ranges[..]; + let mut idx_from = 0; + let mut idx_to = 0; let mut range_count = 0; - while iter0.is_valid() && iter1.is_valid() { + while idx_from < ranges_from.len() && idx_to < ranges_to.len() { range_count += 1; if range_count > 200 { log::debug!( @@ -1949,92 +1641,90 @@ impl<'a, F: Function> Env<'a, F> { return false; } - if self.ranges_hot[iter0.index()].range.from >= self.ranges_hot[iter1.index()].range.to - { - iter1 = self.ranges_hot[iter1.index()].next_in_bundle; - } else if self.ranges_hot[iter1.index()].range.from - >= self.ranges_hot[iter0.index()].range.to - { - iter0 = self.ranges_hot[iter0.index()].next_in_bundle; + if ranges_from[idx_from].range.from >= ranges_to[idx_to].range.to { + idx_to += 1; + } else if ranges_to[idx_to].range.from >= ranges_from[idx_from].range.to { + idx_from += 1; } else { // Overlap -- cannot merge. - log::debug!(" -> overlap between {:?} and {:?}, exiting", iter0, iter1); + log::debug!( + " -> overlap between {:?} and {:?}, exiting", + ranges_from[idx_from].index, + ranges_to[idx_to].index + ); return false; } } log::debug!(" -> committing to merge"); - // If we reach here, then the bundles do not overlap -- merge them! - // We do this with a merge-sort-like scan over both chains, removing - // from `to` (`iter1`) and inserting into `from` (`iter0`). - let mut iter0 = self.bundles[from.index()].first_range; - let mut iter1 = self.bundles[to.index()].first_range; - if iter0.is_invalid() { + // If we reach here, then the bundles do not overlap -- merge + // them! We do this with a merge-sort-like scan over both + // lists, building a new range list and replacing the list on + // `to` when we're done. + let mut idx_from = 0; + let mut idx_to = 0; + if ranges_from.is_empty() { // `from` bundle is empty -- trivial merge. + log::debug!(" -> from bundle{} is empty; trivial merge", from.index()); return true; } - if iter1.is_invalid() { - // `to` bundle is empty -- just move head/tail pointers over from + if ranges_to.is_empty() { + // `to` bundle is empty -- just move the list over from // `from` and set `bundle` up-link on all ranges. - let head = self.bundles[from.index()].first_range; - let tail = self.bundles[from.index()].last_range; - self.bundles[to.index()].first_range = head; - self.bundles[to.index()].last_range = tail; - self.bundles[from.index()].first_range = LiveRangeIndex::invalid(); - self.bundles[from.index()].last_range = LiveRangeIndex::invalid(); - while iter0.is_valid() { - self.ranges[iter0.index()].bundle = from; + log::debug!(" -> to bundle{} is empty; trivial merge", to.index()); + let list = std::mem::replace(&mut self.bundles[from.index()].ranges, smallvec![]); + for entry in &list { + self.ranges[entry.index.index()].bundle = to; if log::log_enabled!(log::Level::Debug) { self.annotate( - self.ranges_hot[iter0.index()].range.from, + entry.range.from, format!( " MERGE range{} v{} from bundle{} to bundle{}", - iter0.index(), - self.ranges[iter0.index()].vreg.index(), + entry.index.index(), + self.ranges[entry.index.index()].vreg.index(), from.index(), to.index(), ), ); } - - iter0 = self.ranges_hot[iter0.index()].next_in_bundle; } + self.bundles[to.index()].ranges = list; + return true; } - // Two non-empty chains of LiveRanges: traverse both simultaneously and - // merge links into `from`. - let mut prev = LiveRangeIndex::invalid(); - while iter0.is_valid() || iter1.is_valid() { - // Pick the next range. - let next_range_iter = if iter0.is_valid() { - if iter1.is_valid() { - if self.ranges_hot[iter0.index()].range.from - <= self.ranges_hot[iter1.index()].range.from - { - &mut iter0 - } else { - &mut iter1 - } + // Two non-empty lists of LiveRanges: traverse both simultaneously and + // merge ranges into `merged`. + let mut merged: LiveRangeList = smallvec![]; + while idx_from < ranges_from.len() || idx_to < ranges_to.len() { + if idx_from < ranges_from.len() && idx_to < ranges_to.len() { + if ranges_from[idx_from].range.from <= ranges_to[idx_to].range.from { + merged.push(ranges_from[idx_from]); + idx_from += 1; } else { - &mut iter0 + merged.push(ranges_to[idx_to]); + idx_to += 1; } + } else if idx_from < ranges_from.len() { + merged.extend_from_slice(&ranges_from[idx_from..]); + break; } else { - &mut iter1 - }; - let next = *next_range_iter; - *next_range_iter = self.ranges_hot[next.index()].next_in_bundle; - - if self.ranges[next.index()].bundle == from { + assert!(idx_to < ranges_to.len()); + merged.extend_from_slice(&ranges_to[idx_to..]); + break; + } + } + for entry in &merged { + if self.ranges[entry.index.index()].bundle == from { if log::log_enabled!(log::Level::Debug) { self.annotate( - self.ranges_hot[next.index()].range.from, + entry.range.from, format!( " MERGE range{} v{} from bundle{} to bundle{}", - next.index(), - self.ranges[next.index()].vreg.index(), + entry.index.index(), + self.ranges[entry.index.index()].vreg.index(), from.index(), to.index(), ), @@ -2042,67 +1732,18 @@ impl<'a, F: Function> Env<'a, F> { } } - // link from prev. - if prev.is_valid() { - self.ranges_hot[prev.index()].next_in_bundle = next; - } else { - self.bundles[to.index()].first_range = next; - } - self.bundles[to.index()].last_range = next; - self.ranges[next.index()].bundle = to; - prev = next; + log::debug!( + " -> merged result for bundle{}: range{}", + to.index(), + entry.index.index(), + ); + self.ranges[entry.index.index()].bundle = to; } - self.bundles[from.index()].first_range = LiveRangeIndex::invalid(); - self.bundles[from.index()].last_range = LiveRangeIndex::invalid(); - true - } + self.bundles[to.index()].ranges = merged; + self.bundles[from.index()].ranges.clear(); - fn insert_liverange_into_bundle(&mut self, bundle: LiveBundleIndex, lr: LiveRangeIndex) { - log::debug!( - "insert_liverange_into_bundle: lr {:?} bundle {:?}", - lr, - bundle - ); - self.ranges_hot[lr.index()].next_in_bundle = LiveRangeIndex::invalid(); - self.ranges[lr.index()].bundle = bundle; - if self.bundles[bundle.index()].first_range.is_invalid() { - // Empty bundle. - self.bundles[bundle.index()].first_range = lr; - self.bundles[bundle.index()].last_range = lr; - } else if self.ranges_hot[self.bundles[bundle.index()].first_range.index()] - .range - .to - <= self.ranges_hot[lr.index()].range.from - { - // After last range in bundle. - let last = self.bundles[bundle.index()].last_range; - self.ranges_hot[last.index()].next_in_bundle = lr; - self.bundles[bundle.index()].last_range = lr; - } else { - // Find location to insert. - let mut iter = self.bundles[bundle.index()].first_range; - let mut insert_after = LiveRangeIndex::invalid(); - let insert_range = self.ranges_hot[lr.index()].range; - while iter.is_valid() { - debug_assert!(!self.ranges_hot[iter.index()].range.overlaps(&insert_range)); - if self.ranges_hot[iter.index()].range.to <= insert_range.from { - break; - } - insert_after = iter; - iter = self.ranges_hot[iter.index()].next_in_bundle; - } - if insert_after.is_valid() { - self.ranges_hot[insert_after.index()].next_in_bundle = lr; - if self.bundles[bundle.index()].last_range == insert_after { - self.bundles[bundle.index()].last_range = lr; - } - } else { - let next = self.bundles[bundle.index()].first_range; - self.ranges_hot[lr.index()].next_in_bundle = next; - self.bundles[bundle.index()].first_range = lr; - } - } + true } fn merge_vreg_bundles(&mut self) { @@ -2110,35 +1751,18 @@ impl<'a, F: Function> Env<'a, F> { log::debug!("merge_vreg_bundles: creating vreg bundles"); for vreg in 0..self.vregs.len() { let vreg = VRegIndex::new(vreg); - if self.vregs[vreg.index()].first_range.is_invalid() { + if self.vregs[vreg.index()].ranges.is_empty() { continue; } let bundle = self.create_bundle(); - let mut range = self.vregs[vreg.index()].first_range; - while range.is_valid() { - self.insert_liverange_into_bundle(bundle, range); - range = self.ranges[range.index()].next_in_reg; - } + self.bundles[bundle.index()].ranges = self.vregs[vreg.index()].ranges.clone(); log::debug!("vreg v{} gets bundle{}", vreg.index(), bundle.index()); - - // If this vreg is pinned, assign the allocation and block the PRegs. - if let Some(preg) = self.func.is_pinned_vreg(self.vreg_regs[vreg.index()]) { - self.bundles[bundle.index()].allocation = Allocation::reg(preg); - - let mut iter = self.bundles[bundle.index()].first_range; - while iter.is_valid() { - let range = self.ranges_hot[iter.index()].range; - // Create a new LiveRange for the PReg - // reservation, unaffiliated with the VReg, to - // reserve it (like a clobber) without the - // possibility of eviction. - self.add_liverange_to_preg(range, preg); - iter = self.ranges_hot[iter.index()].next_in_bundle; - } - continue; + for entry in &self.bundles[bundle.index()].ranges { + log::debug!(" -> with LR range{}", entry.index.index()); + self.ranges[entry.index.index()].bundle = bundle; } - // Otherwise, create a spillslot for it. + // Create a spillslot for this bundle. let ssidx = SpillSetIndex::new(self.spillsets.len()); let reg = self.vreg_regs[vreg.index()]; let size = self.func.spillslot_size(reg.class(), reg) as u8; @@ -2167,10 +1791,10 @@ impl<'a, F: Function> Env<'a, F> { dst_vreg ); let src_bundle = - self.ranges[self.vregs[src_vreg.vreg()].first_range.index()].bundle; + self.ranges[self.vregs[src_vreg.vreg()].ranges[0].index.index()].bundle; assert!(src_bundle.is_valid()); let dest_bundle = - self.ranges[self.vregs[dst_vreg.vreg()].first_range.index()].bundle; + self.ranges[self.vregs[dst_vreg.vreg()].ranges[0].index.index()].bundle; assert!(dest_bundle.is_valid()); self.merge_bundles(/* from */ dest_bundle, /* to */ src_bundle); } @@ -2185,9 +1809,10 @@ impl<'a, F: Function> Env<'a, F> { to_vreg.index(), from_vreg.index() ); - let to_bundle = self.ranges[self.vregs[to_vreg.index()].first_range.index()].bundle; + let to_bundle = self.ranges[self.vregs[to_vreg.index()].ranges[0].index.index()].bundle; assert!(to_bundle.is_valid()); - let from_bundle = self.ranges[self.vregs[from_vreg.index()].first_range.index()].bundle; + let from_bundle = + self.ranges[self.vregs[from_vreg.index()].ranges[0].index.index()].bundle; assert!(from_bundle.is_valid()); log::debug!( " -> from bundle{} to bundle{}", @@ -2218,35 +1843,6 @@ impl<'a, F: Function> Env<'a, F> { } } - // Now create range summaries for all bundles. - for bundle in 0..self.bundles.len() { - let bundle = LiveBundleIndex::new(bundle); - let mut iter = self.bundles[bundle.index()].first_range; - let start_idx = self.range_ranges.len(); - let start_pos = if iter.is_valid() { - self.ranges_hot[iter.index()].range.from - } else { - ProgPoint::from_index(0) - }; - let mut end_pos = start_pos; - while iter.is_valid() { - let range = self.ranges_hot[iter.index()].range; - end_pos = range.to; - self.range_ranges.push(range); - iter = self.ranges_hot[iter.index()].next_in_bundle; - } - let end_idx = self.range_ranges.len(); - let bound = CodeRange { - from: start_pos, - to: end_pos, - }; - self.bundles[bundle.index()].range_summary = RangeSummary { - from: start_idx as u32, - to: end_idx as u32, - bound, - }; - } - log::debug!("done merging bundles"); } @@ -2262,25 +1858,23 @@ impl<'a, F: Function> Env<'a, F> { fn compute_bundle_prio(&self, bundle: LiveBundleIndex) -> u32 { // The priority is simply the total "length" -- the number of // instructions covered by all LiveRanges. - let mut iter = self.bundles[bundle.index()].first_range; let mut total = 0; - while iter.is_valid() { - total += self.ranges_hot[iter.index()].range.len() as u32; - iter = self.ranges_hot[iter.index()].next_in_bundle; + for entry in &self.bundles[bundle.index()].ranges { + total += entry.range.len() as u32; } total } fn queue_bundles(&mut self) { for bundle in 0..self.bundles.len() { - if self.bundles[bundle].first_range.is_invalid() { - continue; - } - if !self.bundles[bundle].allocation.is_none() { + log::debug!("enqueueing bundle{}", bundle); + if self.bundles[bundle].ranges.is_empty() { + log::debug!(" -> no ranges; skipping"); continue; } let bundle = LiveBundleIndex::new(bundle); let prio = self.compute_bundle_prio(bundle); + log::debug!(" -> prio {}", prio); self.bundles[bundle.index()].prio = prio; self.recompute_bundle_properties(bundle); self.allocation_queue.insert(bundle, prio as usize); @@ -2308,112 +1902,81 @@ impl<'a, F: Function> Env<'a, F> { log::debug!("Bundles:"); for (i, b) in self.bundles.iter().enumerate() { log::debug!( - "bundle{}: first_range={:?} last_range={:?} spillset={:?} alloc={:?}", + "bundle{}: spillset={:?} alloc={:?}", i, - b.first_range, - b.last_range, b.spillset, b.allocation ); + for entry in &b.ranges { + log::debug!( + " * range {:?} -- {:?}: range{}", + entry.range.from, + entry.range.to, + entry.index.index() + ); + } } log::debug!("VRegs:"); for (i, v) in self.vregs.iter().enumerate() { - log::debug!("vreg{}: first_range={:?}", i, v.first_range,); + log::debug!("vreg{}:", i); + for entry in &v.ranges { + log::debug!( + " * range {:?} -- {:?}: range{}", + entry.range.from, + entry.range.to, + entry.index.index() + ); + } } log::debug!("Ranges:"); - for (i, (r, rh)) in self.ranges.iter().zip(self.ranges_hot.iter()).enumerate() { + for (i, r) in self.ranges.iter().enumerate() { log::debug!( - concat!( - "range{}: range={:?} vreg={:?} bundle={:?} ", - "weight={} first_use={:?} last_use={:?} ", - "next_in_bundle={:?} next_in_reg={:?}" - ), + concat!("range{}: range={:?} vreg={:?} bundle={:?} ", "weight={}"), i, - rh.range, + r.range, r.vreg, r.bundle, r.uses_spill_weight(), - r.first_use, - r.last_use, - rh.next_in_bundle, - r.next_in_reg - ); - } - log::debug!("Uses:"); - for (i, u) in self.uses.iter().enumerate() { - log::debug!( - "use{}: op={:?} pos={:?} slot={} next_use={:?}", - i, - u.operand, - u.pos, - u.slot(), - u.next_use(), ); + for u in &r.uses { + log::debug!(" * use at {:?} (slot {}): {:?}", u.pos, u.slot, u.operand); + } } } fn compute_requirement(&self, bundle: LiveBundleIndex) -> Option { - let init_vreg = self.vreg_regs[self.ranges - [self.bundles[bundle.index()].first_range.index()] - .vreg - .index()]; - let class = init_vreg.class(); - let mut needed = Requirement::Any(class); + log::debug!("compute_requirement: bundle {:?}", bundle); - log::debug!( - "compute_requirement: bundle {:?} class {:?} (from vreg {:?})", - bundle, - class, - init_vreg - ); + let class = self.spillsets[self.bundles[bundle.index()].spillset.index()].class; + log::debug!(" -> class = {:?}", class); + + let mut needed = Requirement::Any(class); - let mut iter = self.bundles[bundle.index()].first_range; - while iter.is_valid() { - let range_hot = &self.ranges_hot[iter.index()]; - let range = &self.ranges[iter.index()]; + for entry in &self.bundles[bundle.index()].ranges { + let range = &self.ranges[entry.index.index()]; log::debug!( " -> range LR {} ({:?}): {:?}", - iter.index(), - iter, - range_hot.range + entry.index.index(), + entry.index, + entry.range ); - let mut use_iter = range.first_use; - while use_iter.is_valid() { - let usedata = &self.uses[use_iter.index()]; - let use_op = usedata.operand; - let use_req = Requirement::from_operand(use_op); - log::debug!(" -> use {:?} op {:?} req {:?}", use_iter, use_op, use_req); + for u in &range.uses { + let use_req = Requirement::from_operand(u.operand); + log::debug!( + " -> use at {:?} op {:?} req {:?}", + u.pos, + u.operand, + use_req + ); needed = needed.merge(use_req)?; log::debug!(" -> needed {:?}", needed); - use_iter = usedata.next_use(); } - iter = range_hot.next_in_bundle; } log::debug!(" -> final needed: {:?}", needed); Some(needed) } - fn bundle_bounding_range_if_multiple(&self, bundle: LiveBundleIndex) -> Option { - let first_range = self.bundles[bundle.index()].first_range; - let last_range = self.bundles[bundle.index()].last_range; - if first_range.is_invalid() || first_range == last_range { - return None; - } - Some(CodeRange { - from: self.ranges_hot[first_range.index()].range.from, - to: self.ranges_hot[last_range.index()].range.to, - }) - } - - fn range_definitely_fits_in_reg(&self, range: CodeRange, reg: PRegIndex) -> bool { - self.pregs[reg.index()] - .allocations - .btree - .get(&LiveRangeKey::from_range(&range)) - .is_none() - } - fn try_to_allocate_bundle_to_reg( &mut self, bundle: LiveBundleIndex, @@ -2421,37 +1984,81 @@ impl<'a, F: Function> Env<'a, F> { ) -> AllocRegResult { log::debug!("try_to_allocate_bundle_to_reg: {:?} -> {:?}", bundle, reg); let mut conflicts = smallvec![]; - // Use the range-summary array; this allows fast streaming - // access to CodeRanges (which are just two u32s packed - // together) which is important for this hot loop. - let iter = self.bundles[bundle.index()] - .range_summary - .iter(&self.range_ranges[..]); - for range in iter { - log::debug!(" -> range {:?}", range); - // Note that the comparator function here tests for *overlap*, so we - // are checking whether the BTree contains any preg range that - // *overlaps* with range `range`, not literally the range `range`. - if let Some(preg_range) = self.pregs[reg.index()] - .allocations - .btree - .get(&LiveRangeKey::from_range(&range)) - { - log::debug!(" -> btree contains range {:?} that overlaps", preg_range); - if self.ranges[preg_range.index()].vreg.is_valid() { - log::debug!(" -> from vreg {:?}", self.ranges[preg_range.index()].vreg); - // range from an allocated bundle: find the bundle and add to - // conflicts list. - let conflict_bundle = self.ranges[preg_range.index()].bundle; - log::debug!(" -> conflict bundle {:?}", conflict_bundle); - if !conflicts.iter().any(|b| *b == conflict_bundle) { - conflicts.push(conflict_bundle); - } - } else { - log::debug!(" -> conflict with fixed reservation"); - // range from a direct use of the PReg (due to clobber). - return AllocRegResult::ConflictWithFixed; + // Traverse the BTreeMap in order by requesting the whole + // range spanned by the bundle and iterating over that + // concurrently with our ranges. Because our ranges are in + // order, and the BTreeMap is as well, this allows us to have + // an overall O(n log n) + O(b) complexity, where the PReg has + // n current ranges and the bundle has b ranges, rather than + // O(b * n log n) with the simple probe-for-each-bundle-range + // approach. + // + // Note that the comparator function on a CodeRange tests for *overlap*, so we + // are checking whether the BTree contains any preg range that + // *overlaps* with range `range`, not literally the range `range`. + let bundle_ranges = &self.bundles[bundle.index()].ranges; + let from_key = LiveRangeKey::from_range(&bundle_ranges.first().unwrap().range); + let to_key = LiveRangeKey::from_range(&bundle_ranges.last().unwrap().range); + assert!(from_key <= to_key); + let mut preg_range_iter = self.pregs[reg.index()] + .allocations + .btree + .range(from_key..=to_key) + .peekable(); + log::debug!( + "alloc map for {:?}: {:?}", + reg, + self.pregs[reg.index()].allocations.btree + ); + for entry in bundle_ranges { + log::debug!(" -> range LR {:?}: {:?}", entry.index, entry.range); + let key = LiveRangeKey::from_range(&entry.range); + + // Advance our BTree traversal until it is >= this bundle + // range (i.e., skip PReg allocations in the BTree that + // are completely before this bundle range). + + while preg_range_iter.peek().is_some() && *preg_range_iter.peek().unwrap().0 < key { + log::debug!( + "Skipping PReg range {:?}", + preg_range_iter.peek().unwrap().0 + ); + preg_range_iter.next(); + } + + // If there are no more PReg allocations, we're done! + if preg_range_iter.peek().is_none() { + log::debug!(" -> no more PReg allocations; so no conflict possible!"); + break; + } + + // If the current PReg range is beyond this range, there is no conflict; continue. + if *preg_range_iter.peek().unwrap().0 > key { + log::debug!( + " -> next PReg allocation is at {:?}; moving to next VReg range", + preg_range_iter.peek().unwrap().0 + ); + continue; + } + + // Otherwise, there is a conflict. + assert_eq!(*preg_range_iter.peek().unwrap().0, key); + let preg_range = preg_range_iter.next().unwrap().1; + + log::debug!(" -> btree contains range {:?} that overlaps", preg_range); + if self.ranges[preg_range.index()].vreg.is_valid() { + log::debug!(" -> from vreg {:?}", self.ranges[preg_range.index()].vreg); + // range from an allocated bundle: find the bundle and add to + // conflicts list. + let conflict_bundle = self.ranges[preg_range.index()].bundle; + log::debug!(" -> conflict bundle {:?}", conflict_bundle); + if !conflicts.iter().any(|b| *b == conflict_bundle) { + conflicts.push(conflict_bundle); } + } else { + log::debug!(" -> conflict with fixed reservation"); + // range from a direct use of the PReg (due to clobber). + return AllocRegResult::ConflictWithFixed; } } @@ -2463,14 +2070,11 @@ impl<'a, F: Function> Env<'a, F> { let preg = self.pregs[reg.index()].reg; log::debug!(" -> bundle {:?} assigned to preg {:?}", bundle, preg); self.bundles[bundle.index()].allocation = Allocation::reg(preg); - let mut iter = self.bundles[bundle.index()].first_range; - while iter.is_valid() { - let range = &self.ranges_hot[iter.index()]; + for entry in &self.bundles[bundle.index()].ranges { self.pregs[reg.index()] .allocations .btree - .insert(LiveRangeKey::from_range(&range.range), iter); - iter = range.next_in_bundle; + .insert(LiveRangeKey::from_range(&entry.range), entry.index); } AllocRegResult::Allocated(Allocation::reg(preg)) @@ -2494,16 +2098,12 @@ impl<'a, F: Function> Env<'a, F> { }; let preg_idx = PRegIndex::new(preg.index()); self.bundles[bundle.index()].allocation = Allocation::none(); - let mut iter = self.bundles[bundle.index()].first_range; - while iter.is_valid() { - log::debug!(" -> removing LR {:?} from reg {:?}", iter, preg_idx); + for entry in &self.bundles[bundle.index()].ranges { + log::debug!(" -> removing LR {:?} from reg {:?}", entry.index, preg_idx); self.pregs[preg_idx.index()] .allocations .btree - .remove(&LiveRangeKey::from_range( - &self.ranges_hot[iter.index()].range, - )); - iter = self.ranges_hot[iter.index()].next_in_bundle; + .remove(&LiveRangeKey::from_range(&entry.range)); } let prio = self.bundles[bundle.index()].prio; log::debug!(" -> prio {}; back into queue", prio); @@ -2530,40 +2130,33 @@ impl<'a, F: Function> Env<'a, F> { } fn recompute_bundle_properties(&mut self, bundle: LiveBundleIndex) { + log::debug!("recompute bundle properties: bundle {:?}", bundle); + let minimal; let mut fixed = false; let bundledata = &self.bundles[bundle.index()]; - let first_range = &self.ranges[bundledata.first_range.index()]; - let first_range_hot = &self.ranges_hot[bundledata.first_range.index()]; + let first_range = bundledata.ranges[0].index; + let first_range_data = &self.ranges[first_range.index()]; - log::debug!("recompute bundle properties: bundle {:?}", bundle); - - if first_range.vreg.is_invalid() { + if first_range_data.vreg.is_invalid() { log::debug!(" -> no vreg; minimal and fixed"); minimal = true; fixed = true; } else { - let mut use_iter = first_range.first_use; - while use_iter.is_valid() { - let use_data = &self.uses[use_iter.index()]; - if let OperandPolicy::FixedReg(_) = use_data.operand.policy() { - log::debug!(" -> fixed use {:?}", use_iter); + for u in &first_range_data.uses { + if let OperandPolicy::FixedReg(_) = u.operand.policy() { + log::debug!(" -> fixed use at {:?}: {:?}", u.pos, u.operand); fixed = true; break; } - use_iter = use_data.next_use(); } // Minimal if this is the only range in the bundle, and if // the range covers only one instruction. Note that it // could cover just one ProgPoint, i.e. X.Before..X.After, // or two ProgPoints, i.e. X.Before..X+1.Before. - log::debug!(" -> first range has range {:?}", first_range_hot.range); - log::debug!( - " -> first range has next in bundle {:?}", - first_range_hot.next_in_bundle - ); - minimal = first_range_hot.next_in_bundle.is_invalid() - && first_range_hot.range.from.inst() == first_range_hot.range.to.prev().inst(); + log::debug!(" -> first range has range {:?}", first_range_data.range); + minimal = self.bundles[bundle.index()].ranges.len() == 1 + && first_range_data.range.from.inst() == first_range_data.range.to.prev().inst(); log::debug!(" -> minimal: {}", minimal); } @@ -2577,15 +2170,13 @@ impl<'a, F: Function> Env<'a, F> { } } else { let mut total = 0; - let mut range = self.bundles[bundle.index()].first_range; - while range.is_valid() { - let range_data = &self.ranges[range.index()]; + for entry in &self.bundles[bundle.index()].ranges { + let range_data = &self.ranges[entry.index.index()]; log::debug!( " -> uses spill weight: +{}", range_data.uses_spill_weight() ); total += range_data.uses_spill_weight(); - range = self.ranges_hot[range.index()].next_in_bundle; } if self.bundles[bundle.index()].prio > 0 { @@ -2651,16 +2242,22 @@ impl<'a, F: Function> Env<'a, F> { // first use after it. Each loop iteration handles one range in our // bundle. Calls are scanned up until they advance past the current // range. - let mut our_iter = self.bundles[bundle.index()].first_range; + let our_ranges = &self.bundles[bundle.index()].ranges[..]; let (conflict_from, conflict_to) = if conflicting.is_valid() { ( Some( - self.ranges_hot[self.bundles[conflicting.index()].first_range.index()] + self.bundles[conflicting.index()] + .ranges + .first() + .unwrap() .range .from, ), Some( - self.ranges_hot[self.bundles[conflicting.index()].last_range.index()] + self.bundles[conflicting.index()] + .ranges + .last() + .unwrap() .range .to, ), @@ -2669,40 +2266,35 @@ impl<'a, F: Function> Env<'a, F> { (None, None) }; - let bundle_start = if self.bundles[bundle.index()].first_range.is_valid() { - self.ranges_hot[self.bundles[bundle.index()].first_range.index()] - .range - .from - } else { + let bundle_start = if our_ranges.is_empty() { ProgPoint::before(Inst::new(0)) - }; - let bundle_end = if self.bundles[bundle.index()].last_range.is_valid() { - self.ranges_hot[self.bundles[bundle.index()].last_range.index()] - .range - .to } else { + our_ranges.first().unwrap().range.from + }; + let bundle_end = if our_ranges.is_empty() { ProgPoint::before(Inst::new(self.func.insts())) + } else { + our_ranges.last().unwrap().range.to }; log::debug!(" -> conflict from {:?} to {:?}", conflict_from, conflict_to); let mut clobberidx = 0; - while our_iter.is_valid() { + for entry in our_ranges { // Probe the hot-code tree. - let our_range = self.ranges_hot[our_iter.index()].range; - log::debug!(" -> range {:?}", our_range); + log::debug!(" -> range {:?}", entry.range); if let Some(hot_range_idx) = self .hot_code .btree - .get(&LiveRangeKey::from_range(&our_range)) + .get(&LiveRangeKey::from_range(&entry.range)) { // `hot_range_idx` is a range that *overlaps* with our range. // There may be cold code in our range on either side of the hot // range. Record the transition points if so. - let hot_range = self.ranges_hot[hot_range_idx.index()].range; + let hot_range = self.ranges[hot_range_idx.index()].range; log::debug!(" -> overlaps with hot-code range {:?}", hot_range); - let start_cold = our_range.from < hot_range.from; - let end_cold = our_range.to > hot_range.to; + let start_cold = entry.range.from < hot_range.from; + let end_cold = entry.range.to > hot_range.to; if start_cold { log::debug!( " -> our start is cold; potential split at cold->hot transition {:?}", @@ -2727,11 +2319,11 @@ impl<'a, F: Function> Env<'a, F> { while clobberidx < self.clobbers.len() { let cur_clobber = self.clobbers[clobberidx]; let pos = ProgPoint::before(cur_clobber); - if pos >= our_range.to { + if pos >= entry.range.to { break; } clobberidx += 1; - if pos < our_range.from { + if pos < entry.range.from { continue; } if pos > bundle_start { @@ -2760,21 +2352,16 @@ impl<'a, F: Function> Env<'a, F> { } }; - let mut use_idx = self.ranges[our_iter.index()].first_use; - while use_idx.is_valid() { - let use_data = &self.uses[use_idx.index()]; - log::debug!(" -> range has use at {:?}", use_data.pos); - update_with_pos(use_data.pos); - if use_data.operand.kind() == OperandKind::Def { + for u in &self.ranges[entry.index.index()].uses { + log::debug!(" -> range has use at {:?}", u.pos); + update_with_pos(u.pos); + if u.operand.kind() == OperandKind::Def { if seen_defs > 0 { - def_splits.push(use_data.pos); + def_splits.push(u.pos); } seen_defs += 1; } - use_idx = use_data.next_use(); } - - our_iter = self.ranges_hot[our_iter.index()].next_in_bundle; } log::debug!( " -> first use/def after conflict range: {:?}", @@ -2824,40 +2411,34 @@ impl<'a, F: Function> Env<'a, F> { fn find_all_use_split_points(&self, bundle: LiveBundleIndex) -> SmallVec<[ProgPoint; 4]> { let mut splits = smallvec![]; - let mut iter = self.bundles[bundle.index()].first_range; + let ranges = &self.bundles[bundle.index()].ranges[..]; log::debug!("finding all use/def splits for {:?}", bundle); - let bundle_start = if iter.is_valid() { - self.ranges_hot[iter.index()].range.from - } else { + let bundle_start = if ranges.is_empty() { ProgPoint::before(Inst::new(0)) + } else { + self.ranges[ranges[0].index.index()].range.from }; // N.B.: a minimal bundle must include only ProgPoints in a // single instruction, but can include both (can include two // ProgPoints). We split here, taking care to never split *in // the middle* of an instruction, because we would not be able // to insert moves to reify such an assignment. - while iter.is_valid() { - log::debug!( - " -> range {:?}: {:?}", - iter, - self.ranges_hot[iter.index()].range - ); - let mut use_idx = self.ranges[iter.index()].first_use; - while use_idx.is_valid() { - let use_data = &self.uses[use_idx.index()]; - log::debug!(" -> use: {:?}", use_data); - let before_use_inst = if use_data.operand.kind() == OperandKind::Def { + for entry in ranges { + log::debug!(" -> range {:?}: {:?}", entry.index, entry.range); + for u in &self.ranges[entry.index.index()].uses { + log::debug!(" -> use: {:?}", u); + let before_use_inst = if u.operand.kind() == OperandKind::Def { // For a def, split *at* the def -- this may be an // After point, but the value cannot be live into // the def so we don't need to insert a move. - use_data.pos + u.pos } else { // For an use or mod, split before the instruction // -- this allows us to insert a move if // necessary. - ProgPoint::before(use_data.pos.inst()) + ProgPoint::before(u.pos.inst()) }; - let after_use_inst = ProgPoint::before(use_data.pos.inst().next()); + let after_use_inst = ProgPoint::before(u.pos.inst().next()); log::debug!( " -> splitting before and after use: {:?} and {:?}", before_use_inst, @@ -2867,10 +2448,7 @@ impl<'a, F: Function> Env<'a, F> { splits.push(before_use_inst); } splits.push(after_use_inst); - use_idx = use_data.next_use(); } - - iter = self.ranges_hot[iter.index()].next_in_bundle; } splits.sort_unstable(); log::debug!(" -> final splits: {:?}", splits); @@ -2931,15 +2509,16 @@ impl<'a, F: Function> Env<'a, F> { // bit more complex and has some subtle invariants. We stick // to the above invariants to keep this code maintainable. + let spillset = self.bundles[bundle.index()].spillset; + let mut split_idx = 0; // Fast-forward past any splits that occur before or exactly // at the start of the first range in the bundle. - let first_range = self.bundles[bundle.index()].first_range; - let bundle_start = if first_range.is_valid() { - self.ranges_hot[first_range.index()].range.from - } else { + let bundle_start = if self.bundles[bundle.index()].ranges.is_empty() { ProgPoint::before(Inst::new(0)) + } else { + self.bundles[bundle.index()].ranges[0].range.from }; while split_idx < split_points.len() && split_points[split_idx] <= bundle_start { split_idx += 1; @@ -2947,221 +2526,161 @@ impl<'a, F: Function> Env<'a, F> { let mut new_bundles: LiveBundleVec = smallvec![]; let mut cur_bundle = bundle; - let mut iter = self.bundles[bundle.index()].first_range; - self.bundles[bundle.index()].first_range = LiveRangeIndex::invalid(); - self.bundles[bundle.index()].last_range = LiveRangeIndex::invalid(); - let mut range_summary_idx = self.bundles[bundle.index()].range_summary.from; - while iter.is_valid() { - // Read `next` link now and then clear it -- we rebuild the list below. - let next = self.ranges_hot[iter.index()].next_in_bundle; - self.ranges_hot[iter.index()].next_in_bundle = LiveRangeIndex::invalid(); - - let mut range = self.ranges_hot[iter.index()].range; - log::debug!(" -> has range {:?} (LR {:?})", range, iter); - - // If any splits occur before this range, create a new - // bundle, then advance to the first split within the - // range. - if split_idx < split_points.len() && split_points[split_idx] <= range.from { - cur_bundle = self.create_bundle(); - log::debug!( - " -> split before a range; creating new bundle {:?}", - cur_bundle - ); - self.bundles[cur_bundle.index()].spillset = self.bundles[bundle.index()].spillset; - new_bundles.push(cur_bundle); - - self.bundles[cur_bundle.index()].range_summary.from = range_summary_idx; + let ranges = std::mem::replace(&mut self.bundles[bundle.index()].ranges, smallvec![]); + // - Invariant: current LR `cur_lr` is being built; it has not + // yet been added to `cur_bundle`. + // - Invariant: uses in `cur_uses` have not yet been added to + // `cur_lr`. + for entry in &ranges { + log::debug!(" -> has range {:?} (LR {:?})", entry.range, entry.index); + + // Until we reach a split point, copy or create the current range. + let mut cur_range = entry.range; + let mut cur_lr = entry.index; + let mut cur_uses = + std::mem::replace(&mut self.ranges[cur_lr.index()].uses, smallvec![]); + let mut cur_uses = cur_uses.drain(..).peekable(); + + self.ranges[cur_lr.index()].uses_spill_weight_and_flags = 0; + + let update_lr_stats = |lr: &mut LiveRange, u: &Use| { + if lr.uses.is_empty() && u.operand.kind() == OperandKind::Def { + lr.set_flag(LiveRangeFlag::StartsAtDef); + } + lr.uses_spill_weight_and_flags += u.weight as u32; + }; - if log::log_enabled!(log::Level::Debug) { - self.annotate( - range.from, - format!( - " SPLIT bundle{} / range{} -> bundle{} / range{}", - bundle.index(), - iter.index(), - cur_bundle.index(), - iter.index(), - ), + while cur_range.to > cur_range.from { + if (split_idx >= split_points.len()) || (split_points[split_idx] >= cur_range.to) { + log::debug!( + " -> no more split points; placing all remaining uses into cur range{}", + cur_lr.index() ); + // No more split points left, or next split point + // is beyond the range: just copy the current + // range into the current bundle, and drop all the + // remaining uses into it. + for u in cur_uses { + update_lr_stats(&mut self.ranges[cur_lr.index()], &u); + log::debug!(" -> use at {:?}", u.pos); + self.ranges[cur_lr.index()].uses.push(u); + } + self.ranges[cur_lr.index()].bundle = cur_bundle; + self.bundles[cur_bundle.index()] + .ranges + .push(LiveRangeListEntry { + range: cur_range, + index: cur_lr, + }); + break; } - split_idx += 1; - } - while split_idx < split_points.len() && split_points[split_idx] <= range.from { - split_idx += 1; - } - - // Link into current bundle. - self.ranges[iter.index()].bundle = cur_bundle; - if self.bundles[cur_bundle.index()].first_range.is_valid() { - self.ranges_hot[self.bundles[cur_bundle.index()].last_range.index()] - .next_in_bundle = iter; - } else { - self.bundles[cur_bundle.index()].first_range = iter; - } - self.bundles[cur_bundle.index()].last_range = iter; - - // While the next split point is beyond the start of the - // range and before the end, shorten the current LiveRange - // (this is always legal) and create a new Bundle and - // LiveRange for the remainder. Truncate the old bundle - // (set last_range). Insert the LiveRange into the vreg - // and into the new bundle. Then move the use-chain over, - // splitting at the appropriate point. - // - // We accumulate the use stats (fixed-use count and spill - // weight) as we scan through uses, recomputing the values - // for the truncated initial LiveRange and taking the - // remainders for the split "rest" LiveRange. - - while split_idx < split_points.len() && split_points[split_idx] < range.to { - let split_point = split_points[split_idx]; - split_idx += 1; - - // Skip forward to the current range. - if split_point <= range.from { + // If there is a split point prior to or exactly at + // the start of this LR, then create a new bundle but + // keep the existing LR, and go around again. Skip all + // such split-points (lump them into one), while we're + // at it. + if split_points[split_idx] <= cur_range.from { + log::debug!( + " -> split point at {:?} before start of range (range {:?} LR {:?})", + split_points[split_idx], + cur_range, + cur_lr, + ); + cur_bundle = self.create_bundle(); + log::debug!(" -> new bundle {:?}", cur_bundle); + self.ranges[cur_lr.index()].bundle = cur_bundle; + new_bundles.push(cur_bundle); + self.bundles[cur_bundle.index()].spillset = spillset; + while split_idx < split_points.len() + && split_points[split_idx] <= cur_range.from + { + split_idx += 1; + } continue; } - log::debug!( - " -> processing split point {:?} with iter {:?}", - split_point, - iter - ); + // If we reach here, there is at least one split point + // that lands in the current range, so we need to + // actually split. Let's create a new LR and bundle + // for the rest (post-split-point), drop uses up to + // the split point into current LR and drop current LR + // into current bundle, then advance current LR and + // bundle to new LR and bundle. + let split = split_points[split_idx]; + while split_idx < split_points.len() && split_points[split_idx] == split { + // Skip past all duplicate split-points. + split_idx += 1; + } + log::debug!(" -> split at {:?}", split); - // We split into `first` and `rest`. `rest` may be - // further subdivided in subsequent iterations; we - // only do one split per iteration. - debug_assert!(range.from < split_point && split_point < range.to); - let rest_range = CodeRange { - from: split_point, - to: self.ranges_hot[iter.index()].range.to, + let existing_range = CodeRange { + from: cur_range.from, + to: split, }; - self.ranges_hot[iter.index()].range.to = split_point; - range = rest_range; - log::debug!( - " -> range of {:?} now {:?}", - iter, - self.ranges_hot[iter.index()].range - ); - - // Create the rest-range and insert it into the vreg's - // range list. (Note that the vreg does not keep a - // tail-pointer so we do not need to update that.) - let rest_lr = self.create_liverange(rest_range); - self.ranges[rest_lr.index()].vreg = self.ranges[iter.index()].vreg; - self.ranges[rest_lr.index()].next_in_reg = self.ranges[iter.index()].next_in_reg; - self.ranges[iter.index()].next_in_reg = rest_lr; - - log::debug!( - " -> split tail to new LR {:?} with range {:?}", - rest_lr, - rest_range - ); - - // Scan over uses, accumulating stats for those that - // stay in the first range, finding the first use that - // moves to the rest range. - let mut last_use_in_first_range = UseIndex::invalid(); - let mut use_iter = self.ranges[iter.index()].first_use; - let mut uses_spill_weight = 0; - while use_iter.is_valid() { - if self.uses[use_iter.index()].pos >= split_point { + let new_range = CodeRange { + from: split, + to: cur_range.to, + }; + let new_lr = self.create_liverange(new_range); + let new_bundle = self.create_bundle(); + log::debug!(" -> new LR {:?}, new bundle {:?}", new_lr, new_bundle); + new_bundles.push(new_bundle); + self.bundles[new_bundle.index()].spillset = spillset; + + self.ranges[cur_lr.index()].range = existing_range; + self.ranges[new_lr.index()].vreg = self.ranges[cur_lr.index()].vreg; + self.ranges[new_lr.index()].bundle = new_bundle; + self.ranges[cur_lr.index()].bundle = cur_bundle; + self.bundles[cur_bundle.index()] + .ranges + .push(LiveRangeListEntry { + range: existing_range, + index: cur_lr, + }); + while let Some(u) = cur_uses.peek() { + if u.pos >= split { break; } - last_use_in_first_range = use_iter; - let policy = self.uses[use_iter.index()].operand.policy(); - log::debug!( - " -> use {:?} before split point; policy {:?}", - use_iter, - policy - ); - let pos = self.uses[use_iter.index()].pos; - let is_hot = - self.hot_code - .btree - .contains_key(&LiveRangeKey::from_range(&CodeRange { - from: pos, - to: pos.next(), - })); - let weight = spill_weight_from_policy(policy, is_hot); - uses_spill_weight += weight; - log::debug!(" -> use {:?} remains in orig", use_iter); - use_iter = self.uses[use_iter.index()].next_use(); + update_lr_stats(&mut self.ranges[cur_lr.index()], &u); + log::debug!(" -> use at {:?} in current LR {:?}", u.pos, cur_lr); + self.ranges[cur_lr.index()].uses.push(*u); + cur_uses.next(); } - // Move over `rest`'s uses and update stats on first - // and rest LRs. - if use_iter.is_valid() { - log::debug!( - " -> moving uses over the split starting at {:?}", - use_iter - ); - self.ranges[rest_lr.index()].first_use = use_iter; - self.ranges[rest_lr.index()].last_use = self.ranges[iter.index()].last_use; - - self.ranges[iter.index()].last_use = last_use_in_first_range; - if last_use_in_first_range.is_valid() { - self.uses[last_use_in_first_range.index()] - .set_next_use(UseIndex::invalid()); - } else { - self.ranges[iter.index()].first_use = UseIndex::invalid(); - } - - let new_spill_weight = - self.ranges[iter.index()].uses_spill_weight() - uses_spill_weight; - self.ranges[rest_lr.index()].set_uses_spill_weight(new_spill_weight); - self.ranges[iter.index()].set_uses_spill_weight(uses_spill_weight); - } - - log::debug!( - " -> range {:?} next-in-bundle is {:?}", - iter, - self.ranges_hot[iter.index()].next_in_bundle + self.annotate( + existing_range.to, + format!( + " SPLIT range{} v{} bundle{} to range{} bundle{}", + cur_lr.index(), + self.ranges[cur_lr.index()].vreg.index(), + cur_bundle.index(), + new_lr.index(), + new_bundle.index(), + ), ); - // Create a new bundle to hold the rest-range. - let rest_bundle = self.create_bundle(); - self.bundles[cur_bundle.index()].range_summary.to = range_summary_idx + 1; - let old_bundle = cur_bundle; - cur_bundle = rest_bundle; - self.bundles[cur_bundle.index()].range_summary.from = range_summary_idx; - self.bundles[cur_bundle.index()].range_summary.to = range_summary_idx + 1; - new_bundles.push(rest_bundle); - self.bundles[rest_bundle.index()].first_range = rest_lr; - self.bundles[rest_bundle.index()].last_range = rest_lr; - self.bundles[rest_bundle.index()].spillset = self.bundles[bundle.index()].spillset; - self.ranges[rest_lr.index()].bundle = rest_bundle; - log::debug!(" -> new bundle {:?} for LR {:?}", rest_bundle, rest_lr); - - if log::log_enabled!(log::Level::Debug) { - self.annotate( - split_point, - format!( - " SPLIT bundle{} / range{} -> bundle{} / range{}", - old_bundle.index(), - iter.index(), - cur_bundle.index(), - rest_lr.index(), - ), - ); - } - - iter = rest_lr; + cur_range = new_range; + cur_bundle = new_bundle; + cur_lr = new_lr; + + // Perform a lazy split in the VReg data. We just + // append the new LR and its range; we will sort by + // start of range, and fix up range ends, once when we + // iterate over the VReg's ranges after allocation + // completes (this is the only time when order + // matters). + self.vregs[self.ranges[new_lr.index()].vreg.index()] + .ranges + .push(LiveRangeListEntry { + range: new_range, + index: new_lr, + }); } - - iter = next; - range_summary_idx += 1; - self.bundles[cur_bundle.index()].range_summary.to = range_summary_idx; } - self.fixup_range_summary_bound(bundle); - for &b in &new_bundles { - self.fixup_range_summary_bound(b); - } - - // Enqueue all split-bundles on the allocation queue. + // Recompute weights and priorities of all bundles, and + // enqueue all split-bundles on the allocation queue. let prio = self.compute_bundle_prio(bundle); self.bundles[bundle.index()].prio = prio; self.recompute_bundle_properties(bundle); @@ -3174,21 +2693,6 @@ impl<'a, F: Function> Env<'a, F> { } } - fn fixup_range_summary_bound(&mut self, bundle: LiveBundleIndex) { - let bundledata = &mut self.bundles[bundle.index()]; - let from = if bundledata.first_range.is_valid() { - self.ranges_hot[bundledata.first_range.index()].range.from - } else { - ProgPoint::from_index(0) - }; - let to = if bundledata.last_range.is_valid() { - self.ranges_hot[bundledata.last_range.index()].range.to - } else { - ProgPoint::from_index(0) - }; - bundledata.range_summary.bound = CodeRange { from, to }; - } - fn process_bundle(&mut self, bundle: LiveBundleIndex) { // Find any requirements: for every LR, for every def/use, gather // requirements (fixed-reg, any-reg, any) and merge them. @@ -3251,48 +2755,14 @@ impl<'a, F: Function> Env<'a, F> { // location in the code and by the bundle we're // considering. This has the effect of spreading // demand more evenly across registers. - let scan_offset = self.ranges_hot - [self.bundles[bundle.index()].first_range.index()] + let scan_offset = self.ranges + [self.bundles[bundle.index()].ranges[0].index.index()] .range .from .inst() .index() + bundle.index(); - // If the bundle is more than one range, see if we - // can find a reg that the bounding range fits - // completely in first. Use that if so. Otherwise, - // do a detailed (liverange-by-liverange) probe of - // each reg in preference order. - let bounding_range = self.bundle_bounding_range_if_multiple(bundle); - if let Some(bounding_range) = bounding_range { - log::debug!("initial scan with bounding range {:?}", bounding_range); - self.stats.process_bundle_bounding_range_probe_start_any += 1; - for preg in RegTraversalIter::new( - self.env, - class, - hint_reg, - PReg::invalid(), - scan_offset, - ) { - let preg_idx = PRegIndex::new(preg.index()); - log::debug!("trying preg {:?}", preg_idx); - self.stats.process_bundle_bounding_range_probes_any += 1; - if self.range_definitely_fits_in_reg(bounding_range, preg_idx) { - let result = self.try_to_allocate_bundle_to_reg(bundle, preg_idx); - self.stats.process_bundle_bounding_range_success_any += 1; - let alloc = match result { - AllocRegResult::Allocated(alloc) => alloc, - _ => panic!("Impossible result: {:?}", result), - }; - self.spillsets[self.bundles[bundle.index()].spillset.index()] - .reg_hint = alloc.as_reg().unwrap(); - log::debug!(" -> definitely fits; assigning"); - return; - } - } - } - self.stats.process_bundle_reg_probe_start_any += 1; for preg in RegTraversalIter::new( self.env, @@ -3413,7 +2883,7 @@ impl<'a, F: Function> Env<'a, F> { for i in 0..self.spilled_bundles.len() { let bundle = self.spilled_bundles[i]; // don't borrow self let any_vreg = self.vreg_regs[self.ranges - [self.bundles[bundle.index()].first_range.index()] + [self.bundles[bundle.index()].ranges[0].index.index()] .vreg .index()]; let class = any_vreg.class(); @@ -3455,17 +2925,14 @@ impl<'a, F: Function> Env<'a, F> { spillset: SpillSetIndex, ) -> bool { for &bundle in &self.spillsets[spillset.index()].bundles { - let mut iter = self.bundles[bundle.index()].first_range; - while iter.is_valid() { - let range = self.ranges_hot[iter.index()].range; + for entry in &self.bundles[bundle.index()].ranges { if self.spillslots[spillslot.index()] .ranges .btree - .contains_key(&LiveRangeKey::from_range(&range)) + .contains_key(&LiveRangeKey::from_range(&entry.range)) { return false; } - iter = self.ranges_hot[iter.index()].next_in_bundle; } } true @@ -3486,21 +2953,18 @@ impl<'a, F: Function> Env<'a, F> { spillset, bundle ); - let mut iter = self.bundles[bundle.index()].first_range; - while iter.is_valid() { + for entry in &self.bundles[bundle.index()].ranges { log::debug!( "spillslot {:?} getting range {:?} from bundle {:?}: {:?}", spillslot, - iter, + entry.range, + entry.index, bundle, - self.ranges_hot[iter.index()].range ); - let range = self.ranges_hot[iter.index()].range; self.spillslots[spillslot.index()] .ranges .btree - .insert(LiveRangeKey::from_range(&range), iter); - iter = self.ranges_hot[iter.index()].next_in_bundle; + .insert(LiveRangeKey::from_range(&entry.range), entry.index); } } } @@ -3654,18 +3118,37 @@ impl<'a, F: Function> Env<'a, F> { } fn get_alloc_for_range(&self, range: LiveRangeIndex) -> Allocation { - let bundledata = &self.bundles[self.ranges[range.index()].bundle.index()]; + log::debug!("get_alloc_for_range: {:?}", range); + let bundle = self.ranges[range.index()].bundle; + log::debug!(" -> bundle: {:?}", bundle); + let bundledata = &self.bundles[bundle.index()]; + log::debug!(" -> allocation {:?}", bundledata.allocation); if bundledata.allocation != Allocation::none() { bundledata.allocation } else { + log::debug!(" -> spillset {:?}", bundledata.spillset); + log::debug!( + " -> spill slot {:?}", + self.spillsets[bundledata.spillset.index()].slot + ); self.spillslots[self.spillsets[bundledata.spillset.index()].slot.index()].alloc } } fn apply_allocations_and_insert_moves(&mut self) { + log::debug!("apply_allocations_and_insert_moves"); log::debug!("blockparam_ins: {:?}", self.blockparam_ins); log::debug!("blockparam_outs: {:?}", self.blockparam_outs); + // Now that all splits are done, we can pay the cost once to + // sort VReg range lists and update with the final ranges. + for vreg in &mut self.vregs { + for entry in &mut vreg.ranges { + entry.range = self.ranges[entry.index.index()].range; + } + vreg.ranges.sort_unstable_by_key(|entry| entry.range.from); + } + /// We create "half-moves" in order to allow a single-scan /// strategy with a subsequent sort. Basically, the key idea /// is that as our single scan through a range for a vreg hits @@ -3738,15 +3221,15 @@ impl<'a, F: Function> Env<'a, F> { // half-moves. We also scan over `blockparam_ins` and // `blockparam_outs`, which are sorted by (block, vreg), // and over program-move srcs/dsts to fill in allocations. - let mut iter = self.vregs[vreg.index()].first_range; let mut prev = LiveRangeIndex::invalid(); - while iter.is_valid() { - let alloc = self.get_alloc_for_range(iter); - let range = self.ranges_hot[iter.index()].range; + for range_idx in 0..self.vregs[vreg.index()].ranges.len() { + let entry = self.vregs[vreg.index()].ranges[range_idx]; + let alloc = self.get_alloc_for_range(entry.index); + let range = entry.range; log::debug!( "apply_allocations: vreg {:?} LR {:?} with range {:?} has alloc {:?}", vreg, - iter, + entry.index, range, alloc ); @@ -3759,8 +3242,8 @@ impl<'a, F: Function> Env<'a, F> { " <<< start v{} in {} (range{}) (bundle{})", vreg.index(), alloc, - iter.index(), - self.ranges[iter.index()].bundle.index(), + entry.index.index(), + self.ranges[entry.index.index()].bundle.index(), ), ); self.annotate( @@ -3769,8 +3252,8 @@ impl<'a, F: Function> Env<'a, F> { " end v{} in {} (range{}) (bundle{}) >>>", vreg.index(), alloc, - iter.index(), - self.ranges[iter.index()].bundle.index(), + entry.index.index(), + self.ranges[entry.index.index()].bundle.index(), ), ); } @@ -3797,9 +3280,9 @@ impl<'a, F: Function> Env<'a, F> { // instruction). if prev.is_valid() { let prev_alloc = self.get_alloc_for_range(prev); - let prev_range = self.ranges_hot[prev.index()].range; + let prev_range = self.ranges[prev.index()].range; let first_is_def = - self.ranges[iter.index()].has_flag(LiveRangeFlag::StartsAtDef); + self.ranges[entry.index.index()].has_flag(LiveRangeFlag::StartsAtDef); debug_assert!(prev_alloc != Allocation::none()); if prev_range.to == range.from && !self.is_start_of_block(range.from) @@ -3808,7 +3291,7 @@ impl<'a, F: Function> Env<'a, F> { log::debug!( "prev LR {} abuts LR {} in same block; moving {} -> {} for v{}", prev.index(), - iter.index(), + entry.index.index(), prev_alloc, alloc, vreg.index() @@ -4009,13 +3492,12 @@ impl<'a, F: Function> Env<'a, F> { } // Scan over def/uses and apply allocations. - let mut use_iter = self.ranges[iter.index()].first_use; - while use_iter.is_valid() { - let usedata = &self.uses[use_iter.index()]; + for use_idx in 0..self.ranges[entry.index.index()].uses.len() { + let usedata = self.ranges[entry.index.index()].uses[use_idx]; log::debug!("applying to use: {:?}", usedata); debug_assert!(range.contains_point(usedata.pos)); let inst = usedata.pos.inst(); - let slot = usedata.slot(); + let slot = usedata.slot; let operand = usedata.operand; // Safepoints add virtual uses with no slots; // avoid these. @@ -4025,7 +3507,6 @@ impl<'a, F: Function> Env<'a, F> { if let OperandPolicy::Reuse(_) = operand.policy() { reuse_input_insts.push(inst); } - use_iter = self.uses[use_iter.index()].next_use(); } // Scan over program move srcs/dsts to fill in allocations. @@ -4111,8 +3592,7 @@ impl<'a, F: Function> Env<'a, F> { prog_move_dst_idx += 1; } - prev = iter; - iter = self.ranges[iter.index()].next_in_reg; + prev = entry.index; } } @@ -4533,10 +4013,9 @@ impl<'a, F: Function> Env<'a, F> { log::debug!(" -> live over safepoints: {:?}", safepoints); let mut safepoint_idx = 0; - let mut iter = self.vregs[vreg.index()].first_range; - while iter.is_valid() { - let range = self.ranges_hot[iter.index()].range; - let alloc = self.get_alloc_for_range(iter); + for entry in &self.vregs[vreg.index()].ranges { + let range = entry.range; + let alloc = self.get_alloc_for_range(entry.index); log::debug!(" -> range {:?}: alloc {}", range, alloc); while safepoint_idx < safepoints.len() && safepoints[safepoint_idx] < range.to { if safepoints[safepoint_idx] < range.from { @@ -4551,7 +4030,6 @@ impl<'a, F: Function> Env<'a, F> { self.safepoint_slots.push((safepoints[safepoint_idx], slot)); safepoint_idx += 1; } - iter = self.ranges[iter.index()].next_in_reg; } } diff --git a/src/lib.rs b/src/lib.rs index a4d81858..df5f2841 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -758,15 +758,6 @@ pub trait Function { &[] } - /// Is the given VReg pinned permanently to a PReg? Note that the - /// input program must not contain constraints that contradict - /// this (e.g., using another VReg with a fixed-reg policy to a - /// given preg at the same time as using a VReg pinned to that - /// preg) or else allocation may be impossible. - fn is_pinned_vreg(&self, _: VReg) -> Option { - None - } - // -------------- // Spills/reloads // -------------- From 328c9004e5a35453ae1b599d8ad80fe096dbc3b0 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 17 May 2021 23:59:13 -0700 Subject: [PATCH 058/155] fuzzbug fixes --- src/ion/mod.rs | 77 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 61 insertions(+), 16 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index e3aae1a4..444138d6 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -803,15 +803,6 @@ impl<'a, F: Function> Env<'a, F> { /// Returns the liverange that contains the given range. fn add_liverange_to_vreg(&mut self, vreg: VRegIndex, range: CodeRange) -> LiveRangeIndex { log::debug!("add_liverange_to_vreg: vreg {:?} range {:?}", vreg, range); - - // When we use this function, the LR lists in VRegs are not - // yet sorted. We can extend an existing LR if we happen to - // see that one abuts the new range -- we check the end, - // because this one should be the earliest given how we build - // liveness (but we don't claim or uphold this as an - // invariant) -- or we can just append to the end. After we - // add all ranges, we will sort the lists. - if let Some(last) = self.vregs[vreg.index()].ranges.last_mut() { if last.range.from == range.to { log::debug!(" -> abuts existing range {:?}, extending", last.index); @@ -1360,12 +1351,13 @@ impl<'a, F: Function> Env<'a, F> { self.safepoints.sort_unstable(); - // Sort ranges in each vreg, and uses in each range, so we can - // iterate over them in order below. The ordering invariant is - // always maintained for uses and always for ranges in bundles - // (which are initialized later), but not always for ranges in - // vregs; those are sorted only when needed, here and then - // again at the end of allocation when resolving moves. + // Sort ranges in each vreg, and uses in each range, and merge + // overlapping ranges, so we can iterate over them in order + // below. The ordering invariant is always maintained for uses + // and always for ranges in bundles (which are initialized + // later), but not always for ranges in vregs; those are + // sorted only when needed, here and then again at the end of + // allocation when resolving moves. for vreg in &mut self.vregs { for entry in &mut vreg.ranges { // Ranges may have been truncated above at defs. We @@ -1373,6 +1365,43 @@ impl<'a, F: Function> Env<'a, F> { entry.range = self.ranges[entry.index.index()].range; } vreg.ranges.sort_unstable_by_key(|entry| entry.range.from); + + // Merge overlapping ranges. + let mut last: Option = None; + let mut i = 0; + while i < vreg.ranges.len() { + if last.is_some() { + if vreg.ranges[i].range.from <= vreg.ranges[last.unwrap()].range.to { + // Move uses over. + let mut uses = std::mem::replace( + &mut self.ranges[vreg.ranges[i].index.index()].uses, + smallvec![], + ); + self.ranges[vreg.ranges[last.unwrap()].index.index()] + .uses + .extend(uses.drain(..)); + self.ranges[vreg.ranges[i].index.index()].merged_into = + vreg.ranges[last.unwrap()].index; + // Extend end of range to subsume this one. + vreg.ranges[last.unwrap()].range.to = std::cmp::max( + vreg.ranges[last.unwrap()].range.to, + vreg.ranges[i].range.to, + ); + // Remove the merged-from range. + vreg.ranges.remove(i); + } else { + // This range is beyond the last one. + last = Some(i); + i += 1; + } + } else { + last = Some(i); + i += 1; + } + } + for entry in &vreg.ranges { + self.ranges[entry.index.index()].range = entry.range; + } } for range in 0..self.ranges.len() { @@ -1698,6 +1727,11 @@ impl<'a, F: Function> Env<'a, F> { // Two non-empty lists of LiveRanges: traverse both simultaneously and // merge ranges into `merged`. let mut merged: LiveRangeList = smallvec![]; + log::debug!( + "merging: ranges_from = {:?} ranges_to = {:?}", + ranges_from, + ranges_to + ); while idx_from < ranges_from.len() || idx_to < ranges_to.len() { if idx_from < ranges_from.len() && idx_to < ranges_to.len() { if ranges_from[idx_from].range.from <= ranges_to[idx_to].range.from { @@ -1716,7 +1750,14 @@ impl<'a, F: Function> Env<'a, F> { break; } } + log::debug!("merging: merged = {:?}", merged); + let mut last_range = None; for entry in &merged { + if last_range.is_some() { + assert!(last_range.unwrap() < entry.range); + } + last_range = Some(entry.range); + if self.ranges[entry.index.index()].bundle == from { if log::log_enabled!(log::Level::Debug) { self.annotate( @@ -1758,7 +1799,11 @@ impl<'a, F: Function> Env<'a, F> { self.bundles[bundle.index()].ranges = self.vregs[vreg.index()].ranges.clone(); log::debug!("vreg v{} gets bundle{}", vreg.index(), bundle.index()); for entry in &self.bundles[bundle.index()].ranges { - log::debug!(" -> with LR range{}", entry.index.index()); + log::debug!( + " -> with LR range{}: {:?}", + entry.index.index(), + entry.range + ); self.ranges[entry.index.index()].bundle = bundle; } From 8e0d0f1de0054ab0a67c0fe410c798bbe6eaad8e Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 18 May 2021 00:26:38 -0700 Subject: [PATCH 059/155] fuzzbug fix --- src/ion/mod.rs | 115 +++++++++++++++++++++++++------------------------ 1 file changed, 58 insertions(+), 57 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 444138d6..aabb5eeb 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -163,6 +163,14 @@ impl LiveRange { self.uses_spill_weight_and_flags & ((flag as u32) << 29) != 0 } #[inline(always)] + pub fn flag_word(&self) -> u32 { + self.uses_spill_weight_and_flags & 0xe000_0000 + } + #[inline(always)] + pub fn merge_flags(&mut self, flag_word: u32) { + self.uses_spill_weight_and_flags |= flag_word; + } + #[inline(always)] pub fn uses_spill_weight(&self) -> u32 { self.uses_spill_weight_and_flags & 0x1fff_ffff } @@ -801,25 +809,56 @@ impl<'a, F: Function> Env<'a, F> { /// Mark `range` as live for the given `vreg`. /// /// Returns the liverange that contains the given range. - fn add_liverange_to_vreg(&mut self, vreg: VRegIndex, range: CodeRange) -> LiveRangeIndex { + fn add_liverange_to_vreg(&mut self, vreg: VRegIndex, mut range: CodeRange) -> LiveRangeIndex { log::debug!("add_liverange_to_vreg: vreg {:?} range {:?}", vreg, range); - if let Some(last) = self.vregs[vreg.index()].ranges.last_mut() { - if last.range.from == range.to { - log::debug!(" -> abuts existing range {:?}, extending", last.index); - last.range.from = range.from; - self.ranges[last.index.index()].range.from = range.from; - return last.index; + + // Check for abutting or overlapping ranges. + let mut merged = None; + let mut i = 0; + while i < self.vregs[vreg.index()].ranges.len() { + let entry = self.vregs[vreg.index()].ranges[i]; + if entry.range.overlaps(&range) { + if entry.range.from < range.from { + range.from = entry.range.from; + } + if entry.range.to > range.to { + range.to = entry.range.to; + } + if merged.is_none() { + merged = Some(i); + self.ranges[entry.index.index()].range = range; + self.vregs[vreg.index()].ranges[i].range = range; + i += 1; + } else { + let merge_from = entry.index; + let merge_into = self.vregs[vreg.index()].ranges[merged.unwrap()].index; + self.ranges[merge_from.index()].merged_into = merge_into; + let mut uses = + std::mem::replace(&mut self.ranges[merge_from.index()].uses, smallvec![]); + self.ranges[merge_into.index()].uses.extend(uses.drain(..)); + let f = self.ranges[merge_from.index()].flag_word(); + self.ranges[merge_into.index()].merge_flags(f); + self.ranges[merge_into.index()].range = range; + self.vregs[vreg.index()].ranges[merged.unwrap()].range = range; + self.vregs[vreg.index()].ranges.remove(i); + } + } else { + i += 1; } } // If we get here and did not merge into an existing liverange or liveranges, then we need // to create a new one. - let lr = self.create_liverange(range); - self.ranges[lr.index()].vreg = vreg; - self.vregs[vreg.index()] - .ranges - .push(LiveRangeListEntry { range, index: lr }); - lr + if merged.is_none() { + let lr = self.create_liverange(range); + self.ranges[lr.index()].vreg = vreg; + self.vregs[vreg.index()] + .ranges + .push(LiveRangeListEntry { range, index: lr }); + lr + } else { + self.vregs[vreg.index()].ranges[merged.unwrap()].index + } } fn insert_use_into_liverange(&mut self, into: LiveRangeIndex, mut u: Use) { @@ -1351,13 +1390,12 @@ impl<'a, F: Function> Env<'a, F> { self.safepoints.sort_unstable(); - // Sort ranges in each vreg, and uses in each range, and merge - // overlapping ranges, so we can iterate over them in order - // below. The ordering invariant is always maintained for uses - // and always for ranges in bundles (which are initialized - // later), but not always for ranges in vregs; those are - // sorted only when needed, here and then again at the end of - // allocation when resolving moves. + // Sort ranges in each vreg, and uses in each range, so we can + // iterate over them in order below. The ordering invariant is + // always maintained for uses and always for ranges in bundles + // (which are initialized later), but not always for ranges in + // vregs; those are sorted only when needed, here and then + // again at the end of allocation when resolving moves. for vreg in &mut self.vregs { for entry in &mut vreg.ranges { // Ranges may have been truncated above at defs. We @@ -1365,43 +1403,6 @@ impl<'a, F: Function> Env<'a, F> { entry.range = self.ranges[entry.index.index()].range; } vreg.ranges.sort_unstable_by_key(|entry| entry.range.from); - - // Merge overlapping ranges. - let mut last: Option = None; - let mut i = 0; - while i < vreg.ranges.len() { - if last.is_some() { - if vreg.ranges[i].range.from <= vreg.ranges[last.unwrap()].range.to { - // Move uses over. - let mut uses = std::mem::replace( - &mut self.ranges[vreg.ranges[i].index.index()].uses, - smallvec![], - ); - self.ranges[vreg.ranges[last.unwrap()].index.index()] - .uses - .extend(uses.drain(..)); - self.ranges[vreg.ranges[i].index.index()].merged_into = - vreg.ranges[last.unwrap()].index; - // Extend end of range to subsume this one. - vreg.ranges[last.unwrap()].range.to = std::cmp::max( - vreg.ranges[last.unwrap()].range.to, - vreg.ranges[i].range.to, - ); - // Remove the merged-from range. - vreg.ranges.remove(i); - } else { - // This range is beyond the last one. - last = Some(i); - i += 1; - } - } else { - last = Some(i); - i += 1; - } - } - for entry in &vreg.ranges { - self.ranges[entry.index.index()].range = entry.range; - } } for range in 0..self.ranges.len() { From 4389f16156925582e28317790fb24b79d4bd2a81 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 18 May 2021 12:14:59 -0700 Subject: [PATCH 060/155] debugging log message for liveins --- src/ion/mod.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index aabb5eeb..96541e56 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -987,6 +987,10 @@ impl<'a, F: Function> Env<'a, F> { .next() .is_some() { + log::debug!( + "non-empty liveins to entry block: {:?}", + self.liveins[self.func.entry_block().index()] + ); return Err(RegAllocError::EntryLivein); } From c3513b94b0304870db99a34101d2833c39a9ea47 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 18 May 2021 15:16:19 -0700 Subject: [PATCH 061/155] Bugfix: don't do a split-at-intermediate-defs split if the first such point is the start of the bundle. --- src/ion/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 96541e56..e3e5e6c2 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -2448,7 +2448,7 @@ impl<'a, F: Function> Env<'a, F> { self.stats.splits_conflicts += 1; log::debug!(" going with last before conflict"); smallvec![last_before_conflict.unwrap()] - } else if def_splits.len() > 0 { + } else if def_splits.len() > 0 && def_splits[0] > bundle_start { log::debug!(" going with non-first def splits: {:?}", def_splits); self.stats.splits_defs += 1; def_splits From 04c8e46787797d902274e4c146684b5164744778 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 18 May 2021 18:52:34 -0700 Subject: [PATCH 062/155] Only do annotations in debug builds --- src/ion/mod.rs | 250 +++++++++++++++++++++++++++---------------------- 1 file changed, 140 insertions(+), 110 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index e3e5e6c2..676a4e67 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -1133,17 +1133,20 @@ impl<'a, F: Function> Env<'a, F> { OperandPos::Before, ); - if log::log_enabled!(log::Level::Debug) { - self.annotate( - ProgPoint::after(inst), - format!( - " prog-move v{} ({:?}) -> v{} ({:?})", - src.vreg().vreg(), - src_policy, - dst.vreg().vreg(), - dst_policy, - ), - ); + #[cfg(debug)] + { + if log::log_enabled!(log::Level::Debug) { + self.annotate( + ProgPoint::after(inst), + format!( + " prog-move v{} ({:?}) -> v{} ({:?})", + src.vreg().vreg(), + src_policy, + dst.vreg().vreg(), + dst_policy, + ), + ); + } } // N.B.: in order to integrate with the move @@ -1711,17 +1714,20 @@ impl<'a, F: Function> Env<'a, F> { for entry in &list { self.ranges[entry.index.index()].bundle = to; - if log::log_enabled!(log::Level::Debug) { - self.annotate( - entry.range.from, - format!( - " MERGE range{} v{} from bundle{} to bundle{}", - entry.index.index(), - self.ranges[entry.index.index()].vreg.index(), - from.index(), - to.index(), - ), - ); + #[cfg(debug)] + { + if log::log_enabled!(log::Level::Debug) { + self.annotate( + entry.range.from, + format!( + " MERGE range{} v{} from bundle{} to bundle{}", + entry.index.index(), + self.ranges[entry.index.index()].vreg.index(), + from.index(), + to.index(), + ), + ); + } } } self.bundles[to.index()].ranges = list; @@ -1763,18 +1769,21 @@ impl<'a, F: Function> Env<'a, F> { } last_range = Some(entry.range); - if self.ranges[entry.index.index()].bundle == from { - if log::log_enabled!(log::Level::Debug) { - self.annotate( - entry.range.from, - format!( - " MERGE range{} v{} from bundle{} to bundle{}", - entry.index.index(), - self.ranges[entry.index.index()].vreg.index(), - from.index(), - to.index(), - ), - ); + #[cfg(debug)] + { + if self.ranges[entry.index.index()].bundle == from { + if log::log_enabled!(log::Level::Debug) { + self.annotate( + entry.range.from, + format!( + " MERGE range{} v{} from bundle{} to bundle{}", + entry.index.index(), + self.ranges[entry.index.index()].vreg.index(), + from.index(), + to.index(), + ), + ); + } } } @@ -2698,17 +2707,20 @@ impl<'a, F: Function> Env<'a, F> { cur_uses.next(); } - self.annotate( - existing_range.to, - format!( - " SPLIT range{} v{} bundle{} to range{} bundle{}", - cur_lr.index(), - self.ranges[cur_lr.index()].vreg.index(), - cur_bundle.index(), - new_lr.index(), - new_bundle.index(), - ), - ); + #[cfg(debug)] + { + self.annotate( + existing_range.to, + format!( + " SPLIT range{} v{} bundle{} to range{} bundle{}", + cur_lr.index(), + self.ranges[cur_lr.index()].vreg.index(), + cur_bundle.index(), + new_lr.index(), + new_bundle.index(), + ), + ); + } cur_range = new_range; cur_bundle = new_bundle; @@ -3285,27 +3297,30 @@ impl<'a, F: Function> Env<'a, F> { ); debug_assert!(alloc != Allocation::none()); - if log::log_enabled!(log::Level::Debug) { - self.annotate( - range.from, - format!( - " <<< start v{} in {} (range{}) (bundle{})", - vreg.index(), - alloc, - entry.index.index(), - self.ranges[entry.index.index()].bundle.index(), - ), - ); - self.annotate( - range.to, - format!( - " end v{} in {} (range{}) (bundle{}) >>>", - vreg.index(), - alloc, - entry.index.index(), - self.ranges[entry.index.index()].bundle.index(), - ), - ); + #[cfg(debug)] + { + if log::log_enabled!(log::Level::Debug) { + self.annotate( + range.from, + format!( + " <<< start v{} in {} (range{}) (bundle{})", + vreg.index(), + alloc, + entry.index.index(), + self.ranges[entry.index.index()].bundle.index(), + ), + ); + self.annotate( + range.to, + format!( + " end v{} in {} (range{}) (bundle{}) >>>", + vreg.index(), + alloc, + entry.index.index(), + self.ranges[entry.index.index()].bundle.index(), + ), + ); + } } // Does this range follow immediately after a prior @@ -3419,18 +3434,21 @@ impl<'a, F: Function> Env<'a, F> { ), alloc, }); - if log::log_enabled!(log::Level::Debug) { - self.annotate( - self.cfginfo.block_exit[block.index()], - format!( - "blockparam-out: block{} to block{}: v{} to v{} in {}", - from_block.index(), - to_block.index(), - from_vreg.index(), - to_vreg.index(), - alloc - ), - ); + #[cfg(debug)] + { + if log::log_enabled!(log::Level::Debug) { + self.annotate( + self.cfginfo.block_exit[block.index()], + format!( + "blockparam-out: block{} to block{}: v{} to v{} in {}", + from_block.index(), + to_block.index(), + from_vreg.index(), + to_vreg.index(), + alloc + ), + ); + } } } blockparam_out_idx += 1; @@ -3482,17 +3500,20 @@ impl<'a, F: Function> Env<'a, F> { from_block.index(), alloc, ); - if log::log_enabled!(log::Level::Debug) { - self.annotate( - self.cfginfo.block_entry[block.index()], - format!( - "blockparam-in: block{} to block{}:into v{} in {}", - from_block.index(), - to_block.index(), - to_vreg.index(), - alloc - ), - ); + #[cfg(debug)] + { + if log::log_enabled!(log::Level::Debug) { + self.annotate( + self.cfginfo.block_entry[block.index()], + format!( + "blockparam-in: block{} to block{}:into v{} in {}", + from_block.index(), + to_block.index(), + to_vreg.index(), + alloc + ), + ); + } } } blockparam_in_idx += 1; @@ -3829,11 +3850,17 @@ impl<'a, F: Function> Env<'a, F> { input_alloc ); if input_alloc != output_alloc { - if log::log_enabled!(log::Level::Debug) { - self.annotate( - ProgPoint::before(inst), - format!(" reuse-input-copy: {} -> {}", input_alloc, output_alloc), - ); + #[cfg(debug)] + { + if log::log_enabled!(log::Level::Debug) { + self.annotate( + ProgPoint::before(inst), + format!( + " reuse-input-copy: {} -> {}", + input_alloc, output_alloc + ), + ); + } } self.insert_move( ProgPoint::before(inst), @@ -3999,22 +4026,25 @@ impl<'a, F: Function> Env<'a, F> { self.stats.edits_count = self.edits.len(); // Add debug annotations. - if log::log_enabled!(log::Level::Debug) { - for i in 0..self.edits.len() { - let &(pos, _, ref edit) = &self.edits[i]; - match edit { - &Edit::Move { from, to, to_vreg } => { - self.annotate( - ProgPoint::from_index(pos), - format!("move {} -> {} ({:?})", from, to, to_vreg), - ); - } - &Edit::BlockParams { - ref vregs, - ref allocs, - } => { - let s = format!("blockparams vregs:{:?} allocs:{:?}", vregs, allocs); - self.annotate(ProgPoint::from_index(pos), s); + #[cfg(debug)] + { + if log::log_enabled!(log::Level::Debug) { + for i in 0..self.edits.len() { + let &(pos, _, ref edit) = &self.edits[i]; + match edit { + &Edit::Move { from, to, to_vreg } => { + self.annotate( + ProgPoint::from_index(pos), + format!("move {} -> {} ({:?})", from, to, to_vreg), + ); + } + &Edit::BlockParams { + ref vregs, + ref allocs, + } => { + let s = format!("blockparams vregs:{:?} allocs:{:?}", vregs, allocs); + self.annotate(ProgPoint::from_index(pos), s); + } } } } From e1f67e860feec76f6338eb2b801f407d710a646c Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 18 May 2021 22:40:43 -0700 Subject: [PATCH 063/155] Pinned VRegs for use with regalloc.rs shim to support RealRegs. --- src/ion/mod.rs | 462 ++++++++++++++++++++++++++++++------------------- src/lib.rs | 20 +++ 2 files changed, 307 insertions(+), 175 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 676a4e67..ccef123d 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -53,7 +53,7 @@ use crate::{ MachineEnv, Operand, OperandKind, OperandPolicy, OperandPos, Output, PReg, ProgPoint, RegAllocError, RegClass, SpillSlot, VReg, }; -use fxhash::FxHashSet; +use fxhash::{FxHashMap, FxHashSet}; use log::debug; use smallvec::{smallvec, SmallVec}; use std::cmp::Ordering; @@ -252,6 +252,7 @@ struct VRegData { ranges: LiveRangeList, blockparam: Block, is_ref: bool, + is_pinned: bool, } #[derive(Clone, Debug)] @@ -766,12 +767,16 @@ impl<'a, F: Function> Env<'a, F> { ranges: smallvec![], blockparam: Block::invalid(), is_ref: false, + is_pinned: false, }, ); } for v in self.func.reftype_vregs() { self.vregs[v.vreg()].is_ref = true; } + for v in self.func.pinned_vregs() { + self.vregs[v.vreg()].is_pinned = true; + } // Create allocations too. for inst in 0..self.func.insts() { let start = self.allocs.len() as u32; @@ -905,11 +910,10 @@ impl<'a, F: Function> Env<'a, F> { fn add_liverange_to_preg(&mut self, range: CodeRange, reg: PReg) { log::debug!("adding liverange to preg: {:?} to {}", range, reg); let preg_idx = PRegIndex::new(reg.index()); - let lr = self.create_liverange(range); self.pregs[preg_idx.index()] .allocations .btree - .insert(LiveRangeKey::from_range(&range), lr); + .insert(LiveRangeKey::from_range(&range), LiveRangeIndex::invalid()); } fn is_live_in(&mut self, block: Block, vreg: VRegIndex) -> bool { @@ -942,14 +946,19 @@ impl<'a, F: Function> Env<'a, F> { self.stats.livein_iterations += 1; let mut live = self.liveouts[block.index()].clone(); + log::debug!(" -> initial liveout set: {:?}", live); + for inst in self.func.block_insns(block).rev().iter() { if let Some((src, dst)) = self.func.is_move(inst) { live.set(dst.vreg().vreg(), false); live.set(src.vreg().vreg(), true); } + for pos in &[OperandPos::After, OperandPos::Before] { for op in self.func.inst_operands(inst) { if op.pos() == *pos { + let was_live = live.get(op.vreg().vreg()); + log::debug!("op {:?} was_live = {}", op, was_live); match op.kind() { OperandKind::Use | OperandKind::Mod => { live.set(op.vreg().vreg(), true); @@ -1097,8 +1106,7 @@ impl<'a, F: Function> Env<'a, F> { // If this is a move, handle specially. if let Some((src, dst)) = self.func.is_move(inst) { // We can completely skip the move if it is - // trivial (vreg to same vreg) or its output is - // dead. + // trivial (vreg to same vreg). if src.vreg() != dst.vreg() { log::debug!(" -> move inst{}: src {} -> dst {}", inst.index(), src, dst); @@ -1418,6 +1426,9 @@ impl<'a, F: Function> Env<'a, F> { // Insert safepoint virtual stack uses, if needed. for vreg in self.func.reftype_vregs() { + if self.vregs[vreg.vreg()].is_pinned { + continue; + } let vreg = VRegIndex::new(vreg.vreg()); let mut inserted = false; let mut safepoint_idx = 0; @@ -1809,6 +1820,24 @@ impl<'a, F: Function> Env<'a, F> { if self.vregs[vreg.index()].ranges.is_empty() { continue; } + + // If this is a pinned vreg, go ahead and add it to the + // commitment map, and avoid creating a bundle entirely. + if self.vregs[vreg.index()].is_pinned { + for entry in &self.vregs[vreg.index()].ranges { + let preg = self + .func + .is_pinned_vreg(self.vreg_regs[vreg.index()]) + .unwrap(); + let key = LiveRangeKey::from_range(&entry.range); + self.pregs[preg.index()] + .allocations + .btree + .insert(key, LiveRangeIndex::invalid()); + } + continue; + } + let bundle = self.create_bundle(); self.bundles[bundle.index()].ranges = self.vregs[vreg.index()].ranges.clone(); log::debug!("vreg v{} gets bundle{}", vreg.index(), bundle.index()); @@ -1844,6 +1873,12 @@ impl<'a, F: Function> Env<'a, F> { if let OperandPolicy::Reuse(reuse_idx) = op.policy() { let src_vreg = op.vreg(); let dst_vreg = self.func.inst_operands(inst)[reuse_idx].vreg(); + if self.vregs[src_vreg.vreg()].is_pinned + || self.vregs[dst_vreg.vreg()].is_pinned + { + continue; + } + log::debug!( "trying to merge reused-input def: src {} to dst {}", src_vreg, @@ -1892,6 +1927,27 @@ impl<'a, F: Function> Env<'a, F> { src, dst ); + + let dst_vreg = self.vreg_regs[self.ranges[dst.index()].vreg.index()]; + let src_vreg = self.vreg_regs[self.ranges[src.index()].vreg.index()]; + if self.vregs[src_vreg.vreg()].is_pinned && self.vregs[dst_vreg.vreg()].is_pinned { + continue; + } + if self.vregs[src_vreg.vreg()].is_pinned { + let dest_bundle = self.ranges[dst.index()].bundle; + let spillset = self.bundles[dest_bundle.index()].spillset; + self.spillsets[spillset.index()].reg_hint = + self.func.is_pinned_vreg(src_vreg).unwrap(); + continue; + } + if self.vregs[dst_vreg.vreg()].is_pinned { + let src_bundle = self.ranges[src.index()].bundle; + let spillset = self.bundles[src_bundle.index()].spillset; + self.spillsets[spillset.index()].reg_hint = + self.func.is_pinned_vreg(dst_vreg).unwrap(); + continue; + } + let src_bundle = self.ranges[src.index()].bundle; assert!(src_bundle.is_valid()); let dest_bundle = self.ranges[dst.index()].bundle; @@ -1941,11 +1997,11 @@ impl<'a, F: Function> Env<'a, F> { self.stats.merged_bundle_count = self.allocation_queue.heap.len(); } - fn process_bundles(&mut self) { + fn process_bundles(&mut self) -> Result<(), RegAllocError> { let mut count = 0; while let Some(bundle) = self.allocation_queue.pop() { self.stats.process_bundle_count += 1; - self.process_bundle(bundle); + self.process_bundle(bundle)?; count += 1; if count > self.func.insts() * 50 { self.dump_state(); @@ -1955,6 +2011,8 @@ impl<'a, F: Function> Env<'a, F> { self.stats.final_liverange_count = self.ranges.len(); self.stats.final_bundle_count = self.bundles.len(); self.stats.spill_bundle_count = self.spilled_bundles.len(); + + Ok(()) } fn dump_state(&self) { @@ -2105,7 +2163,7 @@ impl<'a, F: Function> Env<'a, F> { let preg_range = preg_range_iter.next().unwrap().1; log::debug!(" -> btree contains range {:?} that overlaps", preg_range); - if self.ranges[preg_range.index()].vreg.is_valid() { + if preg_range.is_valid() { log::debug!(" -> from vreg {:?}", self.ranges[preg_range.index()].vreg); // range from an allocated bundle: find the bundle and add to // conflicts list. @@ -2755,7 +2813,7 @@ impl<'a, F: Function> Env<'a, F> { } } - fn process_bundle(&mut self, bundle: LiveBundleIndex) { + fn process_bundle(&mut self, bundle: LiveBundleIndex) -> Result<(), RegAllocError> { // Find any requirements: for every LR, for every def/use, gather // requirements (fixed-reg, any-reg, any) and merge them. let req = self.compute_requirement(bundle); @@ -2794,7 +2852,7 @@ impl<'a, F: Function> Env<'a, F> { log::debug!(" -> allocated to fixed {:?}", preg_idx); self.spillsets[self.bundles[bundle.index()].spillset.index()] .reg_hint = alloc.as_reg().unwrap(); - return; + return Ok(()); } AllocRegResult::Conflict(bundles) => { log::debug!(" -> conflict with bundles {:?}", bundles); @@ -2842,7 +2900,7 @@ impl<'a, F: Function> Env<'a, F> { log::debug!(" -> allocated to any {:?}", preg_idx); self.spillsets[self.bundles[bundle.index()].spillset.index()] .reg_hint = alloc.as_reg().unwrap(); - return; + return Ok(()); } AllocRegResult::Conflict(bundles) => { log::debug!(" -> conflict with bundles {:?}", bundles); @@ -2878,7 +2936,7 @@ impl<'a, F: Function> Env<'a, F> { self.spillsets[self.bundles[bundle.index()].spillset.index()] .bundles .push(bundle); - return; + return Ok(()); } Requirement::Any(_) => { @@ -2886,7 +2944,7 @@ impl<'a, F: Function> Env<'a, F> { // allocation on spilled bundles later). log::debug!("spilling bundle {:?} to spilled_bundles list", bundle); self.spilled_bundles.push(bundle); - return; + return Ok(()); } }; @@ -2929,6 +2987,37 @@ impl<'a, F: Function> Env<'a, F> { } // A minimal bundle cannot be split. + if self.minimal_bundle(bundle) { + if let Some(Requirement::Register(class)) = req { + // Check if this is a too-many-live-registers situation. + let range = self.bundles[bundle.index()].ranges[0].range; + let mut min_bundles_assigned = 0; + let mut fixed_assigned = 0; + let mut total_regs = 0; + for preg in self.env.preferred_regs_by_class[class as u8 as usize] + .iter() + .chain(self.env.non_preferred_regs_by_class[class as u8 as usize].iter()) + { + if let Some(&lr) = self.pregs[preg.index()] + .allocations + .btree + .get(&LiveRangeKey::from_range(&range)) + { + if lr.is_valid() { + if self.minimal_bundle(self.ranges[lr.index()].bundle) { + min_bundles_assigned += 1; + } + } else { + fixed_assigned += 1; + } + } + total_regs += 1; + } + if min_bundles_assigned + fixed_assigned == total_regs { + return Err(RegAllocError::TooManyLiveRegs); + } + } + } if self.minimal_bundle(bundle) { self.dump_state(); } @@ -2938,6 +3027,8 @@ impl<'a, F: Function> Env<'a, F> { bundle, first_conflicting_bundle.unwrap_or(LiveBundleIndex::invalid()), ); + + Ok(()) } fn try_allocating_regs_for_spilled_bundles(&mut self) { @@ -3279,6 +3370,12 @@ impl<'a, F: Function> Env<'a, F> { for vreg in 0..self.vregs.len() { let vreg = VRegIndex::new(vreg); + let pinned_alloc = if self.vregs[vreg.index()].is_pinned { + self.func.is_pinned_vreg(self.vreg_regs[vreg.index()]) + } else { + None + }; + // For each range in each vreg, insert moves or // half-moves. We also scan over `blockparam_ins` and // `blockparam_outs`, which are sorted by (block, vreg), @@ -3286,7 +3383,9 @@ impl<'a, F: Function> Env<'a, F> { let mut prev = LiveRangeIndex::invalid(); for range_idx in 0..self.vregs[vreg.index()].ranges.len() { let entry = self.vregs[vreg.index()].ranges[range_idx]; - let alloc = self.get_alloc_for_range(entry.index); + let alloc = pinned_alloc + .map(|preg| Allocation::reg(preg)) + .unwrap_or_else(|| self.get_alloc_for_range(entry.index)); let range = entry.range; log::debug!( "apply_allocations: vreg {:?} LR {:?} with range {:?} has alloc {:?}", @@ -3343,7 +3442,10 @@ impl<'a, F: Function> Env<'a, F> { // can't insert a move that logically happens just // before After (i.e. in the middle of a single // instruction). - if prev.is_valid() { + // + // Also note that this case is not applicable to + // pinned vregs (because they are always in one PReg). + if pinned_alloc.is_none() && prev.is_valid() { let prev_alloc = self.get_alloc_for_range(prev); let prev_range = self.ranges[prev.index()].range; let first_is_def = @@ -3372,74 +3474,79 @@ impl<'a, F: Function> Env<'a, F> { } } - // Scan over blocks whose ends are covered by this - // range. For each, for each successor that is not - // already in this range (hence guaranteed to have the - // same allocation) and if the vreg is live, add a - // Source half-move. - let mut block = self.cfginfo.insn_block[range.from.inst().index()]; - while block.is_valid() && block.index() < self.func.blocks() { - if range.to < self.cfginfo.block_exit[block.index()].next() { - break; - } - log::debug!("examining block with end in range: block{}", block.index()); - for &succ in self.func.block_succs(block) { - log::debug!( - " -> has succ block {} with entry {:?}", - succ.index(), - self.cfginfo.block_entry[succ.index()] - ); - if range.contains_point(self.cfginfo.block_entry[succ.index()]) { - continue; - } - log::debug!(" -> out of this range, requires half-move if live"); - if self.is_live_in(succ, vreg) { - log::debug!(" -> live at input to succ, adding halfmove"); - half_moves.push(HalfMove { - key: half_move_key(block, succ, vreg, HalfMoveKind::Source), - alloc, - }); - } - } - - // Scan forward in `blockparam_outs`, adding all - // half-moves for outgoing values to blockparams - // in succs. - log::debug!( - "scanning blockparam_outs for v{} block{}: blockparam_out_idx = {}", - vreg.index(), - block.index(), - blockparam_out_idx, - ); - while blockparam_out_idx < self.blockparam_outs.len() { - let (from_vreg, from_block, to_block, to_vreg) = - self.blockparam_outs[blockparam_out_idx]; - if (from_vreg, from_block) > (vreg, block) { + // The block-to-block edge-move logic is not + // applicable to pinned vregs, which are always in one + // PReg (so never need moves within their own vreg + // ranges). + if pinned_alloc.is_none() { + // Scan over blocks whose ends are covered by this + // range. For each, for each successor that is not + // already in this range (hence guaranteed to have the + // same allocation) and if the vreg is live, add a + // Source half-move. + let mut block = self.cfginfo.insn_block[range.from.inst().index()]; + while block.is_valid() && block.index() < self.func.blocks() { + if range.to < self.cfginfo.block_exit[block.index()].next() { break; } - if (from_vreg, from_block) == (vreg, block) { + log::debug!("examining block with end in range: block{}", block.index()); + for &succ in self.func.block_succs(block) { log::debug!( - " -> found: from v{} block{} to v{} block{}", - from_vreg.index(), - from_block.index(), - to_vreg.index(), - to_vreg.index() + " -> has succ block {} with entry {:?}", + succ.index(), + self.cfginfo.block_entry[succ.index()] ); - half_moves.push(HalfMove { - key: half_move_key( - from_block, - to_block, - to_vreg, - HalfMoveKind::Source, - ), - alloc, - }); - #[cfg(debug)] - { - if log::log_enabled!(log::Level::Debug) { - self.annotate( - self.cfginfo.block_exit[block.index()], - format!( + if range.contains_point(self.cfginfo.block_entry[succ.index()]) { + continue; + } + log::debug!(" -> out of this range, requires half-move if live"); + if self.is_live_in(succ, vreg) { + log::debug!(" -> live at input to succ, adding halfmove"); + half_moves.push(HalfMove { + key: half_move_key(block, succ, vreg, HalfMoveKind::Source), + alloc, + }); + } + } + + // Scan forward in `blockparam_outs`, adding all + // half-moves for outgoing values to blockparams + // in succs. + log::debug!( + "scanning blockparam_outs for v{} block{}: blockparam_out_idx = {}", + vreg.index(), + block.index(), + blockparam_out_idx, + ); + while blockparam_out_idx < self.blockparam_outs.len() { + let (from_vreg, from_block, to_block, to_vreg) = + self.blockparam_outs[blockparam_out_idx]; + if (from_vreg, from_block) > (vreg, block) { + break; + } + if (from_vreg, from_block) == (vreg, block) { + log::debug!( + " -> found: from v{} block{} to v{} block{}", + from_vreg.index(), + from_block.index(), + to_vreg.index(), + to_vreg.index() + ); + half_moves.push(HalfMove { + key: half_move_key( + from_block, + to_block, + to_vreg, + HalfMoveKind::Source, + ), + alloc, + }); + #[cfg(debug)] + { + if log::log_enabled!(log::Level::Debug) { + self.annotate( + self.cfginfo.block_exit[block.index()], + format!( "blockparam-out: block{} to block{}: v{} to v{} in {}", from_block.index(), to_block.index(), @@ -3447,119 +3554,124 @@ impl<'a, F: Function> Env<'a, F> { to_vreg.index(), alloc ), - ); + ); + } } } + blockparam_out_idx += 1; } - blockparam_out_idx += 1; - } - block = block.next(); - } - - // Scan over blocks whose beginnings are covered by - // this range and for which the vreg is live at the - // start of the block. For each, for each predecessor, - // add a Dest half-move. - let mut block = self.cfginfo.insn_block[range.from.inst().index()]; - if self.cfginfo.block_entry[block.index()] < range.from { - block = block.next(); - } - while block.is_valid() && block.index() < self.func.blocks() { - if self.cfginfo.block_entry[block.index()] >= range.to { - break; + block = block.next(); } - // Add half-moves for blockparam inputs. - log::debug!( - "scanning blockparam_ins at vreg {} block {}: blockparam_in_idx = {}", - vreg.index(), - block.index(), - blockparam_in_idx - ); - while blockparam_in_idx < self.blockparam_ins.len() { - let (to_vreg, to_block, from_block) = - self.blockparam_ins[blockparam_in_idx]; - if (to_vreg, to_block) > (vreg, block) { + // Scan over blocks whose beginnings are covered by + // this range and for which the vreg is live at the + // start of the block. For each, for each predecessor, + // add a Dest half-move. + let mut block = self.cfginfo.insn_block[range.from.inst().index()]; + if self.cfginfo.block_entry[block.index()] < range.from { + block = block.next(); + } + while block.is_valid() && block.index() < self.func.blocks() { + if self.cfginfo.block_entry[block.index()] >= range.to { break; } - if (to_vreg, to_block) == (vreg, block) { - half_moves.push(HalfMove { - key: half_move_key( - from_block, - to_block, - to_vreg, - HalfMoveKind::Dest, - ), - alloc, - }); - log::debug!( - "match: blockparam_in: v{} in block{} from block{} into {}", - to_vreg.index(), - to_block.index(), - from_block.index(), - alloc, - ); - #[cfg(debug)] - { - if log::log_enabled!(log::Level::Debug) { - self.annotate( - self.cfginfo.block_entry[block.index()], - format!( - "blockparam-in: block{} to block{}:into v{} in {}", - from_block.index(), - to_block.index(), - to_vreg.index(), - alloc - ), - ); + + // Add half-moves for blockparam inputs. + log::debug!( + "scanning blockparam_ins at vreg {} block {}: blockparam_in_idx = {}", + vreg.index(), + block.index(), + blockparam_in_idx + ); + while blockparam_in_idx < self.blockparam_ins.len() { + let (to_vreg, to_block, from_block) = + self.blockparam_ins[blockparam_in_idx]; + if (to_vreg, to_block) > (vreg, block) { + break; + } + if (to_vreg, to_block) == (vreg, block) { + half_moves.push(HalfMove { + key: half_move_key( + from_block, + to_block, + to_vreg, + HalfMoveKind::Dest, + ), + alloc, + }); + log::debug!( + "match: blockparam_in: v{} in block{} from block{} into {}", + to_vreg.index(), + to_block.index(), + from_block.index(), + alloc, + ); + #[cfg(debug)] + { + if log::log_enabled!(log::Level::Debug) { + self.annotate( + self.cfginfo.block_entry[block.index()], + format!( + "blockparam-in: block{} to block{}:into v{} in {}", + from_block.index(), + to_block.index(), + to_vreg.index(), + alloc + ), + ); + } } } + blockparam_in_idx += 1; } - blockparam_in_idx += 1; - } - if !self.is_live_in(block, vreg) { - block = block.next(); - continue; - } - - log::debug!( - "scanning preds at vreg {} block {} for ends outside the range", - vreg.index(), - block.index() - ); + if !self.is_live_in(block, vreg) { + block = block.next(); + continue; + } - // Now find any preds whose ends are not in the - // same range, and insert appropriate moves. - for &pred in self.func.block_preds(block) { log::debug!( - "pred block {} has exit {:?}", - pred.index(), - self.cfginfo.block_exit[pred.index()] + "scanning preds at vreg {} block {} for ends outside the range", + vreg.index(), + block.index() ); - if range.contains_point(self.cfginfo.block_exit[pred.index()]) { - continue; + + // Now find any preds whose ends are not in the + // same range, and insert appropriate moves. + for &pred in self.func.block_preds(block) { + log::debug!( + "pred block {} has exit {:?}", + pred.index(), + self.cfginfo.block_exit[pred.index()] + ); + if range.contains_point(self.cfginfo.block_exit[pred.index()]) { + continue; + } + log::debug!(" -> requires half-move"); + half_moves.push(HalfMove { + key: half_move_key(pred, block, vreg, HalfMoveKind::Dest), + alloc, + }); } - log::debug!(" -> requires half-move"); - half_moves.push(HalfMove { - key: half_move_key(pred, block, vreg, HalfMoveKind::Dest), - alloc, - }); - } - block = block.next(); - } + block = block.next(); + } - // If this is a blockparam vreg and the start of block - // is in this range, add to blockparam_allocs. - let (blockparam_block, blockparam_idx) = - self.cfginfo.vreg_def_blockparam[vreg.index()]; - if blockparam_block.is_valid() - && range.contains_point(self.cfginfo.block_entry[blockparam_block.index()]) - { - self.blockparam_allocs - .push((blockparam_block, blockparam_idx, vreg, alloc)); + // If this is a blockparam vreg and the start of block + // is in this range, add to blockparam_allocs. + let (blockparam_block, blockparam_idx) = + self.cfginfo.vreg_def_blockparam[vreg.index()]; + if blockparam_block.is_valid() + && range.contains_point(self.cfginfo.block_entry[blockparam_block.index()]) + { + self.blockparam_allocs.push(( + blockparam_block, + blockparam_idx, + vreg, + alloc, + )); + } } // Scan over def/uses and apply allocations. @@ -4130,7 +4242,7 @@ impl<'a, F: Function> Env<'a, F> { } pub(crate) fn run(&mut self) -> Result<(), RegAllocError> { - self.process_bundles(); + self.process_bundles()?; self.try_allocating_regs_for_spilled_bundles(); self.allocate_spillslots(); self.apply_allocations_and_insert_moves(); diff --git a/src/lib.rs b/src/lib.rs index df5f2841..9a24b845 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -758,6 +758,23 @@ pub trait Function { &[] } + /// Is the given vreg pinned to a preg? If so, every use of the + /// vreg is automatically assigned to the preg, and live-ranges of + /// the vreg allocate the preg exclusively (are not spilled + /// elsewhere). The user must take care not to have too many live + /// pinned vregs such that allocation is no longer possible; + /// liverange computation will check that this is the case (that + /// there are enough remaining allocatable pregs of every class to + /// hold all Reg-constrained operands). + fn is_pinned_vreg(&self, _: VReg) -> Option { + None + } + + /// Return a list of all pinned vregs. + fn pinned_vregs(&self) -> &[VReg] { + &[] + } + // -------------- // Spills/reloads // -------------- @@ -980,6 +997,9 @@ pub enum RegAllocError { /// places a use after the edge moves occur; insert an edge block /// to avoid the situation. DisallowedBranchArg(Inst), + /// Too many pinned VRegs + Reg-constrained Operands are live at + /// once, making allocation impossible. + TooManyLiveRegs, } impl std::fmt::Display for RegAllocError { From f1c6dfe807203e9672dd146b3d7b483701e2c346 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Wed, 19 May 2021 16:36:36 -0700 Subject: [PATCH 064/155] Optionally show annotations in final allocation/program dump based on RegallocOptions flag --- src/index.rs | 5 ++ src/ion/mod.rs | 156 ++++++++++++++++++++++++------------------------- src/lib.rs | 15 ++++- 3 files changed, 95 insertions(+), 81 deletions(-) diff --git a/src/index.rs b/src/index.rs index 1fe1b604..21dd9766 100644 --- a/src/index.rs +++ b/src/index.rs @@ -35,6 +35,11 @@ macro_rules! define_index { assert!(self.is_valid()); Self(self.0 - 1) } + + #[inline(always)] + pub fn raw_u32(self) -> u32 { + self.0 + } } impl crate::index::ContainerIndex for $ix {} diff --git a/src/ion/mod.rs b/src/ion/mod.rs index ccef123d..d19f7595 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -365,6 +365,7 @@ struct Env<'a, F: Function> { // For debug output only: a list of textual annotations at every // ProgPoint to insert into the final allocated program listing. debug_annotations: std::collections::HashMap>, + annotations_enabled: bool, } #[derive(Clone, Debug)] @@ -700,7 +701,12 @@ impl<'a> std::iter::Iterator for RegTraversalIter<'a> { } impl<'a, F: Function> Env<'a, F> { - pub(crate) fn new(func: &'a F, env: &'a MachineEnv, cfginfo: CFGInfo) -> Self { + pub(crate) fn new( + func: &'a F, + env: &'a MachineEnv, + cfginfo: CFGInfo, + annotations_enabled: bool, + ) -> Self { let n = func.insts(); Self { func, @@ -742,6 +748,7 @@ impl<'a, F: Function> Env<'a, F> { stats: Stats::default(), debug_annotations: std::collections::HashMap::new(), + annotations_enabled, } } @@ -1141,20 +1148,17 @@ impl<'a, F: Function> Env<'a, F> { OperandPos::Before, ); - #[cfg(debug)] - { - if log::log_enabled!(log::Level::Debug) { - self.annotate( - ProgPoint::after(inst), - format!( - " prog-move v{} ({:?}) -> v{} ({:?})", - src.vreg().vreg(), - src_policy, - dst.vreg().vreg(), - dst_policy, - ), - ); - } + if self.annotations_enabled && log::log_enabled!(log::Level::Debug) { + self.annotate( + ProgPoint::after(inst), + format!( + " prog-move v{} ({:?}) -> v{} ({:?})", + src.vreg().vreg(), + src_policy, + dst.vreg().vreg(), + dst_policy, + ), + ); } // N.B.: in order to integrate with the move @@ -1725,20 +1729,17 @@ impl<'a, F: Function> Env<'a, F> { for entry in &list { self.ranges[entry.index.index()].bundle = to; - #[cfg(debug)] - { - if log::log_enabled!(log::Level::Debug) { - self.annotate( - entry.range.from, - format!( - " MERGE range{} v{} from bundle{} to bundle{}", - entry.index.index(), - self.ranges[entry.index.index()].vreg.index(), - from.index(), - to.index(), - ), - ); - } + if self.annotations_enabled && log::log_enabled!(log::Level::Debug) { + self.annotate( + entry.range.from, + format!( + " MERGE range{} v{} from bundle{} to bundle{}", + entry.index.index(), + self.ranges[entry.index.index()].vreg.index(), + from.index(), + to.index(), + ), + ); } } self.bundles[to.index()].ranges = list; @@ -1780,21 +1781,18 @@ impl<'a, F: Function> Env<'a, F> { } last_range = Some(entry.range); - #[cfg(debug)] - { - if self.ranges[entry.index.index()].bundle == from { - if log::log_enabled!(log::Level::Debug) { - self.annotate( - entry.range.from, - format!( - " MERGE range{} v{} from bundle{} to bundle{}", - entry.index.index(), - self.ranges[entry.index.index()].vreg.index(), - from.index(), - to.index(), - ), - ); - } + if self.ranges[entry.index.index()].bundle == from { + if self.annotations_enabled && log::log_enabled!(log::Level::Debug) { + self.annotate( + entry.range.from, + format!( + " MERGE range{} v{} from bundle{} to bundle{}", + entry.index.index(), + self.ranges[entry.index.index()].vreg.index(), + from.index(), + to.index(), + ), + ); } } @@ -2765,8 +2763,7 @@ impl<'a, F: Function> Env<'a, F> { cur_uses.next(); } - #[cfg(debug)] - { + if self.annotations_enabled && log::log_enabled!(log::Level::Debug) { self.annotate( existing_range.to, format!( @@ -3396,30 +3393,27 @@ impl<'a, F: Function> Env<'a, F> { ); debug_assert!(alloc != Allocation::none()); - #[cfg(debug)] - { - if log::log_enabled!(log::Level::Debug) { - self.annotate( - range.from, - format!( - " <<< start v{} in {} (range{}) (bundle{})", - vreg.index(), - alloc, - entry.index.index(), - self.ranges[entry.index.index()].bundle.index(), - ), - ); - self.annotate( - range.to, - format!( - " end v{} in {} (range{}) (bundle{}) >>>", - vreg.index(), - alloc, - entry.index.index(), - self.ranges[entry.index.index()].bundle.index(), - ), - ); - } + if self.annotations_enabled && log::log_enabled!(log::Level::Debug) { + self.annotate( + range.from, + format!( + " <<< start v{} in {} (range{}) (bundle{})", + vreg.index(), + alloc, + entry.index.index(), + self.ranges[entry.index.index()].bundle.raw_u32(), + ), + ); + self.annotate( + range.to, + format!( + " end v{} in {} (range{}) (bundle{}) >>>", + vreg.index(), + alloc, + entry.index.index(), + self.ranges[entry.index.index()].bundle.raw_u32(), + ), + ); } // Does this range follow immediately after a prior @@ -3541,12 +3535,12 @@ impl<'a, F: Function> Env<'a, F> { ), alloc, }); - #[cfg(debug)] + + if self.annotations_enabled && log::log_enabled!(log::Level::Debug) { - if log::log_enabled!(log::Level::Debug) { - self.annotate( - self.cfginfo.block_exit[block.index()], - format!( + self.annotate( + self.cfginfo.block_exit[block.index()], + format!( "blockparam-out: block{} to block{}: v{} to v{} in {}", from_block.index(), to_block.index(), @@ -3554,10 +3548,10 @@ impl<'a, F: Function> Env<'a, F> { to_vreg.index(), alloc ), - ); - } + ); } } + blockparam_out_idx += 1; } @@ -4341,10 +4335,14 @@ impl<'a, F: Function> Env<'a, F> { } } -pub fn run(func: &F, mach_env: &MachineEnv) -> Result { +pub fn run( + func: &F, + mach_env: &MachineEnv, + enable_annotations: bool, +) -> Result { let cfginfo = CFGInfo::new(func)?; - let mut env = Env::new(func, mach_env, cfginfo); + let mut env = Env::new(func, mach_env, cfginfo, enable_annotations); env.init()?; env.run()?; diff --git a/src/lib.rs b/src/lib.rs index 9a24b845..db87c230 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1010,6 +1010,17 @@ impl std::fmt::Display for RegAllocError { impl std::error::Error for RegAllocError {} -pub fn run(func: &F, env: &MachineEnv) -> Result { - ion::run(func, env) +pub fn run( + func: &F, + env: &MachineEnv, + options: &RegallocOptions, +) -> Result { + ion::run(func, env, options.verbose_log) +} + +/// Options for allocation. +#[derive(Clone, Copy, Debug, Default)] +pub struct RegallocOptions { + /// Add extra verbosity to debug logs. + pub verbose_log: bool, } From f56676fb8d2c0a2d98200d42e56f1e1eb5b1fea7 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Wed, 19 May 2021 18:25:34 -0700 Subject: [PATCH 065/155] Fixed all fuzzer targets (some API changes) --- fuzz/fuzz_targets/ion.rs | 2 +- fuzz/fuzz_targets/ion_checker.rs | 29 ++++++++++++++++------------- fuzz/fuzz_targets/ssagen.rs | 2 +- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/fuzz/fuzz_targets/ion.rs b/fuzz/fuzz_targets/ion.rs index 13349388..288b8fb5 100644 --- a/fuzz/fuzz_targets/ion.rs +++ b/fuzz/fuzz_targets/ion.rs @@ -12,5 +12,5 @@ fuzz_target!(|func: Func| { let _ = env_logger::try_init(); log::debug!("func:\n{:?}", func); let env = regalloc2::fuzzing::func::machine_env(); - let _out = regalloc2::ion::run(&func, &env).expect("regalloc did not succeed"); + let _out = regalloc2::ion::run(&func, &env, false).expect("regalloc did not succeed"); }); diff --git a/fuzz/fuzz_targets/ion_checker.rs b/fuzz/fuzz_targets/ion_checker.rs index da43dbc0..f6e8cd5a 100644 --- a/fuzz/fuzz_targets/ion_checker.rs +++ b/fuzz/fuzz_targets/ion_checker.rs @@ -4,11 +4,11 @@ */ #![no_main] +use libfuzzer_sys::arbitrary::{Arbitrary, Result, Unstructured}; use libfuzzer_sys::fuzz_target; -use libfuzzer_sys::arbitrary::{Arbitrary, Unstructured, Result}; -use regalloc2::fuzzing::func::{Func, Options}; use regalloc2::checker::Checker; +use regalloc2::fuzzing::func::{Func, Options}; #[derive(Clone, Debug)] struct TestCase { @@ -18,16 +18,19 @@ struct TestCase { impl Arbitrary for TestCase { fn arbitrary(u: &mut Unstructured) -> Result { Ok(TestCase { - func: Func::arbitrary_with_options(u, &Options { - reused_inputs: true, - fixed_regs: true, - clobbers: true, - control_flow: true, - reducible: false, - block_params: true, - always_local_uses: false, - reftypes: true, - })?, + func: Func::arbitrary_with_options( + u, + &Options { + reused_inputs: true, + fixed_regs: true, + clobbers: true, + control_flow: true, + reducible: false, + block_params: true, + always_local_uses: false, + reftypes: true, + }, + )?, }) } } @@ -37,7 +40,7 @@ fuzz_target!(|testcase: TestCase| { let _ = env_logger::try_init(); log::debug!("func:\n{:?}", func); let env = regalloc2::fuzzing::func::machine_env(); - let out = regalloc2::ion::run(&func, &env).expect("regalloc did not succeed"); + let out = regalloc2::ion::run(&func, &env, true).expect("regalloc did not succeed"); let mut checker = Checker::new(&func); checker.prepare(&out); diff --git a/fuzz/fuzz_targets/ssagen.rs b/fuzz/fuzz_targets/ssagen.rs index c0ff306e..d68d672f 100644 --- a/fuzz/fuzz_targets/ssagen.rs +++ b/fuzz/fuzz_targets/ssagen.rs @@ -37,6 +37,6 @@ impl Arbitrary for TestCase { } fuzz_target!(|t: TestCase| { - let cfginfo = CFGInfo::new(&t.f); + let cfginfo = CFGInfo::new(&t.f).expect("could not create CFG info"); validate_ssa(&t.f, &cfginfo).expect("invalid SSA"); }); From ce935c1040c51f77de8a7738e93733d81c690281 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Wed, 19 May 2021 22:12:22 -0700 Subject: [PATCH 066/155] Add all empty LRs to a single "spill bundle", to avoid many small bundles and excessive moves --- src/ion/mod.rs | 135 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 106 insertions(+), 29 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index d19f7595..1c558ba9 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -244,6 +244,7 @@ struct SpillSet { slot: SpillSlotIndex, reg_hint: PReg, class: RegClass, + spill_bundle: LiveBundleIndex, size: u8, } @@ -1858,6 +1859,7 @@ impl<'a, F: Function> Env<'a, F> { size, class: reg.class(), reg_hint: PReg::invalid(), + spill_bundle: LiveBundleIndex::invalid(), }); self.bundles[bundle.index()].spillset = ssidx; } @@ -2626,6 +2628,12 @@ impl<'a, F: Function> Env<'a, F> { let spillset = self.bundles[bundle.index()].spillset; + // Get the spill bundle, which is the single bundle into which + // we place empty ranges (those with no uses). This may be + // `LiveBundleIndex::invalid()`, in which case we'll create it + // when we first need it. + let mut spill_bundle = self.spillsets[spillset.index()].spill_bundle; + let mut split_idx = 0; // Fast-forward past any splits that occur before or exactly @@ -2680,13 +2688,41 @@ impl<'a, F: Function> Env<'a, F> { log::debug!(" -> use at {:?}", u.pos); self.ranges[cur_lr.index()].uses.push(u); } - self.ranges[cur_lr.index()].bundle = cur_bundle; - self.bundles[cur_bundle.index()] - .ranges - .push(LiveRangeListEntry { - range: cur_range, - index: cur_lr, - }); + if self.ranges[cur_lr.index()].uses.len() > 0 { + self.ranges[cur_lr.index()].bundle = cur_bundle; + self.bundles[cur_bundle.index()] + .ranges + .push(LiveRangeListEntry { + range: cur_range, + index: cur_lr, + }); + } else { + if spill_bundle.is_invalid() { + spill_bundle = self.create_bundle(); + log::debug!( + " -> allocating new spill-bundle for empty ranges: bundle{}", + spill_bundle.index() + ); + self.bundles[spill_bundle.index()].spillset = spillset; + self.spillsets[spillset.index()].spill_bundle = spill_bundle; + self.spilled_bundles.push(spill_bundle); + } + // Range lists in empty-range bundles are not + // sorted until later (when we try to allocate + // regs or spillslots for them). + log::debug!( + " -> no uses in range{}; placing in empty-range spill-bundle bundle{}", + cur_lr.index(), + spill_bundle.index() + ); + self.ranges[cur_lr.index()].bundle = spill_bundle; + self.bundles[spill_bundle.index()] + .ranges + .push(LiveRangeListEntry { + range: cur_range, + index: cur_lr, + }); + } break; } @@ -2746,13 +2782,7 @@ impl<'a, F: Function> Env<'a, F> { self.ranges[cur_lr.index()].range = existing_range; self.ranges[new_lr.index()].vreg = self.ranges[cur_lr.index()].vreg; self.ranges[new_lr.index()].bundle = new_bundle; - self.ranges[cur_lr.index()].bundle = cur_bundle; - self.bundles[cur_bundle.index()] - .ranges - .push(LiveRangeListEntry { - range: existing_range, - index: cur_lr, - }); + while let Some(u) = cur_uses.peek() { if u.pos >= split { break; @@ -2763,6 +2793,44 @@ impl<'a, F: Function> Env<'a, F> { cur_uses.next(); } + if self.ranges[cur_lr.index()].uses.len() > 0 { + log::debug!( + " -> adding current LR {:?} to current bundle {:?}", + cur_lr, + cur_bundle + ); + self.ranges[cur_lr.index()].bundle = cur_bundle; + self.bundles[cur_bundle.index()] + .ranges + .push(LiveRangeListEntry { + range: existing_range, + index: cur_lr, + }); + } else { + if spill_bundle.is_invalid() { + spill_bundle = self.create_bundle(); + log::debug!( + " -> allocating new spill-bundle for empty ranges: bundle{}", + spill_bundle.index() + ); + self.bundles[spill_bundle.index()].spillset = spillset; + self.spillsets[spillset.index()].spill_bundle = spill_bundle; + self.spilled_bundles.push(spill_bundle); + } + log::debug!( + " -> no uses in range{}; placing in empty-range spill-bundle bundle{}", + cur_lr.index(), + spill_bundle.index() + ); + self.ranges[cur_lr.index()].bundle = spill_bundle; + self.bundles[spill_bundle.index()] + .ranges + .push(LiveRangeListEntry { + range: cur_range, + index: cur_lr, + }); + } + if self.annotations_enabled && log::log_enabled!(log::Level::Debug) { self.annotate( existing_range.to, @@ -2798,15 +2866,19 @@ impl<'a, F: Function> Env<'a, F> { // Recompute weights and priorities of all bundles, and // enqueue all split-bundles on the allocation queue. - let prio = self.compute_bundle_prio(bundle); - self.bundles[bundle.index()].prio = prio; - self.recompute_bundle_properties(bundle); - self.allocation_queue.insert(bundle, prio as usize); + if self.bundles[bundle.index()].ranges.len() > 0 { + let prio = self.compute_bundle_prio(bundle); + self.bundles[bundle.index()].prio = prio; + self.recompute_bundle_properties(bundle); + self.allocation_queue.insert(bundle, prio as usize); + } for &b in &new_bundles { - let prio = self.compute_bundle_prio(b); - self.bundles[b.index()].prio = prio; - self.recompute_bundle_properties(b); - self.allocation_queue.insert(b, prio as usize); + if self.bundles[b.index()].ranges.len() > 0 { + let prio = self.compute_bundle_prio(b); + self.bundles[b.index()].prio = prio; + self.recompute_bundle_properties(b); + self.allocation_queue.insert(b, prio as usize); + } } } @@ -3032,11 +3104,15 @@ impl<'a, F: Function> Env<'a, F> { log::debug!("allocating regs for spilled bundles"); for i in 0..self.spilled_bundles.len() { let bundle = self.spilled_bundles[i]; // don't borrow self - let any_vreg = self.vreg_regs[self.ranges - [self.bundles[bundle.index()].ranges[0].index.index()] - .vreg - .index()]; - let class = any_vreg.class(); + + let class = self.spillsets[self.bundles[bundle.index()].spillset.index()].class; + + // This may be an empty-range bundle whose ranges are not + // sorted; sort all range-lists again here. + self.bundles[bundle.index()] + .ranges + .sort_unstable_by_key(|entry| entry.range.from); + let mut success = false; self.stats.spill_bundle_reg_probes += 1; for preg in RegTraversalIter::new( @@ -3385,11 +3461,12 @@ impl<'a, F: Function> Env<'a, F> { .unwrap_or_else(|| self.get_alloc_for_range(entry.index)); let range = entry.range; log::debug!( - "apply_allocations: vreg {:?} LR {:?} with range {:?} has alloc {:?}", + "apply_allocations: vreg {:?} LR {:?} with range {:?} has alloc {:?} (pinned {:?})", vreg, entry.index, range, - alloc + alloc, + pinned_alloc, ); debug_assert!(alloc != Allocation::none()); From f0b24cf9fa8cad206ae86bceb4ca56ff20aa7d0c Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 20 May 2021 10:21:22 -0700 Subject: [PATCH 067/155] Remove all-empty-ranges-to-spill-bundle: prioritizing same-alloc for all empty ranges over allowing some to live in registers results in too much spilling --- src/ion/mod.rs | 119 ++++++++++++------------------------------------- 1 file changed, 28 insertions(+), 91 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 1c558ba9..d1aa9006 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -2627,13 +2627,6 @@ impl<'a, F: Function> Env<'a, F> { // to the above invariants to keep this code maintainable. let spillset = self.bundles[bundle.index()].spillset; - - // Get the spill bundle, which is the single bundle into which - // we place empty ranges (those with no uses). This may be - // `LiveBundleIndex::invalid()`, in which case we'll create it - // when we first need it. - let mut spill_bundle = self.spillsets[spillset.index()].spill_bundle; - let mut split_idx = 0; // Fast-forward past any splits that occur before or exactly @@ -2688,41 +2681,14 @@ impl<'a, F: Function> Env<'a, F> { log::debug!(" -> use at {:?}", u.pos); self.ranges[cur_lr.index()].uses.push(u); } - if self.ranges[cur_lr.index()].uses.len() > 0 { - self.ranges[cur_lr.index()].bundle = cur_bundle; - self.bundles[cur_bundle.index()] - .ranges - .push(LiveRangeListEntry { - range: cur_range, - index: cur_lr, - }); - } else { - if spill_bundle.is_invalid() { - spill_bundle = self.create_bundle(); - log::debug!( - " -> allocating new spill-bundle for empty ranges: bundle{}", - spill_bundle.index() - ); - self.bundles[spill_bundle.index()].spillset = spillset; - self.spillsets[spillset.index()].spill_bundle = spill_bundle; - self.spilled_bundles.push(spill_bundle); - } - // Range lists in empty-range bundles are not - // sorted until later (when we try to allocate - // regs or spillslots for them). - log::debug!( - " -> no uses in range{}; placing in empty-range spill-bundle bundle{}", - cur_lr.index(), - spill_bundle.index() - ); - self.ranges[cur_lr.index()].bundle = spill_bundle; - self.bundles[spill_bundle.index()] - .ranges - .push(LiveRangeListEntry { - range: cur_range, - index: cur_lr, - }); - } + + self.ranges[cur_lr.index()].bundle = cur_bundle; + self.bundles[cur_bundle.index()] + .ranges + .push(LiveRangeListEntry { + range: cur_range, + index: cur_lr, + }); break; } @@ -2793,43 +2759,18 @@ impl<'a, F: Function> Env<'a, F> { cur_uses.next(); } - if self.ranges[cur_lr.index()].uses.len() > 0 { - log::debug!( - " -> adding current LR {:?} to current bundle {:?}", - cur_lr, - cur_bundle - ); - self.ranges[cur_lr.index()].bundle = cur_bundle; - self.bundles[cur_bundle.index()] - .ranges - .push(LiveRangeListEntry { - range: existing_range, - index: cur_lr, - }); - } else { - if spill_bundle.is_invalid() { - spill_bundle = self.create_bundle(); - log::debug!( - " -> allocating new spill-bundle for empty ranges: bundle{}", - spill_bundle.index() - ); - self.bundles[spill_bundle.index()].spillset = spillset; - self.spillsets[spillset.index()].spill_bundle = spill_bundle; - self.spilled_bundles.push(spill_bundle); - } - log::debug!( - " -> no uses in range{}; placing in empty-range spill-bundle bundle{}", - cur_lr.index(), - spill_bundle.index() - ); - self.ranges[cur_lr.index()].bundle = spill_bundle; - self.bundles[spill_bundle.index()] - .ranges - .push(LiveRangeListEntry { - range: cur_range, - index: cur_lr, - }); - } + log::debug!( + " -> adding current LR {:?} to current bundle {:?}", + cur_lr, + cur_bundle + ); + self.ranges[cur_lr.index()].bundle = cur_bundle; + self.bundles[cur_bundle.index()] + .ranges + .push(LiveRangeListEntry { + range: existing_range, + index: cur_lr, + }); if self.annotations_enabled && log::log_enabled!(log::Level::Debug) { self.annotate( @@ -2866,19 +2807,15 @@ impl<'a, F: Function> Env<'a, F> { // Recompute weights and priorities of all bundles, and // enqueue all split-bundles on the allocation queue. - if self.bundles[bundle.index()].ranges.len() > 0 { - let prio = self.compute_bundle_prio(bundle); - self.bundles[bundle.index()].prio = prio; - self.recompute_bundle_properties(bundle); - self.allocation_queue.insert(bundle, prio as usize); - } + let prio = self.compute_bundle_prio(bundle); + self.bundles[bundle.index()].prio = prio; + self.recompute_bundle_properties(bundle); + self.allocation_queue.insert(bundle, prio as usize); for &b in &new_bundles { - if self.bundles[b.index()].ranges.len() > 0 { - let prio = self.compute_bundle_prio(b); - self.bundles[b.index()].prio = prio; - self.recompute_bundle_properties(b); - self.allocation_queue.insert(b, prio as usize); - } + let prio = self.compute_bundle_prio(b); + self.bundles[b.index()].prio = prio; + self.recompute_bundle_properties(b); + self.allocation_queue.insert(b, prio as usize); } } From 2a5f571b80eacf770e42a9422c055af0c97d8eb5 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 20 May 2021 19:53:16 -0700 Subject: [PATCH 068/155] WIP: Handle moves between realregs (pregs) and vregs somewhat specially, by converting into operand constraints Still has a fuzzbug in interaction between R->R and V->R moves. Will likely rework to make pinned-vreg handling more general but want to save a checkpoint here; idea for rework: - set allocs immediately if an Operand is a pinned vreg; - reserve preg ranges; - then, in rest of liveness computation / LR construction, convert pinned-vregs to operands with constraints, but otherwise do not special-case as we do in this commit. --- src/checker.rs | 6 + src/ion/mod.rs | 460 ++++++++++++++++++++++++++++++++++++------------- src/lib.rs | 3 + 3 files changed, 353 insertions(+), 116 deletions(-) diff --git a/src/checker.rs b/src/checker.rs index 76b815a7..e3afb9f4 100644 --- a/src/checker.rs +++ b/src/checker.rs @@ -589,6 +589,12 @@ impl<'a, F: Function> Checker<'a, F> { .unwrap() .push(CheckerInst::Move { into: to, from }); } + &Edit::DefAlloc { .. } => { + unimplemented!(concat!( + "DefAlloc is used only when dealing with pinned vregs, ", + "which are only used by regalloc.rs shim; use checker at that level!" + )); + } &Edit::BlockParams { ref vregs, ref allocs, diff --git a/src/ion/mod.rs b/src/ion/mod.rs index d1aa9006..3fc4279c 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -830,12 +830,15 @@ impl<'a, F: Function> Env<'a, F> { let mut i = 0; while i < self.vregs[vreg.index()].ranges.len() { let entry = self.vregs[vreg.index()].ranges[i]; - if entry.range.overlaps(&range) { - if entry.range.from < range.from { - range.from = entry.range.from; + // Don't use `entry.range`; it is not kept up-to-date as + // we are building LRs. + let this_range = self.ranges[entry.index.index()].range; + if range.overlaps(&this_range) { + if this_range.from < range.from { + range.from = this_range.from; } - if entry.range.to > range.to { - range.to = entry.range.to; + if this_range.to > range.to { + range.to = this_range.to; } if merged.is_none() { merged = Some(i); @@ -1124,124 +1127,347 @@ impl<'a, F: Function> Env<'a, F> { assert_eq!(dst.kind(), OperandKind::Def); assert_eq!(dst.pos(), OperandPos::After); - // Redefine src and dst operands to have - // positions of After and Before respectively - // (see note below), and to have Any - // constraints if they were originally Reg. - let src_policy = match src.policy() { - OperandPolicy::Reg => OperandPolicy::Any, - x => x, - }; - let dst_policy = match dst.policy() { - OperandPolicy::Reg => OperandPolicy::Any, - x => x, - }; - let src = Operand::new( - src.vreg(), - src_policy, - OperandKind::Use, - OperandPos::After, - ); - let dst = Operand::new( - dst.vreg(), - dst_policy, - OperandKind::Def, - OperandPos::Before, - ); + // If exactly one of source and dest (but not + // both) is a pinned-vreg, convert this into a + // ghost use on the other vreg with a FixedReg + // policy. + if self.vregs[src.vreg().vreg()].is_pinned + ^ self.vregs[dst.vreg().vreg()].is_pinned + { + log::debug!( + " -> exactly one of src/dst is pinned; converting to ghost use" + ); + let (preg, vreg, pinned_vreg, kind, pos, progpoint) = + if self.vregs[src.vreg().vreg()].is_pinned { + // Source is pinned: this is a def on the dst with a pinned preg. + ( + self.func.is_pinned_vreg(src.vreg()).unwrap(), + dst.vreg(), + src.vreg(), + OperandKind::Def, + OperandPos::After, + ProgPoint::after(inst), + ) + } else { + // Dest is pinned: this is a use on the src with a pinned preg. + ( + self.func.is_pinned_vreg(dst.vreg()).unwrap(), + src.vreg(), + dst.vreg(), + OperandKind::Use, + // Use comes late to avoid interfering with a potential + // prior move instruction + // reserving the preg, which + // creates a use or def *at* + // our Before pos (because + // progmoves happen between + // insts). + OperandPos::After, + ProgPoint::after(inst), + ) + }; + let policy = OperandPolicy::FixedReg(preg); + let operand = Operand::new(vreg, policy, kind, pos); - if self.annotations_enabled && log::log_enabled!(log::Level::Debug) { - self.annotate( - ProgPoint::after(inst), - format!( - " prog-move v{} ({:?}) -> v{} ({:?})", - src.vreg().vreg(), - src_policy, - dst.vreg().vreg(), - dst_policy, + log::debug!( + concat!( + " -> preg {:?} vreg {:?} kind {:?} ", + "pos {:?} progpoint {:?} policy {:?} operand {:?}" ), + preg, + vreg, + kind, + pos, + progpoint, + policy, + operand ); - } - // N.B.: in order to integrate with the move - // resolution that joins LRs in general, we - // conceptually treat the move as happening - // between the move inst's After and the next - // inst's Before. Thus the src LR goes up to - // (exclusive) next-inst-pre, and the dst LR - // starts at next-inst-pre. We have to take - // care in our move insertion to handle this - // like other inter-inst moves, i.e., at - // `Regular` priority, so it properly happens - // in parallel with other inter-LR moves. - // - // Why the progpoint between move and next - // inst, and not the progpoint between prev - // inst and move? Because a move can be the - // first inst in a block, but cannot be the - // last; so the following progpoint is always - // within the same block, while the previous - // one may be an inter-block point (and the - // After of the prev inst in a different - // block). - - // Handle the def w.r.t. liveranges: trim the - // start of the range and mark it dead at this - // point in our backward scan. - let pos = ProgPoint::before(inst.next()); - let mut dst_lr = vreg_ranges[dst.vreg().vreg()]; - if !live.get(dst.vreg().vreg()) { - let from = pos; - let to = pos.next(); - dst_lr = self.add_liverange_to_vreg( - VRegIndex::new(dst.vreg().vreg()), - CodeRange { from, to }, + // Get the LR for the vreg; if none, create one. + let mut lr = vreg_ranges[vreg.vreg()]; + if !live.get(vreg.vreg()) { + let from = match kind { + OperandKind::Use => self.cfginfo.block_entry[block.index()], + OperandKind::Def => progpoint, + _ => unreachable!(), + }; + let to = progpoint.next(); + lr = self.add_liverange_to_vreg( + VRegIndex::new(vreg.vreg()), + CodeRange { from, to }, + ); + log::debug!(" -> dead; created LR"); + } + log::debug!(" -> LR {:?}", lr); + + self.insert_use_into_liverange( + lr, + Use::new(operand, progpoint, SLOT_NONE), ); - log::debug!(" -> invalid LR for def; created {:?}", dst_lr); - } - log::debug!(" -> has existing LR {:?}", dst_lr); - // Trim the LR to start here. - if self.ranges[dst_lr.index()].range.from - == self.cfginfo.block_entry[block.index()] - { - log::debug!(" -> started at block start; trimming to {:?}", pos); - self.ranges[dst_lr.index()].range.from = pos; - } - self.ranges[dst_lr.index()].set_flag(LiveRangeFlag::StartsAtDef); - live.set(dst.vreg().vreg(), false); - vreg_ranges[dst.vreg().vreg()] = LiveRangeIndex::invalid(); - self.vreg_regs[dst.vreg().vreg()] = dst.vreg(); - - // Handle the use w.r.t. liveranges: make it live - // and create an initial LR back to the start of - // the block. - let pos = ProgPoint::after(inst); - let range = CodeRange { - from: self.cfginfo.block_entry[block.index()], - to: pos.next(), - }; - let src_lr = - self.add_liverange_to_vreg(VRegIndex::new(src.vreg().vreg()), range); - vreg_ranges[src.vreg().vreg()] = src_lr; - log::debug!(" -> src LR {:?}", src_lr); + if kind == OperandKind::Def { + live.set(vreg.vreg(), false); + if self.ranges[lr.index()].range.from + == self.cfginfo.block_entry[block.index()] + { + self.ranges[lr.index()].range.from = progpoint; + } + self.ranges[lr.index()].set_flag(LiveRangeFlag::StartsAtDef); + } else { + live.set(vreg.vreg(), true); + vreg_ranges[vreg.vreg()] = lr; + } - // Add to live-set. - let src_is_dead_after_move = !live.get(src.vreg().vreg()); - live.set(src.vreg().vreg(), true); + // Handle liveness of the other vreg. Note + // that this is somewhat special. For the + // destination case, we want the pinned + // vreg's LR to start just *after* the + // operand we inserted above, because + // otherwise it would overlap, and + // interfere, and prevent allocation. For + // the source case, we want to "poke a + // hole" in the LR: if it's live going + // downward, end it just after the operand + // and restart it before; if it isn't + // (this is the last use), start it + // before. + if kind == OperandKind::Def { + log::debug!(" -> src on pinned vreg {:?}", pinned_vreg); + // The *other* vreg is a def, so the pinned-vreg + // mention is a use. If already live, + // end the existing LR just *after* + // the `progpoint` defined above and + // start a new one just *before* the + // `progpoint` defined above, + // preserving the start. If not, start + // a new one live back to the top of + // the block, starting just before + // `progpoint`. + if live.get(pinned_vreg.vreg()) { + let pinned_lr = vreg_ranges[pinned_vreg.vreg()]; + let orig_start = self.ranges[pinned_lr.index()].range.from; + self.ranges[pinned_lr.index()].range.from = progpoint.next(); + let new_lr = self.add_liverange_to_vreg( + VRegIndex::new(pinned_vreg.vreg()), + CodeRange { + from: orig_start, + to: progpoint, + }, + ); + vreg_ranges[pinned_vreg.vreg()] = new_lr; + log::debug!( + " -> live with LR {:?}; truncating to start at {:?}", + pinned_lr, + progpoint.next() + ); + log::debug!(" -> created LR {:?} with remaining range from {:?} to {:?}", new_lr, orig_start, progpoint); + + // Add an edit right now to indicate that at + // this program point, the given + // preg is now known as that vreg, + // not the preg, but immediately + // after, it is known as the preg + // again. This is used by the + // checker. + self.add_edit( + ProgPoint::before(inst), + InsertMovePrio::Regular, + Edit::DefAlloc { + alloc: Allocation::reg(preg), + vreg: dst.vreg(), + }, + ); + self.add_edit( + ProgPoint::after(inst), + InsertMovePrio::Regular, + Edit::DefAlloc { + alloc: Allocation::reg(preg), + vreg: src.vreg(), + }, + ); + } else { + let new_lr = self.add_liverange_to_vreg( + VRegIndex::new(pinned_vreg.vreg()), + CodeRange { + from: self.cfginfo.block_entry[block.index()], + to: progpoint, + }, + ); + vreg_ranges[pinned_vreg.vreg()] = new_lr; + live.set(pinned_vreg.vreg(), true); + log::debug!(" -> was not live; created new LR {:?}", new_lr); + + // Add an edit right now to indicate that at + // this program point, the given + // preg is now known as that vreg, + // not the preg. This is used by + // the checker. + self.add_edit( + ProgPoint::before(inst), + InsertMovePrio::Regular, + Edit::DefAlloc { + alloc: Allocation::reg(preg), + vreg: dst.vreg(), + }, + ); + } + } else { + log::debug!(" -> dst on pinned vreg {:?}", pinned_vreg); + // The *other* vreg is a use, so the pinned-vreg + // mention is a def. Truncate its LR + // just *after* the `progpoint` + // defined above. + if live.get(pinned_vreg.vreg()) { + let pinned_lr = vreg_ranges[pinned_vreg.vreg()]; + self.ranges[pinned_lr.index()].range.from = progpoint.next(); + log::debug!( + " -> was live with LR {:?}; truncated start to {:?}", + pinned_lr, + progpoint.next() + ); + live.set(pinned_vreg.vreg(), false); + + // Add a no-op edit right now to indicate that + // at this program point, the + // given preg is now known as that + // preg, not the vreg. This is + // used by the checker. + self.add_edit( + ProgPoint::after(inst), + InsertMovePrio::Regular, + Edit::DefAlloc { + alloc: Allocation::reg(preg), + vreg: dst.vreg(), + }, + ); + } + // Otherwise, if dead, no need to create + // a dummy LR -- there is no + // reservation to make (the other vreg + // will land in the reg with the + // fixed-reg operand constraint, but + // it's a dead move anyway). + } + } else { + // Redefine src and dst operands to have + // positions of After and Before respectively + // (see note below), and to have Any + // constraints if they were originally Reg. + let src_policy = match src.policy() { + OperandPolicy::Reg => OperandPolicy::Any, + x => x, + }; + let dst_policy = match dst.policy() { + OperandPolicy::Reg => OperandPolicy::Any, + x => x, + }; + let src = Operand::new( + src.vreg(), + src_policy, + OperandKind::Use, + OperandPos::After, + ); + let dst = Operand::new( + dst.vreg(), + dst_policy, + OperandKind::Def, + OperandPos::Before, + ); - // Add to program-moves lists. - self.prog_move_srcs.push(( - (VRegIndex::new(src.vreg().vreg()), inst), - Allocation::none(), - )); - self.prog_move_dsts.push(( - (VRegIndex::new(dst.vreg().vreg()), inst.next()), - Allocation::none(), - )); - self.stats.prog_moves += 1; - if src_is_dead_after_move { - self.stats.prog_moves_dead_src += 1; - self.prog_move_merges.push((src_lr, dst_lr)); + if self.annotations_enabled && log::log_enabled!(log::Level::Debug) { + self.annotate( + ProgPoint::after(inst), + format!( + " prog-move v{} ({:?}) -> v{} ({:?})", + src.vreg().vreg(), + src_policy, + dst.vreg().vreg(), + dst_policy, + ), + ); + } + + // N.B.: in order to integrate with the move + // resolution that joins LRs in general, we + // conceptually treat the move as happening + // between the move inst's After and the next + // inst's Before. Thus the src LR goes up to + // (exclusive) next-inst-pre, and the dst LR + // starts at next-inst-pre. We have to take + // care in our move insertion to handle this + // like other inter-inst moves, i.e., at + // `Regular` priority, so it properly happens + // in parallel with other inter-LR moves. + // + // Why the progpoint between move and next + // inst, and not the progpoint between prev + // inst and move? Because a move can be the + // first inst in a block, but cannot be the + // last; so the following progpoint is always + // within the same block, while the previous + // one may be an inter-block point (and the + // After of the prev inst in a different + // block). + + // Handle the def w.r.t. liveranges: trim the + // start of the range and mark it dead at this + // point in our backward scan. + let pos = ProgPoint::before(inst.next()); + let mut dst_lr = vreg_ranges[dst.vreg().vreg()]; + if !live.get(dst.vreg().vreg()) { + let from = pos; + let to = pos.next(); + dst_lr = self.add_liverange_to_vreg( + VRegIndex::new(dst.vreg().vreg()), + CodeRange { from, to }, + ); + log::debug!(" -> invalid LR for def; created {:?}", dst_lr); + } + log::debug!(" -> has existing LR {:?}", dst_lr); + // Trim the LR to start here. + if self.ranges[dst_lr.index()].range.from + == self.cfginfo.block_entry[block.index()] + { + log::debug!(" -> started at block start; trimming to {:?}", pos); + self.ranges[dst_lr.index()].range.from = pos; + } + self.ranges[dst_lr.index()].set_flag(LiveRangeFlag::StartsAtDef); + live.set(dst.vreg().vreg(), false); + vreg_ranges[dst.vreg().vreg()] = LiveRangeIndex::invalid(); + self.vreg_regs[dst.vreg().vreg()] = dst.vreg(); + + // Handle the use w.r.t. liveranges: make it live + // and create an initial LR back to the start of + // the block. + let pos = ProgPoint::after(inst); + let range = CodeRange { + from: self.cfginfo.block_entry[block.index()], + to: pos.next(), + }; + let src_lr = self + .add_liverange_to_vreg(VRegIndex::new(src.vreg().vreg()), range); + vreg_ranges[src.vreg().vreg()] = src_lr; + + log::debug!(" -> src LR {:?}", src_lr); + + // Add to live-set. + let src_is_dead_after_move = !live.get(src.vreg().vreg()); + live.set(src.vreg().vreg(), true); + + // Add to program-moves lists. + self.prog_move_srcs.push(( + (VRegIndex::new(src.vreg().vreg()), inst), + Allocation::none(), + )); + self.prog_move_dsts.push(( + (VRegIndex::new(dst.vreg().vreg()), inst.next()), + Allocation::none(), + )); + self.stats.prog_moves += 1; + if src_is_dead_after_move { + self.stats.prog_moves_dead_src += 1; + self.prog_move_merges.push((src_lr, dst_lr)); + } } } @@ -1316,6 +1542,8 @@ impl<'a, F: Function> Env<'a, F> { CodeRange { from, to }, ); log::debug!(" -> invalid; created {:?}", lr); + vreg_ranges[operand.vreg().vreg()] = lr; + live.set(operand.vreg().vreg(), true); } // Create the use in the LiveRange. self.insert_use_into_liverange(lr, Use::new(operand, pos, i as u8)); @@ -3477,7 +3705,7 @@ impl<'a, F: Function> Env<'a, F> { InsertMovePrio::Regular, prev_alloc, alloc, - None, + Some(self.vreg_regs[vreg.index()]), ); } } diff --git a/src/lib.rs b/src/lib.rs index db87c230..c96004f1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -906,6 +906,9 @@ pub enum Edit { vregs: Vec, allocs: Vec, }, + /// Define a particular Allocation to contain a particular VReg. Useful + /// for the checker. + DefAlloc { alloc: Allocation, vreg: VReg }, } /// A machine envrionment tells the register allocator which registers From ec7fdeb8ed188275894057f967fd6a49f1439c74 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 20 May 2021 20:38:50 -0700 Subject: [PATCH 069/155] Properly handle RReg-RReg moves in new scheme --- src/ion/mod.rs | 60 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 50 insertions(+), 10 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 3fc4279c..6b28a332 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -1127,12 +1127,59 @@ impl<'a, F: Function> Env<'a, F> { assert_eq!(dst.kind(), OperandKind::Def); assert_eq!(dst.pos(), OperandPos::After); + // If both src and dest are pinned, emit the + // move right here, right now. + if self.vregs[src.vreg().vreg()].is_pinned + && self.vregs[dst.vreg().vreg()].is_pinned + { + // Update LRs. + if !live.get(src.vreg().vreg()) { + let lr = self.add_liverange_to_vreg( + VRegIndex::new(src.vreg().vreg()), + CodeRange { + from: self.cfginfo.block_entry[block.index()], + to: ProgPoint::after(inst), + }, + ); + live.set(src.vreg().vreg(), true); + vreg_ranges[src.vreg().vreg()] = lr; + } + if live.get(dst.vreg().vreg()) { + let lr = vreg_ranges[dst.vreg().vreg()]; + self.ranges[lr.index()].range.from = ProgPoint::after(inst); + live.set(dst.vreg().vreg(), false); + } else { + self.add_liverange_to_vreg( + VRegIndex::new(dst.vreg().vreg()), + CodeRange { + from: ProgPoint::after(inst), + to: ProgPoint::before(inst.next()), + }, + ); + } + + let src_preg = match src.policy() { + OperandPolicy::FixedReg(r) => r, + _ => unreachable!(), + }; + let dst_preg = match dst.policy() { + OperandPolicy::FixedReg(r) => r, + _ => unreachable!(), + }; + self.insert_move( + ProgPoint::before(inst), + InsertMovePrio::MultiFixedReg, + Allocation::reg(src_preg), + Allocation::reg(dst_preg), + Some(dst.vreg()), + ); + } // If exactly one of source and dest (but not // both) is a pinned-vreg, convert this into a // ghost use on the other vreg with a FixedReg // policy. - if self.vregs[src.vreg().vreg()].is_pinned - ^ self.vregs[dst.vreg().vreg()].is_pinned + else if self.vregs[src.vreg().vreg()].is_pinned + || self.vregs[dst.vreg().vreg()].is_pinned { log::debug!( " -> exactly one of src/dst is pinned; converting to ghost use" @@ -1155,14 +1202,7 @@ impl<'a, F: Function> Env<'a, F> { src.vreg(), dst.vreg(), OperandKind::Use, - // Use comes late to avoid interfering with a potential - // prior move instruction - // reserving the preg, which - // creates a use or def *at* - // our Before pos (because - // progmoves happen between - // insts). - OperandPos::After, + OperandPos::Before, ProgPoint::after(inst), ) }; From 466ea2cd9a8da4db119ec9a5d5ce0a0295604dbd Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 21 May 2021 01:34:52 -0700 Subject: [PATCH 070/155] Simpler / possibly better splitting: split based on conflict position, always, and use a reg hint to put the before-conflict part in the place where we determined it fit before. --- src/ion/mod.rs | 900 +++++++++++++++++-------------------------------- 1 file changed, 318 insertions(+), 582 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 6b28a332..63ea532e 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -14,15 +14,6 @@ /* * TODO: * - * - tune heuristics: - * - splits: - * - safepoints? - * - split just before uses with fixed regs and/or just after defs - * with fixed regs? - * - measure average liverange length / number of splits / ... - * - * - reused-input reg: don't allocate register for input that is reused. - * * - "Fixed-stack location": negative spillslot numbers? * * - Rematerialization @@ -159,6 +150,12 @@ impl LiveRange { self.uses_spill_weight_and_flags &= !((flag as u32) << 29); } #[inline(always)] + pub fn assign_flag(&mut self, flag: LiveRangeFlag, val: bool) { + let bit = if val { (flag as u32) << 29 } else { 0 }; + self.uses_spill_weight_and_flags &= 0xe000_0000; + self.uses_spill_weight_and_flags |= bit; + } + #[inline(always)] pub fn has_flag(&self, flag: LiveRangeFlag) -> bool { self.uses_spill_weight_and_flags & ((flag as u32) << 29) != 0 } @@ -393,6 +390,7 @@ struct PrioQueue { struct PrioQueueEntry { prio: u32, bundle: LiveBundleIndex, + reg_hint: PReg, } #[derive(Clone, Debug)] @@ -459,10 +457,11 @@ impl PrioQueue { } } - fn insert(&mut self, bundle: LiveBundleIndex, prio: usize) { + fn insert(&mut self, bundle: LiveBundleIndex, prio: usize, reg_hint: PReg) { self.heap.push(PrioQueueEntry { prio: prio as u32, bundle, + reg_hint, }); } @@ -470,8 +469,8 @@ impl PrioQueue { self.heap.is_empty() } - fn pop(&mut self) -> Option { - self.heap.pop().map(|entry| entry.bundle) + fn pop(&mut self) -> Option<(LiveBundleIndex, PReg)> { + self.heap.pop().map(|entry| (entry.bundle, entry.reg_hint)) } } @@ -2260,16 +2259,17 @@ impl<'a, F: Function> Env<'a, F> { log::debug!(" -> prio {}", prio); self.bundles[bundle.index()].prio = prio; self.recompute_bundle_properties(bundle); - self.allocation_queue.insert(bundle, prio as usize); + self.allocation_queue + .insert(bundle, prio as usize, PReg::invalid()); } self.stats.merged_bundle_count = self.allocation_queue.heap.len(); } fn process_bundles(&mut self) -> Result<(), RegAllocError> { let mut count = 0; - while let Some(bundle) = self.allocation_queue.pop() { + while let Some((bundle, reg_hint)) = self.allocation_queue.pop() { self.stats.process_bundle_count += 1; - self.process_bundle(bundle)?; + self.process_bundle(bundle, reg_hint)?; count += 1; if count > self.func.insts() * 50 { self.dump_state(); @@ -2492,7 +2492,8 @@ impl<'a, F: Function> Env<'a, F> { } let prio = self.bundles[bundle.index()].prio; log::debug!(" -> prio {}; back into queue", prio); - self.allocation_queue.insert(bundle, prio as usize); + self.allocation_queue + .insert(bundle, prio as usize, PReg::invalid()); } fn bundle_spill_weight(&self, bundle: LiveBundleIndex) -> u32 { @@ -2587,512 +2588,219 @@ impl<'a, F: Function> Env<'a, F> { self.bundles[bundle.index()].cached_minimal() } - fn find_split_points( + fn recompute_range_properties(&mut self, range: LiveRangeIndex) { + let mut rangedata = &mut self.ranges[range.index()]; + let w = rangedata.uses.iter().map(|u| u.weight as u32).sum(); + rangedata.uses_spill_weight_and_flags = w; + if rangedata.uses.len() > 0 && rangedata.uses[0].operand.kind() == OperandKind::Def { + rangedata.set_flag(LiveRangeFlag::StartsAtDef); + } + } + + fn split_and_requeue_bundle( &mut self, bundle: LiveBundleIndex, - conflicting: LiveBundleIndex, - ) -> SmallVec<[ProgPoint; 4]> { - // Scan the bundle's ranges once. We want to record: - // - Does the bundle contain any ranges in "hot" code and/or "cold" code? - // If so, record the transition points that are fully included in - // `bundle`: the first ProgPoint in a hot range if the prior cold - // point is also in the bundle; and the first ProgPoint in a cold - // range if the prior hot point is also in the bundle. - // - Does the bundle cross any clobbering insts? - // If so, record the ProgPoint before each such instruction. - // - Is there a register use before the conflicting bundle? - // If so, record the ProgPoint just after the last one. - // - Is there a register use after the conflicting bundle? - // If so, record the ProgPoint just before the last one. - // - // Then choose one of the above kinds of splits, in priority order. - - let mut def_splits: SmallVec<[ProgPoint; 4]> = smallvec![]; - let mut seen_defs = 0; - let mut cold_hot_splits: SmallVec<[ProgPoint; 4]> = smallvec![]; - let mut clobber_splits: SmallVec<[ProgPoint; 4]> = smallvec![]; - let mut last_before_conflict: Option = None; - let mut first_after_conflict: Option = None; - + mut split_at: ProgPoint, + reg_hint: PReg, + ) { + self.stats.splits += 1; log::debug!( - "find_split_points: bundle {:?} conflicting {:?}", + "split bundle {:?} at {:?} and requeue with reg hint (for first part) {:?}", bundle, - conflicting + split_at, + reg_hint, ); - // We simultaneously scan the sorted list of LiveRanges in our bundle - // and the sorted list of call instruction locations. We also take the - // total range (start of first range to end of last range) of the - // conflicting bundle, if any, so we can find the last use before it and - // first use after it. Each loop iteration handles one range in our - // bundle. Calls are scanned up until they advance past the current - // range. - let our_ranges = &self.bundles[bundle.index()].ranges[..]; - let (conflict_from, conflict_to) = if conflicting.is_valid() { - ( - Some( - self.bundles[conflicting.index()] + // Split `bundle` at `split_at`, creating new LiveRanges and + // bundles (and updating vregs' linked lists appropriately), + // and enqueue the new bundles. + + let spillset = self.bundles[bundle.index()].spillset; + + assert!(!self.bundles[bundle.index()].ranges.is_empty()); + // Split point *at* start is OK; this means we peel off + // exactly one use to create a minimal bundle. + let bundle_start = self.bundles[bundle.index()] + .ranges + .first() + .unwrap() + .range + .from; + assert!(split_at >= bundle_start); + let bundle_end = self.bundles[bundle.index()].ranges.last().unwrap().range.to; + assert!(split_at < bundle_end); + + // Is the split point *at* the start? If so, peel off the + // first use: set the split point just after it, or just + // before it if it comes after the start of the bundle. + if split_at == bundle_start { + // Find any uses; if none, just chop off one instruction. + let mut first_use = None; + 'outer: for entry in &self.bundles[bundle.index()].ranges { + for u in &self.ranges[entry.index.index()].uses { + first_use = Some(u.pos); + break 'outer; + } + } + log::debug!(" -> first use loc is {:?}", first_use); + split_at = match first_use { + Some(pos) => { + if pos.inst() == bundle_start.inst() { + ProgPoint::before(pos.inst().next()) + } else { + ProgPoint::before(pos.inst()) + } + } + None => ProgPoint::before( + self.bundles[bundle.index()] .ranges .first() .unwrap() .range - .from, + .from + .inst() + .next(), ), - Some( - self.bundles[conflicting.index()] - .ranges - .last() - .unwrap() - .range - .to, - ), - ) - } else { - (None, None) - }; - - let bundle_start = if our_ranges.is_empty() { - ProgPoint::before(Inst::new(0)) - } else { - our_ranges.first().unwrap().range.from - }; - let bundle_end = if our_ranges.is_empty() { - ProgPoint::before(Inst::new(self.func.insts())) + }; + assert!(split_at < bundle_end); + log::debug!( + "split point is at bundle start; advancing to {:?}", + split_at + ); } else { - our_ranges.last().unwrap().range.to - }; - - log::debug!(" -> conflict from {:?} to {:?}", conflict_from, conflict_to); - let mut clobberidx = 0; - for entry in our_ranges { - // Probe the hot-code tree. - log::debug!(" -> range {:?}", entry.range); - if let Some(hot_range_idx) = self - .hot_code - .btree - .get(&LiveRangeKey::from_range(&entry.range)) - { - // `hot_range_idx` is a range that *overlaps* with our range. - - // There may be cold code in our range on either side of the hot - // range. Record the transition points if so. - let hot_range = self.ranges[hot_range_idx.index()].range; - log::debug!(" -> overlaps with hot-code range {:?}", hot_range); - let start_cold = entry.range.from < hot_range.from; - let end_cold = entry.range.to > hot_range.to; - if start_cold { - log::debug!( - " -> our start is cold; potential split at cold->hot transition {:?}", - hot_range.from, - ); - // First ProgPoint in hot range. - cold_hot_splits.push(hot_range.from); - } - if end_cold { - log::debug!( - " -> our end is cold; potential split at hot->cold transition {:?}", - hot_range.to, - ); - // First ProgPoint in cold range (after hot range). - cold_hot_splits.push(hot_range.to); - } + // Don't split in the middle of an instruction -- this could + // create impossible moves (we cannot insert a move between an + // instruction's uses and defs). + if split_at.pos() == InstPosition::After { + split_at = split_at.next(); } - - // Scan through clobber-insts from last left-off position until the first - // clobbering inst past this range. Record all clobber sites as potential - // splits. - while clobberidx < self.clobbers.len() { - let cur_clobber = self.clobbers[clobberidx]; - let pos = ProgPoint::before(cur_clobber); - if pos >= entry.range.to { - break; - } - clobberidx += 1; - if pos < entry.range.from { - continue; - } - if pos > bundle_start { - log::debug!(" -> potential clobber split at {:?}", pos); - clobber_splits.push(pos); - } + if split_at >= bundle_end { + split_at = split_at.prev().prev(); } + } - // Update last-before-conflict and first-before-conflict positions. + assert!(split_at > bundle_start && split_at < bundle_end); - let mut update_with_pos = |pos: ProgPoint| { - let before_inst = ProgPoint::before(pos.inst()); - let before_next_inst = before_inst.next().next(); - if before_inst > bundle_start - && (conflict_from.is_none() || before_inst < conflict_from.unwrap()) - && (last_before_conflict.is_none() - || before_inst > last_before_conflict.unwrap()) - { - last_before_conflict = Some(before_inst); - } - if before_next_inst < bundle_end - && (conflict_to.is_none() || pos >= conflict_to.unwrap()) - && (first_after_conflict.is_none() || pos > first_after_conflict.unwrap()) - { - first_after_conflict = Some(ProgPoint::before(pos.inst().next())); - } - }; + // We need to find which LRs fall on each side of the split, + // which LR we need to split down the middle, then update the + // current bundle, create a new one, and (re)-queue both. - for u in &self.ranges[entry.index.index()].uses { - log::debug!(" -> range has use at {:?}", u.pos); - update_with_pos(u.pos); - if u.operand.kind() == OperandKind::Def { - if seen_defs > 0 { - def_splits.push(u.pos); - } - seen_defs += 1; - } - } - } - log::debug!( - " -> first use/def after conflict range: {:?}", - first_after_conflict, - ); - log::debug!( - " -> last use/def before conflict range: {:?}", - last_before_conflict, - ); + log::debug!(" -> LRs: {:?}", self.bundles[bundle.index()].ranges); - // Based on the above, we can determine which split strategy we are taking at this - // iteration: - // - If we span both hot and cold code, split into separate "hot" and "cold" bundles. - // - Otherwise, if we span any calls, split just before every call instruction. - // - Otherwise, if there is a register use after the conflicting bundle, - // split at that use-point ("split before first use"). - // - Otherwise, if there is a register use before the conflicting - // bundle, split at that use-point ("split after last use"). - // - Otherwise, split at every use, to form minimal bundles. - - if cold_hot_splits.len() > 0 { - log::debug!(" going with cold/hot splits: {:?}", cold_hot_splits); - self.stats.splits_hot += 1; - cold_hot_splits - } else if clobber_splits.len() > 0 { - log::debug!(" going with clobber splits: {:?}", clobber_splits); - self.stats.splits_clobbers += 1; - clobber_splits - } else if first_after_conflict.is_some() { - self.stats.splits_conflicts += 1; - log::debug!(" going with first after conflict"); - smallvec![first_after_conflict.unwrap()] - } else if last_before_conflict.is_some() { - self.stats.splits_conflicts += 1; - log::debug!(" going with last before conflict"); - smallvec![last_before_conflict.unwrap()] - } else if def_splits.len() > 0 && def_splits[0] > bundle_start { - log::debug!(" going with non-first def splits: {:?}", def_splits); - self.stats.splits_defs += 1; - def_splits - } else { - self.stats.splits_all += 1; - log::debug!(" splitting at all uses"); - self.find_all_use_split_points(bundle) - } - } - - fn find_all_use_split_points(&self, bundle: LiveBundleIndex) -> SmallVec<[ProgPoint; 4]> { - let mut splits = smallvec![]; - let ranges = &self.bundles[bundle.index()].ranges[..]; - log::debug!("finding all use/def splits for {:?}", bundle); - let bundle_start = if ranges.is_empty() { - ProgPoint::before(Inst::new(0)) - } else { - self.ranges[ranges[0].index.index()].range.from - }; - // N.B.: a minimal bundle must include only ProgPoints in a - // single instruction, but can include both (can include two - // ProgPoints). We split here, taking care to never split *in - // the middle* of an instruction, because we would not be able - // to insert moves to reify such an assignment. - for entry in ranges { - log::debug!(" -> range {:?}: {:?}", entry.index, entry.range); - for u in &self.ranges[entry.index.index()].uses { - log::debug!(" -> use: {:?}", u); - let before_use_inst = if u.operand.kind() == OperandKind::Def { - // For a def, split *at* the def -- this may be an - // After point, but the value cannot be live into - // the def so we don't need to insert a move. - u.pos - } else { - // For an use or mod, split before the instruction - // -- this allows us to insert a move if - // necessary. - ProgPoint::before(u.pos.inst()) - }; - let after_use_inst = ProgPoint::before(u.pos.inst().next()); - log::debug!( - " -> splitting before and after use: {:?} and {:?}", - before_use_inst, - after_use_inst, - ); - if before_use_inst > bundle_start { - splits.push(before_use_inst); - } - splits.push(after_use_inst); + let mut last_lr_in_old_bundle_idx = 0; // last LR-list index in old bundle + let mut first_lr_in_new_bundle_idx = 0; // first LR-list index in new bundle + for (i, entry) in self.bundles[bundle.index()].ranges.iter().enumerate() { + if split_at > entry.range.from { + last_lr_in_old_bundle_idx = i; + first_lr_in_new_bundle_idx = i; + } + if split_at < entry.range.to { + first_lr_in_new_bundle_idx = i; + break; } } - splits.sort_unstable(); - log::debug!(" -> final splits: {:?}", splits); - splits - } - - fn split_and_requeue_bundle( - &mut self, - bundle: LiveBundleIndex, - first_conflicting_bundle: LiveBundleIndex, - ) { - self.stats.splits += 1; - // Try splitting: (i) across hot code; (ii) across all calls, - // if we had a fixed-reg conflict; (iii) before first reg use; - // (iv) after reg use; (v) around all register uses. After - // each type of split, check for conflict with conflicting - // bundle(s); stop when no conflicts. In all cases, re-queue - // the split bundles on the allocation queue. - // - // The critical property here is that we must eventually split - // down to minimal bundles, which consist just of live ranges - // around each individual def/use (this is step (v) - // above). This ensures termination eventually. - let split_points = self.find_split_points(bundle, first_conflicting_bundle); log::debug!( - "split bundle {:?} (conflict {:?}): split points {:?}", - bundle, - first_conflicting_bundle, - split_points + " -> last LR in old bundle: LR {:?}", + self.bundles[bundle.index()].ranges[last_lr_in_old_bundle_idx] + ); + log::debug!( + " -> first LR in new bundle: LR {:?}", + self.bundles[bundle.index()].ranges[first_lr_in_new_bundle_idx] ); - // Split `bundle` at every ProgPoint in `split_points`, - // creating new LiveRanges and bundles (and updating vregs' - // linked lists appropriately), and enqueue the new bundles. - // - // We uphold several basic invariants here: - // - The LiveRanges in every vreg, and in every bundle, are disjoint - // - Every bundle for a given vreg is disjoint - // - // To do so, we make one scan in program order: all ranges in - // the bundle, and the def/all uses in each range. We track - // the currently active bundle. For each range, we distribute - // its uses among one or more ranges, depending on whether it - // crosses any split points. If we had to split a range, then - // we need to insert the new subparts in its vreg as - // well. N.B.: to avoid the need to *remove* ranges from vregs - // (which we could not do without a lookup, since we use - // singly-linked lists and the bundle may contain multiple - // vregs so we cannot simply scan a single vreg simultaneously - // to the main scan), we instead *trim* the existing range - // into its first subpart, and then create the new - // subparts. Note that shrinking a LiveRange is always legal - // (as long as one replaces the shrunk space with new - // LiveRanges). - // - // Note that the original IonMonkey splitting code is quite a - // bit more complex and has some subtle invariants. We stick - // to the above invariants to keep this code maintainable. - - let spillset = self.bundles[bundle.index()].spillset; - let mut split_idx = 0; - - // Fast-forward past any splits that occur before or exactly - // at the start of the first range in the bundle. - let bundle_start = if self.bundles[bundle.index()].ranges.is_empty() { - ProgPoint::before(Inst::new(0)) - } else { - self.bundles[bundle.index()].ranges[0].range.from - }; - while split_idx < split_points.len() && split_points[split_idx] <= bundle_start { - split_idx += 1; + // Take the sublist of LRs that will go in the new bundle. + let mut new_lr_list: LiveRangeList = self.bundles[bundle.index()] + .ranges + .iter() + .cloned() + .skip(first_lr_in_new_bundle_idx) + .collect(); + self.bundles[bundle.index()] + .ranges + .truncate(last_lr_in_old_bundle_idx + 1); + + // If the first entry in `new_lr_list` is a LR that is split + // down the middle, replace it with a new LR and chop off the + // end of the same LR in the original list. + if split_at > new_lr_list[0].range.from { + assert_eq!(last_lr_in_old_bundle_idx, first_lr_in_new_bundle_idx); + let orig_lr = new_lr_list[0].index; + let new_lr = self.create_liverange(CodeRange { + from: split_at, + to: new_lr_list[0].range.to, + }); + self.ranges[new_lr.index()].vreg = self.ranges[orig_lr.index()].vreg; + log::debug!(" -> splitting LR {:?} into {:?}", orig_lr, new_lr); + let first_use = self.ranges[orig_lr.index()] + .uses + .iter() + .position(|u| u.pos >= split_at) + .unwrap_or(self.ranges[orig_lr.index()].uses.len()); + let rest_uses: UseList = self.ranges[orig_lr.index()] + .uses + .iter() + .cloned() + .skip(first_use) + .collect(); + self.ranges[new_lr.index()].uses = rest_uses; + self.ranges[orig_lr.index()].uses.truncate(first_use); + self.recompute_range_properties(orig_lr); + self.recompute_range_properties(new_lr); + new_lr_list[0].index = new_lr; + new_lr_list[0].range = self.ranges[new_lr.index()].range; + self.ranges[orig_lr.index()].range.to = split_at; + self.bundles[bundle.index()].ranges[last_lr_in_old_bundle_idx].range = + self.ranges[orig_lr.index()].range; + + // Perform a lazy split in the VReg data. We just + // append the new LR and its range; we will sort by + // start of range, and fix up range ends, once when we + // iterate over the VReg's ranges after allocation + // completes (this is the only time when order + // matters). + self.vregs[self.ranges[new_lr.index()].vreg.index()] + .ranges + .push(LiveRangeListEntry { + range: self.ranges[new_lr.index()].range, + index: new_lr, + }); } - let mut new_bundles: LiveBundleVec = smallvec![]; - let mut cur_bundle = bundle; - let ranges = std::mem::replace(&mut self.bundles[bundle.index()].ranges, smallvec![]); - // - Invariant: current LR `cur_lr` is being built; it has not - // yet been added to `cur_bundle`. - // - Invariant: uses in `cur_uses` have not yet been added to - // `cur_lr`. - for entry in &ranges { - log::debug!(" -> has range {:?} (LR {:?})", entry.range, entry.index); - - // Until we reach a split point, copy or create the current range. - let mut cur_range = entry.range; - let mut cur_lr = entry.index; - let mut cur_uses = - std::mem::replace(&mut self.ranges[cur_lr.index()].uses, smallvec![]); - let mut cur_uses = cur_uses.drain(..).peekable(); - - self.ranges[cur_lr.index()].uses_spill_weight_and_flags = 0; - - let update_lr_stats = |lr: &mut LiveRange, u: &Use| { - if lr.uses.is_empty() && u.operand.kind() == OperandKind::Def { - lr.set_flag(LiveRangeFlag::StartsAtDef); - } - lr.uses_spill_weight_and_flags += u.weight as u32; - }; - - while cur_range.to > cur_range.from { - if (split_idx >= split_points.len()) || (split_points[split_idx] >= cur_range.to) { - log::debug!( - " -> no more split points; placing all remaining uses into cur range{}", - cur_lr.index() - ); - // No more split points left, or next split point - // is beyond the range: just copy the current - // range into the current bundle, and drop all the - // remaining uses into it. - for u in cur_uses { - update_lr_stats(&mut self.ranges[cur_lr.index()], &u); - log::debug!(" -> use at {:?}", u.pos); - self.ranges[cur_lr.index()].uses.push(u); - } - - self.ranges[cur_lr.index()].bundle = cur_bundle; - self.bundles[cur_bundle.index()] - .ranges - .push(LiveRangeListEntry { - range: cur_range, - index: cur_lr, - }); - break; - } - - // If there is a split point prior to or exactly at - // the start of this LR, then create a new bundle but - // keep the existing LR, and go around again. Skip all - // such split-points (lump them into one), while we're - // at it. - if split_points[split_idx] <= cur_range.from { - log::debug!( - " -> split point at {:?} before start of range (range {:?} LR {:?})", - split_points[split_idx], - cur_range, - cur_lr, - ); - cur_bundle = self.create_bundle(); - log::debug!(" -> new bundle {:?}", cur_bundle); - self.ranges[cur_lr.index()].bundle = cur_bundle; - new_bundles.push(cur_bundle); - self.bundles[cur_bundle.index()].spillset = spillset; - while split_idx < split_points.len() - && split_points[split_idx] <= cur_range.from - { - split_idx += 1; - } - continue; - } - - // If we reach here, there is at least one split point - // that lands in the current range, so we need to - // actually split. Let's create a new LR and bundle - // for the rest (post-split-point), drop uses up to - // the split point into current LR and drop current LR - // into current bundle, then advance current LR and - // bundle to new LR and bundle. - let split = split_points[split_idx]; - while split_idx < split_points.len() && split_points[split_idx] == split { - // Skip past all duplicate split-points. - split_idx += 1; - } - log::debug!(" -> split at {:?}", split); - - let existing_range = CodeRange { - from: cur_range.from, - to: split, - }; - let new_range = CodeRange { - from: split, - to: cur_range.to, - }; - let new_lr = self.create_liverange(new_range); - let new_bundle = self.create_bundle(); - log::debug!(" -> new LR {:?}, new bundle {:?}", new_lr, new_bundle); - new_bundles.push(new_bundle); - self.bundles[new_bundle.index()].spillset = spillset; - - self.ranges[cur_lr.index()].range = existing_range; - self.ranges[new_lr.index()].vreg = self.ranges[cur_lr.index()].vreg; - self.ranges[new_lr.index()].bundle = new_bundle; - - while let Some(u) = cur_uses.peek() { - if u.pos >= split { - break; - } - update_lr_stats(&mut self.ranges[cur_lr.index()], &u); - log::debug!(" -> use at {:?} in current LR {:?}", u.pos, cur_lr); - self.ranges[cur_lr.index()].uses.push(*u); - cur_uses.next(); - } - - log::debug!( - " -> adding current LR {:?} to current bundle {:?}", - cur_lr, - cur_bundle - ); - self.ranges[cur_lr.index()].bundle = cur_bundle; - self.bundles[cur_bundle.index()] - .ranges - .push(LiveRangeListEntry { - range: existing_range, - index: cur_lr, - }); - - if self.annotations_enabled && log::log_enabled!(log::Level::Debug) { - self.annotate( - existing_range.to, - format!( - " SPLIT range{} v{} bundle{} to range{} bundle{}", - cur_lr.index(), - self.ranges[cur_lr.index()].vreg.index(), - cur_bundle.index(), - new_lr.index(), - new_bundle.index(), - ), - ); - } - - cur_range = new_range; - cur_bundle = new_bundle; - cur_lr = new_lr; - - // Perform a lazy split in the VReg data. We just - // append the new LR and its range; we will sort by - // start of range, and fix up range ends, once when we - // iterate over the VReg's ranges after allocation - // completes (this is the only time when order - // matters). - self.vregs[self.ranges[new_lr.index()].vreg.index()] - .ranges - .push(LiveRangeListEntry { - range: new_range, - index: new_lr, - }); - } + let new_bundle = self.create_bundle(); + log::debug!(" -> creating new bundle {:?}", new_bundle); + self.bundles[new_bundle.index()].spillset = spillset; + for entry in &new_lr_list { + self.ranges[entry.index.index()].bundle = new_bundle; } + self.bundles[new_bundle.index()].ranges = new_lr_list; - // Recompute weights and priorities of all bundles, and - // enqueue all split-bundles on the allocation queue. + self.recompute_bundle_properties(bundle); + self.recompute_bundle_properties(new_bundle); let prio = self.compute_bundle_prio(bundle); + let new_prio = self.compute_bundle_prio(new_bundle); self.bundles[bundle.index()].prio = prio; - self.recompute_bundle_properties(bundle); - self.allocation_queue.insert(bundle, prio as usize); - for &b in &new_bundles { - let prio = self.compute_bundle_prio(b); - self.bundles[b.index()].prio = prio; - self.recompute_bundle_properties(b); - self.allocation_queue.insert(b, prio as usize); - } + self.bundles[new_bundle.index()].prio = new_prio; + self.allocation_queue + .insert(bundle, prio as usize, reg_hint); + self.allocation_queue + .insert(new_bundle, new_prio as usize, PReg::invalid()); } - fn process_bundle(&mut self, bundle: LiveBundleIndex) -> Result<(), RegAllocError> { + fn process_bundle( + &mut self, + bundle: LiveBundleIndex, + reg_hint: PReg, + ) -> Result<(), RegAllocError> { // Find any requirements: for every LR, for every def/use, gather // requirements (fixed-reg, any-reg, any) and merge them. let req = self.compute_requirement(bundle); - // Grab a hint from our spillset, if any. - let hint_reg = self.spillsets[self.bundles[bundle.index()].spillset.index()].reg_hint; + // Grab a hint from either the queue or our spillset, if any. + let hint_reg = if reg_hint != PReg::invalid() { + reg_hint + } else { + self.spillsets[self.bundles[bundle.index()].spillset.index()].reg_hint + }; log::debug!( "process_bundle: bundle {:?} requirement {:?} hint {:?}", bundle, @@ -3102,12 +2810,12 @@ impl<'a, F: Function> Env<'a, F> { // Try to allocate! let mut attempts = 0; - let mut first_conflicting_bundle; + let mut split_at_point = self.bundles[bundle.index()].ranges[0].range.from; + let mut requeue_with_reg = PReg::invalid(); loop { attempts += 1; log::debug!("attempt {}, req {:?}", attempts, req); debug_assert!(attempts < 100 * self.func.insts()); - first_conflicting_bundle = None; let req = match req { Some(r) => r, // `None` means conflicting requirements, hence impossible to @@ -3115,114 +2823,143 @@ impl<'a, F: Function> Env<'a, F> { None => break, }; - let conflicting_bundles = match req { - Requirement::Fixed(preg) => { - let preg_idx = PRegIndex::new(preg.index()); - self.stats.process_bundle_reg_probes_fixed += 1; - log::debug!("trying fixed reg {:?}", preg_idx); - match self.try_to_allocate_bundle_to_reg(bundle, preg_idx) { - AllocRegResult::Allocated(alloc) => { - self.stats.process_bundle_reg_success_fixed += 1; - log::debug!(" -> allocated to fixed {:?}", preg_idx); - self.spillsets[self.bundles[bundle.index()].spillset.index()] - .reg_hint = alloc.as_reg().unwrap(); - return Ok(()); - } - AllocRegResult::Conflict(bundles) => { - log::debug!(" -> conflict with bundles {:?}", bundles); - bundles - } - AllocRegResult::ConflictWithFixed => { - log::debug!(" -> conflict with fixed alloc"); - // Empty conflicts set: there's nothing we can - // evict, because fixed conflicts cannot be moved. - smallvec![] - } - } - } - Requirement::Register(class) => { - // Scan all pregs and attempt to allocate. - let mut lowest_cost_conflict_set: Option = None; - - // Heuristic: start the scan for an available - // register at an offset influenced both by our - // location in the code and by the bundle we're - // considering. This has the effect of spreading - // demand more evenly across registers. - let scan_offset = self.ranges - [self.bundles[bundle.index()].ranges[0].index.index()] - .range - .from - .inst() - .index() - + bundle.index(); - - self.stats.process_bundle_reg_probe_start_any += 1; - for preg in RegTraversalIter::new( - self.env, - class, - hint_reg, - PReg::invalid(), - scan_offset, - ) { - self.stats.process_bundle_reg_probes_any += 1; + let (conflicting_bundles, latest_first_conflict_point, latest_first_conflict_reg) = + match req { + Requirement::Fixed(preg) => { let preg_idx = PRegIndex::new(preg.index()); - log::debug!("trying preg {:?}", preg_idx); + self.stats.process_bundle_reg_probes_fixed += 1; + log::debug!("trying fixed reg {:?}", preg_idx); match self.try_to_allocate_bundle_to_reg(bundle, preg_idx) { AllocRegResult::Allocated(alloc) => { - self.stats.process_bundle_reg_success_any += 1; - log::debug!(" -> allocated to any {:?}", preg_idx); + self.stats.process_bundle_reg_success_fixed += 1; + log::debug!(" -> allocated to fixed {:?}", preg_idx); self.spillsets[self.bundles[bundle.index()].spillset.index()] .reg_hint = alloc.as_reg().unwrap(); return Ok(()); } AllocRegResult::Conflict(bundles) => { log::debug!(" -> conflict with bundles {:?}", bundles); - if lowest_cost_conflict_set.is_none() { - lowest_cost_conflict_set = Some(bundles); - } else if self.maximum_spill_weight_in_bundle_set(&bundles) - < self.maximum_spill_weight_in_bundle_set( - lowest_cost_conflict_set.as_ref().unwrap(), - ) - { - lowest_cost_conflict_set = Some(bundles); - } + let first_bundle = bundles[0]; + ( + bundles, + self.bundles[first_bundle.index()].ranges[0].range.from, + preg, + ) } AllocRegResult::ConflictWithFixed => { log::debug!(" -> conflict with fixed alloc"); - // Simply don't consider as an option. + // Empty conflicts set: there's nothing we can + // evict, because fixed conflicts cannot be moved. + ( + smallvec![], + ProgPoint::before(Inst::new(0)), + PReg::invalid(), + ) } } } + Requirement::Register(class) => { + // Scan all pregs and attempt to allocate. + let mut lowest_cost_conflict_set: Option = None; + let mut latest_first_conflict_point = ProgPoint::before(Inst::new(0)); + let mut latest_first_conflict_reg = PReg::invalid(); + + // Heuristic: start the scan for an available + // register at an offset influenced both by our + // location in the code and by the bundle we're + // considering. This has the effect of spreading + // demand more evenly across registers. + let scan_offset = self.ranges + [self.bundles[bundle.index()].ranges[0].index.index()] + .range + .from + .inst() + .index() + + bundle.index(); + + self.stats.process_bundle_reg_probe_start_any += 1; + for preg in RegTraversalIter::new( + self.env, + class, + hint_reg, + PReg::invalid(), + scan_offset, + ) { + self.stats.process_bundle_reg_probes_any += 1; + let preg_idx = PRegIndex::new(preg.index()); + log::debug!("trying preg {:?}", preg_idx); + match self.try_to_allocate_bundle_to_reg(bundle, preg_idx) { + AllocRegResult::Allocated(alloc) => { + self.stats.process_bundle_reg_success_any += 1; + log::debug!(" -> allocated to any {:?}", preg_idx); + self.spillsets[self.bundles[bundle.index()].spillset.index()] + .reg_hint = alloc.as_reg().unwrap(); + return Ok(()); + } + AllocRegResult::Conflict(bundles) => { + log::debug!(" -> conflict with bundles {:?}", bundles); + + let first_conflict_point = + self.bundles[bundles[0].index()].ranges[0].range.from; + if first_conflict_point > latest_first_conflict_point { + latest_first_conflict_point = first_conflict_point; + latest_first_conflict_reg = preg; + } - // Otherwise, we *require* a register, but didn't fit into - // any with current bundle assignments. Hence, we will need - // to either split or attempt to evict some bundles. Return - // the conflicting bundles to evict and retry. Empty list - // means nothing to try (due to fixed conflict) so we must - // split instead. - lowest_cost_conflict_set.unwrap_or(smallvec![]) - } + if lowest_cost_conflict_set.is_none() { + lowest_cost_conflict_set = Some(bundles); + } else if self.maximum_spill_weight_in_bundle_set(&bundles) + < self.maximum_spill_weight_in_bundle_set( + lowest_cost_conflict_set.as_ref().unwrap(), + ) + { + lowest_cost_conflict_set = Some(bundles); + } + } + AllocRegResult::ConflictWithFixed => { + log::debug!(" -> conflict with fixed alloc"); + // Simply don't consider as an option. + } + } + } - Requirement::Stack(_) => { - // If we must be on the stack, put ourselves on - // the spillset's list immediately. - self.spillsets[self.bundles[bundle.index()].spillset.index()] - .bundles - .push(bundle); - return Ok(()); - } + // Otherwise, we *require* a register, but didn't fit into + // any with current bundle assignments. Hence, we will need + // to either split or attempt to evict some bundles. Return + // the conflicting bundles to evict and retry. Empty list + // means nothing to try (due to fixed conflict) so we must + // split instead. + ( + lowest_cost_conflict_set.unwrap_or(smallvec![]), + latest_first_conflict_point, + latest_first_conflict_reg, + ) + } - Requirement::Any(_) => { - // If a register is not *required*, spill now (we'll retry - // allocation on spilled bundles later). - log::debug!("spilling bundle {:?} to spilled_bundles list", bundle); - self.spilled_bundles.push(bundle); - return Ok(()); - } - }; + Requirement::Stack(_) => { + // If we must be on the stack, put ourselves on + // the spillset's list immediately. + self.spillsets[self.bundles[bundle.index()].spillset.index()] + .bundles + .push(bundle); + return Ok(()); + } + + Requirement::Any(_) => { + // If a register is not *required*, spill now (we'll retry + // allocation on spilled bundles later). + log::debug!("spilling bundle {:?} to spilled_bundles list", bundle); + self.spilled_bundles.push(bundle); + return Ok(()); + } + }; log::debug!(" -> conflict set {:?}", conflicting_bundles); + log::debug!( + " -> latest first conflict {:?} with reg {:?}", + latest_first_conflict_point, + latest_first_conflict_reg + ); // If we have already tried evictions once before and are // still unsuccessful, give up and move on to splitting as @@ -3236,7 +2973,9 @@ impl<'a, F: Function> Env<'a, F> { break; } - first_conflicting_bundle = Some(conflicting_bundles[0]); + let bundle_start = self.bundles[bundle.index()].ranges[0].range.from; + split_at_point = std::cmp::max(latest_first_conflict_point, bundle_start); + requeue_with_reg = latest_first_conflict_reg; // If the maximum spill weight in the conflicting-bundles set is >= this bundle's spill // weight, then don't evict. @@ -3297,10 +3036,7 @@ impl<'a, F: Function> Env<'a, F> { } assert!(!self.minimal_bundle(bundle)); - self.split_and_requeue_bundle( - bundle, - first_conflicting_bundle.unwrap_or(LiveBundleIndex::invalid()), - ); + self.split_and_requeue_bundle(bundle, split_at_point, requeue_with_reg); Ok(()) } From 4b46c6388ae8a5c64fcbfc496f203eaab07ba3bb Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sat, 22 May 2021 14:26:49 -0700 Subject: [PATCH 071/155] Fuzzbug fixes for simpler splitting --- src/ion/mod.rs | 56 +++++++++++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 23 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 63ea532e..c2cd78a9 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -1285,20 +1285,20 @@ impl<'a, F: Function> Env<'a, F> { if live.get(pinned_vreg.vreg()) { let pinned_lr = vreg_ranges[pinned_vreg.vreg()]; let orig_start = self.ranges[pinned_lr.index()].range.from; + log::debug!( + " -> live with LR {:?}; truncating to start at {:?}", + pinned_lr, + progpoint.next() + ); self.ranges[pinned_lr.index()].range.from = progpoint.next(); let new_lr = self.add_liverange_to_vreg( VRegIndex::new(pinned_vreg.vreg()), CodeRange { from: orig_start, - to: progpoint, + to: progpoint.prev(), }, ); vreg_ranges[pinned_vreg.vreg()] = new_lr; - log::debug!( - " -> live with LR {:?}; truncating to start at {:?}", - pinned_lr, - progpoint.next() - ); log::debug!(" -> created LR {:?} with remaining range from {:?} to {:?}", new_lr, orig_start, progpoint); // Add an edit right now to indicate that at @@ -1325,16 +1325,21 @@ impl<'a, F: Function> Env<'a, F> { }, ); } else { - let new_lr = self.add_liverange_to_vreg( - VRegIndex::new(pinned_vreg.vreg()), - CodeRange { - from: self.cfginfo.block_entry[block.index()], - to: progpoint, - }, - ); - vreg_ranges[pinned_vreg.vreg()] = new_lr; - live.set(pinned_vreg.vreg(), true); - log::debug!(" -> was not live; created new LR {:?}", new_lr); + if inst > self.cfginfo.block_entry[block.index()].inst() { + let new_lr = self.add_liverange_to_vreg( + VRegIndex::new(pinned_vreg.vreg()), + CodeRange { + from: self.cfginfo.block_entry[block.index()], + to: ProgPoint::before(inst), + }, + ); + vreg_ranges[pinned_vreg.vreg()] = new_lr; + live.set(pinned_vreg.vreg(), true); + log::debug!( + " -> was not live; created new LR {:?}", + new_lr + ); + } // Add an edit right now to indicate that at // this program point, the given @@ -2536,13 +2541,19 @@ impl<'a, F: Function> Env<'a, F> { break; } } - // Minimal if this is the only range in the bundle, and if - // the range covers only one instruction. Note that it - // could cover just one ProgPoint, i.e. X.Before..X.After, - // or two ProgPoints, i.e. X.Before..X+1.Before. + // Minimal if the range covers only one instruction. Note + // that it could cover just one ProgPoint, + // i.e. X.Before..X.After, or two ProgPoints, + // i.e. X.Before..X+1.Before. log::debug!(" -> first range has range {:?}", first_range_data.range); - minimal = self.bundles[bundle.index()].ranges.len() == 1 - && first_range_data.range.from.inst() == first_range_data.range.to.prev().inst(); + let bundle_start = self.bundles[bundle.index()] + .ranges + .first() + .unwrap() + .range + .from; + let bundle_end = self.bundles[bundle.index()].ranges.last().unwrap().range.to; + minimal = bundle_start.inst() == bundle_end.prev().inst(); log::debug!(" -> minimal: {}", minimal); } @@ -2662,7 +2673,6 @@ impl<'a, F: Function> Env<'a, F> { .next(), ), }; - assert!(split_at < bundle_end); log::debug!( "split point is at bundle start; advancing to {:?}", split_at From 469669155fcb8ab201f235ab7b7f31fb8511a1e8 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sat, 22 May 2021 14:37:56 -0700 Subject: [PATCH 072/155] Another fuzzbug fix: proper checker-hint ordering when V-R and V-V moves are back-to-back and RReg ownership changes --- src/ion/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index c2cd78a9..1db10b8d 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -1310,7 +1310,7 @@ impl<'a, F: Function> Env<'a, F> { // checker. self.add_edit( ProgPoint::before(inst), - InsertMovePrio::Regular, + InsertMovePrio::MultiFixedReg, Edit::DefAlloc { alloc: Allocation::reg(preg), vreg: dst.vreg(), @@ -1347,7 +1347,7 @@ impl<'a, F: Function> Env<'a, F> { // not the preg. This is used by // the checker. self.add_edit( - ProgPoint::before(inst), + ProgPoint::after(inst), InsertMovePrio::Regular, Edit::DefAlloc { alloc: Allocation::reg(preg), From a6c89b1c01d5a8f834aaae5f8719f6a42eda657c Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sat, 22 May 2021 15:12:35 -0700 Subject: [PATCH 073/155] Avoid O(n^2) in liverange construction: we always build LRs in (reverse) order, so we can just append (prepend) to running list and reverse at end. Likewise for uses. --- src/ion/mod.rs | 132 ++++++++++++++++++++++++++++--------------------- 1 file changed, 76 insertions(+), 56 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 1db10b8d..ada8049c 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -821,50 +821,46 @@ impl<'a, F: Function> Env<'a, F> { /// Mark `range` as live for the given `vreg`. /// /// Returns the liverange that contains the given range. - fn add_liverange_to_vreg(&mut self, vreg: VRegIndex, mut range: CodeRange) -> LiveRangeIndex { + fn add_liverange_to_vreg(&mut self, vreg: VRegIndex, range: CodeRange) -> LiveRangeIndex { log::debug!("add_liverange_to_vreg: vreg {:?} range {:?}", vreg, range); - // Check for abutting or overlapping ranges. - let mut merged = None; - let mut i = 0; - while i < self.vregs[vreg.index()].ranges.len() { - let entry = self.vregs[vreg.index()].ranges[i]; - // Don't use `entry.range`; it is not kept up-to-date as - // we are building LRs. - let this_range = self.ranges[entry.index.index()].range; - if range.overlaps(&this_range) { - if this_range.from < range.from { - range.from = this_range.from; - } - if this_range.to > range.to { - range.to = this_range.to; - } - if merged.is_none() { - merged = Some(i); - self.ranges[entry.index.index()].range = range; - self.vregs[vreg.index()].ranges[i].range = range; - i += 1; - } else { - let merge_from = entry.index; - let merge_into = self.vregs[vreg.index()].ranges[merged.unwrap()].index; - self.ranges[merge_from.index()].merged_into = merge_into; - let mut uses = - std::mem::replace(&mut self.ranges[merge_from.index()].uses, smallvec![]); - self.ranges[merge_into.index()].uses.extend(uses.drain(..)); - let f = self.ranges[merge_from.index()].flag_word(); - self.ranges[merge_into.index()].merge_flags(f); - self.ranges[merge_into.index()].range = range; - self.vregs[vreg.index()].ranges[merged.unwrap()].range = range; - self.vregs[vreg.index()].ranges.remove(i); - } - } else { - i += 1; - } - } + // Invariant: as we are building liveness information, we + // *always* process instructions bottom-to-top, and as a + // consequence, new liveranges are always created before any + // existing liveranges for a given vreg. We assert this here, + // then use it to avoid an O(n) merge step (which would lead + // to O(n^2) liveness construction cost overall). + // + // We store liveranges in reverse order in the `.ranges` + // array, then reverse them at the end of + // `compute_liveness()`. + + assert!( + self.vregs[vreg.index()].ranges.is_empty() + || range.to + <= self.ranges[self.vregs[vreg.index()] + .ranges + .last() + .unwrap() + .index + .index()] + .range + .from + ); - // If we get here and did not merge into an existing liverange or liveranges, then we need - // to create a new one. - if merged.is_none() { + if self.vregs[vreg.index()].ranges.is_empty() + || range.to + < self.ranges[self.vregs[vreg.index()] + .ranges + .last() + .unwrap() + .index + .index()] + .range + .from + { + // Is not contiguous with previously-added (immediately + // following) range; create a new range. let lr = self.create_liverange(range); self.ranges[lr.index()].vreg = vreg; self.vregs[vreg.index()] @@ -872,7 +868,12 @@ impl<'a, F: Function> Env<'a, F> { .push(LiveRangeListEntry { range, index: lr }); lr } else { - self.vregs[vreg.index()].ranges[merged.unwrap()].index + // Is contiguous with previously-added range; just extend + // its range and return it. + let lr = self.vregs[vreg.index()].ranges.last().unwrap().index; + assert!(range.to == self.ranges[lr.index()].range.from); + self.ranges[lr.index()].range.from = range.from; + lr } } @@ -1484,13 +1485,20 @@ impl<'a, F: Function> Env<'a, F> { // and create an initial LR back to the start of // the block. let pos = ProgPoint::after(inst); - let range = CodeRange { - from: self.cfginfo.block_entry[block.index()], - to: pos.next(), + let src_lr = if !live.get(src.vreg().vreg()) { + let range = CodeRange { + from: self.cfginfo.block_entry[block.index()], + to: pos.next(), + }; + let src_lr = self.add_liverange_to_vreg( + VRegIndex::new(src.vreg().vreg()), + range, + ); + vreg_ranges[src.vreg().vreg()] = src_lr; + src_lr + } else { + vreg_ranges[src.vreg().vreg()] }; - let src_lr = self - .add_liverange_to_vreg(VRegIndex::new(src.vreg().vreg()), range); - vreg_ranges[src.vreg().vreg()] = src_lr; log::debug!(" -> src LR {:?}", src_lr); @@ -1682,23 +1690,35 @@ impl<'a, F: Function> Env<'a, F> { self.safepoints.sort_unstable(); - // Sort ranges in each vreg, and uses in each range, so we can - // iterate over them in order below. The ordering invariant is - // always maintained for uses and always for ranges in bundles - // (which are initialized later), but not always for ranges in - // vregs; those are sorted only when needed, here and then - // again at the end of allocation when resolving moves. + // Make ranges in each vreg and uses in each range appear in + // sorted order. We built them in reverse order above, so this + // is a simple reversal, *not* a full sort. + // + // The ordering invariant is always maintained for uses and + // always for ranges in bundles (which are initialized later), + // but not always for ranges in vregs; those are sorted only + // when needed, here and then again at the end of allocation + // when resolving moves. + for vreg in &mut self.vregs { + vreg.ranges.reverse(); + let mut last = None; for entry in &mut vreg.ranges { // Ranges may have been truncated above at defs. We // need to update with the final range here. entry.range = self.ranges[entry.index.index()].range; + // Assert in-order and non-overlapping. + assert!(last.is_none() || last.unwrap() <= entry.range.from); + last = Some(entry.range.to); } - vreg.ranges.sort_unstable_by_key(|entry| entry.range.from); } for range in 0..self.ranges.len() { - self.ranges[range].uses.sort_unstable_by_key(|u| u.pos); + self.ranges[range].uses.reverse(); + debug_assert!(self.ranges[range] + .uses + .windows(2) + .all(|win| win[0].pos <= win[1].pos)); } // Insert safepoint virtual stack uses, if needed. From 107c09181f3a1a4342abd423c861152416887098 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sat, 22 May 2021 16:36:44 -0700 Subject: [PATCH 074/155] Simple speedup in bundle merge: set bundle while everything is in cache (same pass), and only check non-overlap with debug assertions enabled. Alloc time on clang.wasm: 9.1s old backtracking RA vs. 7.2s with this regalloc2 RA. --- src/ion/mod.rs | 59 ++++++++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 26 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index ada8049c..ff3c282b 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -2051,6 +2051,7 @@ impl<'a, F: Function> Env<'a, F> { while idx_from < ranges_from.len() || idx_to < ranges_to.len() { if idx_from < ranges_from.len() && idx_to < ranges_to.len() { if ranges_from[idx_from].range.from <= ranges_to[idx_to].range.from { + self.ranges[ranges_from[idx_from].index.index()].bundle = to; merged.push(ranges_from[idx_from]); idx_from += 1; } else { @@ -2058,6 +2059,9 @@ impl<'a, F: Function> Env<'a, F> { idx_to += 1; } } else if idx_from < ranges_from.len() { + for entry in &ranges_from[idx_from..] { + self.ranges[entry.index.index()].bundle = to; + } merged.extend_from_slice(&ranges_from[idx_from..]); break; } else { @@ -2066,35 +2070,38 @@ impl<'a, F: Function> Env<'a, F> { break; } } - log::debug!("merging: merged = {:?}", merged); - let mut last_range = None; - for entry in &merged { - if last_range.is_some() { - assert!(last_range.unwrap() < entry.range); - } - last_range = Some(entry.range); - if self.ranges[entry.index.index()].bundle == from { - if self.annotations_enabled && log::log_enabled!(log::Level::Debug) { - self.annotate( - entry.range.from, - format!( - " MERGE range{} v{} from bundle{} to bundle{}", - entry.index.index(), - self.ranges[entry.index.index()].vreg.index(), - from.index(), - to.index(), - ), - ); + #[cfg(debug_assertions)] + { + log::debug!("merging: merged = {:?}", merged); + let mut last_range = None; + for entry in &merged { + if last_range.is_some() { + assert!(last_range.unwrap() < entry.range); + } + last_range = Some(entry.range); + + if self.ranges[entry.index.index()].bundle == from { + if self.annotations_enabled && log::log_enabled!(log::Level::Debug) { + self.annotate( + entry.range.from, + format!( + " MERGE range{} v{} from bundle{} to bundle{}", + entry.index.index(), + self.ranges[entry.index.index()].vreg.index(), + from.index(), + to.index(), + ), + ); + } } - } - log::debug!( - " -> merged result for bundle{}: range{}", - to.index(), - entry.index.index(), - ); - self.ranges[entry.index.index()].bundle = to; + log::debug!( + " -> merged result for bundle{}: range{}", + to.index(), + entry.index.index(), + ); + } } self.bundles[to.index()].ranges = merged; From 59967ff7561ba5361b7dd90dd4b7cfe7ec4b17e3 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sun, 23 May 2021 20:04:28 -0700 Subject: [PATCH 075/155] TODO-list update: braindump of next ideas to work on. --- src/ion/mod.rs | 57 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 43 insertions(+), 14 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index ff3c282b..712f0578 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -20,17 +20,45 @@ */ /* - Performance ideas: - - - conflict hints? (note on one bundle that it definitely conflicts - with another, so avoid probing the other's alloc) - - - partial allocation -- place one LR, split rest off into separate - bundle, in one pass? - - - coarse-grained "register contention" counters per fixed region; - randomly sample these, adding up a vector of them, to choose - register probe order? + Performance and code-quality ideas: + + - Split heuristics: + - Loop depth at split point? Split before entering more nested loop + - Split at earliest vs latest conflict -- study more + + - Reduced spilling when spillslot is still "clean": + - When we allocate spillsets, use the whole bundle of a given + spillset to check for fit. Add all bundles to spillset as we + split; then SpillSet::bundles always corresponds to original + merged bundle. + - Then a single bundle will never move between spillslots, so we + know that when we reload from the one single spillslot, it is + the last value that we spilled. + - So we can track 'dirty' status of reg and elide spill when not + dirty. + - This is slightly tricky: fixpoint problem, across edges. + - We can simplify by assuming spillslot is dirty if value came + in on BB edge; only clean if we reload in same block we spill + in. + - As a slightly better variation on this, track dirty during + scan in a single range while resolving moves; in-edge makes + dirty. + + - Add weight to bundles according to progmoves + + - Efficiency improvements: + - Record 'cheapest evict bundle so far' and stop scanning if + total evict cost exceeds that + + - Avoid requiring two scratch regs: + - Require machine impl to be able to (i) push a reg, (ii) pop a + reg; then generate a balanced pair of push/pop, using the stack + slot as the scratch. + - on Cranelift side, take care to generate virtual-SP + adjustments! + - For a spillslot->spillslot move, push a fixed reg (say the + first preferred one), reload into it, spill out of it, and then + pop old val */ #![allow(dead_code, unused_imports)] @@ -2410,9 +2438,10 @@ impl<'a, F: Function> Env<'a, F> { // O(b * n log n) with the simple probe-for-each-bundle-range // approach. // - // Note that the comparator function on a CodeRange tests for *overlap*, so we - // are checking whether the BTree contains any preg range that - // *overlaps* with range `range`, not literally the range `range`. + // Note that the comparator function on a CodeRange tests for + // *overlap*, so we are checking whether the BTree contains + // any preg range that *overlaps* with range `range`, not + // literally the range `range`. let bundle_ranges = &self.bundles[bundle.index()].ranges; let from_key = LiveRangeKey::from_range(&bundle_ranges.first().unwrap().range); let to_key = LiveRangeKey::from_range(&bundle_ranges.last().unwrap().range); From 46feacc654fc4377e762d64336fb8558139a9d7b Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 24 May 2021 15:32:05 -0700 Subject: [PATCH 076/155] Fuzzbug fix: don't merge bundles that have conflicting requirements. (Normally splitting would fix this, but let's just not merge in the first place.) --- src/ion/mod.rs | 146 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 101 insertions(+), 45 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 712f0578..e2c1a407 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -160,6 +160,7 @@ struct LiveRange { uses: UseList, merged_into: LiveRangeIndex, + requirement: Requirement, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] @@ -237,6 +238,7 @@ struct LiveBundle { allocation: Allocation, prio: u32, // recomputed after every bulk update spill_weight_and_props: u32, + requirement: Requirement, } impl LiveBundle { @@ -524,36 +526,61 @@ fn spill_weight_from_policy(policy: OperandPolicy, is_hot: bool, is_def: bool) - #[derive(Clone, Copy, Debug, PartialEq, Eq)] enum Requirement { + Unknown, Fixed(PReg), Register(RegClass), Stack(RegClass), Any(RegClass), + Conflict, } impl Requirement { #[inline(always)] fn class(self) -> RegClass { match self { + Requirement::Unknown => panic!("No class for unknown Requirement"), Requirement::Fixed(preg) => preg.class(), Requirement::Register(class) | Requirement::Any(class) | Requirement::Stack(class) => { class } + Requirement::Conflict => panic!("No class for conflicted Requirement"), } } #[inline(always)] - fn merge(self, other: Requirement) -> Option { - if self.class() != other.class() { - return None; - } + fn merge(self, other: Requirement) -> Requirement { match (self, other) { - (other, Requirement::Any(_)) | (Requirement::Any(_), other) => Some(other), - (Requirement::Stack(_), Requirement::Stack(_)) => Some(self), - (Requirement::Register(_), Requirement::Fixed(preg)) - | (Requirement::Fixed(preg), Requirement::Register(_)) => { - Some(Requirement::Fixed(preg)) + (Requirement::Unknown, other) | (other, Requirement::Unknown) => other, + (Requirement::Conflict, _) | (_, Requirement::Conflict) => Requirement::Conflict, + (other, Requirement::Any(rc)) | (Requirement::Any(rc), other) => { + if other.class() == rc { + other + } else { + Requirement::Conflict + } + } + (Requirement::Stack(rc1), Requirement::Stack(rc2)) => { + if rc1 == rc2 { + self + } else { + Requirement::Conflict + } + } + (Requirement::Register(rc), Requirement::Fixed(preg)) + | (Requirement::Fixed(preg), Requirement::Register(rc)) => { + if rc == preg.class() { + Requirement::Fixed(preg) + } else { + Requirement::Conflict + } } - (Requirement::Register(_), Requirement::Register(_)) => Some(self), - (Requirement::Fixed(a), Requirement::Fixed(b)) if a == b => Some(self), - _ => None, + (Requirement::Register(rc1), Requirement::Register(rc2)) => { + if rc1 == rc2 { + self + } else { + Requirement::Conflict + } + } + (Requirement::Fixed(a), Requirement::Fixed(b)) if a == b => self, + _ => Requirement::Conflict, } } #[inline(always)] @@ -841,6 +868,7 @@ impl<'a, F: Function> Env<'a, F> { uses: smallvec![], merged_into: LiveRangeIndex::invalid(), + requirement: Requirement::Unknown, }); LiveRangeIndex::new(idx) @@ -926,7 +954,9 @@ impl<'a, F: Function> Env<'a, F> { weight, ); + let req = Requirement::from_operand(u.operand); self.ranges[into.index()].uses.push(u); + self.ranges[into.index()].requirement = self.ranges[into.index()].requirement.merge(req); // Update stats. self.ranges[into.index()].uses_spill_weight_and_flags += weight; @@ -1953,6 +1983,7 @@ impl<'a, F: Function> Env<'a, F> { spillset: SpillSetIndex::invalid(), prio: 0, spill_weight_and_props: 0, + requirement: Requirement::Unknown, }); LiveBundleIndex::new(bundle) } @@ -1997,7 +2028,25 @@ impl<'a, F: Function> Env<'a, F> { } } - // Check for overlap in LiveRanges. + log::debug!( + "bundle{} has req {:?}, bundle{} has req {:?}", + from.index(), + self.bundles[from.index()].requirement, + to.index(), + self.bundles[to.index()].requirement + ); + + if self.bundles[from.index()] + .requirement + .merge(self.bundles[to.index()].requirement) + == Requirement::Conflict + { + log::debug!(" -> conflicting requirements; aborting merge"); + return false; + } + + // Check for overlap in LiveRanges and for conflicting + // requirements. let ranges_from = &self.bundles[from.index()].ranges[..]; let ranges_to = &self.bundles[to.index()].ranges[..]; let mut idx_from = 0; @@ -2076,28 +2125,36 @@ impl<'a, F: Function> Env<'a, F> { ranges_from, ranges_to ); + let mut req = Requirement::Unknown; while idx_from < ranges_from.len() || idx_to < ranges_to.len() { if idx_from < ranges_from.len() && idx_to < ranges_to.len() { if ranges_from[idx_from].range.from <= ranges_to[idx_to].range.from { self.ranges[ranges_from[idx_from].index.index()].bundle = to; + req = req.merge(self.ranges[ranges_from[idx_from].index.index()].requirement); merged.push(ranges_from[idx_from]); idx_from += 1; } else { + req = req.merge(self.ranges[ranges_to[idx_to].index.index()].requirement); merged.push(ranges_to[idx_to]); idx_to += 1; } } else if idx_from < ranges_from.len() { for entry in &ranges_from[idx_from..] { self.ranges[entry.index.index()].bundle = to; + req = req.merge(self.ranges[entry.index.index()].requirement); } merged.extend_from_slice(&ranges_from[idx_from..]); break; } else { assert!(idx_to < ranges_to.len()); + for entry in &ranges_to[idx_to..] { + req = req.merge(self.ranges[entry.index.index()].requirement); + } merged.extend_from_slice(&ranges_to[idx_to..]); break; } } + self.bundles[to.index()].requirement = req; #[cfg(debug_assertions)] { @@ -2167,6 +2224,7 @@ impl<'a, F: Function> Env<'a, F> { let bundle = self.create_bundle(); self.bundles[bundle.index()].ranges = self.vregs[vreg.index()].ranges.clone(); log::debug!("vreg v{} gets bundle{}", vreg.index(), bundle.index()); + let mut req = Requirement::Unknown; for entry in &self.bundles[bundle.index()].ranges { log::debug!( " -> with LR range{}: {:?}", @@ -2174,7 +2232,9 @@ impl<'a, F: Function> Env<'a, F> { entry.range ); self.ranges[entry.index.index()].bundle = bundle; + req = req.merge(self.ranges[entry.index.index()].requirement); } + self.bundles[bundle.index()].requirement = req; // Create a spillslot for this bundle. let ssidx = SpillSetIndex::new(self.spillsets.len()); @@ -2389,14 +2449,10 @@ impl<'a, F: Function> Env<'a, F> { } } - fn compute_requirement(&self, bundle: LiveBundleIndex) -> Option { + fn compute_requirement(&self, bundle: LiveBundleIndex) -> Requirement { log::debug!("compute_requirement: bundle {:?}", bundle); - let class = self.spillsets[self.bundles[bundle.index()].spillset.index()].class; - log::debug!(" -> class = {:?}", class); - - let mut needed = Requirement::Any(class); - + let mut needed = Requirement::Unknown; for entry in &self.bundles[bundle.index()].ranges { let range = &self.ranges[entry.index.index()]; log::debug!( @@ -2405,21 +2461,12 @@ impl<'a, F: Function> Env<'a, F> { entry.index, entry.range ); - for u in &range.uses { - let use_req = Requirement::from_operand(u.operand); - log::debug!( - " -> use at {:?} op {:?} req {:?}", - u.pos, - u.operand, - use_req - ); - needed = needed.merge(use_req)?; - log::debug!(" -> needed {:?}", needed); - } + needed = needed.merge(range.requirement); + log::debug!(" -> needed {:?}", needed); } log::debug!(" -> final needed: {:?}", needed); - Some(needed) + needed } fn try_to_allocate_bundle_to_reg( @@ -2613,16 +2660,19 @@ impl<'a, F: Function> Env<'a, F> { log::debug!(" -> minimal: {}", minimal); } - let spill_weight = if minimal { - if fixed { + let (spill_weight, req) = if minimal { + let w = if fixed { log::debug!(" -> fixed and minimal: spill weight 2000000"); 2_000_000 } else { log::debug!(" -> non-fixed and minimal: spill weight 1000000"); 1_000_000 - } + }; + let req = self.ranges[first_range.index()].requirement; + (w, req) } else { let mut total = 0; + let mut req = Requirement::Unknown; for entry in &self.bundles[bundle.index()].ranges { let range_data = &self.ranges[entry.index.index()]; log::debug!( @@ -2630,6 +2680,7 @@ impl<'a, F: Function> Env<'a, F> { range_data.uses_spill_weight() ); total += range_data.uses_spill_weight(); + req = req.merge(range_data.requirement); } if self.bundles[bundle.index()].prio > 0 { @@ -2638,9 +2689,9 @@ impl<'a, F: Function> Env<'a, F> { self.bundles[bundle.index()].prio, total / self.bundles[bundle.index()].prio ); - total / self.bundles[bundle.index()].prio + (total / self.bundles[bundle.index()].prio, req) } else { - 0 + (0, req) } }; @@ -2649,6 +2700,7 @@ impl<'a, F: Function> Env<'a, F> { minimal, fixed, ); + self.bundles[bundle.index()].requirement = req; } fn minimal_bundle(&mut self, bundle: LiveBundleIndex) -> bool { @@ -2657,8 +2709,14 @@ impl<'a, F: Function> Env<'a, F> { fn recompute_range_properties(&mut self, range: LiveRangeIndex) { let mut rangedata = &mut self.ranges[range.index()]; - let w = rangedata.uses.iter().map(|u| u.weight as u32).sum(); + let mut w = 0; + let mut req = Requirement::Unknown; + for u in &rangedata.uses { + w += u.weight as u32; + req = req.merge(Requirement::from_operand(u.operand)); + } rangedata.uses_spill_weight_and_flags = w; + rangedata.requirement = req; if rangedata.uses.len() > 0 && rangedata.uses[0].operand.kind() == OperandKind::Def { rangedata.set_flag(LiveRangeFlag::StartsAtDef); } @@ -2882,12 +2940,6 @@ impl<'a, F: Function> Env<'a, F> { attempts += 1; log::debug!("attempt {}, req {:?}", attempts, req); debug_assert!(attempts < 100 * self.func.insts()); - let req = match req { - Some(r) => r, - // `None` means conflicting requirements, hence impossible to - // allocate. - None => break, - }; let (conflicting_bundles, latest_first_conflict_point, latest_first_conflict_reg) = match req { @@ -3011,13 +3063,17 @@ impl<'a, F: Function> Env<'a, F> { return Ok(()); } - Requirement::Any(_) => { + Requirement::Any(_) | Requirement::Unknown => { // If a register is not *required*, spill now (we'll retry // allocation on spilled bundles later). log::debug!("spilling bundle {:?} to spilled_bundles list", bundle); self.spilled_bundles.push(bundle); return Ok(()); } + + Requirement::Conflict => { + break; + } }; log::debug!(" -> conflict set {:?}", conflicting_bundles); @@ -3067,7 +3123,7 @@ impl<'a, F: Function> Env<'a, F> { // A minimal bundle cannot be split. if self.minimal_bundle(bundle) { - if let Some(Requirement::Register(class)) = req { + if let Requirement::Register(class) = req { // Check if this is a too-many-live-registers situation. let range = self.bundles[bundle.index()].ranges[0].range; let mut min_bundles_assigned = 0; From 78c009181c901944bc6cb55cec9f73d08a01bdc5 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 24 May 2021 15:47:15 -0700 Subject: [PATCH 077/155] Fuzzbug fix re: new requirements computation and multi-fixed-reg fixup. --- src/ion/mod.rs | 49 +++++++++++++++++++++---------------------------- 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index e2c1a407..178af3d0 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -946,21 +946,26 @@ impl<'a, F: Function> Env<'a, F> { })); let weight = spill_weight_from_policy(policy, is_hot, operand.kind() != OperandKind::Use); u.weight = u16::try_from(weight).expect("weight too large for u16 field"); + let req = Requirement::from_operand(u.operand); log::debug!( - "insert use {:?} into lr {:?} with weight {}", + "insert use {:?} into lr {:?} with weight {} req {:?}", u, into, weight, + req, ); - let req = Requirement::from_operand(u.operand); self.ranges[into.index()].uses.push(u); self.ranges[into.index()].requirement = self.ranges[into.index()].requirement.merge(req); // Update stats. self.ranges[into.index()].uses_spill_weight_and_flags += weight; - log::debug!(" -> now {}", self.ranges[into.index()].uses_spill_weight()); + log::debug!( + " -> now range has weight {} req {:?}", + self.ranges[into.index()].uses_spill_weight(), + self.ranges[into.index()].requirement + ); } fn find_vreg_liverange_for_pos( @@ -1904,6 +1909,7 @@ impl<'a, F: Function> Env<'a, F> { } }; + let mut req = Requirement::Unknown; for u in &mut self.ranges[range.index()].uses { let pos = u.pos; let slot = u.slot as usize; @@ -1913,7 +1919,9 @@ impl<'a, F: Function> Env<'a, F> { &mut u.operand, &mut self.multi_fixed_reg_fixups, ); + req = req.merge(Requirement::from_operand(u.operand)); } + self.ranges[range.index()].requirement = req; for &(clobber, inst) in &extra_clobbers { let range = CodeRange { @@ -2407,9 +2415,10 @@ impl<'a, F: Function> Env<'a, F> { log::debug!("Bundles:"); for (i, b) in self.bundles.iter().enumerate() { log::debug!( - "bundle{}: spillset={:?} alloc={:?}", + "bundle{}: spillset={:?} req={:?} alloc={:?}", i, b.spillset, + b.requirement, b.allocation ); for entry in &b.ranges { @@ -2436,11 +2445,12 @@ impl<'a, F: Function> Env<'a, F> { log::debug!("Ranges:"); for (i, r) in self.ranges.iter().enumerate() { log::debug!( - concat!("range{}: range={:?} vreg={:?} bundle={:?} ", "weight={}"), + "range{}: range={:?} vreg={:?} bundle={:?} req={:?} weight={}", i, r.range, r.vreg, r.bundle, + r.requirement, r.uses_spill_weight(), ); for u in &r.uses { @@ -2449,26 +2459,6 @@ impl<'a, F: Function> Env<'a, F> { } } - fn compute_requirement(&self, bundle: LiveBundleIndex) -> Requirement { - log::debug!("compute_requirement: bundle {:?}", bundle); - - let mut needed = Requirement::Unknown; - for entry in &self.bundles[bundle.index()].ranges { - let range = &self.ranges[entry.index.index()]; - log::debug!( - " -> range LR {} ({:?}): {:?}", - entry.index.index(), - entry.index, - entry.range - ); - needed = needed.merge(range.requirement); - log::debug!(" -> needed {:?}", needed); - } - - log::debug!(" -> final needed: {:?}", needed); - needed - } - fn try_to_allocate_bundle_to_reg( &mut self, bundle: LiveBundleIndex, @@ -2713,8 +2703,10 @@ impl<'a, F: Function> Env<'a, F> { let mut req = Requirement::Unknown; for u in &rangedata.uses { w += u.weight as u32; + log::debug!("range{}: use {:?}", range.index(), u); req = req.merge(Requirement::from_operand(u.operand)); } + log::debug!("range{}: recomputed req = {:?}", range.index(), req); rangedata.uses_spill_weight_and_flags = w; rangedata.requirement = req; if rangedata.uses.len() > 0 && rangedata.uses[0].operand.kind() == OperandKind::Def { @@ -2916,9 +2908,10 @@ impl<'a, F: Function> Env<'a, F> { bundle: LiveBundleIndex, reg_hint: PReg, ) -> Result<(), RegAllocError> { - // Find any requirements: for every LR, for every def/use, gather - // requirements (fixed-reg, any-reg, any) and merge them. - let req = self.compute_requirement(bundle); + // The requirement is kept up-to-date as bundles are merged + // and split, and indicates what sort of allocation we need. + let req = self.bundles[bundle.index()].requirement; + // Grab a hint from either the queue or our spillset, if any. let hint_reg = if reg_hint != PReg::invalid() { reg_hint From 10d926557a09861ac2a62ff0634c3006a16c014b Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 24 May 2021 15:52:14 -0700 Subject: [PATCH 078/155] avoid some redundant work by computing initial reqs only once --- src/ion/mod.rs | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 178af3d0..dfdb7034 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -946,25 +946,25 @@ impl<'a, F: Function> Env<'a, F> { })); let weight = spill_weight_from_policy(policy, is_hot, operand.kind() != OperandKind::Use); u.weight = u16::try_from(weight).expect("weight too large for u16 field"); - let req = Requirement::from_operand(u.operand); log::debug!( - "insert use {:?} into lr {:?} with weight {} req {:?}", + "insert use {:?} into lr {:?} with weight {}", u, into, weight, - req, ); + // N.B.: we do *not* update `requirement` on the range, + // because those will be computed during the multi-fixed-reg + // fixup pass later (after all uses are inserted). + self.ranges[into.index()].uses.push(u); - self.ranges[into.index()].requirement = self.ranges[into.index()].requirement.merge(req); // Update stats. self.ranges[into.index()].uses_spill_weight_and_flags += weight; log::debug!( - " -> now range has weight {} req {:?}", + " -> now range has weight {}", self.ranges[into.index()].uses_spill_weight(), - self.ranges[into.index()].requirement ); } @@ -1909,6 +1909,11 @@ impl<'a, F: Function> Env<'a, F> { } }; + // N.B.: this is important even if we later remove the + // multi-fixed-reg fixup scheme, because it is the + // only place where range requirements are (initially) + // computed! We do it only here in order to avoid + // redundant work. let mut req = Requirement::Unknown; for u in &mut self.ranges[range.index()].uses { let pos = u.pos; From 51206817304384dbc01a3195c658dfcfeccc7cae Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 24 May 2021 17:49:45 -0700 Subject: [PATCH 079/155] Fuzzbug fix for requirement recomputation on minimal bundles with multiple LRs --- src/ion/mod.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index dfdb7034..7425dfad 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -2663,7 +2663,14 @@ impl<'a, F: Function> Env<'a, F> { log::debug!(" -> non-fixed and minimal: spill weight 1000000"); 1_000_000 }; - let req = self.ranges[first_range.index()].requirement; + // Even a minimal bundle may have multiple ranges (one for + // pre and one for post of one instruction). We need to + // iterate over all (up to 2) to merge requiements. + let mut req = Requirement::Unknown; + for entry in &self.bundles[bundle.index()].ranges { + req = req.merge(self.ranges[entry.index.index()].requirement); + } + log::debug!(" -> req from first range: {:?}", req); (w, req) } else { let mut total = 0; @@ -2677,6 +2684,7 @@ impl<'a, F: Function> Env<'a, F> { total += range_data.uses_spill_weight(); req = req.merge(range_data.requirement); } + log::debug!(" -> req from all ranges: {:?}", req); if self.bundles[bundle.index()].prio > 0 { log::debug!( From 5895ae8a2d4d26fd9c95a2104ec2aa61f0eabc77 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 24 May 2021 21:32:41 -0700 Subject: [PATCH 080/155] Remove precomputed requirements from ranges and bundles; cost of struct size and updates is not worth it. Instead, do a simple conflicting-requirements check on each attempted merge, but minimize cost by (i) only checking after ruling out overlaps, and (ii) only checking if we know one of the sides has at least one non-register constraint. --- src/ion/mod.rs | 146 ++++++++++++++++++++++--------------------------- 1 file changed, 66 insertions(+), 80 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 7425dfad..036a7656 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -160,7 +160,6 @@ struct LiveRange { uses: UseList, merged_into: LiveRangeIndex, - requirement: Requirement, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] @@ -238,15 +237,22 @@ struct LiveBundle { allocation: Allocation, prio: u32, // recomputed after every bulk update spill_weight_and_props: u32, - requirement: Requirement, } impl LiveBundle { #[inline(always)] - fn set_cached_spill_weight_and_props(&mut self, spill_weight: u32, minimal: bool, fixed: bool) { - debug_assert!(spill_weight < ((1 << 30) - 1)); - self.spill_weight_and_props = - spill_weight | (if minimal { 1 << 31 } else { 0 }) | (if fixed { 1 << 30 } else { 0 }); + fn set_cached_spill_weight_and_props( + &mut self, + spill_weight: u32, + minimal: bool, + fixed: bool, + stack: bool, + ) { + debug_assert!(spill_weight < ((1 << 29) - 1)); + self.spill_weight_and_props = spill_weight + | (if minimal { 1 << 31 } else { 0 }) + | (if fixed { 1 << 30 } else { 0 }) + | (if stack { 1 << 29 } else { 0 }); } #[inline(always)] @@ -259,9 +265,14 @@ impl LiveBundle { self.spill_weight_and_props & (1 << 30) != 0 } + #[inline(always)] + fn cached_stack(&self) -> bool { + self.spill_weight_and_props & (1 << 29) != 0 + } + #[inline(always)] fn cached_spill_weight(&self) -> u32 { - self.spill_weight_and_props & ((1 << 30) - 1) + self.spill_weight_and_props & ((1 << 29) - 1) } } @@ -868,7 +879,6 @@ impl<'a, F: Function> Env<'a, F> { uses: smallvec![], merged_into: LiveRangeIndex::invalid(), - requirement: Requirement::Unknown, }); LiveRangeIndex::new(idx) @@ -1909,12 +1919,6 @@ impl<'a, F: Function> Env<'a, F> { } }; - // N.B.: this is important even if we later remove the - // multi-fixed-reg fixup scheme, because it is the - // only place where range requirements are (initially) - // computed! We do it only here in order to avoid - // redundant work. - let mut req = Requirement::Unknown; for u in &mut self.ranges[range.index()].uses { let pos = u.pos; let slot = u.slot as usize; @@ -1924,9 +1928,7 @@ impl<'a, F: Function> Env<'a, F> { &mut u.operand, &mut self.multi_fixed_reg_fixups, ); - req = req.merge(Requirement::from_operand(u.operand)); } - self.ranges[range.index()].requirement = req; for &(clobber, inst) in &extra_clobbers { let range = CodeRange { @@ -1996,7 +1998,6 @@ impl<'a, F: Function> Env<'a, F> { spillset: SpillSetIndex::invalid(), prio: 0, spill_weight_and_props: 0, - requirement: Requirement::Unknown, }); LiveBundleIndex::new(bundle) } @@ -2041,23 +2042,6 @@ impl<'a, F: Function> Env<'a, F> { } } - log::debug!( - "bundle{} has req {:?}, bundle{} has req {:?}", - from.index(), - self.bundles[from.index()].requirement, - to.index(), - self.bundles[to.index()].requirement - ); - - if self.bundles[from.index()] - .requirement - .merge(self.bundles[to.index()].requirement) - == Requirement::Conflict - { - log::debug!(" -> conflicting requirements; aborting merge"); - return false; - } - // Check for overlap in LiveRanges and for conflicting // requirements. let ranges_from = &self.bundles[from.index()].ranges[..]; @@ -2091,6 +2075,27 @@ impl<'a, F: Function> Env<'a, F> { } } + // There could be a requirements conflict only one of the both + // sides of the merge has at least one requirement that is not + // 'Reg' or 'Any'. (Note that we already checked that the + // RegClass is the same on both sides.) + if self.bundles[from.index()].cached_fixed() + || self.bundles[from.index()].cached_stack() + || self.bundles[to.index()].cached_fixed() + || self.bundles[to.index()].cached_stack() + { + let mut req = Requirement::Unknown; + for entry in ranges_from.iter().chain(ranges_to.iter()) { + for u in &self.ranges[entry.index.index()].uses { + req = req.merge(Requirement::from_operand(u.operand)); + if req == Requirement::Conflict { + log::debug!(" -> conflicting requirements; aborting merge"); + return false; + } + } + } + } + log::debug!(" -> committing to merge"); // If we reach here, then the bundles do not overlap -- merge @@ -2138,36 +2143,28 @@ impl<'a, F: Function> Env<'a, F> { ranges_from, ranges_to ); - let mut req = Requirement::Unknown; while idx_from < ranges_from.len() || idx_to < ranges_to.len() { if idx_from < ranges_from.len() && idx_to < ranges_to.len() { if ranges_from[idx_from].range.from <= ranges_to[idx_to].range.from { self.ranges[ranges_from[idx_from].index.index()].bundle = to; - req = req.merge(self.ranges[ranges_from[idx_from].index.index()].requirement); merged.push(ranges_from[idx_from]); idx_from += 1; } else { - req = req.merge(self.ranges[ranges_to[idx_to].index.index()].requirement); merged.push(ranges_to[idx_to]); idx_to += 1; } } else if idx_from < ranges_from.len() { for entry in &ranges_from[idx_from..] { self.ranges[entry.index.index()].bundle = to; - req = req.merge(self.ranges[entry.index.index()].requirement); } merged.extend_from_slice(&ranges_from[idx_from..]); break; } else { assert!(idx_to < ranges_to.len()); - for entry in &ranges_to[idx_to..] { - req = req.merge(self.ranges[entry.index.index()].requirement); - } merged.extend_from_slice(&ranges_to[idx_to..]); break; } } - self.bundles[to.index()].requirement = req; #[cfg(debug_assertions)] { @@ -2237,7 +2234,6 @@ impl<'a, F: Function> Env<'a, F> { let bundle = self.create_bundle(); self.bundles[bundle.index()].ranges = self.vregs[vreg.index()].ranges.clone(); log::debug!("vreg v{} gets bundle{}", vreg.index(), bundle.index()); - let mut req = Requirement::Unknown; for entry in &self.bundles[bundle.index()].ranges { log::debug!( " -> with LR range{}: {:?}", @@ -2245,9 +2241,7 @@ impl<'a, F: Function> Env<'a, F> { entry.range ); self.ranges[entry.index.index()].bundle = bundle; - req = req.merge(self.ranges[entry.index.index()].requirement); } - self.bundles[bundle.index()].requirement = req; // Create a spillslot for this bundle. let ssidx = SpillSetIndex::new(self.spillsets.len()); @@ -2420,10 +2414,9 @@ impl<'a, F: Function> Env<'a, F> { log::debug!("Bundles:"); for (i, b) in self.bundles.iter().enumerate() { log::debug!( - "bundle{}: spillset={:?} req={:?} alloc={:?}", + "bundle{}: spillset={:?} alloc={:?}", i, b.spillset, - b.requirement, b.allocation ); for entry in &b.ranges { @@ -2450,12 +2443,11 @@ impl<'a, F: Function> Env<'a, F> { log::debug!("Ranges:"); for (i, r) in self.ranges.iter().enumerate() { log::debug!( - "range{}: range={:?} vreg={:?} bundle={:?} req={:?} weight={}", + "range{}: range={:?} vreg={:?} bundle={:?} weight={}", i, r.range, r.vreg, r.bundle, - r.requirement, r.uses_spill_weight(), ); for u in &r.uses { @@ -2623,6 +2615,7 @@ impl<'a, F: Function> Env<'a, F> { let minimal; let mut fixed = false; + let mut stack = false; let bundledata = &self.bundles[bundle.index()]; let first_range = bundledata.ranges[0].index; let first_range_data = &self.ranges[first_range.index()]; @@ -2636,6 +2629,12 @@ impl<'a, F: Function> Env<'a, F> { if let OperandPolicy::FixedReg(_) = u.operand.policy() { log::debug!(" -> fixed use at {:?}: {:?}", u.pos, u.operand); fixed = true; + } + if let OperandPolicy::Stack = u.operand.policy() { + log::debug!(" -> stack use at {:?}: {:?}", u.pos, u.operand); + stack = true; + } + if stack && fixed { break; } } @@ -2655,26 +2654,16 @@ impl<'a, F: Function> Env<'a, F> { log::debug!(" -> minimal: {}", minimal); } - let (spill_weight, req) = if minimal { - let w = if fixed { + let spill_weight = if minimal { + if fixed { log::debug!(" -> fixed and minimal: spill weight 2000000"); 2_000_000 } else { log::debug!(" -> non-fixed and minimal: spill weight 1000000"); 1_000_000 - }; - // Even a minimal bundle may have multiple ranges (one for - // pre and one for post of one instruction). We need to - // iterate over all (up to 2) to merge requiements. - let mut req = Requirement::Unknown; - for entry in &self.bundles[bundle.index()].ranges { - req = req.merge(self.ranges[entry.index.index()].requirement); } - log::debug!(" -> req from first range: {:?}", req); - (w, req) } else { let mut total = 0; - let mut req = Requirement::Unknown; for entry in &self.bundles[bundle.index()].ranges { let range_data = &self.ranges[entry.index.index()]; log::debug!( @@ -2682,9 +2671,7 @@ impl<'a, F: Function> Env<'a, F> { range_data.uses_spill_weight() ); total += range_data.uses_spill_weight(); - req = req.merge(range_data.requirement); } - log::debug!(" -> req from all ranges: {:?}", req); if self.bundles[bundle.index()].prio > 0 { log::debug!( @@ -2692,9 +2679,9 @@ impl<'a, F: Function> Env<'a, F> { self.bundles[bundle.index()].prio, total / self.bundles[bundle.index()].prio ); - (total / self.bundles[bundle.index()].prio, req) + total / self.bundles[bundle.index()].prio } else { - (0, req) + 0 } }; @@ -2702,8 +2689,8 @@ impl<'a, F: Function> Env<'a, F> { spill_weight, minimal, fixed, + stack, ); - self.bundles[bundle.index()].requirement = req; } fn minimal_bundle(&mut self, bundle: LiveBundleIndex) -> bool { @@ -2713,15 +2700,11 @@ impl<'a, F: Function> Env<'a, F> { fn recompute_range_properties(&mut self, range: LiveRangeIndex) { let mut rangedata = &mut self.ranges[range.index()]; let mut w = 0; - let mut req = Requirement::Unknown; for u in &rangedata.uses { w += u.weight as u32; log::debug!("range{}: use {:?}", range.index(), u); - req = req.merge(Requirement::from_operand(u.operand)); } - log::debug!("range{}: recomputed req = {:?}", range.index(), req); rangedata.uses_spill_weight_and_flags = w; - rangedata.requirement = req; if rangedata.uses.len() > 0 && rangedata.uses[0].operand.kind() == OperandKind::Def { rangedata.set_flag(LiveRangeFlag::StartsAtDef); } @@ -2916,27 +2899,30 @@ impl<'a, F: Function> Env<'a, F> { .insert(new_bundle, new_prio as usize, PReg::invalid()); } + fn compute_requirement(&self, bundle: LiveBundleIndex) -> Requirement { + let mut req = Requirement::Unknown; + for entry in &self.bundles[bundle.index()].ranges { + for u in &self.ranges[entry.index.index()].uses { + let r = Requirement::from_operand(u.operand); + req = req.merge(r); + } + } + req + } + fn process_bundle( &mut self, bundle: LiveBundleIndex, reg_hint: PReg, ) -> Result<(), RegAllocError> { - // The requirement is kept up-to-date as bundles are merged - // and split, and indicates what sort of allocation we need. - let req = self.bundles[bundle.index()].requirement; - + let req = self.compute_requirement(bundle); // Grab a hint from either the queue or our spillset, if any. let hint_reg = if reg_hint != PReg::invalid() { reg_hint } else { self.spillsets[self.bundles[bundle.index()].spillset.index()].reg_hint }; - log::debug!( - "process_bundle: bundle {:?} requirement {:?} hint {:?}", - bundle, - req, - hint_reg, - ); + log::debug!("process_bundle: bundle {:?} hint {:?}", bundle, hint_reg,); // Try to allocate! let mut attempts = 0; From 5b47462e0c9a214e5afcb6382bed1f188740bae1 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 24 May 2021 22:09:41 -0700 Subject: [PATCH 081/155] Loop depth instead of hot/cold, with fast O(n) loop-depth computation. Use this to compute use weights. --- src/cfg.rs | 40 ++++++++++++++++++++++++++++++++++++++ src/ion/mod.rs | 52 ++++++++------------------------------------------ 2 files changed, 48 insertions(+), 44 deletions(-) diff --git a/src/cfg.rs b/src/cfg.rs index 0ad6c07f..65638e7c 100644 --- a/src/cfg.rs +++ b/src/cfg.rs @@ -6,6 +6,7 @@ //! Lightweight CFG analyses. use crate::{domtree, postorder, Block, Function, Inst, OperandKind, ProgPoint, RegAllocError}; +use smallvec::{smallvec, SmallVec}; #[derive(Clone, Debug)] pub struct CFGInfo { @@ -34,6 +35,14 @@ pub struct CFGInfo { /// just one value per block and always know any block's position in its /// successors' preds lists.) pub pred_pos: Vec, + /// For each block, what is the approximate loop depth? + /// + /// This measure is fully precise iff the input CFG is reducible + /// and blocks are in RPO, so that loop backedges are precisely + /// those whose block target indices are less than their source + /// indices. Otherwise, it will be approximate, but should still + /// be usable for heuristic purposes. + pub approx_loop_depth: Vec, } impl CFGInfo { @@ -52,6 +61,8 @@ impl CFGInfo { let mut block_entry = vec![ProgPoint::before(Inst::invalid()); f.blocks()]; let mut block_exit = vec![ProgPoint::before(Inst::invalid()); f.blocks()]; let mut pred_pos = vec![0; f.blocks()]; + let mut backedge_in = vec![0; f.blocks()]; + let mut backedge_out = vec![0; f.blocks()]; for block in 0..f.blocks() { let block = Block::new(block); @@ -104,6 +115,34 @@ impl CFGInfo { return Err(RegAllocError::DisallowedBranchArg(last)); } } + + for &succ in f.block_succs(block) { + if succ.index() <= block.index() { + backedge_in[succ.index()] += 1; + backedge_out[block.index()] += 1; + } + } + } + + let mut approx_loop_depth = vec![]; + let mut backedge_stack: SmallVec<[usize; 4]> = smallvec![]; + let mut cur_depth = 0; + for block in 0..f.blocks() { + if backedge_in[block] > 0 { + cur_depth += 1; + backedge_stack.push(backedge_in[block]); + } + + approx_loop_depth.push(cur_depth); + + while backedge_stack.len() > 0 && backedge_out[block] > 0 { + backedge_out[block] -= 1; + *backedge_stack.last_mut().unwrap() -= 1; + if *backedge_stack.last().unwrap() == 0 { + cur_depth -= 1; + backedge_stack.pop(); + } + } } Ok(CFGInfo { @@ -115,6 +154,7 @@ impl CFGInfo { block_entry, block_exit, pred_pos, + approx_loop_depth, }) } diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 036a7656..fcbe0fad 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -354,7 +354,6 @@ struct Env<'a, F: Function> { vreg_regs: Vec, pregs: Vec, allocation_queue: PrioQueue, - hot_code: LiveRangeSet, clobbers: Vec, // Sorted list of insts with clobbers. safepoints: Vec, // Sorted list of safepoint insts. safepoints_per_vreg: HashMap>, @@ -524,8 +523,10 @@ impl LiveRangeSet { } #[inline(always)] -fn spill_weight_from_policy(policy: OperandPolicy, is_hot: bool, is_def: bool) -> u32 { - let hot_bonus = if is_hot { 10000 } else { 0 }; +fn spill_weight_from_policy(policy: OperandPolicy, loop_depth: usize, is_def: bool) -> u32 { + // A bonus of 1000 for one loop level, 4000 for two loop levels, + // 16000 for three loop levels, etc. Avoids exponentiation. + let hot_bonus = std::cmp::min(16000, 1000 * (1 << (2 * loop_depth))); let def_bonus = if is_def { 2000 } else { 0 }; let policy_bonus = match policy { OperandPolicy::Any => 1000, @@ -794,7 +795,6 @@ impl<'a, F: Function> Env<'a, F> { clobbers: vec![], safepoints: vec![], safepoints_per_vreg: HashMap::new(), - hot_code: LiveRangeSet::new(), spilled_bundles: vec![], spillslots: vec![], slots_by_size: vec![], @@ -944,17 +944,12 @@ impl<'a, F: Function> Env<'a, F> { } fn insert_use_into_liverange(&mut self, into: LiveRangeIndex, mut u: Use) { - let insert_pos = u.pos; let operand = u.operand; let policy = operand.policy(); - let is_hot = self - .hot_code - .btree - .contains_key(&LiveRangeKey::from_range(&CodeRange { - from: insert_pos, - to: insert_pos.next(), - })); - let weight = spill_weight_from_policy(policy, is_hot, operand.kind() != OperandKind::Use); + let block = self.cfginfo.insn_block[u.pos.inst().index()]; + let loop_depth = self.cfginfo.approx_loop_depth[block.index()] as usize; + let weight = + spill_weight_from_policy(policy, loop_depth, operand.kind() != OperandKind::Use); u.weight = u16::try_from(weight).expect("weight too large for u16 field"); log::debug!( @@ -1960,36 +1955,6 @@ impl<'a, F: Function> Env<'a, F> { Ok(()) } - fn compute_hot_code(&mut self) { - // Initialize hot_code to contain inner loops only. - let mut header = Block::invalid(); - let mut backedge = Block::invalid(); - for block in 0..self.func.blocks() { - let block = Block::new(block); - let max_backedge = self - .func - .block_preds(block) - .iter() - .filter(|b| b.index() >= block.index()) - .max(); - if let Some(&b) = max_backedge { - header = block; - backedge = b; - } - if block == backedge { - // We've traversed a loop body without finding a deeper loop. Mark the whole body - // as hot. - let from = self.cfginfo.block_entry[header.index()]; - let to = self.cfginfo.block_exit[backedge.index()].next(); - let range = CodeRange { from, to }; - let lr = self.create_liverange(range); - self.hot_code - .btree - .insert(LiveRangeKey::from_range(&range), lr); - } - } - } - fn create_bundle(&mut self) -> LiveBundleIndex { let bundle = self.bundles.len(); self.bundles.push(LiveBundle { @@ -4357,7 +4322,6 @@ impl<'a, F: Function> Env<'a, F> { pub(crate) fn init(&mut self) -> Result<(), RegAllocError> { self.create_pregs_and_vregs(); - self.compute_hot_code(); self.compute_liveness()?; self.merge_vreg_bundles(); self.queue_bundles(); From 3382f9a2e8d9d256548c5bfa6154d8ec151273da Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 24 May 2021 22:26:57 -0700 Subject: [PATCH 082/155] Split based on first conflict of lowest-weight conflict, not first conflict. Also stop scanning PRegs when max bundle weight in conflict bundle list exceeds current best option. --- src/ion/mod.rs | 282 +++++++++++++++++++++++++++---------------------- 1 file changed, 153 insertions(+), 129 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index fcbe0fad..dcca5559 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -24,7 +24,10 @@ - Split heuristics: - Loop depth at split point? Split before entering more nested loop - - Split at earliest vs latest conflict -- study more + - In general, consider 'weight' of split point as if it were + another use. + + - Add weight to bundles according to progmoves - Reduced spilling when spillslot is still "clean": - When we allocate spillsets, use the whole bundle of a given @@ -44,12 +47,6 @@ scan in a single range while resolving moves; in-edge makes dirty. - - Add weight to bundles according to progmoves - - - Efficiency improvements: - - Record 'cheapest evict bundle so far' and stop scanning if - total evict cost exceeds that - - Avoid requiring two scratch regs: - Require machine impl to be able to (i) push a reg, (ii) pop a reg; then generate a balanced pair of push/pop, using the stack @@ -611,6 +608,7 @@ enum AllocRegResult { Allocated(Allocation), Conflict(LiveBundleVec), ConflictWithFixed, + ConflictHighCost, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] @@ -2425,9 +2423,14 @@ impl<'a, F: Function> Env<'a, F> { &mut self, bundle: LiveBundleIndex, reg: PRegIndex, + // if the max bundle weight in the conflict set exceeds this + // cost (if provided), just return + // `AllocRegResult::ConflictHighCost`. + max_allowable_cost: Option, ) -> AllocRegResult { log::debug!("try_to_allocate_bundle_to_reg: {:?} -> {:?}", bundle, reg); let mut conflicts = smallvec![]; + let mut max_conflict_weight = 0; // Traverse the BTreeMap in order by requesting the whole // range spanned by the bundle and iterating over that // concurrently with our ranges. Because our ranges are in @@ -2499,6 +2502,15 @@ impl<'a, F: Function> Env<'a, F> { log::debug!(" -> conflict bundle {:?}", conflict_bundle); if !conflicts.iter().any(|b| *b == conflict_bundle) { conflicts.push(conflict_bundle); + max_conflict_weight = std::cmp::max( + max_conflict_weight, + self.bundles[conflict_bundle.index()].cached_spill_weight(), + ); + if max_allowable_cost.is_some() + && max_conflict_weight > max_allowable_cost.unwrap() + { + return AllocRegResult::ConflictHighCost; + } } } else { log::debug!(" -> conflict with fixed reservation"); @@ -2898,146 +2910,158 @@ impl<'a, F: Function> Env<'a, F> { log::debug!("attempt {}, req {:?}", attempts, req); debug_assert!(attempts < 100 * self.func.insts()); - let (conflicting_bundles, latest_first_conflict_point, latest_first_conflict_reg) = - match req { - Requirement::Fixed(preg) => { + let (conflicting_bundles, first_conflict_point, first_conflict_reg) = match req { + Requirement::Fixed(preg) => { + let preg_idx = PRegIndex::new(preg.index()); + self.stats.process_bundle_reg_probes_fixed += 1; + log::debug!("trying fixed reg {:?}", preg_idx); + match self.try_to_allocate_bundle_to_reg(bundle, preg_idx, None) { + AllocRegResult::Allocated(alloc) => { + self.stats.process_bundle_reg_success_fixed += 1; + log::debug!(" -> allocated to fixed {:?}", preg_idx); + self.spillsets[self.bundles[bundle.index()].spillset.index()] + .reg_hint = alloc.as_reg().unwrap(); + return Ok(()); + } + AllocRegResult::Conflict(bundles) => { + log::debug!(" -> conflict with bundles {:?}", bundles); + let first_bundle = bundles[0]; + ( + bundles, + self.bundles[first_bundle.index()].ranges[0].range.from, + preg, + ) + } + AllocRegResult::ConflictWithFixed => { + log::debug!(" -> conflict with fixed alloc"); + // Empty conflicts set: there's nothing we can + // evict, because fixed conflicts cannot be moved. + ( + smallvec![], + ProgPoint::before(Inst::new(0)), + PReg::invalid(), + ) + } + AllocRegResult::ConflictHighCost => unreachable!(), + } + } + Requirement::Register(class) => { + // Scan all pregs and attempt to allocate. + let mut lowest_cost_conflict_set: Option = None; + let mut lowest_cost_conflict_cost: Option = None; + let mut lowest_cost_conflict_point = ProgPoint::before(Inst::new(0)); + let mut lowest_cost_conflict_reg = PReg::invalid(); + + // Heuristic: start the scan for an available + // register at an offset influenced both by our + // location in the code and by the bundle we're + // considering. This has the effect of spreading + // demand more evenly across registers. + let scan_offset = self.ranges + [self.bundles[bundle.index()].ranges[0].index.index()] + .range + .from + .inst() + .index() + + bundle.index(); + + self.stats.process_bundle_reg_probe_start_any += 1; + for preg in RegTraversalIter::new( + self.env, + class, + hint_reg, + PReg::invalid(), + scan_offset, + ) { + self.stats.process_bundle_reg_probes_any += 1; let preg_idx = PRegIndex::new(preg.index()); - self.stats.process_bundle_reg_probes_fixed += 1; - log::debug!("trying fixed reg {:?}", preg_idx); - match self.try_to_allocate_bundle_to_reg(bundle, preg_idx) { + log::debug!("trying preg {:?}", preg_idx); + + match self.try_to_allocate_bundle_to_reg( + bundle, + preg_idx, + lowest_cost_conflict_cost, + ) { AllocRegResult::Allocated(alloc) => { - self.stats.process_bundle_reg_success_fixed += 1; - log::debug!(" -> allocated to fixed {:?}", preg_idx); + self.stats.process_bundle_reg_success_any += 1; + log::debug!(" -> allocated to any {:?}", preg_idx); self.spillsets[self.bundles[bundle.index()].spillset.index()] .reg_hint = alloc.as_reg().unwrap(); return Ok(()); } AllocRegResult::Conflict(bundles) => { log::debug!(" -> conflict with bundles {:?}", bundles); - let first_bundle = bundles[0]; - ( - bundles, - self.bundles[first_bundle.index()].ranges[0].range.from, - preg, - ) + + let first_conflict_point = + self.bundles[bundles[0].index()].ranges[0].range.from; + + let cost = self.maximum_spill_weight_in_bundle_set(&bundles); + + if lowest_cost_conflict_cost.is_none() { + lowest_cost_conflict_cost = Some(cost); + lowest_cost_conflict_set = Some(bundles); + lowest_cost_conflict_point = first_conflict_point; + lowest_cost_conflict_reg = preg; + } else if cost < lowest_cost_conflict_cost.unwrap() { + lowest_cost_conflict_cost = Some(cost); + lowest_cost_conflict_set = Some(bundles); + lowest_cost_conflict_point = first_conflict_point; + lowest_cost_conflict_reg = preg; + } } AllocRegResult::ConflictWithFixed => { log::debug!(" -> conflict with fixed alloc"); - // Empty conflicts set: there's nothing we can - // evict, because fixed conflicts cannot be moved. - ( - smallvec![], - ProgPoint::before(Inst::new(0)), - PReg::invalid(), - ) + // Simply don't consider as an option. } - } - } - Requirement::Register(class) => { - // Scan all pregs and attempt to allocate. - let mut lowest_cost_conflict_set: Option = None; - let mut latest_first_conflict_point = ProgPoint::before(Inst::new(0)); - let mut latest_first_conflict_reg = PReg::invalid(); - - // Heuristic: start the scan for an available - // register at an offset influenced both by our - // location in the code and by the bundle we're - // considering. This has the effect of spreading - // demand more evenly across registers. - let scan_offset = self.ranges - [self.bundles[bundle.index()].ranges[0].index.index()] - .range - .from - .inst() - .index() - + bundle.index(); - - self.stats.process_bundle_reg_probe_start_any += 1; - for preg in RegTraversalIter::new( - self.env, - class, - hint_reg, - PReg::invalid(), - scan_offset, - ) { - self.stats.process_bundle_reg_probes_any += 1; - let preg_idx = PRegIndex::new(preg.index()); - log::debug!("trying preg {:?}", preg_idx); - match self.try_to_allocate_bundle_to_reg(bundle, preg_idx) { - AllocRegResult::Allocated(alloc) => { - self.stats.process_bundle_reg_success_any += 1; - log::debug!(" -> allocated to any {:?}", preg_idx); - self.spillsets[self.bundles[bundle.index()].spillset.index()] - .reg_hint = alloc.as_reg().unwrap(); - return Ok(()); - } - AllocRegResult::Conflict(bundles) => { - log::debug!(" -> conflict with bundles {:?}", bundles); - - let first_conflict_point = - self.bundles[bundles[0].index()].ranges[0].range.from; - if first_conflict_point > latest_first_conflict_point { - latest_first_conflict_point = first_conflict_point; - latest_first_conflict_reg = preg; - } - - if lowest_cost_conflict_set.is_none() { - lowest_cost_conflict_set = Some(bundles); - } else if self.maximum_spill_weight_in_bundle_set(&bundles) - < self.maximum_spill_weight_in_bundle_set( - lowest_cost_conflict_set.as_ref().unwrap(), - ) - { - lowest_cost_conflict_set = Some(bundles); - } - } - AllocRegResult::ConflictWithFixed => { - log::debug!(" -> conflict with fixed alloc"); - // Simply don't consider as an option. - } + AllocRegResult::ConflictHighCost => { + // Simply don't consider -- we already have + // a lower-cost conflict bundle option + // to evict. + continue; } } - - // Otherwise, we *require* a register, but didn't fit into - // any with current bundle assignments. Hence, we will need - // to either split or attempt to evict some bundles. Return - // the conflicting bundles to evict and retry. Empty list - // means nothing to try (due to fixed conflict) so we must - // split instead. - ( - lowest_cost_conflict_set.unwrap_or(smallvec![]), - latest_first_conflict_point, - latest_first_conflict_reg, - ) } - Requirement::Stack(_) => { - // If we must be on the stack, put ourselves on - // the spillset's list immediately. - self.spillsets[self.bundles[bundle.index()].spillset.index()] - .bundles - .push(bundle); - return Ok(()); - } + // Otherwise, we *require* a register, but didn't fit into + // any with current bundle assignments. Hence, we will need + // to either split or attempt to evict some bundles. Return + // the conflicting bundles to evict and retry. Empty list + // means nothing to try (due to fixed conflict) so we must + // split instead. + ( + lowest_cost_conflict_set.unwrap_or(smallvec![]), + lowest_cost_conflict_point, + lowest_cost_conflict_reg, + ) + } - Requirement::Any(_) | Requirement::Unknown => { - // If a register is not *required*, spill now (we'll retry - // allocation on spilled bundles later). - log::debug!("spilling bundle {:?} to spilled_bundles list", bundle); - self.spilled_bundles.push(bundle); - return Ok(()); - } + Requirement::Stack(_) => { + // If we must be on the stack, put ourselves on + // the spillset's list immediately. + self.spillsets[self.bundles[bundle.index()].spillset.index()] + .bundles + .push(bundle); + return Ok(()); + } - Requirement::Conflict => { - break; - } - }; + Requirement::Any(_) | Requirement::Unknown => { + // If a register is not *required*, spill now (we'll retry + // allocation on spilled bundles later). + log::debug!("spilling bundle {:?} to spilled_bundles list", bundle); + self.spilled_bundles.push(bundle); + return Ok(()); + } + + Requirement::Conflict => { + break; + } + }; log::debug!(" -> conflict set {:?}", conflicting_bundles); log::debug!( - " -> latest first conflict {:?} with reg {:?}", - latest_first_conflict_point, - latest_first_conflict_reg + " -> first conflict {:?} with reg {:?}", + first_conflict_point, + first_conflict_reg ); // If we have already tried evictions once before and are @@ -3053,8 +3077,8 @@ impl<'a, F: Function> Env<'a, F> { } let bundle_start = self.bundles[bundle.index()].ranges[0].range.from; - split_at_point = std::cmp::max(latest_first_conflict_point, bundle_start); - requeue_with_reg = latest_first_conflict_reg; + split_at_point = std::cmp::max(first_conflict_point, bundle_start); + requeue_with_reg = first_conflict_reg; // If the maximum spill weight in the conflicting-bundles set is >= this bundle's spill // weight, then don't evict. @@ -3145,7 +3169,7 @@ impl<'a, F: Function> Env<'a, F> { log::debug!("trying bundle {:?} to preg {:?}", bundle, preg); let preg_idx = PRegIndex::new(preg.index()); if let AllocRegResult::Allocated(_) = - self.try_to_allocate_bundle_to_reg(bundle, preg_idx) + self.try_to_allocate_bundle_to_reg(bundle, preg_idx, None) { self.stats.spill_bundle_reg_success += 1; success = true; From 8887077b5960c9d8c1d57295f816d8d421e904e0 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 24 May 2021 22:45:25 -0700 Subject: [PATCH 083/155] small fix: preserve starts-at-def flag when setting liverange weight --- src/ion/mod.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index dcca5559..8fe28a20 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -2675,14 +2675,20 @@ impl<'a, F: Function> Env<'a, F> { } fn recompute_range_properties(&mut self, range: LiveRangeIndex) { - let mut rangedata = &mut self.ranges[range.index()]; + let rangedata = &mut self.ranges[range.index()]; let mut w = 0; for u in &rangedata.uses { w += u.weight as u32; log::debug!("range{}: use {:?}", range.index(), u); } - rangedata.uses_spill_weight_and_flags = w; + rangedata.set_uses_spill_weight(w); if rangedata.uses.len() > 0 && rangedata.uses[0].operand.kind() == OperandKind::Def { + // Note that we *set* the flag here, but we never *clear* + // it: it may be set by a progmove as well (which does not + // create an explicit use or def), and we want to preserve + // that. We will never split or trim ranges in a way that + // removes a def at the front and requires the flag to be + // cleared. rangedata.set_flag(LiveRangeFlag::StartsAtDef); } } From 7cdcb2031e88e2c92fac1a40b5df807ded40f3f1 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 24 May 2021 23:09:05 -0700 Subject: [PATCH 084/155] Split heuristic: split before entering deeper loop nest --- src/ion/mod.rs | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 8fe28a20..b764c7e9 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -22,13 +22,6 @@ /* Performance and code-quality ideas: - - Split heuristics: - - Loop depth at split point? Split before entering more nested loop - - In general, consider 'weight' of split point as if it were - another use. - - - Add weight to bundles according to progmoves - - Reduced spilling when spillslot is still "clean": - When we allocate spillsets, use the whole bundle of a given spillset to check for fit. Add all bundles to spillset as we @@ -3086,6 +3079,24 @@ impl<'a, F: Function> Env<'a, F> { split_at_point = std::cmp::max(first_conflict_point, bundle_start); requeue_with_reg = first_conflict_reg; + // Adjust `split_at_point` if it is within a deeper loop + // than the bundle start -- hoist it to just before the + // first loop header it encounters. + let bundle_start_depth = self.cfginfo.approx_loop_depth + [self.cfginfo.insn_block[bundle_start.inst().index()].index()]; + let split_at_depth = self.cfginfo.approx_loop_depth + [self.cfginfo.insn_block[split_at_point.inst().index()].index()]; + if split_at_depth > bundle_start_depth { + for block in (self.cfginfo.insn_block[bundle_start.inst().index()].index() + 1) + ..=self.cfginfo.insn_block[split_at_point.inst().index()].index() + { + if self.cfginfo.approx_loop_depth[block] > bundle_start_depth { + split_at_point = self.cfginfo.block_entry[block]; + break; + } + } + } + // If the maximum spill weight in the conflicting-bundles set is >= this bundle's spill // weight, then don't evict. let max_spill_weight = self.maximum_spill_weight_in_bundle_set(&conflicting_bundles); From ca5f24f6b768193683d50643aef62bd0a59e7361 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 24 May 2021 23:49:47 -0700 Subject: [PATCH 085/155] Hint the same PReg for both halves of a split --- src/ion/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index b764c7e9..f3df24de 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -2872,7 +2872,7 @@ impl<'a, F: Function> Env<'a, F> { self.allocation_queue .insert(bundle, prio as usize, reg_hint); self.allocation_queue - .insert(new_bundle, new_prio as usize, PReg::invalid()); + .insert(new_bundle, new_prio as usize, reg_hint); } fn compute_requirement(&self, bundle: LiveBundleIndex) -> Requirement { From b3dc2b25a51e61fe1020bf67be9806cb217939b2 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 25 May 2021 18:19:25 -0700 Subject: [PATCH 086/155] Alloc spillsets for whole vreg, not just spilled LRs. This is a prerequisite to allowing a "clean" value to remain in spillslot while also in reg and avoiding the re-spill. It should also reduce stack-to-stack moves (though they can still come from progmoves). --- src/ion/mod.rs | 53 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 21 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index f3df24de..4124ffd9 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -268,11 +268,12 @@ impl LiveBundle { #[derive(Clone, Debug)] struct SpillSet { - bundles: SmallVec<[LiveBundleIndex; 2]>, + vregs: SmallVec<[VRegIndex; 2]>, slot: SpillSlotIndex, reg_hint: PReg, class: RegClass, spill_bundle: LiveBundleIndex, + required: bool, size: u8, } @@ -2158,6 +2159,19 @@ impl<'a, F: Function> Env<'a, F> { self.bundles[to.index()].ranges = merged; self.bundles[from.index()].ranges.clear(); + if self.bundles[from.index()].spillset != self.bundles[to.index()].spillset { + let from_vregs = std::mem::replace( + &mut self.spillsets[self.bundles[from.index()].spillset.index()].vregs, + smallvec![], + ); + let to_vregs = &mut self.spillsets[self.bundles[to.index()].spillset.index()].vregs; + for vreg in from_vregs { + if !to_vregs.contains(&vreg) { + to_vregs.push(vreg); + } + } + } + true } @@ -2204,9 +2218,10 @@ impl<'a, F: Function> Env<'a, F> { let reg = self.vreg_regs[vreg.index()]; let size = self.func.spillslot_size(reg.class(), reg) as u8; self.spillsets.push(SpillSet { - bundles: smallvec![], + vregs: smallvec![vreg], slot: SpillSlotIndex::invalid(), size, + required: false, class: reg.class(), reg_hint: PReg::invalid(), spill_bundle: LiveBundleIndex::invalid(), @@ -3035,11 +3050,9 @@ impl<'a, F: Function> Env<'a, F> { } Requirement::Stack(_) => { - // If we must be on the stack, put ourselves on - // the spillset's list immediately. - self.spillsets[self.bundles[bundle.index()].spillset.index()] - .bundles - .push(bundle); + // If we must be on the stack, mark our spillset + // as required immediately. + self.spillsets[self.bundles[bundle.index()].spillset.index()].required = true; return Ok(()); } @@ -3195,13 +3208,11 @@ impl<'a, F: Function> Env<'a, F> { } if !success { log::debug!( - "spilling bundle {:?} to spillset bundle list {:?}", + "spilling bundle {:?}: marking spillset {:?} as required", bundle, self.bundles[bundle.index()].spillset ); - self.spillsets[self.bundles[bundle.index()].spillset.index()] - .bundles - .push(bundle); + self.spillsets[self.bundles[bundle.index()].spillset.index()].required = true; } } } @@ -3211,8 +3222,8 @@ impl<'a, F: Function> Env<'a, F> { spillslot: SpillSlotIndex, spillset: SpillSetIndex, ) -> bool { - for &bundle in &self.spillsets[spillset.index()].bundles { - for entry in &self.bundles[bundle.index()].ranges { + for &vreg in &self.spillsets[spillset.index()].vregs { + for entry in &self.vregs[vreg.index()].ranges { if self.spillslots[spillslot.index()] .ranges .btree @@ -3231,22 +3242,22 @@ impl<'a, F: Function> Env<'a, F> { spillslot: SpillSlotIndex, ) { self.spillsets[spillset.index()].slot = spillslot; - for i in 0..self.spillsets[spillset.index()].bundles.len() { + for i in 0..self.spillsets[spillset.index()].vregs.len() { // don't borrow self - let bundle = self.spillsets[spillset.index()].bundles[i]; + let vreg = self.spillsets[spillset.index()].vregs[i]; log::debug!( - "spillslot {:?} alloc'ed to spillset {:?}: bundle {:?}", + "spillslot {:?} alloc'ed to spillset {:?}: vreg {:?}", spillslot, spillset, - bundle + vreg, ); - for entry in &self.bundles[bundle.index()].ranges { + for entry in &self.vregs[vreg.index()].ranges { log::debug!( - "spillslot {:?} getting range {:?} from bundle {:?}: {:?}", + "spillslot {:?} getting range {:?} from LR {:?} from vreg {:?}", spillslot, entry.range, entry.index, - bundle, + vreg, ); self.spillslots[spillslot.index()] .ranges @@ -3260,7 +3271,7 @@ impl<'a, F: Function> Env<'a, F> { for spillset in 0..self.spillsets.len() { log::debug!("allocate spillslot: {}", spillset); let spillset = SpillSetIndex::new(spillset); - if self.spillsets[spillset.index()].bundles.is_empty() { + if !self.spillsets[spillset.index()].required { continue; } // Get or create the spillslot list for this size. From 4e0dd1f2968a87a3b048750c48e3bf05c8a1b979 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Wed, 26 May 2021 00:38:53 -0700 Subject: [PATCH 087/155] little tweak to avoid a div/mod on every iter of a PReg alloc loop --- src/ion/mod.rs | 47 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 4124ffd9..12b07837 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -23,15 +23,7 @@ Performance and code-quality ideas: - Reduced spilling when spillslot is still "clean": - - When we allocate spillsets, use the whole bundle of a given - spillset to check for fit. Add all bundles to spillset as we - split; then SpillSet::bundles always corresponds to original - merged bundle. - - Then a single bundle will never move between spillslots, so we - know that when we reload from the one single spillslot, it is - the last value that we spilled. - - So we can track 'dirty' status of reg and elide spill when not - dirty. + - Track 'dirty' status of reg and elide spill when not dirty. - This is slightly tricky: fixpoint problem, across edges. - We can simplify by assuming spillslot is dirty if value came in on BB edge; only clean if we reload in same block we spill @@ -49,6 +41,11 @@ - For a spillslot->spillslot move, push a fixed reg (say the first preferred one), reload into it, spill out of it, and then pop old val + + - Avoid rebuilding MachineEnv on every function allocation in + regalloc.rs shim + + - Profile allocations */ #![allow(dead_code, unused_imports)] @@ -137,7 +134,7 @@ struct LiveRangeListEntry { } type LiveRangeList = SmallVec<[LiveRangeListEntry; 4]>; -type UseList = SmallVec<[Use; 4]>; +type UseList = SmallVec<[Use; 2]>; #[derive(Clone, Debug)] struct LiveRange { @@ -689,7 +686,8 @@ struct RegTraversalIter<'a> { hint_idx: usize, pref_idx: usize, non_pref_idx: usize, - offset: usize, + offset_pref: usize, + offset_non_pref: usize, } impl<'a> RegTraversalIter<'a> { @@ -716,14 +714,26 @@ impl<'a> RegTraversalIter<'a> { hint2_reg = None; } let hints = [hint_reg, hint2_reg]; + let class = class as u8 as usize; + let offset_pref = if env.preferred_regs_by_class[class].len() > 0 { + offset % env.preferred_regs_by_class[class].len() + } else { + 0 + }; + let offset_non_pref = if env.non_preferred_regs_by_class[class].len() > 0 { + offset % env.non_preferred_regs_by_class[class].len() + } else { + 0 + }; Self { env, - class: class as u8 as usize, + class, hints, hint_idx: 0, pref_idx: 0, non_pref_idx: 0, - offset, + offset_pref, + offset_non_pref, } } } @@ -732,6 +742,13 @@ impl<'a> std::iter::Iterator for RegTraversalIter<'a> { type Item = PReg; fn next(&mut self) -> Option { + fn wrap(idx: usize, limit: usize) -> usize { + if idx >= limit { + idx - limit + } else { + idx + } + } if self.hint_idx < 2 && self.hints[self.hint_idx].is_some() { let h = self.hints[self.hint_idx]; self.hint_idx += 1; @@ -739,7 +756,7 @@ impl<'a> std::iter::Iterator for RegTraversalIter<'a> { } while self.pref_idx < self.env.preferred_regs_by_class[self.class].len() { let arr = &self.env.preferred_regs_by_class[self.class][..]; - let r = arr[(self.pref_idx + self.offset) % arr.len()]; + let r = arr[wrap(self.pref_idx + self.offset_pref, arr.len())]; self.pref_idx += 1; if Some(r) == self.hints[0] || Some(r) == self.hints[1] { continue; @@ -748,7 +765,7 @@ impl<'a> std::iter::Iterator for RegTraversalIter<'a> { } while self.non_pref_idx < self.env.non_preferred_regs_by_class[self.class].len() { let arr = &self.env.non_preferred_regs_by_class[self.class][..]; - let r = arr[(self.non_pref_idx + self.offset) % arr.len()]; + let r = arr[wrap(self.non_pref_idx + self.offset_non_pref, arr.len())]; self.non_pref_idx += 1; if Some(r) == self.hints[0] || Some(r) == self.hints[1] { continue; From dcf6f473ca740297155cdec1d834ab67419de436 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Wed, 26 May 2021 00:48:41 -0700 Subject: [PATCH 088/155] inline some things --- src/ion/mod.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 12b07837..6df69cf3 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -485,6 +485,7 @@ impl PrioQueue { } } + #[inline(always)] fn insert(&mut self, bundle: LiveBundleIndex, prio: usize, reg_hint: PReg) { self.heap.push(PrioQueueEntry { prio: prio as u32, @@ -493,10 +494,12 @@ impl PrioQueue { }); } + #[inline(always)] fn is_empty(self) -> bool { self.heap.is_empty() } + #[inline(always)] fn pop(&mut self) -> Option<(LiveBundleIndex, PReg)> { self.heap.pop().map(|entry| (entry.bundle, entry.reg_hint)) } From e521811b88988cf2701c7849ce124e21036d8c03 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Wed, 26 May 2021 17:08:14 -0700 Subject: [PATCH 089/155] Avoid re-spilling to spillslot when still clean: intra-block edition (inter-block needs more analysis and careful thought) --- src/ion/mod.rs | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 6df69cf3..826f928d 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -3541,6 +3541,8 @@ impl<'a, F: Function> Env<'a, F> { None }; + let mut clean_spillslot: Option = None; + // For each range in each vreg, insert moves or // half-moves. We also scan over `blockparam_ins` and // `blockparam_outs`, which are sorted by (block, vreg), @@ -3614,9 +3616,20 @@ impl<'a, F: Function> Env<'a, F> { let first_is_def = self.ranges[entry.index.index()].has_flag(LiveRangeFlag::StartsAtDef); debug_assert!(prev_alloc != Allocation::none()); + + // If this is a stack-to-reg move, track that the reg is a clean copy of a spillslot. + if prev_alloc.is_stack() && alloc.is_reg() { + clean_spillslot = Some(prev_alloc.as_stack().unwrap()); + } + // If this is a reg-to-stack move, elide it if the spillslot is still clean. + let skip_spill = prev_alloc.is_reg() + && alloc.is_stack() + && clean_spillslot == alloc.as_stack(); + if prev_range.to == range.from && !self.is_start_of_block(range.from) && !first_is_def + && !skip_spill { log::debug!( "prev LR {} abuts LR {} in same block; moving {} -> {} for v{}", @@ -3637,6 +3650,27 @@ impl<'a, F: Function> Env<'a, F> { } } + // If this range either spans any block boundary, or + // has any mods/defs, then the spillslot (if any) that + // its value came from is no longer 'clean'. + if clean_spillslot.is_some() { + if self.cfginfo.insn_block[range.from.inst().index()] + != self.cfginfo.insn_block[range.to.prev().inst().index()] + { + clean_spillslot = None; + } else { + for u in &self.ranges[entry.index.index()].uses { + match u.operand.kind() { + OperandKind::Def | OperandKind::Mod => { + clean_spillslot = None; + break; + } + _ => {} + } + } + } + } + // The block-to-block edge-move logic is not // applicable to pinned vregs, which are always in one // PReg (so never need moves within their own vreg From 13bde99d7d568a5a9c213cb8397e9898fbf87797 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Wed, 26 May 2021 18:08:41 -0700 Subject: [PATCH 090/155] bugfix with clean-spill opt: avoid if liverange starts at start of block (this is like a def) or if has starts-at-def flag. --- src/ion/mod.rs | 45 ++++++++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 826f928d..105b9648 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -3656,6 +3656,12 @@ impl<'a, F: Function> Env<'a, F> { if clean_spillslot.is_some() { if self.cfginfo.insn_block[range.from.inst().index()] != self.cfginfo.insn_block[range.to.prev().inst().index()] + || range.from + == self.cfginfo.block_entry + [self.cfginfo.insn_block[range.from.inst().index()].index()] + { + clean_spillslot = None; + } else if self.ranges[entry.index.index()].has_flag(LiveRangeFlag::StartsAtDef) { clean_spillslot = None; } else { @@ -4335,25 +4341,26 @@ impl<'a, F: Function> Env<'a, F> { self.stats.edits_count = self.edits.len(); // Add debug annotations. - #[cfg(debug)] - { - if log::log_enabled!(log::Level::Debug) { - for i in 0..self.edits.len() { - let &(pos, _, ref edit) = &self.edits[i]; - match edit { - &Edit::Move { from, to, to_vreg } => { - self.annotate( - ProgPoint::from_index(pos), - format!("move {} -> {} ({:?})", from, to, to_vreg), - ); - } - &Edit::BlockParams { - ref vregs, - ref allocs, - } => { - let s = format!("blockparams vregs:{:?} allocs:{:?}", vregs, allocs); - self.annotate(ProgPoint::from_index(pos), s); - } + if self.annotations_enabled && log::log_enabled!(log::Level::Debug) { + for i in 0..self.edits.len() { + let &(pos, _, ref edit) = &self.edits[i]; + match edit { + &Edit::Move { from, to, to_vreg } => { + self.annotate( + ProgPoint::from_index(pos), + format!("move {} -> {} ({:?})", from, to, to_vreg), + ); + } + &Edit::BlockParams { + ref vregs, + ref allocs, + } => { + let s = format!("blockparams vregs:{:?} allocs:{:?}", vregs, allocs); + self.annotate(ProgPoint::from_index(pos), s); + } + &Edit::DefAlloc { alloc, vreg } => { + let s = format!("defalloc {:?} := {:?}", alloc, vreg); + self.annotate(ProgPoint::from_index(pos), s); } } } From 7171624750157e725e00d105da6eb0d45365c496 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Wed, 26 May 2021 21:35:43 -0700 Subject: [PATCH 091/155] Don't generate r1->scratch,scratch-r1 sequence for cyclic moves of r1->r1 that are generated to change vreg ownership and keep the checker happy. Seems to eliminate a bit of braindeadness and improve bz2 by ~5-10%. --- src/ion/mod.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 105b9648..ed2b7cef 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -4254,11 +4254,18 @@ impl<'a, F: Function> Env<'a, F> { // regs, but this seems simpler.) let mut int_moves: SmallVec<[InsertedMove; 8]> = smallvec![]; let mut float_moves: SmallVec<[InsertedMove; 8]> = smallvec![]; + let mut self_moves: SmallVec<[InsertedMove; 8]> = smallvec![]; for m in moves { if m.from_alloc.is_reg() && m.to_alloc.is_reg() { assert_eq!(m.from_alloc.class(), m.to_alloc.class()); } + if m.from_alloc == m.to_alloc { + if m.to_vreg.is_some() { + self_moves.push(m.clone()); + } + continue; + } match m.from_alloc.class() { RegClass::Int => { int_moves.push(m.clone()); @@ -4269,6 +4276,18 @@ impl<'a, F: Function> Env<'a, F> { } } + for m in &self_moves { + self.add_edit( + pos, + prio, + Edit::Move { + from: m.from_alloc, + to: m.to_alloc, + to_vreg: m.to_vreg, + }, + ); + } + for &(regclass, moves) in &[(RegClass::Int, &int_moves), (RegClass::Float, &float_moves)] { From 43d7095cbd9cbab69a135b099ed953a0228ddc83 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 28 May 2021 16:49:32 -0700 Subject: [PATCH 092/155] Properly split when we hit a fixed conflict --- src/ion/mod.rs | 52 ++++++++++++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index ed2b7cef..d8f06e33 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -601,7 +601,7 @@ impl Requirement { enum AllocRegResult { Allocated(Allocation), Conflict(LiveBundleVec), - ConflictWithFixed, + ConflictWithFixed(u32, ProgPoint), ConflictHighCost, } @@ -2518,7 +2518,8 @@ impl<'a, F: Function> Env<'a, F> { } // Otherwise, there is a conflict. - assert_eq!(*preg_range_iter.peek().unwrap().0, key); + let preg_key = *preg_range_iter.peek().unwrap().0; + assert_eq!(preg_key, key); // Assert that this range overlaps. let preg_range = preg_range_iter.next().unwrap().1; log::debug!(" -> btree contains range {:?} that overlaps", preg_range); @@ -2543,7 +2544,10 @@ impl<'a, F: Function> Env<'a, F> { } else { log::debug!(" -> conflict with fixed reservation"); // range from a direct use of the PReg (due to clobber). - return AllocRegResult::ConflictWithFixed; + return AllocRegResult::ConflictWithFixed( + max_conflict_weight, + ProgPoint::from_index(preg_key.from), + ); } } @@ -2966,15 +2970,9 @@ impl<'a, F: Function> Env<'a, F> { preg, ) } - AllocRegResult::ConflictWithFixed => { + AllocRegResult::ConflictWithFixed(_, point) => { log::debug!(" -> conflict with fixed alloc"); - // Empty conflicts set: there's nothing we can - // evict, because fixed conflicts cannot be moved. - ( - smallvec![], - ProgPoint::before(Inst::new(0)), - PReg::invalid(), - ) + (smallvec![], point, preg) } AllocRegResult::ConflictHighCost => unreachable!(), } @@ -3031,21 +3029,25 @@ impl<'a, F: Function> Env<'a, F> { let cost = self.maximum_spill_weight_in_bundle_set(&bundles); - if lowest_cost_conflict_cost.is_none() { - lowest_cost_conflict_cost = Some(cost); - lowest_cost_conflict_set = Some(bundles); - lowest_cost_conflict_point = first_conflict_point; - lowest_cost_conflict_reg = preg; - } else if cost < lowest_cost_conflict_cost.unwrap() { + if lowest_cost_conflict_cost.is_none() + || cost < lowest_cost_conflict_cost.unwrap() + { lowest_cost_conflict_cost = Some(cost); lowest_cost_conflict_set = Some(bundles); lowest_cost_conflict_point = first_conflict_point; lowest_cost_conflict_reg = preg; } } - AllocRegResult::ConflictWithFixed => { - log::debug!(" -> conflict with fixed alloc"); - // Simply don't consider as an option. + AllocRegResult::ConflictWithFixed(max_cost, point) => { + log::debug!(" -> conflict with fixed alloc; cost of other bundles up to point is {}, conflict at {:?}", max_cost, point); + if lowest_cost_conflict_cost.is_none() + || max_cost < lowest_cost_conflict_cost.unwrap() + { + lowest_cost_conflict_cost = Some(max_cost); + lowest_cost_conflict_set = Some(smallvec![]); + lowest_cost_conflict_point = point; + lowest_cost_conflict_reg = preg; + } } AllocRegResult::ConflictHighCost => { // Simply don't consider -- we already have @@ -3103,11 +3105,6 @@ impl<'a, F: Function> Env<'a, F> { break; } - // If we hit a fixed conflict, give up and move on to splitting. - if conflicting_bundles.is_empty() { - break; - } - let bundle_start = self.bundles[bundle.index()].ranges[0].range.from; split_at_point = std::cmp::max(first_conflict_point, bundle_start); requeue_with_reg = first_conflict_reg; @@ -3130,6 +3127,11 @@ impl<'a, F: Function> Env<'a, F> { } } + // If we hit a fixed conflict, give up and move on to splitting. + if conflicting_bundles.is_empty() { + break; + } + // If the maximum spill weight in the conflicting-bundles set is >= this bundle's spill // weight, then don't evict. let max_spill_weight = self.maximum_spill_weight_in_bundle_set(&conflicting_bundles); From 789651f9470cbf404d773ea3bd2e139891198471 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 28 May 2021 17:36:06 -0700 Subject: [PATCH 093/155] Rework inner allocation-loop code: choose more wisely between splitting and evicting based on costs (and unify the fixed-reg-constraint case) --- src/ion/mod.rs | 444 ++++++++++++++++++++++++++----------------------- 1 file changed, 232 insertions(+), 212 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index d8f06e33..76f719b4 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -691,6 +691,8 @@ struct RegTraversalIter<'a> { non_pref_idx: usize, offset_pref: usize, offset_non_pref: usize, + is_fixed: bool, + fixed: Option, } impl<'a> RegTraversalIter<'a> { @@ -700,6 +702,7 @@ impl<'a> RegTraversalIter<'a> { hint_reg: PReg, hint2_reg: PReg, offset: usize, + fixed: Option, ) -> Self { let mut hint_reg = if hint_reg != PReg::invalid() { Some(hint_reg) @@ -737,6 +740,8 @@ impl<'a> RegTraversalIter<'a> { non_pref_idx: 0, offset_pref, offset_non_pref, + is_fixed: fixed.is_some(), + fixed, } } } @@ -745,6 +750,12 @@ impl<'a> std::iter::Iterator for RegTraversalIter<'a> { type Item = PReg; fn next(&mut self) -> Option { + if self.is_fixed { + let ret = self.fixed; + self.fixed = None; + return ret; + } + fn wrap(idx: usize, limit: usize) -> usize { if idx >= limit { idx - limit @@ -2939,138 +2950,27 @@ impl<'a, F: Function> Env<'a, F> { }; log::debug!("process_bundle: bundle {:?} hint {:?}", bundle, hint_reg,); + if let Requirement::Conflict = req { + // We have to split right away. + let bundle_start = self.bundles[bundle.index()].ranges[0].range.from; + self.split_and_requeue_bundle( + bundle, + /* split_at_point = */ bundle_start, + reg_hint, + ); + return Ok(()); + } + // Try to allocate! let mut attempts = 0; - let mut split_at_point = self.bundles[bundle.index()].ranges[0].range.from; - let mut requeue_with_reg = PReg::invalid(); loop { attempts += 1; log::debug!("attempt {}, req {:?}", attempts, req); debug_assert!(attempts < 100 * self.func.insts()); - let (conflicting_bundles, first_conflict_point, first_conflict_reg) = match req { - Requirement::Fixed(preg) => { - let preg_idx = PRegIndex::new(preg.index()); - self.stats.process_bundle_reg_probes_fixed += 1; - log::debug!("trying fixed reg {:?}", preg_idx); - match self.try_to_allocate_bundle_to_reg(bundle, preg_idx, None) { - AllocRegResult::Allocated(alloc) => { - self.stats.process_bundle_reg_success_fixed += 1; - log::debug!(" -> allocated to fixed {:?}", preg_idx); - self.spillsets[self.bundles[bundle.index()].spillset.index()] - .reg_hint = alloc.as_reg().unwrap(); - return Ok(()); - } - AllocRegResult::Conflict(bundles) => { - log::debug!(" -> conflict with bundles {:?}", bundles); - let first_bundle = bundles[0]; - ( - bundles, - self.bundles[first_bundle.index()].ranges[0].range.from, - preg, - ) - } - AllocRegResult::ConflictWithFixed(_, point) => { - log::debug!(" -> conflict with fixed alloc"); - (smallvec![], point, preg) - } - AllocRegResult::ConflictHighCost => unreachable!(), - } - } - Requirement::Register(class) => { - // Scan all pregs and attempt to allocate. - let mut lowest_cost_conflict_set: Option = None; - let mut lowest_cost_conflict_cost: Option = None; - let mut lowest_cost_conflict_point = ProgPoint::before(Inst::new(0)); - let mut lowest_cost_conflict_reg = PReg::invalid(); - - // Heuristic: start the scan for an available - // register at an offset influenced both by our - // location in the code and by the bundle we're - // considering. This has the effect of spreading - // demand more evenly across registers. - let scan_offset = self.ranges - [self.bundles[bundle.index()].ranges[0].index.index()] - .range - .from - .inst() - .index() - + bundle.index(); - - self.stats.process_bundle_reg_probe_start_any += 1; - for preg in RegTraversalIter::new( - self.env, - class, - hint_reg, - PReg::invalid(), - scan_offset, - ) { - self.stats.process_bundle_reg_probes_any += 1; - let preg_idx = PRegIndex::new(preg.index()); - log::debug!("trying preg {:?}", preg_idx); - - match self.try_to_allocate_bundle_to_reg( - bundle, - preg_idx, - lowest_cost_conflict_cost, - ) { - AllocRegResult::Allocated(alloc) => { - self.stats.process_bundle_reg_success_any += 1; - log::debug!(" -> allocated to any {:?}", preg_idx); - self.spillsets[self.bundles[bundle.index()].spillset.index()] - .reg_hint = alloc.as_reg().unwrap(); - return Ok(()); - } - AllocRegResult::Conflict(bundles) => { - log::debug!(" -> conflict with bundles {:?}", bundles); - - let first_conflict_point = - self.bundles[bundles[0].index()].ranges[0].range.from; - - let cost = self.maximum_spill_weight_in_bundle_set(&bundles); - - if lowest_cost_conflict_cost.is_none() - || cost < lowest_cost_conflict_cost.unwrap() - { - lowest_cost_conflict_cost = Some(cost); - lowest_cost_conflict_set = Some(bundles); - lowest_cost_conflict_point = first_conflict_point; - lowest_cost_conflict_reg = preg; - } - } - AllocRegResult::ConflictWithFixed(max_cost, point) => { - log::debug!(" -> conflict with fixed alloc; cost of other bundles up to point is {}, conflict at {:?}", max_cost, point); - if lowest_cost_conflict_cost.is_none() - || max_cost < lowest_cost_conflict_cost.unwrap() - { - lowest_cost_conflict_cost = Some(max_cost); - lowest_cost_conflict_set = Some(smallvec![]); - lowest_cost_conflict_point = point; - lowest_cost_conflict_reg = preg; - } - } - AllocRegResult::ConflictHighCost => { - // Simply don't consider -- we already have - // a lower-cost conflict bundle option - // to evict. - continue; - } - } - } - - // Otherwise, we *require* a register, but didn't fit into - // any with current bundle assignments. Hence, we will need - // to either split or attempt to evict some bundles. Return - // the conflicting bundles to evict and retry. Empty list - // means nothing to try (due to fixed conflict) so we must - // split instead. - ( - lowest_cost_conflict_set.unwrap_or(smallvec![]), - lowest_cost_conflict_point, - lowest_cost_conflict_reg, - ) - } - + let (class, fixed_preg) = match req { + Requirement::Fixed(preg) => (preg.class(), Some(preg)), + Requirement::Register(class) => (class, None), Requirement::Stack(_) => { // If we must be on the stack, mark our spillset // as required immediately. @@ -3086,114 +2986,233 @@ impl<'a, F: Function> Env<'a, F> { return Ok(()); } - Requirement::Conflict => { - break; - } + Requirement::Conflict => unreachable!(), }; + // Scan all pregs, or the one fixed preg, and attempt to allocate. - log::debug!(" -> conflict set {:?}", conflicting_bundles); - log::debug!( - " -> first conflict {:?} with reg {:?}", - first_conflict_point, - first_conflict_reg - ); + let mut lowest_cost_evict_conflict_set: Option = None; + let mut lowest_cost_evict_conflict_cost: Option = None; - // If we have already tried evictions once before and are - // still unsuccessful, give up and move on to splitting as - // long as this is not a minimal bundle. - if attempts >= 2 && !self.minimal_bundle(bundle) { - break; - } + let mut lowest_cost_split_conflict_cost: Option = None; + let mut lowest_cost_split_conflict_point = ProgPoint::before(Inst::new(0)); + let mut lowest_cost_split_conflict_reg = PReg::invalid(); - let bundle_start = self.bundles[bundle.index()].ranges[0].range.from; - split_at_point = std::cmp::max(first_conflict_point, bundle_start); - requeue_with_reg = first_conflict_reg; - - // Adjust `split_at_point` if it is within a deeper loop - // than the bundle start -- hoist it to just before the - // first loop header it encounters. - let bundle_start_depth = self.cfginfo.approx_loop_depth - [self.cfginfo.insn_block[bundle_start.inst().index()].index()]; - let split_at_depth = self.cfginfo.approx_loop_depth - [self.cfginfo.insn_block[split_at_point.inst().index()].index()]; - if split_at_depth > bundle_start_depth { - for block in (self.cfginfo.insn_block[bundle_start.inst().index()].index() + 1) - ..=self.cfginfo.insn_block[split_at_point.inst().index()].index() - { - if self.cfginfo.approx_loop_depth[block] > bundle_start_depth { - split_at_point = self.cfginfo.block_entry[block]; - break; + // Heuristic: start the scan for an available + // register at an offset influenced both by our + // location in the code and by the bundle we're + // considering. This has the effect of spreading + // demand more evenly across registers. + let scan_offset = self.ranges[self.bundles[bundle.index()].ranges[0].index.index()] + .range + .from + .inst() + .index() + + bundle.index(); + + self.stats.process_bundle_reg_probe_start_any += 1; + for preg in RegTraversalIter::new( + self.env, + class, + hint_reg, + PReg::invalid(), + scan_offset, + fixed_preg, + ) { + self.stats.process_bundle_reg_probes_any += 1; + let preg_idx = PRegIndex::new(preg.index()); + log::debug!("trying preg {:?}", preg_idx); + + let scan_limit_cost = match ( + lowest_cost_evict_conflict_cost, + lowest_cost_split_conflict_cost, + ) { + (Some(a), Some(b)) => Some(std::cmp::max(a, b)), + _ => None, + }; + match self.try_to_allocate_bundle_to_reg(bundle, preg_idx, scan_limit_cost) { + AllocRegResult::Allocated(alloc) => { + self.stats.process_bundle_reg_success_any += 1; + log::debug!(" -> allocated to any {:?}", preg_idx); + self.spillsets[self.bundles[bundle.index()].spillset.index()].reg_hint = + alloc.as_reg().unwrap(); + return Ok(()); + } + AllocRegResult::Conflict(bundles) => { + log::debug!(" -> conflict with bundles {:?}", bundles); + + let first_conflict_point = + self.bundles[bundles[0].index()].ranges[0].range.from; + + let conflict_cost = self.maximum_spill_weight_in_bundle_set(&bundles); + + if lowest_cost_evict_conflict_cost.is_none() + || conflict_cost < lowest_cost_evict_conflict_cost.unwrap() + { + lowest_cost_evict_conflict_cost = Some(conflict_cost); + lowest_cost_evict_conflict_set = Some(bundles); + } + + let loop_depth = self.cfginfo.approx_loop_depth + [self.cfginfo.insn_block[first_conflict_point.inst().index()].index()]; + let move_cost = spill_weight_from_policy( + OperandPolicy::Reg, + loop_depth as usize, + /* is_def = */ true, + ); + if lowest_cost_split_conflict_cost.is_none() + || (conflict_cost + move_cost) + < lowest_cost_split_conflict_cost.unwrap() + { + lowest_cost_split_conflict_cost = Some(conflict_cost + move_cost); + lowest_cost_split_conflict_point = first_conflict_point; + lowest_cost_split_conflict_reg = preg; + } + } + AllocRegResult::ConflictWithFixed(max_cost, point) => { + log::debug!(" -> conflict with fixed alloc; cost of other bundles up to point is {}, conflict at {:?}", max_cost, point); + + let loop_depth = self.cfginfo.approx_loop_depth + [self.cfginfo.insn_block[point.inst().index()].index()]; + let move_cost = spill_weight_from_policy( + OperandPolicy::Reg, + loop_depth as usize, + /* is_def = */ true, + ); + + if lowest_cost_split_conflict_cost.is_none() + || (max_cost + move_cost) < lowest_cost_split_conflict_cost.unwrap() + { + lowest_cost_split_conflict_cost = Some(max_cost + move_cost); + lowest_cost_split_conflict_point = point; + lowest_cost_split_conflict_reg = preg; + } + } + AllocRegResult::ConflictHighCost => { + // Simply don't consider -- we already have + // a lower-cost conflict bundle option + // to evict. + continue; } } } - // If we hit a fixed conflict, give up and move on to splitting. - if conflicting_bundles.is_empty() { - break; - } + // Otherwise, we *require* a register, but didn't fit into + // any with current bundle assignments. Hence, we will need + // to either split or attempt to evict some bundles. - // If the maximum spill weight in the conflicting-bundles set is >= this bundle's spill - // weight, then don't evict. - let max_spill_weight = self.maximum_spill_weight_in_bundle_set(&conflicting_bundles); log::debug!( - " -> max_spill_weight = {}; our spill weight {}", - max_spill_weight, - self.bundle_spill_weight(bundle) + " -> lowest cost evict: set {:?}, cost {:?}", + lowest_cost_evict_conflict_set, + lowest_cost_evict_conflict_cost, + ); + log::debug!( + " -> lowest cost split: cost {:?}, point {:?}, reg {:?}", + lowest_cost_split_conflict_cost, + lowest_cost_split_conflict_point, + lowest_cost_split_conflict_reg ); - if max_spill_weight >= self.bundle_spill_weight(bundle) { - log::debug!(" -> we're already the cheapest bundle to spill -- going to split"); - break; - } - // Evict all bundles in `conflicting bundles` and try again. - self.stats.evict_bundle_event += 1; - for &bundle in &conflicting_bundles { - log::debug!(" -> evicting {:?}", bundle); - self.evict_bundle(bundle); - self.stats.evict_bundle_count += 1; - } - } + // If we reach here, we *must* have an option either to split or evict. + assert!( + lowest_cost_split_conflict_cost.is_some() + || lowest_cost_evict_conflict_cost.is_some() + ); - // A minimal bundle cannot be split. - if self.minimal_bundle(bundle) { - if let Requirement::Register(class) = req { - // Check if this is a too-many-live-registers situation. - let range = self.bundles[bundle.index()].ranges[0].range; - let mut min_bundles_assigned = 0; - let mut fixed_assigned = 0; - let mut total_regs = 0; - for preg in self.env.preferred_regs_by_class[class as u8 as usize] - .iter() - .chain(self.env.non_preferred_regs_by_class[class as u8 as usize].iter()) - { - if let Some(&lr) = self.pregs[preg.index()] - .allocations - .btree - .get(&LiveRangeKey::from_range(&range)) + // If our bundle's weight is less than or equal to(*) the + // evict cost, choose to split. Also pick splitting if + // we're on our second or more attempt as long as the + // bundle isn't minimal. Also pick splitting if the + // conflict set is empty, meaning a fixed conflict that + // can't be evicted. + // + // (*) the "equal to" part is very important: it prevents + // an infinite loop where two bundles with equal spill + // cost continually evict each other in an infinite + // allocation loop. In such a case, the first bundle in + // wins, and the other splits. + if (attempts >= 2 && !self.minimal_bundle(bundle)) + || lowest_cost_evict_conflict_cost.is_none() + || self.bundle_spill_weight(bundle) <= lowest_cost_evict_conflict_cost.unwrap() + { + log::debug!( + " -> deciding to split: our spill weight is {}", + self.bundle_spill_weight(bundle) + ); + let bundle_start = self.bundles[bundle.index()].ranges[0].range.from; + let mut split_at_point = + std::cmp::max(lowest_cost_split_conflict_point, bundle_start); + let requeue_with_reg = lowest_cost_split_conflict_reg; + + // Adjust `split_at_point` if it is within a deeper loop + // than the bundle start -- hoist it to just before the + // first loop header it encounters. + let bundle_start_depth = self.cfginfo.approx_loop_depth + [self.cfginfo.insn_block[bundle_start.inst().index()].index()]; + let split_at_depth = self.cfginfo.approx_loop_depth + [self.cfginfo.insn_block[split_at_point.inst().index()].index()]; + if split_at_depth > bundle_start_depth { + for block in (self.cfginfo.insn_block[bundle_start.inst().index()].index() + 1) + ..=self.cfginfo.insn_block[split_at_point.inst().index()].index() { - if lr.is_valid() { - if self.minimal_bundle(self.ranges[lr.index()].bundle) { - min_bundles_assigned += 1; + if self.cfginfo.approx_loop_depth[block] > bundle_start_depth { + split_at_point = self.cfginfo.block_entry[block]; + break; + } + } + } + + // A minimal bundle cannot be split. + if self.minimal_bundle(bundle) { + if let Requirement::Register(class) = req { + // Check if this is a too-many-live-registers situation. + let range = self.bundles[bundle.index()].ranges[0].range; + let mut min_bundles_assigned = 0; + let mut fixed_assigned = 0; + let mut total_regs = 0; + for preg in self.env.preferred_regs_by_class[class as u8 as usize] + .iter() + .chain( + self.env.non_preferred_regs_by_class[class as u8 as usize].iter(), + ) + { + if let Some(&lr) = self.pregs[preg.index()] + .allocations + .btree + .get(&LiveRangeKey::from_range(&range)) + { + if lr.is_valid() { + if self.minimal_bundle(self.ranges[lr.index()].bundle) { + min_bundles_assigned += 1; + } + } else { + fixed_assigned += 1; + } } - } else { - fixed_assigned += 1; + total_regs += 1; + } + if min_bundles_assigned + fixed_assigned == total_regs { + return Err(RegAllocError::TooManyLiveRegs); } } - total_regs += 1; } - if min_bundles_assigned + fixed_assigned == total_regs { - return Err(RegAllocError::TooManyLiveRegs); + if self.minimal_bundle(bundle) { + self.dump_state(); } - } - } - if self.minimal_bundle(bundle) { - self.dump_state(); - } - assert!(!self.minimal_bundle(bundle)); + assert!(!self.minimal_bundle(bundle)); - self.split_and_requeue_bundle(bundle, split_at_point, requeue_with_reg); + self.split_and_requeue_bundle(bundle, split_at_point, requeue_with_reg); - Ok(()) + return Ok(()); + } else { + // Evict all bundles in `conflicting bundles` and try again. + self.stats.evict_bundle_event += 1; + for &bundle in &lowest_cost_evict_conflict_set.unwrap() { + log::debug!(" -> evicting {:?}", bundle); + self.evict_bundle(bundle); + self.stats.evict_bundle_count += 1; + } + } + } } fn try_allocating_regs_for_spilled_bundles(&mut self) { @@ -3217,6 +3236,7 @@ impl<'a, F: Function> Env<'a, F> { PReg::invalid(), PReg::invalid(), bundle.index(), + None, ) { log::debug!("trying bundle {:?} to preg {:?}", bundle, preg); let preg_idx = PRegIndex::new(preg.index()); From f49167e0fecc268d9491adc88cdb7e0b7fa09b30 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 28 May 2021 18:35:09 -0700 Subject: [PATCH 094/155] emit annotations at Info level, for easier selective perf-debugging --- src/ion/mod.rs | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 76f719b4..e3fc838c 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -1510,7 +1510,7 @@ impl<'a, F: Function> Env<'a, F> { OperandPos::Before, ); - if self.annotations_enabled && log::log_enabled!(log::Level::Debug) { + if self.annotations_enabled { self.annotate( ProgPoint::after(inst), format!( @@ -2105,7 +2105,7 @@ impl<'a, F: Function> Env<'a, F> { for entry in &list { self.ranges[entry.index.index()].bundle = to; - if self.annotations_enabled && log::log_enabled!(log::Level::Debug) { + if self.annotations_enabled { self.annotate( entry.range.from, format!( @@ -2165,7 +2165,7 @@ impl<'a, F: Function> Env<'a, F> { last_range = Some(entry.range); if self.ranges[entry.index.index()].bundle == from { - if self.annotations_enabled && log::log_enabled!(log::Level::Debug) { + if self.annotations_enabled { self.annotate( entry.range.from, format!( @@ -3586,7 +3586,7 @@ impl<'a, F: Function> Env<'a, F> { ); debug_assert!(alloc != Allocation::none()); - if self.annotations_enabled && log::log_enabled!(log::Level::Debug) { + if self.annotations_enabled { self.annotate( range.from, format!( @@ -3767,8 +3767,7 @@ impl<'a, F: Function> Env<'a, F> { alloc, }); - if self.annotations_enabled && log::log_enabled!(log::Level::Debug) - { + if self.annotations_enabled { self.annotate( self.cfginfo.block_exit[block.index()], format!( @@ -4382,7 +4381,7 @@ impl<'a, F: Function> Env<'a, F> { self.stats.edits_count = self.edits.len(); // Add debug annotations. - if self.annotations_enabled && log::log_enabled!(log::Level::Debug) { + if self.annotations_enabled { for i in 0..self.edits.len() { let &(pos, _, ref edit) = &self.edits[i]; match edit { @@ -4496,7 +4495,7 @@ impl<'a, F: Function> Env<'a, F> { } fn annotate(&mut self, progpoint: ProgPoint, s: String) { - if log::log_enabled!(log::Level::Debug) { + if self.annotations_enabled { self.debug_annotations .entry(progpoint) .or_insert_with(|| vec![]) @@ -4505,10 +4504,10 @@ impl<'a, F: Function> Env<'a, F> { } fn dump_results(&self) { - log::debug!("=== REGALLOC RESULTS ==="); + log::info!("=== REGALLOC RESULTS ==="); for block in 0..self.func.blocks() { let block = Block::new(block); - log::debug!( + log::info!( "block{}: [succs {:?} preds {:?}]", block.index(), self.func @@ -4529,7 +4528,7 @@ impl<'a, F: Function> Env<'a, F> { .map(|v| &v[..]) .unwrap_or(&[]) { - log::debug!(" inst{}-pre: {}", inst.index(), annotation); + log::info!(" inst{}-pre: {}", inst.index(), annotation); } let ops = self .func @@ -4565,7 +4564,7 @@ impl<'a, F: Function> Env<'a, F> { } else { format!(" [clobber: {}]", clobbers.join(", ")) }; - log::debug!( + log::info!( " inst{}: {} {}{}", inst.index(), opname, @@ -4578,7 +4577,7 @@ impl<'a, F: Function> Env<'a, F> { .map(|v| &v[..]) .unwrap_or(&[]) { - log::debug!(" inst{}-post: {}", inst.index(), annotation); + log::info!(" inst{}-post: {}", inst.index(), annotation); } } } @@ -4597,7 +4596,7 @@ pub fn run( env.run()?; - if log::log_enabled!(log::Level::Debug) { + if enable_annotations { env.dump_results(); } From 44ca1893c3836353027dc32976f655747aa4f864 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 1 Jun 2021 14:52:59 -0700 Subject: [PATCH 095/155] Fuzzbug fix: properly check for conflicting reqs before merging bundles (cached values are not computed yet) --- src/ion/mod.rs | 133 +++++++++++++++++++++++++------------------------ 1 file changed, 67 insertions(+), 66 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index e3fc838c..86715698 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -2063,25 +2063,13 @@ impl<'a, F: Function> Env<'a, F> { } } - // There could be a requirements conflict only one of the both - // sides of the merge has at least one requirement that is not - // 'Reg' or 'Any'. (Note that we already checked that the - // RegClass is the same on both sides.) - if self.bundles[from.index()].cached_fixed() - || self.bundles[from.index()].cached_stack() - || self.bundles[to.index()].cached_fixed() - || self.bundles[to.index()].cached_stack() - { - let mut req = Requirement::Unknown; - for entry in ranges_from.iter().chain(ranges_to.iter()) { - for u in &self.ranges[entry.index.index()].uses { - req = req.merge(Requirement::from_operand(u.operand)); - if req == Requirement::Conflict { - log::debug!(" -> conflicting requirements; aborting merge"); - return false; - } - } - } + // Check for a requirements conflict. + let req = self + .compute_requirement(from) + .merge(self.compute_requirement(to)); + if req == Requirement::Conflict { + log::debug!(" -> conflicting requirements; aborting merge"); + return false; } log::debug!(" -> committing to merge"); @@ -2646,6 +2634,7 @@ impl<'a, F: Function> Env<'a, F> { fixed = true; } else { for u in &first_range_data.uses { + log::debug!(" -> use: {:?}", u); if let OperandPolicy::FixedReg(_) = u.operand.policy() { log::debug!(" -> fixed use at {:?}: {:?}", u.pos, u.operand); fixed = true; @@ -2927,12 +2916,17 @@ impl<'a, F: Function> Env<'a, F> { fn compute_requirement(&self, bundle: LiveBundleIndex) -> Requirement { let mut req = Requirement::Unknown; + log::debug!("compute_requirement: {:?}", bundle); for entry in &self.bundles[bundle.index()].ranges { + log::debug!(" -> LR {:?}", entry.index); for u in &self.ranges[entry.index.index()].uses { + log::debug!(" -> use {:?}", u); let r = Requirement::from_operand(u.operand); req = req.merge(r); + log::debug!(" -> req {:?}", req); } } + log::debug!(" -> final: {:?}", req); req } @@ -2952,6 +2946,10 @@ impl<'a, F: Function> Env<'a, F> { if let Requirement::Conflict = req { // We have to split right away. + assert!( + !self.minimal_bundle(bundle), + "Minimal bundle with conflict!" + ); let bundle_start = self.bundles[bundle.index()].ranges[0].range.from; self.split_and_requeue_bundle( bundle, @@ -3118,21 +3116,64 @@ impl<'a, F: Function> Env<'a, F> { || lowest_cost_evict_conflict_cost.is_some() ); + // Check that we haven't attempted more than once with a + // minimal bundle -- this would indicate a bug. We detect + // the "too-many-live-registers" case here and return an + // error cleanly, rather than panicking, because the + // regalloc.rs fuzzer depends on the register allocator to + // correctly reject impossible-to-allocate programs in + // order to discard invalid test cases. + if attempts >= 2 && self.minimal_bundle(bundle) { + if let Requirement::Register(class) = req { + // Check if this is a too-many-live-registers situation. + let range = self.bundles[bundle.index()].ranges[0].range; + let mut min_bundles_assigned = 0; + let mut fixed_assigned = 0; + let mut total_regs = 0; + for preg in self.env.preferred_regs_by_class[class as u8 as usize] + .iter() + .chain(self.env.non_preferred_regs_by_class[class as u8 as usize].iter()) + { + if let Some(&lr) = self.pregs[preg.index()] + .allocations + .btree + .get(&LiveRangeKey::from_range(&range)) + { + if lr.is_valid() { + if self.minimal_bundle(self.ranges[lr.index()].bundle) { + min_bundles_assigned += 1; + } + } else { + fixed_assigned += 1; + } + } + total_regs += 1; + } + if min_bundles_assigned + fixed_assigned == total_regs { + return Err(RegAllocError::TooManyLiveRegs); + } + } + + panic!("Could not allocate minimal bundle, but the allocation problem should be possible to solve"); + } + // If our bundle's weight is less than or equal to(*) the // evict cost, choose to split. Also pick splitting if - // we're on our second or more attempt as long as the - // bundle isn't minimal. Also pick splitting if the - // conflict set is empty, meaning a fixed conflict that - // can't be evicted. + // we're on our second or more attempt and we didn't + // allocate. Also pick splitting if the conflict set is + // empty, meaning a fixed conflict that can't be evicted. // // (*) the "equal to" part is very important: it prevents // an infinite loop where two bundles with equal spill // cost continually evict each other in an infinite // allocation loop. In such a case, the first bundle in // wins, and the other splits. - if (attempts >= 2 && !self.minimal_bundle(bundle)) - || lowest_cost_evict_conflict_cost.is_none() - || self.bundle_spill_weight(bundle) <= lowest_cost_evict_conflict_cost.unwrap() + // + // Note that we don't split if the bundle is minimal. + if !self.minimal_bundle(bundle) + && (attempts >= 2 + || lowest_cost_evict_conflict_cost.is_none() + || self.bundle_spill_weight(bundle) <= lowest_cost_evict_conflict_cost.unwrap()) { log::debug!( " -> deciding to split: our spill weight is {}", @@ -3161,47 +3202,7 @@ impl<'a, F: Function> Env<'a, F> { } } - // A minimal bundle cannot be split. - if self.minimal_bundle(bundle) { - if let Requirement::Register(class) = req { - // Check if this is a too-many-live-registers situation. - let range = self.bundles[bundle.index()].ranges[0].range; - let mut min_bundles_assigned = 0; - let mut fixed_assigned = 0; - let mut total_regs = 0; - for preg in self.env.preferred_regs_by_class[class as u8 as usize] - .iter() - .chain( - self.env.non_preferred_regs_by_class[class as u8 as usize].iter(), - ) - { - if let Some(&lr) = self.pregs[preg.index()] - .allocations - .btree - .get(&LiveRangeKey::from_range(&range)) - { - if lr.is_valid() { - if self.minimal_bundle(self.ranges[lr.index()].bundle) { - min_bundles_assigned += 1; - } - } else { - fixed_assigned += 1; - } - } - total_regs += 1; - } - if min_bundles_assigned + fixed_assigned == total_regs { - return Err(RegAllocError::TooManyLiveRegs); - } - } - } - if self.minimal_bundle(bundle) { - self.dump_state(); - } - assert!(!self.minimal_bundle(bundle)); - self.split_and_requeue_bundle(bundle, split_at_point, requeue_with_reg); - return Ok(()); } else { // Evict all bundles in `conflicting bundles` and try again. From e49727dc75698f7a0c2904ed58f2cd5398eb8773 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 1 Jun 2021 15:32:12 -0700 Subject: [PATCH 096/155] Fuzzbug fix: fix some weirdness with BTree iteration inner loop --- src/ion/mod.rs | 117 ++++++++++++++++++++++++++----------------------- 1 file changed, 61 insertions(+), 56 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 86715698..d12099f9 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -2473,80 +2473,85 @@ impl<'a, F: Function> Env<'a, F> { // literally the range `range`. let bundle_ranges = &self.bundles[bundle.index()].ranges; let from_key = LiveRangeKey::from_range(&bundle_ranges.first().unwrap().range); - let to_key = LiveRangeKey::from_range(&bundle_ranges.last().unwrap().range); - assert!(from_key <= to_key); let mut preg_range_iter = self.pregs[reg.index()] .allocations .btree - .range(from_key..=to_key) + .range(from_key..) .peekable(); log::debug!( - "alloc map for {:?}: {:?}", + "alloc map for {:?} in range {:?}..: {:?}", reg, + from_key, self.pregs[reg.index()].allocations.btree ); - for entry in bundle_ranges { + 'ranges: for entry in bundle_ranges { log::debug!(" -> range LR {:?}: {:?}", entry.index, entry.range); let key = LiveRangeKey::from_range(&entry.range); - // Advance our BTree traversal until it is >= this bundle - // range (i.e., skip PReg allocations in the BTree that - // are completely before this bundle range). + 'alloc: loop { + log::debug!(" -> PReg range {:?}", preg_range_iter.peek()); - while preg_range_iter.peek().is_some() && *preg_range_iter.peek().unwrap().0 < key { - log::debug!( - "Skipping PReg range {:?}", - preg_range_iter.peek().unwrap().0 - ); - preg_range_iter.next(); - } + // Advance our BTree traversal until it is >= this bundle + // range (i.e., skip PReg allocations in the BTree that + // are completely before this bundle range). - // If there are no more PReg allocations, we're done! - if preg_range_iter.peek().is_none() { - log::debug!(" -> no more PReg allocations; so no conflict possible!"); - break; - } + if preg_range_iter.peek().is_some() && *preg_range_iter.peek().unwrap().0 < key { + log::debug!( + "Skipping PReg range {:?}", + preg_range_iter.peek().unwrap().0 + ); + preg_range_iter.next(); + continue 'alloc; + } - // If the current PReg range is beyond this range, there is no conflict; continue. - if *preg_range_iter.peek().unwrap().0 > key { - log::debug!( - " -> next PReg allocation is at {:?}; moving to next VReg range", - preg_range_iter.peek().unwrap().0 - ); - continue; - } + // If there are no more PReg allocations, we're done! + if preg_range_iter.peek().is_none() { + log::debug!(" -> no more PReg allocations; so no conflict possible!"); + break 'ranges; + } - // Otherwise, there is a conflict. - let preg_key = *preg_range_iter.peek().unwrap().0; - assert_eq!(preg_key, key); // Assert that this range overlaps. - let preg_range = preg_range_iter.next().unwrap().1; - - log::debug!(" -> btree contains range {:?} that overlaps", preg_range); - if preg_range.is_valid() { - log::debug!(" -> from vreg {:?}", self.ranges[preg_range.index()].vreg); - // range from an allocated bundle: find the bundle and add to - // conflicts list. - let conflict_bundle = self.ranges[preg_range.index()].bundle; - log::debug!(" -> conflict bundle {:?}", conflict_bundle); - if !conflicts.iter().any(|b| *b == conflict_bundle) { - conflicts.push(conflict_bundle); - max_conflict_weight = std::cmp::max( - max_conflict_weight, - self.bundles[conflict_bundle.index()].cached_spill_weight(), + // If the current PReg range is beyond this range, there is no conflict; continue. + if *preg_range_iter.peek().unwrap().0 > key { + log::debug!( + " -> next PReg allocation is at {:?}; moving to next VReg range", + preg_range_iter.peek().unwrap().0 ); - if max_allowable_cost.is_some() - && max_conflict_weight > max_allowable_cost.unwrap() - { - return AllocRegResult::ConflictHighCost; + break 'alloc; + } + + // Otherwise, there is a conflict. + let preg_key = *preg_range_iter.peek().unwrap().0; + assert_eq!(preg_key, key); // Assert that this range overlaps. + let preg_range = preg_range_iter.next().unwrap().1; + + log::debug!(" -> btree contains range {:?} that overlaps", preg_range); + if preg_range.is_valid() { + log::debug!(" -> from vreg {:?}", self.ranges[preg_range.index()].vreg); + // range from an allocated bundle: find the bundle and add to + // conflicts list. + let conflict_bundle = self.ranges[preg_range.index()].bundle; + log::debug!(" -> conflict bundle {:?}", conflict_bundle); + if !conflicts.iter().any(|b| *b == conflict_bundle) { + conflicts.push(conflict_bundle); + max_conflict_weight = std::cmp::max( + max_conflict_weight, + self.bundles[conflict_bundle.index()].cached_spill_weight(), + ); + if max_allowable_cost.is_some() + && max_conflict_weight > max_allowable_cost.unwrap() + { + log::debug!(" -> reached high cost, retrying early"); + return AllocRegResult::ConflictHighCost; + } } + } else { + log::debug!(" -> conflict with fixed reservation"); + // range from a direct use of the PReg (due to clobber). + return AllocRegResult::ConflictWithFixed( + max_conflict_weight, + ProgPoint::from_index(preg_key.from), + ); } - } else { - log::debug!(" -> conflict with fixed reservation"); - // range from a direct use of the PReg (due to clobber). - return AllocRegResult::ConflictWithFixed( - max_conflict_weight, - ProgPoint::from_index(preg_key.from), - ); } } From 2614eac21ea2cb67f83d52bf4eb42d4af500fe34 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 1 Jun 2021 16:31:33 -0700 Subject: [PATCH 097/155] fuzzbug fix: restore clean error exit required by regalloc.rs fuzzer on too-many-live-regs error --- src/ion/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index d12099f9..63ed6e18 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -3128,7 +3128,9 @@ impl<'a, F: Function> Env<'a, F> { // regalloc.rs fuzzer depends on the register allocator to // correctly reject impossible-to-allocate programs in // order to discard invalid test cases. - if attempts >= 2 && self.minimal_bundle(bundle) { + if self.minimal_bundle(bundle) + && (attempts >= 2 || lowest_cost_evict_conflict_cost.is_none()) + { if let Requirement::Register(class) = req { // Check if this is a too-many-live-registers situation. let range = self.bundles[bundle.index()].ranges[0].range; From a2a770ec50a9436a9bd68d8d831ecdef8ad50da8 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 1 Jun 2021 18:57:07 -0700 Subject: [PATCH 098/155] Fuzzbug fix --- src/ion/mod.rs | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 63ed6e18..1ed8e3de 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -3121,15 +3121,17 @@ impl<'a, F: Function> Env<'a, F> { || lowest_cost_evict_conflict_cost.is_some() ); - // Check that we haven't attempted more than once with a - // minimal bundle -- this would indicate a bug. We detect - // the "too-many-live-registers" case here and return an - // error cleanly, rather than panicking, because the - // regalloc.rs fuzzer depends on the register allocator to - // correctly reject impossible-to-allocate programs in - // order to discard invalid test cases. + let our_spill_weight = self.bundle_spill_weight(bundle); + + // We detect the "too-many-live-registers" case here and + // return an error cleanly, rather than panicking, because + // the regalloc.rs fuzzer depends on the register + // allocator to correctly reject impossible-to-allocate + // programs in order to discard invalid test cases. if self.minimal_bundle(bundle) - && (attempts >= 2 || lowest_cost_evict_conflict_cost.is_none()) + && (attempts >= 2 + || lowest_cost_evict_conflict_cost.is_none() + || lowest_cost_evict_conflict_cost.unwrap() >= our_spill_weight) { if let Requirement::Register(class) = req { // Check if this is a too-many-live-registers situation. @@ -3180,7 +3182,7 @@ impl<'a, F: Function> Env<'a, F> { if !self.minimal_bundle(bundle) && (attempts >= 2 || lowest_cost_evict_conflict_cost.is_none() - || self.bundle_spill_weight(bundle) <= lowest_cost_evict_conflict_cost.unwrap()) + || our_spill_weight <= lowest_cost_evict_conflict_cost.unwrap()) { log::debug!( " -> deciding to split: our spill weight is {}", From 2fe276ca043d024f98b57515b59d1fcf9a550f88 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 1 Jun 2021 23:10:34 -0700 Subject: [PATCH 099/155] BTreeMap probe fix (fuzzbug): BTree does not interact nicely with LiveRangeKey definition of equality; need to probe with one-less-than start to get proper range --- src/ion/mod.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 1ed8e3de..81564d52 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -2472,7 +2472,10 @@ impl<'a, F: Function> Env<'a, F> { // any preg range that *overlaps* with range `range`, not // literally the range `range`. let bundle_ranges = &self.bundles[bundle.index()].ranges; - let from_key = LiveRangeKey::from_range(&bundle_ranges.first().unwrap().range); + let from_key = LiveRangeKey::from_range(&CodeRange { + from: bundle_ranges.first().unwrap().range.from, + to: bundle_ranges.first().unwrap().range.from, + }); let mut preg_range_iter = self.pregs[reg.index()] .allocations .btree @@ -3122,6 +3125,7 @@ impl<'a, F: Function> Env<'a, F> { ); let our_spill_weight = self.bundle_spill_weight(bundle); + log::debug!(" -> our spill weight: {}", our_spill_weight); // We detect the "too-many-live-registers" case here and // return an error cleanly, rather than panicking, because From dc2b0d1913695e2b37cdb8c52f41979b5c9a7a1f Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 1 Jun 2021 23:13:08 -0700 Subject: [PATCH 100/155] Add a perf idea to TODO list --- src/ion/mod.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 81564d52..c16bc3d0 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -42,8 +42,11 @@ first preferred one), reload into it, spill out of it, and then pop old val - - Avoid rebuilding MachineEnv on every function allocation in - regalloc.rs shim + - Play more with commitment-map probing: linear scan through btree + (good for dense bundles, i.e., close ranges) vs. independent + lookup per range in bundle. Adapt based on distance? Do a fresh + range lookup if we skip N btree entries without advancing into + current bundle range? - Profile allocations */ From 6a0739b62a81bdeb724ecdedf0d104a4e496816f Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 3 Jun 2021 00:18:27 -0700 Subject: [PATCH 101/155] Implement spill-bundle: move all empty ranges, and empty leading/trailing pieces surrounding split points, to a single spill bundle, in an attempt to avoid excessive movement --- src/ion/mod.rs | 232 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 210 insertions(+), 22 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index c16bc3d0..e6761750 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -32,6 +32,10 @@ scan in a single range while resolving moves; in-edge makes dirty. + - or: track "at most one" def-points: at a join, new def point + if at least one of the in-edges is a def point. Do this during + liveness by tracking ...? + - Avoid requiring two scratch regs: - Require machine impl to be able to (i) push a reg, (ii) pop a reg; then generate a balanced pair of push/pop, using the stack @@ -2736,6 +2740,26 @@ impl<'a, F: Function> Env<'a, F> { } } + fn get_or_create_spill_bundle( + &mut self, + bundle: LiveBundleIndex, + create_if_absent: bool, + ) -> Option { + let ssidx = self.bundles[bundle.index()].spillset; + let idx = self.spillsets[ssidx.index()].spill_bundle; + if idx.is_valid() { + Some(idx) + } else if create_if_absent { + let idx = self.create_bundle(); + self.spillsets[ssidx.index()].spill_bundle = idx; + self.bundles[idx.index()].spillset = ssidx; + self.spilled_bundles.push(idx); + Some(idx) + } else { + None + } + } + fn split_and_requeue_bundle( &mut self, bundle: LiveBundleIndex, @@ -2913,16 +2937,165 @@ impl<'a, F: Function> Env<'a, F> { } self.bundles[new_bundle.index()].ranges = new_lr_list; - self.recompute_bundle_properties(bundle); - self.recompute_bundle_properties(new_bundle); - let prio = self.compute_bundle_prio(bundle); - let new_prio = self.compute_bundle_prio(new_bundle); - self.bundles[bundle.index()].prio = prio; - self.bundles[new_bundle.index()].prio = new_prio; - self.allocation_queue - .insert(bundle, prio as usize, reg_hint); - self.allocation_queue - .insert(new_bundle, new_prio as usize, reg_hint); + // Finally, handle moving LRs to the spill bundle when + // appropriate: If the first range in `new_bundle` or last + // range in `bundle` has "empty space" beyond the first or + // last use (respectively), trim it and put an empty LR into + // the spill bundle. (We are careful to treat the "starts at + // def" flag as an implicit first def even if no def-type Use + // is present.) + while let Some(entry) = self.bundles[bundle.index()].ranges.last().cloned() { + let end = entry.range.to; + let vreg = self.ranges[entry.index.index()].vreg; + let last_use = self.ranges[entry.index.index()].uses.last().map(|u| u.pos); + if last_use.is_none() { + let spill = self + .get_or_create_spill_bundle(bundle, /* create_if_absent = */ true) + .unwrap(); + log::debug!( + " -> bundle {:?} range {:?}: no uses; moving to spill bundle {:?}", + bundle, + entry.index, + spill + ); + self.bundles[spill.index()].ranges.push(entry); + self.bundles[bundle.index()].ranges.pop(); + self.ranges[entry.index.index()].bundle = spill; + continue; + } + let last_use = last_use.unwrap(); + let split = ProgPoint::before(last_use.inst().next()); + if split < end { + let spill = self + .get_or_create_spill_bundle(bundle, /* create_if_absent = */ true) + .unwrap(); + self.bundles[bundle.index()] + .ranges + .last_mut() + .unwrap() + .range + .to = split; + self.ranges[self.bundles[bundle.index()] + .ranges + .last() + .unwrap() + .index + .index()] + .range + .to = split; + let range = CodeRange { + from: split, + to: end, + }; + let empty_lr = self.create_liverange(range); + self.bundles[spill.index()].ranges.push(LiveRangeListEntry { + range, + index: empty_lr, + }); + self.ranges[empty_lr.index()].bundle = spill; + self.vregs[vreg.index()].ranges.push(LiveRangeListEntry { + range, + index: empty_lr, + }); + log::debug!( + " -> bundle {:?} range {:?}: last use implies split point {:?}", + bundle, + entry.index, + split + ); + log::debug!( + " -> moving trailing empty region to new spill bundle {:?} with new LR {:?}", + spill, + empty_lr + ); + } + break; + } + while let Some(entry) = self.bundles[new_bundle.index()].ranges.first().cloned() { + if self.ranges[entry.index.index()].has_flag(LiveRangeFlag::StartsAtDef) { + break; + } + let start = entry.range.from; + let vreg = self.ranges[entry.index.index()].vreg; + let first_use = self.ranges[entry.index.index()].uses.first().map(|u| u.pos); + if first_use.is_none() { + let spill = self + .get_or_create_spill_bundle(new_bundle, /* create_if_absent = */ true) + .unwrap(); + log::debug!( + " -> bundle {:?} range {:?}: no uses; moving to spill bundle {:?}", + new_bundle, + entry.index, + spill + ); + self.bundles[spill.index()].ranges.push(entry); + self.bundles[new_bundle.index()].ranges.drain(..1); + self.ranges[entry.index.index()].bundle = spill; + continue; + } + let first_use = first_use.unwrap(); + let split = ProgPoint::before(first_use.inst()); + if split > start { + let spill = self + .get_or_create_spill_bundle(new_bundle, /* create_if_absent = */ true) + .unwrap(); + self.bundles[new_bundle.index()] + .ranges + .first_mut() + .unwrap() + .range + .from = split; + self.ranges[self.bundles[new_bundle.index()] + .ranges + .first() + .unwrap() + .index + .index()] + .range + .from = split; + let range = CodeRange { + from: start, + to: split, + }; + let empty_lr = self.create_liverange(range); + self.bundles[spill.index()].ranges.push(LiveRangeListEntry { + range, + index: empty_lr, + }); + self.ranges[empty_lr.index()].bundle = spill; + self.vregs[vreg.index()].ranges.push(LiveRangeListEntry { + range, + index: empty_lr, + }); + log::debug!( + " -> bundle {:?} range {:?}: first use implies split point {:?}", + bundle, + entry.index, + first_use, + ); + log::debug!( + " -> moving leading empty region to new spill bundle {:?} with new LR {:?}", + spill, + empty_lr + ); + } + break; + } + + if self.bundles[bundle.index()].ranges.len() > 0 { + self.recompute_bundle_properties(bundle); + let prio = self.compute_bundle_prio(bundle); + self.bundles[bundle.index()].prio = prio; + self.allocation_queue + .insert(bundle, prio as usize, reg_hint); + } + if self.bundles[new_bundle.index()].ranges.len() > 0 { + self.recompute_bundle_properties(new_bundle); + let new_prio = self.compute_bundle_prio(new_bundle); + self.bundles[new_bundle.index()].prio = new_prio; + self.allocation_queue + .insert(new_bundle, new_prio as usize, reg_hint); + } } fn compute_requirement(&self, bundle: LiveBundleIndex) -> Requirement { @@ -2970,6 +3143,26 @@ impl<'a, F: Function> Env<'a, F> { return Ok(()); } + // If no requirement at all (because no uses), and *if* a + // spill bundle is already present, then move the LRs over to + // the spill bundle right away. + match req { + Requirement::Unknown | Requirement::Any(_) => { + if let Some(spill) = + self.get_or_create_spill_bundle(bundle, /* create_if_absent = */ false) + { + let mut list = + std::mem::replace(&mut self.bundles[bundle.index()].ranges, smallvec![]); + for entry in &list { + self.ranges[entry.index.index()].bundle = spill; + } + self.bundles[spill.index()].ranges.extend(list.drain(..)); + return Ok(()); + } + } + _ => {} + } + // Try to allocate! let mut attempts = 0; loop { @@ -2988,14 +3181,13 @@ impl<'a, F: Function> Env<'a, F> { } Requirement::Any(_) | Requirement::Unknown => { - // If a register is not *required*, spill now (we'll retry - // allocation on spilled bundles later). - log::debug!("spilling bundle {:?} to spilled_bundles list", bundle); self.spilled_bundles.push(bundle); return Ok(()); } - Requirement::Conflict => unreachable!(), + Requirement::Conflict => { + unreachable!() + } }; // Scan all pregs, or the one fixed preg, and attempt to allocate. @@ -3238,6 +3430,7 @@ impl<'a, F: Function> Env<'a, F> { let bundle = self.spilled_bundles[i]; // don't borrow self let class = self.spillsets[self.bundles[bundle.index()].spillset.index()].class; + let hint = self.spillsets[self.bundles[bundle.index()].spillset.index()].reg_hint; // This may be an empty-range bundle whose ranges are not // sorted; sort all range-lists again here. @@ -3247,14 +3440,9 @@ impl<'a, F: Function> Env<'a, F> { let mut success = false; self.stats.spill_bundle_reg_probes += 1; - for preg in RegTraversalIter::new( - self.env, - class, - PReg::invalid(), - PReg::invalid(), - bundle.index(), - None, - ) { + for preg in + RegTraversalIter::new(self.env, class, hint, PReg::invalid(), bundle.index(), None) + { log::debug!("trying bundle {:?} to preg {:?}", bundle, preg); let preg_idx = PRegIndex::new(preg.index()); if let AllocRegResult::Allocated(_) = From 00e4240c93ff4fb0ce3510e1fd72ab57267a3512 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 3 Jun 2021 17:34:19 -0700 Subject: [PATCH 102/155] merge bundles much faster by just concatenating range-lists and unstable-sorting, rather than a merge-sort-like traversal. Rust stdlib sort is very optimized. clang.wasm 9.1s -> 6.8s now. --- src/ion/mod.rs | 136 ++++++++++++++++++++++++++++++------------------- 1 file changed, 85 insertions(+), 51 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index e6761750..05280d42 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -264,6 +264,16 @@ impl LiveBundle { self.spill_weight_and_props & (1 << 29) != 0 } + #[inline(always)] + fn set_cached_fixed(&mut self) { + self.spill_weight_and_props |= 1 << 30; + } + + #[inline(always)] + fn set_cached_stack(&mut self) { + self.spill_weight_and_props |= 1 << 29; + } + #[inline(always)] fn cached_spill_weight(&self) -> u32 { self.spill_weight_and_props & ((1 << 29) - 1) @@ -2071,12 +2081,18 @@ impl<'a, F: Function> Env<'a, F> { } // Check for a requirements conflict. - let req = self - .compute_requirement(from) - .merge(self.compute_requirement(to)); - if req == Requirement::Conflict { - log::debug!(" -> conflicting requirements; aborting merge"); - return false; + if self.bundles[from.index()].cached_stack() + || self.bundles[from.index()].cached_fixed() + || self.bundles[to.index()].cached_stack() + || self.bundles[to.index()].cached_fixed() + { + let req = self + .compute_requirement(from) + .merge(self.compute_requirement(to)); + if req == Requirement::Conflict { + log::debug!(" -> conflicting requirements; aborting merge"); + return false; + } } log::debug!(" -> committing to merge"); @@ -2085,8 +2101,6 @@ impl<'a, F: Function> Env<'a, F> { // them! We do this with a merge-sort-like scan over both // lists, building a new range list and replacing the list on // `to` when we're done. - let mut idx_from = 0; - let mut idx_to = 0; if ranges_from.is_empty() { // `from` bundle is empty -- trivial merge. log::debug!(" -> from bundle{} is empty; trivial merge", from.index()); @@ -2115,63 +2129,57 @@ impl<'a, F: Function> Env<'a, F> { } self.bundles[to.index()].ranges = list; + if self.bundles[from.index()].cached_stack() { + self.bundles[to.index()].set_cached_stack(); + } + if self.bundles[from.index()].cached_fixed() { + self.bundles[to.index()].set_cached_fixed(); + } + return true; } - // Two non-empty lists of LiveRanges: traverse both simultaneously and - // merge ranges into `merged`. - let mut merged: LiveRangeList = smallvec![]; log::debug!( "merging: ranges_from = {:?} ranges_to = {:?}", ranges_from, ranges_to ); - while idx_from < ranges_from.len() || idx_to < ranges_to.len() { - if idx_from < ranges_from.len() && idx_to < ranges_to.len() { - if ranges_from[idx_from].range.from <= ranges_to[idx_to].range.from { - self.ranges[ranges_from[idx_from].index.index()].bundle = to; - merged.push(ranges_from[idx_from]); - idx_from += 1; - } else { - merged.push(ranges_to[idx_to]); - idx_to += 1; - } - } else if idx_from < ranges_from.len() { - for entry in &ranges_from[idx_from..] { - self.ranges[entry.index.index()].bundle = to; - } - merged.extend_from_slice(&ranges_from[idx_from..]); - break; - } else { - assert!(idx_to < ranges_to.len()); - merged.extend_from_slice(&ranges_to[idx_to..]); - break; - } + + // Two non-empty lists of LiveRanges: concatenate and + // sort. This is faster than a mergesort-like merge into a new + // list, empirically. + let from_list = std::mem::replace(&mut self.bundles[from.index()].ranges, smallvec![]); + for entry in &from_list { + self.ranges[entry.index.index()].bundle = to; } + self.bundles[to.index()] + .ranges + .extend_from_slice(&from_list[..]); + self.bundles[to.index()] + .ranges + .sort_unstable_by_key(|entry| entry.range.from); - #[cfg(debug_assertions)] - { - log::debug!("merging: merged = {:?}", merged); + if self.annotations_enabled { + log::debug!("merging: merged = {:?}", self.bundles[to.index()].ranges); let mut last_range = None; - for entry in &merged { + for i in 0..self.bundles[to.index()].ranges.len() { + let entry = self.bundles[to.index()].ranges[i]; if last_range.is_some() { assert!(last_range.unwrap() < entry.range); } last_range = Some(entry.range); if self.ranges[entry.index.index()].bundle == from { - if self.annotations_enabled { - self.annotate( - entry.range.from, - format!( - " MERGE range{} v{} from bundle{} to bundle{}", - entry.index.index(), - self.ranges[entry.index.index()].vreg.index(), - from.index(), - to.index(), - ), - ); - } + self.annotate( + entry.range.from, + format!( + " MERGE range{} v{} from bundle{} to bundle{}", + entry.index.index(), + self.ranges[entry.index.index()].vreg.index(), + from.index(), + to.index(), + ), + ); } log::debug!( @@ -2182,9 +2190,6 @@ impl<'a, F: Function> Env<'a, F> { } } - self.bundles[to.index()].ranges = merged; - self.bundles[from.index()].ranges.clear(); - if self.bundles[from.index()].spillset != self.bundles[to.index()].spillset { let from_vregs = std::mem::replace( &mut self.spillsets[self.bundles[from.index()].spillset.index()].vregs, @@ -2198,6 +2203,13 @@ impl<'a, F: Function> Env<'a, F> { } } + if self.bundles[from.index()].cached_stack() { + self.bundles[to.index()].set_cached_stack(); + } + if self.bundles[from.index()].cached_fixed() { + self.bundles[to.index()].set_cached_fixed(); + } + true } @@ -2239,6 +2251,28 @@ impl<'a, F: Function> Env<'a, F> { self.ranges[entry.index.index()].bundle = bundle; } + let mut fixed = false; + let mut stack = false; + for entry in &self.bundles[bundle.index()].ranges { + for u in &self.ranges[entry.index.index()].uses { + if let OperandPolicy::FixedReg(_) = u.operand.policy() { + fixed = true; + } + if let OperandPolicy::Stack = u.operand.policy() { + stack = true; + } + if fixed && stack { + break; + } + } + } + if fixed { + self.bundles[bundle.index()].set_cached_fixed(); + } + if stack { + self.bundles[bundle.index()].set_cached_stack(); + } + // Create a spillslot for this bundle. let ssidx = SpillSetIndex::new(self.spillsets.len()); let reg = self.vreg_regs[vreg.index()]; From 5560499b802a90e06d99b7c2a39eb5871da91fe2 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 3 Jun 2021 18:17:28 -0700 Subject: [PATCH 103/155] Adaptive commitment-map scanning: re-probe from root if we skip too many entries in linear BTree scan --- src/ion/mod.rs | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 05280d42..97ea63d4 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -46,12 +46,6 @@ first preferred one), reload into it, spill out of it, and then pop old val - - Play more with commitment-map probing: linear scan through btree - (good for dense bundles, i.e., close ranges) vs. independent - lookup per range in bundle. Adapt based on distance? Do a fresh - range lookup if we skip N btree entries without advancing into - current bundle range? - - Profile allocations */ @@ -2532,6 +2526,7 @@ impl<'a, F: Function> Env<'a, F> { log::debug!(" -> range LR {:?}: {:?}", entry.index, entry.range); let key = LiveRangeKey::from_range(&entry.range); + let mut skips = 0; 'alloc: loop { log::debug!(" -> PReg range {:?}", preg_range_iter.peek()); @@ -2545,8 +2540,23 @@ impl<'a, F: Function> Env<'a, F> { preg_range_iter.peek().unwrap().0 ); preg_range_iter.next(); + skips += 1; + if skips >= 16 { + let from_pos = entry.range.from; + let from_key = LiveRangeKey::from_range(&CodeRange { + from: from_pos, + to: from_pos, + }); + preg_range_iter = self.pregs[reg.index()] + .allocations + .btree + .range(from_key..) + .peekable(); + skips = 0; + } continue 'alloc; } + skips = 0; // If there are no more PReg allocations, we're done! if preg_range_iter.peek().is_none() { From 30f42a8717b231bb0f57f71105a5ab83837ba658 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 3 Jun 2021 23:48:33 -0700 Subject: [PATCH 104/155] Fix fuzzbug: properly detect too-many-live-regs condition on fuzzing input. Must be careful in how we probe the BTree when we have multiple "equal" (overlapping) keys. --- src/ion/mod.rs | 41 ++++++++++++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 97ea63d4..fd3c07bc 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -451,6 +451,14 @@ impl LiveRangeKey { to: range.to.to_index(), } } + + #[inline(always)] + fn to_range(&self) -> CodeRange { + CodeRange { + from: ProgPoint::from_index(self.from), + to: ProgPoint::from_index(self.to), + } + } } impl std::cmp::PartialEq for LiveRangeKey { @@ -2761,7 +2769,7 @@ impl<'a, F: Function> Env<'a, F> { ); } - fn minimal_bundle(&mut self, bundle: LiveBundleIndex) -> bool { + fn minimal_bundle(&self, bundle: LiveBundleIndex) -> bool { self.bundles[bundle.index()].cached_minimal() } @@ -3379,6 +3387,7 @@ impl<'a, F: Function> Env<'a, F> { if let Requirement::Register(class) = req { // Check if this is a too-many-live-registers situation. let range = self.bundles[bundle.index()].ranges[0].range; + log::debug!("checking for too many live regs"); let mut min_bundles_assigned = 0; let mut fixed_assigned = 0; let mut total_regs = 0; @@ -3386,22 +3395,40 @@ impl<'a, F: Function> Env<'a, F> { .iter() .chain(self.env.non_preferred_regs_by_class[class as u8 as usize].iter()) { - if let Some(&lr) = self.pregs[preg.index()] - .allocations - .btree - .get(&LiveRangeKey::from_range(&range)) - { + log::debug!(" -> PR {:?}", preg); + let start = LiveRangeKey::from_range(&CodeRange { + from: range.from.prev(), + to: range.from.prev(), + }); + for (key, lr) in self.pregs[preg.index()].allocations.btree.range(start..) { + let preg_range = key.to_range(); + if preg_range.to <= range.from { + continue; + } + if preg_range.from >= range.to { + break; + } if lr.is_valid() { if self.minimal_bundle(self.ranges[lr.index()].bundle) { + log::debug!(" -> min bundle {:?}", lr); min_bundles_assigned += 1; + } else { + log::debug!(" -> non-min bundle {:?}", lr); } } else { + log::debug!(" -> fixed bundle"); fixed_assigned += 1; } } total_regs += 1; } - if min_bundles_assigned + fixed_assigned == total_regs { + log::debug!( + " -> total {}, fixed {}, min {}", + total_regs, + fixed_assigned, + min_bundles_assigned + ); + if min_bundles_assigned + fixed_assigned >= total_regs { return Err(RegAllocError::TooManyLiveRegs); } } From 0eaa0fde0657e8df330bc06f95b0bd0b01ceb8ac Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sat, 5 Jun 2021 14:47:55 -0700 Subject: [PATCH 105/155] Fix to checker: analyze all blocks, even if out-state of entry block is empty --- src/checker.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/checker.rs b/src/checker.rs index e3afb9f4..4adecb3d 100644 --- a/src/checker.rs +++ b/src/checker.rs @@ -613,10 +613,16 @@ impl<'a, F: Function> Checker<'a, F> { /// Perform the dataflow analysis to compute checker state at each BB entry. fn analyze(&mut self) { let mut queue = VecDeque::new(); - queue.push_back(self.f.entry_block()); + let mut queue_set = HashSet::new(); + for block in 0..self.f.blocks() { + let block = Block::new(block); + queue.push_back(block); + queue_set.insert(block); + } while !queue.is_empty() { let block = queue.pop_front().unwrap(); + queue_set.remove(&block); let mut state = self.bb_in.get(&block).cloned().unwrap(); debug!("analyze: block {} has state {:?}", block.index(), state); for inst in self.bb_insts.get(&block).unwrap() { @@ -637,7 +643,10 @@ impl<'a, F: Function> Checker<'a, F> { new_state ); self.bb_in.insert(succ, new_state); - queue.push_back(succ); + if !queue_set.contains(&succ) { + queue.push_back(succ); + queue_set.insert(succ); + } } } } From 2be7bdbc2271a5f39a8eea17e7257bc4602e8491 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 7 Jun 2021 12:27:58 -0700 Subject: [PATCH 106/155] Split-at-first-conflict: first conflict is first of (start of our range), (start of conflict range), not just the latter; otherwise we have a too-early split sometimes --- src/ion/mod.rs | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index fd3c07bc..5be54068 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -619,7 +619,7 @@ impl Requirement { #[derive(Clone, Debug, PartialEq, Eq)] enum AllocRegResult { Allocated(Allocation), - Conflict(LiveBundleVec), + Conflict(LiveBundleVec, ProgPoint), ConflictWithFixed(u32, ProgPoint), ConflictHighCost, } @@ -2530,6 +2530,8 @@ impl<'a, F: Function> Env<'a, F> { from_key, self.pregs[reg.index()].allocations.btree ); + let mut first_conflict: Option = None; + 'ranges: for entry in bundle_ranges { log::debug!(" -> range LR {:?}: {:?}", entry.index, entry.range); let key = LiveRangeKey::from_range(&entry.range); @@ -2606,6 +2608,13 @@ impl<'a, F: Function> Env<'a, F> { return AllocRegResult::ConflictHighCost; } } + + if first_conflict.is_none() { + first_conflict = Some(ProgPoint::from_index(std::cmp::max( + preg_key.from, + key.from, + ))); + } } else { log::debug!(" -> conflict with fixed reservation"); // range from a direct use of the PReg (due to clobber). @@ -2618,7 +2627,7 @@ impl<'a, F: Function> Env<'a, F> { } if conflicts.len() > 0 { - return AllocRegResult::Conflict(conflicts); + return AllocRegResult::Conflict(conflicts, first_conflict.unwrap()); } // We can allocate! Add our ranges to the preg's BTree. @@ -3290,11 +3299,12 @@ impl<'a, F: Function> Env<'a, F> { alloc.as_reg().unwrap(); return Ok(()); } - AllocRegResult::Conflict(bundles) => { - log::debug!(" -> conflict with bundles {:?}", bundles); - - let first_conflict_point = - self.bundles[bundles[0].index()].ranges[0].range.from; + AllocRegResult::Conflict(bundles, first_conflict_point) => { + log::debug!( + " -> conflict with bundles {:?}, first conflict at {:?}", + bundles, + first_conflict_point + ); let conflict_cost = self.maximum_spill_weight_in_bundle_set(&bundles); From c6bcd3c94115580631a62e94bcc86e3e3c384962 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 7 Jun 2021 21:15:32 -0700 Subject: [PATCH 107/155] WIP: redundant-move elimination. --- src/checker.rs | 31 +++++++-- src/ion/mod.rs | 177 ++++++++++++++++++++++++++++++++++++++----------- src/lib.rs | 15 ++++- 3 files changed, 177 insertions(+), 46 deletions(-) diff --git a/src/checker.rs b/src/checker.rs index 4adecb3d..fa638cc6 100644 --- a/src/checker.rs +++ b/src/checker.rs @@ -394,6 +394,11 @@ impl CheckerState { .insert(*alloc, CheckerValue::Reg(*vreg, reftyped)); } } + &CheckerInst::DefAlloc { alloc, vreg } => { + let reftyped = checker.reftyped_vregs.contains(&vreg); + self.allocations + .insert(alloc, CheckerValue::Reg(vreg, reftyped)); + } &CheckerInst::Safepoint { ref slots, .. } => { for (alloc, value) in &mut self.allocations { if let CheckerValue::Reg(_, true) = *value { @@ -475,6 +480,11 @@ pub(crate) enum CheckerInst { allocs: Vec, }, + /// Define an allocation's contents. Like BlockParams but for one + /// allocation. Used sometimes when moves are elided but ownership + /// of a value is logically transferred to a new vreg. + DefAlloc { alloc: Allocation, vreg: VReg }, + /// A safepoint, with the given SpillSlots specified as containing /// reftyped values. All other reftyped values become invalid. Safepoint { inst: Inst, slots: Vec }, @@ -583,17 +593,23 @@ impl<'a, F: Function> Checker<'a, F> { } debug!("checker: adding edit {:?} at pos {:?}", edit, pos); match edit { - &Edit::Move { from, to, .. } => { + &Edit::Move { from, to, to_vreg } => { self.bb_insts .get_mut(&block) .unwrap() .push(CheckerInst::Move { into: to, from }); + if let Some(vreg) = to_vreg { + self.bb_insts + .get_mut(&block) + .unwrap() + .push(CheckerInst::DefAlloc { alloc: to, vreg }); + } } - &Edit::DefAlloc { .. } => { - unimplemented!(concat!( - "DefAlloc is used only when dealing with pinned vregs, ", - "which are only used by regalloc.rs shim; use checker at that level!" - )); + &Edit::DefAlloc { alloc, vreg } => { + self.bb_insts + .get_mut(&block) + .unwrap() + .push(CheckerInst::DefAlloc { alloc, vreg }); } &Edit::BlockParams { ref vregs, @@ -732,6 +748,9 @@ impl<'a, F: Function> Checker<'a, F> { } debug!(" blockparams: {}", args.join(", ")); } + &CheckerInst::DefAlloc { alloc, vreg } => { + debug!(" defalloc: {}:{}", vreg, alloc); + } &CheckerInst::Safepoint { ref slots, .. } => { let mut slotargs = vec![]; for &slot in slots { diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 5be54068..34c026ab 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -3849,8 +3849,6 @@ impl<'a, F: Function> Env<'a, F> { None }; - let mut clean_spillslot: Option = None; - // For each range in each vreg, insert moves or // half-moves. We also scan over `blockparam_ins` and // `blockparam_outs`, which are sorted by (block, vreg), @@ -3925,19 +3923,9 @@ impl<'a, F: Function> Env<'a, F> { self.ranges[entry.index.index()].has_flag(LiveRangeFlag::StartsAtDef); debug_assert!(prev_alloc != Allocation::none()); - // If this is a stack-to-reg move, track that the reg is a clean copy of a spillslot. - if prev_alloc.is_stack() && alloc.is_reg() { - clean_spillslot = Some(prev_alloc.as_stack().unwrap()); - } - // If this is a reg-to-stack move, elide it if the spillslot is still clean. - let skip_spill = prev_alloc.is_reg() - && alloc.is_stack() - && clean_spillslot == alloc.as_stack(); - if prev_range.to == range.from && !self.is_start_of_block(range.from) && !first_is_def - && !skip_spill { log::debug!( "prev LR {} abuts LR {} in same block; moving {} -> {} for v{}", @@ -3958,33 +3946,6 @@ impl<'a, F: Function> Env<'a, F> { } } - // If this range either spans any block boundary, or - // has any mods/defs, then the spillslot (if any) that - // its value came from is no longer 'clean'. - if clean_spillslot.is_some() { - if self.cfginfo.insn_block[range.from.inst().index()] - != self.cfginfo.insn_block[range.to.prev().inst().index()] - || range.from - == self.cfginfo.block_entry - [self.cfginfo.insn_block[range.from.inst().index()].index()] - { - clean_spillslot = None; - } else if self.ranges[entry.index.index()].has_flag(LiveRangeFlag::StartsAtDef) - { - clean_spillslot = None; - } else { - for u in &self.ranges[entry.index.index()].uses { - match u.operand.kind() { - OperandKind::Def | OperandKind::Mod => { - clean_spillslot = None; - break; - } - _ => {} - } - } - } - } - // The block-to-block edge-move logic is not // applicable to pinned vregs, which are always in one // PReg (so never need moves within their own vreg @@ -4540,6 +4501,132 @@ impl<'a, F: Function> Env<'a, F> { let mut i = 0; self.inserted_moves .sort_unstable_by_key(|m| (m.pos.to_index(), m.prio)); + + // Simple redundant-spill/reload removal: track which + // stackslot each preg is a clean copy of, if any, and remove + // spills and reloads that are unnecessary. + let mut preg_state: [(Block, SpillSlot); PReg::MAX_INDEX] = + [(Block::new(0), SpillSlot::invalid()); PReg::MAX_INDEX]; + + fn process_move_maybe_elide( + preg_state: &mut [(Block, SpillSlot); PReg::MAX_INDEX], + block: Block, + from: Allocation, + to: Allocation, + ) -> bool { + if from.is_reg() && to.is_reg() { + let from = from.as_reg().unwrap().hw_enc(); + let to = to.as_reg().unwrap().hw_enc(); + if preg_state[to] == preg_state[from] + && preg_state[from].1.is_valid() + && preg_state[from].0 == block + { + true + } else { + preg_state[to] = preg_state[from]; + false + } + } else if from.is_stack() && to.is_stack() { + let from = from.as_stack().unwrap(); + let to = to.as_stack().unwrap(); + update_copies_of_slot(preg_state, to, from); + false + } else if from.is_reg() && to.is_stack() { + // Reg-to-stack spill: determine if value in slot is + // still up-to-date, and skip the store if so. + let from = from.as_reg().unwrap().hw_enc(); + let to = to.as_stack().unwrap(); + if preg_state[from] == (block, to) { + // Redundant spill: elide. + true + } else { + // Don't elide; but this register now has a clean + // copy of the slot (because its value is now + // being written to the slot). However, no other + // register that previously had a clean copy of + // this slot does anymore. + update_copies_of_slot(preg_state, to, SpillSlot::invalid()); + preg_state[from] = (block, to); + false + } + } else { + // Stack-to-reg reload: determine if reg is already an + // up-to-date copy of value in slot, and skip reload + // if so. Otherwise, don't skip reload, and update + // metadata to indicate reg is now up-to-date. + let from = from.as_stack().unwrap(); + let to = to.as_reg().unwrap().hw_enc(); + if preg_state[to] == (block, from) { + true + } else { + preg_state[to] = (block, from); + false + } + } + } + fn update_copies_of_slot( + preg_state: &mut [(Block, SpillSlot); PReg::MAX_INDEX], + slot: SpillSlot, + updated: SpillSlot, + ) { + for entry in preg_state.iter_mut() { + if entry.1 == slot { + entry.1 = updated; + } + } + } + fn clear_preg_state<'a, F: Function>( + this: &Env<'a, F>, + preg_state: &mut [(Block, SpillSlot); PReg::MAX_INDEX], + from: ProgPoint, + to: ProgPoint, + ) { + let start_inst = if from.pos() == InstPosition::Before { + from.inst() + } else { + from.inst().next() + }; + let end_inst = if to.pos() == InstPosition::Before { + to.inst() + } else { + to.inst().next() + }; + for inst in start_inst.index()..end_inst.index() { + let inst = Inst::new(inst); + + if this.func.is_safepoint(inst) { + for entry in preg_state.iter_mut() { + entry.1 = SpillSlot::invalid(); + } + continue; + } + + for (i, op) in this.func.inst_operands(inst).iter().enumerate() { + match op.kind() { + OperandKind::Def | OperandKind::Use => { + let alloc = this.get_alloc(inst, i); + if let Some(preg) = alloc.as_reg() { + preg_state[preg.hw_enc()].1 = SpillSlot::invalid(); + } else if let Some(stack) = alloc.as_stack() { + update_copies_of_slot(preg_state, stack, SpillSlot::invalid()); + } + } + _ => {} + } + } + for reg in this.func.inst_clobbers(inst) { + preg_state[reg.hw_enc()].1 = SpillSlot::invalid(); + } + } + if to.pos() == InstPosition::Before && this.func.is_safepoint(to.inst()) { + for entry in preg_state.iter_mut() { + entry.1 = SpillSlot::invalid(); + } + } + } + + let mut last_pos = ProgPoint::before(Inst::new(0)); + while i < self.inserted_moves.len() { let start = i; let pos = self.inserted_moves[i].pos; @@ -4552,6 +4639,11 @@ impl<'a, F: Function> Env<'a, F> { } let moves = &self.inserted_moves[start..i]; + let block = self.cfginfo.insn_block[pos.inst().index()]; + + clear_preg_state(self, &mut preg_state, last_pos, pos); + last_pos = pos; + // Gather all the moves with Int class and Float class // separately. These cannot interact, so it is safe to // have two separate ParallelMove instances. They need to @@ -4616,6 +4708,13 @@ impl<'a, F: Function> Env<'a, F> { for (src, dst, to_vreg) in resolved { log::debug!(" resolved: {} -> {} ({:?})", src, dst, to_vreg); + if process_move_maybe_elide(&mut preg_state, block, src, dst) { + log::debug!(" -> eliding (redundant)!"); + if let Some(vreg) = to_vreg { + self.add_edit(pos, prio, Edit::DefAlloc { alloc: dst, vreg }); + } + continue; + } self.add_edit( pos, prio, diff --git a/src/lib.rs b/src/lib.rs index c96004f1..7941ebe7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -43,7 +43,7 @@ pub struct PReg(u8, RegClass); impl PReg { pub const MAX_BITS: usize = 5; pub const MAX: usize = (1 << Self::MAX_BITS) - 1; - pub const MAX_INDEX: usize = 2 * Self::MAX; // including RegClass bit + pub const MAX_INDEX: usize = 1 << (Self::MAX_BITS + 1); // including RegClass bit /// Create a new PReg. The `hw_enc` range is 6 bits. #[inline(always)] @@ -188,6 +188,19 @@ impl SpillSlot { pub fn plus(self, offset: usize) -> Self { SpillSlot::new(self.index() + offset, self.class()) } + + #[inline(always)] + pub fn invalid() -> Self { + SpillSlot(0xffff_ffff) + } + #[inline(always)] + pub fn is_invalid(self) -> bool { + self == Self::invalid() + } + #[inline(always)] + pub fn is_valid(self) -> bool { + self != Self::invalid() + } } impl std::fmt::Display for SpillSlot { From 940bc40fae56791ae6507d7cdb755ef1c87f3c22 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 8 Jun 2021 00:08:28 -0700 Subject: [PATCH 108/155] Redundant move eliminator. --- src/ion/mod.rs | 299 +++++++++++++++++++++++++++++-------------------- 1 file changed, 178 insertions(+), 121 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 34c026ab..d1b3c63a 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -809,6 +809,135 @@ impl<'a> std::iter::Iterator for RegTraversalIter<'a> { } } +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +enum RedundantMoveState { + Copy(Allocation, Option), + Orig(VReg), + None, +} +#[derive(Clone, Debug, Default)] +struct RedundantMoveEliminator { + allocs: HashMap, + reverse_allocs: HashMap>, +} +#[derive(Copy, Clone, Debug)] +struct RedundantMoveAction { + elide: bool, + def_alloc: Option<(Allocation, VReg)>, +} + +impl RedundantMoveEliminator { + fn process_move( + &mut self, + from: Allocation, + to: Allocation, + to_vreg: Option, + ) -> RedundantMoveAction { + // Look up the src and dest. + let from_state = self + .allocs + .get(&from) + .map(|&p| p) + .unwrap_or(RedundantMoveState::None); + let to_state = self + .allocs + .get(&to) + .map(|&p| p) + .unwrap_or(RedundantMoveState::None); + + log::debug!( + " -> redundant move tracker: from {} to {} to_vreg {:?}", + from, + to, + to_vreg + ); + log::debug!( + " -> from_state {:?} to_state {:?}", + from_state, + to_state + ); + + if from == to && to_vreg.is_some() { + self.clear_alloc(to); + self.allocs + .insert(to, RedundantMoveState::Orig(to_vreg.unwrap())); + return RedundantMoveAction { + elide: true, + def_alloc: Some((to, to_vreg.unwrap())), + }; + } + + let src_vreg = match from_state { + RedundantMoveState::Copy(_, opt_r) => opt_r, + RedundantMoveState::Orig(r) => Some(r), + _ => None, + }; + log::debug!(" -> src_vreg {:?}", src_vreg); + let dst_vreg = to_vreg.or(src_vreg); + log::debug!(" -> dst_vreg {:?}", dst_vreg); + let existing_dst_vreg = match to_state { + RedundantMoveState::Copy(_, opt_r) => opt_r, + RedundantMoveState::Orig(r) => Some(r), + _ => None, + }; + log::debug!(" -> existing_dst_vreg {:?}", existing_dst_vreg); + + let elide = match (from_state, to_state) { + (_, RedundantMoveState::Copy(orig_alloc, _)) if orig_alloc == from => true, + (RedundantMoveState::Copy(new_alloc, _), _) if new_alloc == to => true, + _ => false, + }; + log::debug!(" -> elide {}", elide); + + let def_alloc = if dst_vreg != existing_dst_vreg && dst_vreg.is_some() && elide { + Some((to, dst_vreg.unwrap())) + } else { + None + }; + log::debug!(" -> def_alloc {:?}", def_alloc); + + // Invalidate all existing copies of `to` if `to` actually changed value. + if !elide { + self.clear_alloc(to); + } + + // Set up forward and reverse mapping. Don't track stack-to-stack copies. + if from.is_reg() || to.is_reg() { + self.allocs + .insert(to, RedundantMoveState::Copy(from, dst_vreg)); + log::debug!( + " -> create mapping {} -> {:?}", + to, + RedundantMoveState::Copy(from, dst_vreg) + ); + self.reverse_allocs + .entry(from) + .or_insert_with(|| smallvec![]) + .push(to); + } + + RedundantMoveAction { elide, def_alloc } + } + + fn clear(&mut self) { + log::debug!(" redundant move eliminator cleared"); + self.allocs.clear(); + self.reverse_allocs.clear(); + } + + fn clear_alloc(&mut self, alloc: Allocation) { + log::debug!(" redundant move eliminator: clear {:?}", alloc); + if let Some(ref mut existing_copies) = self.reverse_allocs.get_mut(&alloc) { + for to_inval in existing_copies.iter() { + log::debug!(" -> clear existing copy: {:?}", to_inval); + self.allocs.remove(to_inval); + } + existing_copies.clear(); + } + self.allocs.remove(&alloc); + } +} + impl<'a, F: Function> Env<'a, F> { pub(crate) fn new( func: &'a F, @@ -4502,85 +4631,30 @@ impl<'a, F: Function> Env<'a, F> { self.inserted_moves .sort_unstable_by_key(|m| (m.pos.to_index(), m.prio)); - // Simple redundant-spill/reload removal: track which - // stackslot each preg is a clean copy of, if any, and remove - // spills and reloads that are unnecessary. - let mut preg_state: [(Block, SpillSlot); PReg::MAX_INDEX] = - [(Block::new(0), SpillSlot::invalid()); PReg::MAX_INDEX]; - - fn process_move_maybe_elide( - preg_state: &mut [(Block, SpillSlot); PReg::MAX_INDEX], - block: Block, - from: Allocation, - to: Allocation, - ) -> bool { - if from.is_reg() && to.is_reg() { - let from = from.as_reg().unwrap().hw_enc(); - let to = to.as_reg().unwrap().hw_enc(); - if preg_state[to] == preg_state[from] - && preg_state[from].1.is_valid() - && preg_state[from].0 == block - { - true - } else { - preg_state[to] = preg_state[from]; - false - } - } else if from.is_stack() && to.is_stack() { - let from = from.as_stack().unwrap(); - let to = to.as_stack().unwrap(); - update_copies_of_slot(preg_state, to, from); - false - } else if from.is_reg() && to.is_stack() { - // Reg-to-stack spill: determine if value in slot is - // still up-to-date, and skip the store if so. - let from = from.as_reg().unwrap().hw_enc(); - let to = to.as_stack().unwrap(); - if preg_state[from] == (block, to) { - // Redundant spill: elide. - true - } else { - // Don't elide; but this register now has a clean - // copy of the slot (because its value is now - // being written to the slot). However, no other - // register that previously had a clean copy of - // this slot does anymore. - update_copies_of_slot(preg_state, to, SpillSlot::invalid()); - preg_state[from] = (block, to); - false - } - } else { - // Stack-to-reg reload: determine if reg is already an - // up-to-date copy of value in slot, and skip reload - // if so. Otherwise, don't skip reload, and update - // metadata to indicate reg is now up-to-date. - let from = from.as_stack().unwrap(); - let to = to.as_reg().unwrap().hw_enc(); - if preg_state[to] == (block, from) { - true - } else { - preg_state[to] = (block, from); - false - } - } - } - fn update_copies_of_slot( - preg_state: &mut [(Block, SpillSlot); PReg::MAX_INDEX], - slot: SpillSlot, - updated: SpillSlot, - ) { - for entry in preg_state.iter_mut() { - if entry.1 == slot { - entry.1 = updated; - } - } - } - fn clear_preg_state<'a, F: Function>( + // Redundant-move elimination state tracker. + let mut redundant_moves = RedundantMoveEliminator::default(); + + fn redundant_move_process_side_effects<'a, F: Function>( this: &Env<'a, F>, - preg_state: &mut [(Block, SpillSlot); PReg::MAX_INDEX], + redundant_moves: &mut RedundantMoveEliminator, from: ProgPoint, to: ProgPoint, ) { + // If any safepoints in range, clear and return. + // Also, if we cross a block boundary, clear and return. + if this.cfginfo.insn_block[from.inst().index()] + != this.cfginfo.insn_block[to.inst().index()] + { + redundant_moves.clear(); + return; + } + for inst in from.inst().index()..=to.inst().index() { + if this.func.is_safepoint(Inst::new(inst)) { + redundant_moves.clear(); + return; + } + } + let start_inst = if from.pos() == InstPosition::Before { from.inst() } else { @@ -4593,34 +4667,17 @@ impl<'a, F: Function> Env<'a, F> { }; for inst in start_inst.index()..end_inst.index() { let inst = Inst::new(inst); - - if this.func.is_safepoint(inst) { - for entry in preg_state.iter_mut() { - entry.1 = SpillSlot::invalid(); - } - continue; - } - for (i, op) in this.func.inst_operands(inst).iter().enumerate() { match op.kind() { OperandKind::Def | OperandKind::Use => { let alloc = this.get_alloc(inst, i); - if let Some(preg) = alloc.as_reg() { - preg_state[preg.hw_enc()].1 = SpillSlot::invalid(); - } else if let Some(stack) = alloc.as_stack() { - update_copies_of_slot(preg_state, stack, SpillSlot::invalid()); - } + redundant_moves.clear_alloc(alloc); } _ => {} } } for reg in this.func.inst_clobbers(inst) { - preg_state[reg.hw_enc()].1 = SpillSlot::invalid(); - } - } - if to.pos() == InstPosition::Before && this.func.is_safepoint(to.inst()) { - for entry in preg_state.iter_mut() { - entry.1 = SpillSlot::invalid(); + redundant_moves.clear_alloc(Allocation::reg(*reg)); } } } @@ -4639,9 +4696,7 @@ impl<'a, F: Function> Env<'a, F> { } let moves = &self.inserted_moves[start..i]; - let block = self.cfginfo.insn_block[pos.inst().index()]; - - clear_preg_state(self, &mut preg_state, last_pos, pos); + redundant_move_process_side_effects(self, &mut redundant_moves, last_pos, pos); last_pos = pos; // Gather all the moves with Int class and Float class @@ -4676,15 +4731,11 @@ impl<'a, F: Function> Env<'a, F> { } for m in &self_moves { - self.add_edit( - pos, - prio, - Edit::Move { - from: m.from_alloc, - to: m.to_alloc, - to_vreg: m.to_vreg, - }, - ); + let action = redundant_moves.process_move(m.from_alloc, m.to_alloc, m.to_vreg); + assert!(action.elide); + if let Some((alloc, vreg)) = action.def_alloc { + self.add_edit(pos, prio, Edit::DefAlloc { alloc, vreg }); + } } for &(regclass, moves) in @@ -4708,22 +4759,28 @@ impl<'a, F: Function> Env<'a, F> { for (src, dst, to_vreg) in resolved { log::debug!(" resolved: {} -> {} ({:?})", src, dst, to_vreg); - if process_move_maybe_elide(&mut preg_state, block, src, dst) { - log::debug!(" -> eliding (redundant)!"); - if let Some(vreg) = to_vreg { - self.add_edit(pos, prio, Edit::DefAlloc { alloc: dst, vreg }); - } - continue; + let action = redundant_moves.process_move(src, dst, to_vreg); + if !action.elide { + self.add_edit( + pos, + prio, + Edit::Move { + from: src, + to: dst, + to_vreg, + }, + ); + } else { + log::debug!(" -> redundant move elided"); + } + if let Some((alloc, vreg)) = action.def_alloc { + log::debug!( + " -> converted to DefAlloc: alloc {} vreg {}", + alloc, + vreg + ); + self.add_edit(pos, prio, Edit::DefAlloc { alloc, vreg }); } - self.add_edit( - pos, - prio, - Edit::Move { - from: src, - to: dst, - to_vreg, - }, - ); } } } From 0f270e5bcc908cb1348de2c9dd530df1d91b0f76 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 8 Jun 2021 01:14:10 -0700 Subject: [PATCH 109/155] WIP. --- src/ion/mod.rs | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index d1b3c63a..020811c2 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -1548,21 +1548,19 @@ impl<'a, F: Function> Env<'a, F> { // after, it is known as the preg // again. This is used by the // checker. - self.add_edit( + self.insert_move( ProgPoint::before(inst), InsertMovePrio::MultiFixedReg, - Edit::DefAlloc { - alloc: Allocation::reg(preg), - vreg: dst.vreg(), - }, + Allocation::reg(preg), + Allocation::reg(preg), + Some(dst.vreg()), ); - self.add_edit( + self.insert_move( ProgPoint::after(inst), InsertMovePrio::Regular, - Edit::DefAlloc { - alloc: Allocation::reg(preg), - vreg: src.vreg(), - }, + Allocation::reg(preg), + Allocation::reg(preg), + Some(src.vreg()), ); } else { if inst > self.cfginfo.block_entry[block.index()].inst() { @@ -1586,13 +1584,12 @@ impl<'a, F: Function> Env<'a, F> { // preg is now known as that vreg, // not the preg. This is used by // the checker. - self.add_edit( + self.insert_move( ProgPoint::after(inst), InsertMovePrio::Regular, - Edit::DefAlloc { - alloc: Allocation::reg(preg), - vreg: dst.vreg(), - }, + Allocation::reg(preg), + Allocation::reg(preg), + Some(dst.vreg()), ); } } else { @@ -1616,13 +1613,12 @@ impl<'a, F: Function> Env<'a, F> { // given preg is now known as that // preg, not the vreg. This is // used by the checker. - self.add_edit( + self.insert_move( ProgPoint::after(inst), InsertMovePrio::Regular, - Edit::DefAlloc { - alloc: Allocation::reg(preg), - vreg: dst.vreg(), - }, + Allocation::reg(preg), + Allocation::reg(preg), + Some(dst.vreg()), ); } // Otherwise, if dead, no need to create From e33790d8e728bb3c89c9c6e2584e1fb5e1f5785e Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 8 Jun 2021 01:30:29 -0700 Subject: [PATCH 110/155] do not remove redundant move if we don't have local (within-basic-block) vreg tags, as this might throw off the checker --- src/ion/mod.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 020811c2..ad2e0a49 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -883,6 +883,9 @@ impl RedundantMoveEliminator { log::debug!(" -> existing_dst_vreg {:?}", existing_dst_vreg); let elide = match (from_state, to_state) { + // can't elide if we don't know the implications for the + // checker. + _ if src_vreg.is_none() && dst_vreg.is_none() => false, (_, RedundantMoveState::Copy(orig_alloc, _)) if orig_alloc == from => true, (RedundantMoveState::Copy(new_alloc, _), _) if new_alloc == to => true, _ => false, @@ -930,6 +933,14 @@ impl RedundantMoveEliminator { if let Some(ref mut existing_copies) = self.reverse_allocs.get_mut(&alloc) { for to_inval in existing_copies.iter() { log::debug!(" -> clear existing copy: {:?}", to_inval); + if let Some(val) = self.allocs.get_mut(to_inval) { + match val { + RedundantMoveState::Copy(_, Some(vreg)) => { + *val = RedundantMoveState::Orig(*vreg); + } + _ => *val = RedundantMoveState::None, + } + } self.allocs.remove(to_inval); } existing_copies.clear(); From f898b8dcbde7cea5559c1c749c44752858234240 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Wed, 9 Jun 2021 20:58:54 -0700 Subject: [PATCH 111/155] Some fuzzbug fixes --- src/ion/mod.rs | 44 +++++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index ad2e0a49..2f863267 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -644,6 +644,7 @@ enum InsertMovePrio { InEdgeMoves, BlockParam, Regular, + PostRegular, MultiFixedReg, ReusedInput, OutEdgeMoves, @@ -892,7 +893,7 @@ impl RedundantMoveEliminator { }; log::debug!(" -> elide {}", elide); - let def_alloc = if dst_vreg != existing_dst_vreg && dst_vreg.is_some() && elide { + let def_alloc = if dst_vreg != existing_dst_vreg && dst_vreg.is_some() { Some((to, dst_vreg.unwrap())) } else { None @@ -1560,15 +1561,15 @@ impl<'a, F: Function> Env<'a, F> { // again. This is used by the // checker. self.insert_move( - ProgPoint::before(inst), - InsertMovePrio::MultiFixedReg, + ProgPoint::after(inst), + InsertMovePrio::Regular, Allocation::reg(preg), Allocation::reg(preg), Some(dst.vreg()), ); self.insert_move( - ProgPoint::after(inst), - InsertMovePrio::Regular, + ProgPoint::before(inst.next()), + InsertMovePrio::MultiFixedReg, Allocation::reg(preg), Allocation::reg(preg), Some(src.vreg()), @@ -1597,7 +1598,7 @@ impl<'a, F: Function> Env<'a, F> { // the checker. self.insert_move( ProgPoint::after(inst), - InsertMovePrio::Regular, + InsertMovePrio::BlockParam, Allocation::reg(preg), Allocation::reg(preg), Some(dst.vreg()), @@ -1625,8 +1626,8 @@ impl<'a, F: Function> Env<'a, F> { // preg, not the vreg. This is // used by the checker. self.insert_move( - ProgPoint::after(inst), - InsertMovePrio::Regular, + ProgPoint::before(inst.next()), + InsertMovePrio::PostRegular, Allocation::reg(preg), Allocation::reg(preg), Some(dst.vreg()), @@ -4077,7 +4078,7 @@ impl<'a, F: Function> Env<'a, F> { InsertMovePrio::Regular, prev_alloc, alloc, - Some(self.vreg_regs[vreg.index()]), + None, ); } } @@ -4737,14 +4738,6 @@ impl<'a, F: Function> Env<'a, F> { } } - for m in &self_moves { - let action = redundant_moves.process_move(m.from_alloc, m.to_alloc, m.to_vreg); - assert!(action.elide); - if let Some((alloc, vreg)) = action.def_alloc { - self.add_edit(pos, prio, Edit::DefAlloc { alloc, vreg }); - } - } - for &(regclass, moves) in &[(RegClass::Int, &int_moves), (RegClass::Float, &float_moves)] { @@ -4790,6 +4783,23 @@ impl<'a, F: Function> Env<'a, F> { } } } + + for m in &self_moves { + log::debug!( + "self move at pos {:?} prio {:?}: {} -> {} to_vreg {:?}", + pos, + prio, + m.from_alloc, + m.to_alloc, + m.to_vreg + ); + let action = redundant_moves.process_move(m.from_alloc, m.to_alloc, m.to_vreg); + assert!(action.elide); + if let Some((alloc, vreg)) = action.def_alloc { + log::debug!(" -> DefAlloc: alloc {} vreg {}", alloc, vreg); + self.add_edit(pos, prio, Edit::DefAlloc { alloc, vreg }); + } + } } // Add edits to describe blockparam locations too. This is From 2851ac80c788de9f01229d936d22832b8d19935b Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Wed, 9 Jun 2021 23:03:16 -0700 Subject: [PATCH 112/155] Working redundant-move elimination --- src/ion/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 2f863267..61664ff8 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -4677,7 +4677,7 @@ impl<'a, F: Function> Env<'a, F> { let inst = Inst::new(inst); for (i, op) in this.func.inst_operands(inst).iter().enumerate() { match op.kind() { - OperandKind::Def | OperandKind::Use => { + OperandKind::Def | OperandKind::Mod => { let alloc = this.get_alloc(inst, i); redundant_moves.clear_alloc(alloc); } From 4ba7b2f57e2ecbcf458034d8d7d8f6eab333b5fe Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Wed, 9 Jun 2021 23:29:48 -0700 Subject: [PATCH 113/155] Improve redundant-move elimination: don't skip the case where we don't hvae vreg metadata within a BB. Instead, keep the checker happy by feeding more metadata to it on every move. --- src/ion/mod.rs | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 61664ff8..1b4afbfc 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -884,9 +884,6 @@ impl RedundantMoveEliminator { log::debug!(" -> existing_dst_vreg {:?}", existing_dst_vreg); let elide = match (from_state, to_state) { - // can't elide if we don't know the implications for the - // checker. - _ if src_vreg.is_none() && dst_vreg.is_none() => false, (_, RedundantMoveState::Copy(orig_alloc, _)) if orig_alloc == from => true, (RedundantMoveState::Copy(new_alloc, _), _) if new_alloc == to => true, _ => false, @@ -4078,7 +4075,7 @@ impl<'a, F: Function> Env<'a, F> { InsertMovePrio::Regular, prev_alloc, alloc, - None, + Some(self.vreg_regs[vreg.index()]), ); } } @@ -4478,7 +4475,13 @@ impl<'a, F: Function> Env<'a, F> { if last == Some(dest.alloc) { continue; } - self.insert_move(insertion_point, prio, src.alloc, dest.alloc, None); + self.insert_move( + insertion_point, + prio, + src.alloc, + dest.alloc, + Some(self.vreg_regs[dest.to_vreg().index()]), + ); last = Some(dest.alloc); } } @@ -4582,12 +4585,13 @@ impl<'a, F: Function> Env<'a, F> { ); } } + let input_operand = self.func.inst_operands(inst)[input_idx]; self.insert_move( ProgPoint::before(inst), InsertMovePrio::ReusedInput, input_alloc, output_alloc, - None, + Some(input_operand.vreg()), ); self.set_alloc(inst, input_idx, output_alloc); } From fcbf384d74dacad88303d013649daa0efdb9fb44 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Wed, 9 Jun 2021 23:53:54 -0700 Subject: [PATCH 114/155] Use hashset to avoid linear scan in conflict-bundle-set deduplication --- src/ion/mod.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 1b4afbfc..b702c3b8 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -818,8 +818,8 @@ enum RedundantMoveState { } #[derive(Clone, Debug, Default)] struct RedundantMoveEliminator { - allocs: HashMap, - reverse_allocs: HashMap>, + allocs: FxHashMap, + reverse_allocs: FxHashMap>, } #[derive(Copy, Clone, Debug)] struct RedundantMoveAction { @@ -2634,6 +2634,7 @@ impl<'a, F: Function> Env<'a, F> { ) -> AllocRegResult { log::debug!("try_to_allocate_bundle_to_reg: {:?} -> {:?}", bundle, reg); let mut conflicts = smallvec![]; + let mut conflict_set = FxHashSet::default(); let mut max_conflict_weight = 0; // Traverse the BTreeMap in order by requesting the whole // range spanned by the bundle and iterating over that @@ -2729,8 +2730,9 @@ impl<'a, F: Function> Env<'a, F> { // conflicts list. let conflict_bundle = self.ranges[preg_range.index()].bundle; log::debug!(" -> conflict bundle {:?}", conflict_bundle); - if !conflicts.iter().any(|b| *b == conflict_bundle) { + if !conflict_set.contains(&conflict_bundle) { conflicts.push(conflict_bundle); + conflict_set.insert(conflict_bundle); max_conflict_weight = std::cmp::max( max_conflict_weight, self.bundles[conflict_bundle.index()].cached_spill_weight(), From 09b2dd4e7367b31acc9cd7652efb739d046f12e9 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 10 Jun 2021 17:34:04 -0700 Subject: [PATCH 115/155] TODO list update --- src/ion/mod.rs | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index b702c3b8..7325abac 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -22,20 +22,6 @@ /* Performance and code-quality ideas: - - Reduced spilling when spillslot is still "clean": - - Track 'dirty' status of reg and elide spill when not dirty. - - This is slightly tricky: fixpoint problem, across edges. - - We can simplify by assuming spillslot is dirty if value came - in on BB edge; only clean if we reload in same block we spill - in. - - As a slightly better variation on this, track dirty during - scan in a single range while resolving moves; in-edge makes - dirty. - - - or: track "at most one" def-points: at a join, new def point - if at least one of the in-edges is a def point. Do this during - liveness by tracking ...? - - Avoid requiring two scratch regs: - Require machine impl to be able to (i) push a reg, (ii) pop a reg; then generate a balanced pair of push/pop, using the stack @@ -46,6 +32,10 @@ first preferred one), reload into it, spill out of it, and then pop old val + - Better hinting: collect N regs associated with one spillslot? + Collect pointers to other "connected" spillslots (via moves) to + allow move to be elided if possible? + - Profile allocations */ From 1bd1248cb546ddead755ed7763484d9548af33c9 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 10 Jun 2021 22:36:02 -0700 Subject: [PATCH 116/155] Avoid stack-to-stack moves by allocating an extra spillslot and re-using the scratch reg instead. --- src/fuzzing/func.rs | 2 +- src/ion/mod.rs | 149 +++++++++++++++++++++++++++++++++++--------- src/lib.rs | 9 +-- 3 files changed, 123 insertions(+), 37 deletions(-) diff --git a/src/fuzzing/func.rs b/src/fuzzing/func.rs index 258c0b1d..d89df600 100644 --- a/src/fuzzing/func.rs +++ b/src/fuzzing/func.rs @@ -146,7 +146,7 @@ impl Function for Func { self.num_vregs } - fn spillslot_size(&self, regclass: RegClass, _: VReg) -> usize { + fn spillslot_size(&self, regclass: RegClass) -> usize { match regclass { RegClass::Int => 1, RegClass::Float => 2, diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 7325abac..33c584db 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -351,6 +351,8 @@ struct Env<'a, F: Function> { spillslots: Vec, slots_by_size: Vec, + extra_spillslot: Vec>, + // Program moves: these are moves in the provided program that we // handle with our internal machinery, in order to avoid the // overhead of ordinary operand processing. We expect the client @@ -399,7 +401,6 @@ struct Env<'a, F: Function> { struct SpillSlotData { ranges: LiveRangeSet, class: RegClass, - size: u32, alloc: Allocation, next_spillslot: SpillSlotIndex, } @@ -969,6 +970,8 @@ impl<'a, F: Function> Env<'a, F> { spillslots: vec![], slots_by_size: vec![], + extra_spillslot: vec![None, None], + prog_move_srcs: Vec::with_capacity(n / 2), prog_move_dsts: Vec::with_capacity(n / 2), prog_move_merges: Vec::with_capacity(n / 2), @@ -2402,7 +2405,7 @@ impl<'a, F: Function> Env<'a, F> { // Create a spillslot for this bundle. let ssidx = SpillSetIndex::new(self.spillsets.len()); let reg = self.vreg_regs[vreg.index()]; - let size = self.func.spillslot_size(reg.class(), reg) as u8; + let size = self.func.spillslot_size(reg.class()) as u8; self.spillsets.push(SpillSet { vregs: smallvec![vreg], slot: SpillSlotIndex::invalid(), @@ -3791,7 +3794,6 @@ impl<'a, F: Function> Env<'a, F> { self.spillslots.push(SpillSlotData { ranges: LiveRangeSet::new(), next_spillslot: next, - size: size as u32, alloc: Allocation::none(), class: self.spillsets[spillset.index()].class, }); @@ -3805,24 +3807,29 @@ impl<'a, F: Function> Env<'a, F> { } // Assign actual slot indices to spillslots. - let mut offset: u32 = 0; - for data in &mut self.spillslots { - // Align up to `size`. - debug_assert!(data.size.is_power_of_two()); - offset = (offset + data.size - 1) & !(data.size - 1); - let slot = if self.func.multi_spillslot_named_by_last_slot() { - offset + data.size - 1 - } else { - offset - }; - data.alloc = Allocation::stack(SpillSlot::new(slot as usize, data.class)); - offset += data.size; + for i in 0..self.spillslots.len() { + self.spillslots[i].alloc = self.allocate_spillslot(self.spillslots[i].class); } - self.num_spillslots = offset; log::debug!("spillslot allocator done"); } + fn allocate_spillslot(&mut self, class: RegClass) -> Allocation { + let size = self.func.spillslot_size(class) as u32; + let mut offset = self.num_spillslots; + // Align up to `size`. + debug_assert!(size.is_power_of_two()); + offset = (offset + size - 1) & !(size - 1); + let slot = if self.func.multi_spillslot_named_by_last_slot() { + offset + size - 1 + } else { + offset + }; + offset += size; + self.num_spillslots = offset; + Allocation::stack(SpillSlot::new(slot as usize, class)) + } + fn is_start_of_block(&self, pos: ProgPoint) -> bool { let block = self.cfginfo.insn_block[pos.inst().index()]; pos == self.cfginfo.block_entry[block.index()] @@ -4740,9 +4747,8 @@ impl<'a, F: Function> Env<'a, F> { // All moves in `moves` semantically happen in // parallel. Let's resolve these to a sequence of moves // that can be done one at a time. - let mut parallel_moves = ParallelMoves::new(Allocation::reg( - self.env.scratch_by_class[regclass as u8 as usize], - )); + let scratch = self.env.scratch_by_class[regclass as u8 as usize]; + let mut parallel_moves = ParallelMoves::new(Allocation::reg(scratch)); log::debug!("parallel moves at pos {:?} prio {:?}", pos, prio); for m in moves { if (m.from_alloc != m.to_alloc) || m.to_vreg.is_some() { @@ -4753,19 +4759,106 @@ impl<'a, F: Function> Env<'a, F> { let resolved = parallel_moves.resolve(); + // If (i) the scratch register is used, and (ii) a + // stack-to-stack move exists, then we need to + // allocate an additional scratch spillslot to which + // we can temporarily spill the scratch reg when we + // lower the stack-to-stack move to a + // stack-to-scratch-to-stack sequence. + let scratch_used = resolved.iter().any(|&(src, dst, _)| { + src == Allocation::reg(scratch) || dst == Allocation::reg(scratch) + }); + let stack_stack_move = resolved + .iter() + .any(|&(src, dst, _)| src.is_stack() && dst.is_stack()); + let extra_slot = if scratch_used && stack_stack_move { + if self.extra_spillslot[regclass as u8 as usize].is_none() { + let slot = self.allocate_spillslot(regclass); + self.extra_spillslot[regclass as u8 as usize] = Some(slot); + } + self.extra_spillslot[regclass as u8 as usize] + } else { + None + }; + + let mut scratch_used_yet = false; for (src, dst, to_vreg) in resolved { log::debug!(" resolved: {} -> {} ({:?})", src, dst, to_vreg); let action = redundant_moves.process_move(src, dst, to_vreg); if !action.elide { - self.add_edit( - pos, - prio, - Edit::Move { - from: src, - to: dst, - to_vreg, - }, - ); + if dst == Allocation::reg(scratch) { + scratch_used_yet = true; + } + if src.is_stack() && dst.is_stack() { + if !scratch_used_yet { + self.add_edit( + pos, + prio, + Edit::Move { + from: src, + to: Allocation::reg(scratch), + to_vreg, + }, + ); + self.add_edit( + pos, + prio, + Edit::Move { + from: Allocation::reg(scratch), + to: dst, + to_vreg, + }, + ); + } else { + assert!(extra_slot.is_some()); + self.add_edit( + pos, + prio, + Edit::Move { + from: Allocation::reg(scratch), + to: extra_slot.unwrap(), + to_vreg: None, + }, + ); + self.add_edit( + pos, + prio, + Edit::Move { + from: src, + to: Allocation::reg(scratch), + to_vreg, + }, + ); + self.add_edit( + pos, + prio, + Edit::Move { + from: Allocation::reg(scratch), + to: dst, + to_vreg, + }, + ); + self.add_edit( + pos, + prio, + Edit::Move { + from: extra_slot.unwrap(), + to: Allocation::reg(scratch), + to_vreg: None, + }, + ); + } + } else { + self.add_edit( + pos, + prio, + Edit::Move { + from: src, + to: dst, + to_vreg, + }, + ); + } } else { log::debug!(" -> redundant move elided"); } diff --git a/src/lib.rs b/src/lib.rs index 7941ebe7..7d55624c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -797,16 +797,9 @@ pub trait Function { /// 128-bit vector value will require two slots. The regalloc will always /// align on this size. /// - /// This passes the associated virtual register to the client as well, - /// because the way in which we spill a real register may depend on the - /// value that we are using it for. E.g., if a machine has V128 registers - /// but we also use them for F32 and F64 values, we may use a different - /// store-slot size and smaller-operand store/load instructions for an F64 - /// than for a true V128. - /// /// (This trait method's design and doc text derives from /// regalloc.rs' trait of the same name.) - fn spillslot_size(&self, regclass: RegClass, for_vreg: VReg) -> usize; + fn spillslot_size(&self, regclass: RegClass) -> usize; /// When providing a spillslot number for a multi-slot spillslot, /// do we provide the first or the last? This is usually related From ea814225a2cb9762f0b431a572f4c84c1fce3c38 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 10 Jun 2021 23:34:18 -0700 Subject: [PATCH 117/155] Update TODO list --- src/ion/mod.rs | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 33c584db..8c120471 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -22,16 +22,6 @@ /* Performance and code-quality ideas: - - Avoid requiring two scratch regs: - - Require machine impl to be able to (i) push a reg, (ii) pop a - reg; then generate a balanced pair of push/pop, using the stack - slot as the scratch. - - on Cranelift side, take care to generate virtual-SP - adjustments! - - For a spillslot->spillslot move, push a fixed reg (say the - first preferred one), reload into it, spill out of it, and then - pop old val - - Better hinting: collect N regs associated with one spillslot? Collect pointers to other "connected" spillslots (via moves) to allow move to be elided if possible? From a686d5a513e5d2f347c1039a3f607a1220b70d91 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 11 Jun 2021 13:21:50 -0700 Subject: [PATCH 118/155] Always recompute prio when recomputing bundle properties; otherwise is zero on new bundles and causes spill weight to be zeroed --- src/ion/mod.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 8c120471..a309d27d 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -2823,6 +2823,8 @@ impl<'a, F: Function> Env<'a, F> { let first_range = bundledata.ranges[0].index; let first_range_data = &self.ranges[first_range.index()]; + self.bundles[bundle.index()].prio = self.compute_bundle_prio(bundle); + if first_range_data.vreg.is_invalid() { log::debug!(" -> no vreg; minimal and fixed"); minimal = true; @@ -3264,17 +3266,15 @@ impl<'a, F: Function> Env<'a, F> { if self.bundles[bundle.index()].ranges.len() > 0 { self.recompute_bundle_properties(bundle); - let prio = self.compute_bundle_prio(bundle); - self.bundles[bundle.index()].prio = prio; + let prio = self.bundles[bundle.index()].prio; self.allocation_queue .insert(bundle, prio as usize, reg_hint); } if self.bundles[new_bundle.index()].ranges.len() > 0 { self.recompute_bundle_properties(new_bundle); - let new_prio = self.compute_bundle_prio(new_bundle); - self.bundles[new_bundle.index()].prio = new_prio; + let prio = self.bundles[new_bundle.index()].prio; self.allocation_queue - .insert(new_bundle, new_prio as usize, reg_hint); + .insert(new_bundle, prio as usize, reg_hint); } } From 6ec6207717b73ffcb5672c1490b854a35d5d02d1 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 18 Jun 2021 13:59:12 -0700 Subject: [PATCH 119/155] Add design document. --- README.md | 138 +---- doc/DESIGN.md | 1625 +++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 2 +- 3 files changed, 1635 insertions(+), 130 deletions(-) create mode 100644 doc/DESIGN.md diff --git a/README.md b/README.md index c187fe91..a160ed3f 100644 --- a/README.md +++ b/README.md @@ -1,139 +1,19 @@ ## regalloc2: another register allocator -This is a register allocator that started life as, and is about 75% +This is a register allocator that started life as, and is about 50% still, a port of IonMonkey's backtracking register allocator to -Rust. The data structures and invariants have been simplified a little -bit, and the interfaces made a little more generic and reusable. In -addition, it contains substantial amounts of testing infrastructure +Rust. In many regards, it has been generalized, optimized, and +improved since the initial port, and now supports both SSA and non-SSA +use-cases. + +In addition, it contains substantial amounts of testing infrastructure (fuzzing harnesses and checkers) that does not exist in the original IonMonkey allocator. -### Design Overview - -TODO - -- SSA with blockparams - -- Operands with constraints, and clobbers, and reused regs; contrast - with regalloc.rs approach of vregs and pregs and many moves that get - coalesced/elided - -### Differences from IonMonkey Backtracking Allocator - -There are a number of differences between the [IonMonkey -allocator](https://searchfox.org/mozilla-central/source/js/src/jit/BacktrackingAllocator.cpp) -and this one: - -* Most significantly, there are [fuzz/fuzz_targets/](many different - fuzz targets) that exercise the allocator, including a full symbolic - checker (`ion_checker` target) based on the [symbolic checker in - regalloc.rs](https://cfallin.org/blog/2021/03/15/cranelift-isel-3/) - and, e.g., a targetted fuzzer for the parallel move-resolution - algorithm (`moves`) and the SSA generator used for generating cases - for the other fuzz targets (`ssagen`). - -* The data-structure invariants are simplified. While the IonMonkey - allocator allowed for LiveRanges and Bundles to overlap in certain - cases, this allocator sticks to a strict invariant: ranges do not - overlap in bundles, and bundles do not overlap. There are other - examples too: e.g., the definition of minimal bundles is very simple - and does not depend on scanning the code at all. In general, we - should be able to state simple invariants and see by inspection (as - well as fuzzing -- see above) that they hold. - -* Many of the algorithms in the IonMonkey allocator are built with - helper functions that do linear scans. These "small quadratic" loops - are likely not a huge issue in practice, but nevertheless have the - potential to be in corner cases. As much as possible, all work in - this allocator is done in linear scans. For example, bundle - splitting is done in a single compound scan over a bundle, ranges in - the bundle, and a sorted list of split-points. - -* There are novel schemes for solving certain interesting design - challenges. One example: in IonMonkey, liveranges are connected - across blocks by, when reaching one end of a control-flow edge in a - scan, doing a lookup of the allocation at the other end. This is in - principle a linear lookup (so quadratic overall). We instead - generate a list of "half-moves", keyed on the edge and from/to - vregs, with each holding one of the allocations. By sorting and then - scanning this list, we can generate all edge moves in one linear - scan. There are a number of other examples of simplifications: for - example, we handle multiple conflicting - physical-register-constrained uses of a vreg in a single instruction - by recording a copy to do in a side-table, then removing constraints - for the core regalloc. Ion instead has to tweak its definition of - minimal bundles and create two liveranges that overlap (!) to - represent the two uses. - -* Using block parameters rather than phi-nodes significantly - simplifies handling of inter-block data movement. IonMonkey had to - special-case phis in many ways because they are actually quite - weird: their uses happen semantically in other blocks, and their - defs happen in parallel at the top of the block. Block parameters - naturally and explicitly reprsent these semantics in a direct way. - -* The allocator supports irreducible control flow and arbitrary block - ordering (its only CFG requirement is that critical edges are - split). It handles loops during live-range computation in a way that - is similar in spirit to IonMonkey's allocator -- in a single pass, - when we discover a loop, we just mark the whole loop as a liverange - for values live at the top of the loop -- but we find the loop body - without the fixpoint workqueue loop that IonMonkey uses, instead - doing a single linear scan for backedges and finding the minimal - extent that covers all intermingled loops. In order to support - arbitrary block order and irreducible control flow, we relax the - invariant that the first liverange for a vreg always starts at its - def; instead, the def can happen anywhere, and a liverange may - overapproximate. It turns out this is not too hard to handle and is - a more robust invariant. (It also means that non-SSA code *may* not - be too hard to adapt to, though I haven't seriously thought about - this.) - -### Rough Performance Comparison with Regalloc.rs - -The allocator has not yet been wired up to a suitable compiler backend -(such as Cranelift) to perform a true apples-to-apples compile-time -and runtime comparison. However, we can get some idea of compile speed -by running suitable test cases through the allocator and measuring -*throughput*: that is, instructions per second for which registers are -allocated. - -To do so, I measured the `qsort2` benchmark in -[regalloc.rs](https://github.com/bytecodealliance/regalloc.rs), -register-allocated with default options in that crate's backtracking -allocator, using the Criterion benchmark framework to measure ~620K -instructions per second: - - -```plain -benches/0 time: [365.68 us 367.36 us 369.04 us] - thrpt: [617.82 Kelem/s 620.65 Kelem/s 623.49 Kelem/s] -``` - -I then measured three different fuzztest-SSA-generator test cases in -this allocator, `regalloc2`, measuring between 1.1M and 2.3M -instructions per second (closer to the former for larger functions): - -```plain -==== 459 instructions -benches/0 time: [377.91 us 378.09 us 378.27 us] - thrpt: [1.2134 Melem/s 1.2140 Melem/s 1.2146 Melem/s] - -==== 225 instructions -benches/1 time: [202.03 us 202.14 us 202.27 us] - thrpt: [1.1124 Melem/s 1.1131 Melem/s 1.1137 Melem/s] - -==== 21 instructions -benches/2 time: [9.5605 us 9.5655 us 9.5702 us] - thrpt: [2.1943 Melem/s 2.1954 Melem/s 2.1965 Melem/s] -``` - -Though not apples-to-apples (SSA vs. non-SSA, completely different -code only with similar length), this is at least some evidence that -`regalloc2` is likely to lead to at least a compile-time improvement -when used in e.g. Cranelift. +See the [design overview](doc/DESIGN.md) for (much!) more detail on +how the allocator works. -### License +## License Unless otherwise specified, code in this crate is licensed under the Apache 2.0 License with LLVM Exception. This license text can be found in the file diff --git a/doc/DESIGN.md b/doc/DESIGN.md new file mode 100644 index 00000000..c55887ac --- /dev/null +++ b/doc/DESIGN.md @@ -0,0 +1,1625 @@ +# regalloc2 Design Overview + +This document describes the basic architecture of the regalloc2 +register allocator. It describes the externally-visible interface +(input CFG, instructions, operands, with their invariants; meaning of +various parts of the output); core data structures; and the allocation +pipeline, or series of algorithms that compute an allocation. It ends +with a description of future work and expectations, as well as an +appendix that notes design influences and similarities to the +IonMonkey backtracking allocator. + +# API, Input IR and Invariants + +The toplevel API to regalloc2 consists of a single entry point `run()` +that takes a register environment, which specifies all physical +registers, and the input program. The function returns either an error +or an `Output` struct that provides allocations for each operand and a +list of additional instructions (moves, loads, stores) to insert. + +## Register Environment + +The allocator takes a `MachineEnv` which specifies, for each of the +two register classes `Int` and `Float`, a list of `PReg`s by index. A +`PReg` is nothing more than the class and index within the class; the +allocator does not need to know anything more. + +The `MachineEnv` provides a list of preferred and non-preferred +physical registers per class. Any register not on either list will not +be allocated. Usually, registers that do not need to be saved in the +prologue if used (i.e., caller-save registers) are given in the +"preferred" list. The environment also provides exactly one scratch +register per class. This register must not be in the preferred or +non-preferred lists, and is used whenever a set of moves that need to +occur logically in parallel have a cycle (for a simple example, +consider a swap `r0, r1 := r1, r0`). + +With some more work, we could potentially remove the need for the +scratch register by requiring support for an additional edit type from +the client ("swap"), but we have not pursued this. + +## CFG and Instructions + +The allocator operates on an input program that is in a standard CFG +representation: the function body is a list of basic blocks, and each +block has a list of insructions and zero or more successors. The +allocator also requires the client to provide predecessors for each +block, and these must be consistent with the successor +lists. + +Instructions are opaque to the allocator except for a few important +bits: (1) `is_ret` (is a return instruction); (2) `is_branch` (is a +branch instruction); (3) `is_call` (is a call instruction, for +heuristic purposes only), (4) `is_move` (is a move between registers), +and (5) a list of Operands, covered below. Every block must end in a +return or branch. + +Both instructions and blocks are named by indices in contiguous index +spaces. A block's instructions must be a contiguous range of +instruction indices, and block i's first instruction must come +immediately after block i-1's last instruction. + +The CFG must have *no critical edges*. A critical edge is an edge from +block A to block B such that A has more than one successor *and* B has +more than one predecessor. For this definition, the entry block has an +implicit predecessor, and any block that ends in a return has an +implicit successor. + +Note that there are *no* requirements related to the ordering of +blocks, and there is no requirement that the control flow be +reducible. Some *heuristics* used by the allocator will perform better +if the code is reducible and ordered in reverse postorder (RPO), +however: in particular, (1) this interacts better with the +contiguous-range-of-instruction-indices live range representation that +we use, and (2) the "approximate loop depth" metric will actually be +exact if both these conditions are met. + +## Operands and VRegs + +Every instruction operates on values by way of `Operand`s. An operand +consists of the following fields: + +- VReg, or virtual register. *Every* operand mentions a virtual + register, even if it is constrained to a single physical register in + practice. This is because we track liveranges uniformly by vreg. + +- Policy, or "constraint". Every reference to a vreg can apply some + constraint to the vreg at that point in the program. Valid policies are: + + - Any location; + - Any register of the vreg's class; + - Any stack slot; + - A particular fixed physical register; or + - For a def (output), a *reuse* of an input register. + +- The "kind" of reference to this vreg: Def, Use, Mod. A def + (definition) writes to the vreg, and disregards any possible earlier + value. A mod (modify) reads the current value then writes a new + one. A use simply reads the vreg's value. + +- The position: before or after the instruction. + - Note that to have a def (output) register available in a way that + does not conflict with inputs, the def should be placed at the + "before" position. Similarly, to have a use (input) register + available in a way that does not conflict with outputs, the use + should be placed at the "after" position. + +This operand-specification design allows for SSA and non-SSA code (see +section below for details). + +VRegs, or virtual registers, are specified by an index and a register +class (Float or Int). The classes are not given separately; they are +encoded on every mention of the vreg. (In a sense, the class is an +extra index bit, or part of the register name.) The input function +trait does require the client to provide the exact vreg count, +however. + +Implementation note: both vregs and operands are bit-packed into +u32s. This is essential for memory-efficiency. As a result of the +operand bit-packing in particular (including the policy constraints!), +the allocator supports up to 2^20 (1M) vregs per function, and 2^5 +(32) physical registers per class. Later we will also see a limit of +2^20 (1M) instructions per function. These limits are considered +sufficient for the anticipated use-cases (e.g., compiling Wasm, which +also has function-size implementation limits); for larger functions, +it is likely better to use a simpler register allocator in any case. + +## Reuses and Two-Address ISAs + +Some instruction sets primarily have instructions that name only two +registers for a binary operator, rather than three: both registers are +inputs, and the result is placed in one of the registers, clobbering +its original value. The most well-known modern example is x86. It is +thus imperative that we support this pattern well in the register +allocator. + +This instruction-set design is somewhat at odds with an SSA +representation, where a value cannot be redefined. Even in non-SSA +code, it is awkward to overwrite a vreg that may need to be used again +later. + +Thus, the allocator supports a useful fiction of sorts: the +instruction can be described as if it has three register mentions -- +two inputs and a separate output -- and neither input will be +clobbered. The output, however, is special: its register-placement +policy is "reuse input i" (where i == 0 or 1). The allocator +guarantees that the register assignment for that input and the output +will be the same, so the instruction can use that register as its +"modifies" operand. If the input is needed again later, the allocator +will take care of the necessary copying. + +We will see below how the allocator makes this work by doing some +preprocessing so that the core allocation algorithms do not need to +worry about this constraint. + +Note that some non-SSA clients, such as Cranelift using the +regalloc.rs-to-regalloc2 compatibility shim, will instead generate +their own copies (copying to the output vreg first) and then use "mod" +operand kinds, which allow the output vreg to be both read and +written. regalloc2 works hard to make this as efficient as the +reused-input scheme by treating moves specially (see below). + +## SSA + +regalloc2 was originally designed to take an SSA IR as input, where +the usual definitions apply: every vreg is defined exactly once, and +every vreg use is dominated by its one def. (Useing blockparams means +that we do not need additional conditions for phi-nodes.) + +The allocator then evolved to support non-SSA inputs as well. As a +result, the input is maximally flexible right now: it does not check +for and enforce, nor try to take advantage of, the single-def +rule. However, blockparams are still available. + +In the future, we hope to change this, however, once compilation of +non-SSA inputs is no longer needed. Specifically, if we can migrate +Cranelift to the native regalloc2 API rather than the regalloc.rs +compatibility shim, we will be able to remove "mod" operand kinds, +assume (and verify) single defs, and take advantage of this when +reasoning about various algorithms in the allocator. + +## Block Parameters + +Every block can have *block parameters*, and a branch to a block with +block parameters must provide values for those parameters via +operands. When a branch has more than one successor, it provides +separate operands for each possible successor. These block parameters +are equivalent to phi-nodes; we chose this representation because they +are in many ways a more consistent representation of SSA. + +To see why we believe block parameters are a slightly nicer design +choice than use of phi nodes, consider: phis are special +pseudoinstructions that must come first in a block, are all defined in +parallel, and whose uses occur on the edge of a particular +predecessor. All of these facts complicate any analysis that scans +instructions and reasons about uses and defs. It is much closer to the +truth to actually put those uses *in* the predecessor, on the branch, +and put all the defs at the top of the block as a separate kind of +def. The tradeoff is that a vreg's def now has two possibilities -- +ordinary instruction def or blockparam def -- but this is fairly +reasonable to handle. + +## Non-SSA + +As mentioned, regalloc2 supports non-SSA inputs as well. No special +flag is needed to place the allocator in this mode or disable SSA +verification. However, we hope to eventually remove this functionality +when it is no longer needed. + +## Program Moves + +As an especially useful feature for non-SSA IR, regalloc2 supports +special handling of "move" instructions: it will try to merge the +input and output allocations to elide the move altogether. + +It turns out that moves are used frequently in the non-SSA input that +we observe from Cranelift via the regalloc.rs compatibility shim. They +are used in three different ways: + +- Moves to or from physical registers, used to implement ABI details + or place values in particular registers required by certain + instructions. +- Moves between vregs on program edges, as lowered from phi/blockparam + dataflow in the higher-level SSA IR (CLIF). +- Moves just prior to two-address-form instructions that modify an + input to form an output: the input is moved to the output vreg to + avoid clobbering the input. + +Note that, strictly speaking, special handling of program moves is +redundant because each of these kinds of uses has an equivalent in the +"native" regalloc2 API: + +- Moves to/from physical registers can become operand constraints, + either on a particular instruction that requires/produces the values + in certain registers (e.g., a call or ret with args/results in regs, + or a special instruction with fixed register args), or on a ghost + instruction at the top of function that defs vregs for all in-reg + args. + +- Moves between vregs as a lowering of blockparams/phi nodes can be + replaced with use of regalloc2's native blockparam support. + +- Moves prior to two-address-form instructions can be replaced with + the reused-input mechanism. + +Thus, eventually, special handling of program moves should be +removed. However, it is very important for performance at the moment. + +## Output + +The allocator produces two main data structures as output: an array of +`Allocation`s and a list of edits. Some other data, such as stackmap +slot info, is also provided. + +### Allocations + +The allocator provides an array of `Allocation` values, one per +`Operand`. Each `Allocation` has a kind and an index. The kind may +indicate that this is a physical register or a stack slot, and the +index gives the respective register or slot. All allocations will +conform to the constraints given, and will faithfully preserve the +dataflow of the input program. + +### Inserted Moves + +In order to implement the necessary movement of data between +allocations, the allocator needs to insert moves at various program +points. + +The list of inserted moves contains tuples that name a program point +and an "edit". The edit is either a move, from one `Allocation` to +another, or else a kind of metadata used by the checker to know which +VReg is live in a given allocation at any particular time. The latter +sort of edit can be ignored by a backend that is just interested in +generating machine code. + +Note that the allocator will never generate a move from one stackslot +directly to another, by design. Instead, if it needs to do so, it will +make use of the scratch register. (Sometimes such a move occurs when +the scratch register is already holding a value, e.g. to resolve a +cycle of moves; in this case, it will allocate another spillslot and +spill the original scratch value around the move.) + +Thus, the single "edit" type can become either a register-to-register +move, a load from a stackslot into a register, or a store from a +register into a stackslot. + +# Data Structures + +We now review the data structures that regalloc2 uses to track its +state. + +## Program-Derived Alloc-Invariant Data + +There are a number of data structures that are computed in a +deterministic way from the input program and then subsequently used +only as read-only data during the core allocation procedure. + +### Livein/Liveout Bitsets + +The livein and liveout bitsets (`liveins` and `liveouts` on the `Env`) +are allocated one per basic block and record, per block, which vregs +are live entering and leaving that block. They are computed using a +standard backward iterative dataflow analysis and are exact; they do +not over-approximate (this turns out to be important for performance, +and is also necessary for correctness in the case of stackmaps). + +### Blockparam Lists: Source-Side and Dest-Side + +The initialization stage scans the input program and produces two +lists that represent blockparam flows from branches to destination +blocks: `blockparam_ins` and `blockparam_outs`. + +These two lists are the first instance we will see of a recurring +pattern: the lists contain tuples that are carefully ordered in a way +such that their sort-order is meaningful. "Build a list lazily then +sort" is a common idiom: it batches the O(n log n) cost into one +operation that the stdlib has aggressively optimized, it provides +dense storage, and it allows for a scan in a certain order that often +lines up with a scan over the program. + +In this particular case, we will build lists of (vreg, block) points +that are meaningful either at the start or end of a block, so that +later, when we scan over a particular vreg's allocations in block +order, we can generate another list of allocations. One side (the +"outs") also contains enough information that it can line up with the +other side (the "ins") in a later sort. + +To make this work, `blockparam_ins` contains a list of (to-vreg, +to-block, from-block) tuples, and has an entry for every blockparam of +every block. Note that we can compute this without actually observing +from-blocks; we only need to iterate over `block_preds` at any given +block. + +Then, `blockparam_outs` contains a list of (from-vreg, from-block, +to-block, to-vreg), and has an entry for every parameter on every +branch that ends a block. There is exactly one "out" tuple for every +"in" tuple. As mentioned above, we will later scan over both to +generate moves. + +### Program-Move Lists: Source-Side and Dest-Side + +Similar to blockparams, we handle moves specially. In fact, we ingest +all moves in the input program into a set of lists -- "move sources" +and "move dests", analogous to the "ins" and "outs" blockparam lists +described above -- and then completely ignore the moves in the program +thereafter. The semantics of the API are such that all program moves +will be recreated with regalloc-inserted edits, and should not still +be emitted after regalloc. This may seem inefficient, but in fact it +allows for better code because it integrates program-moves with the +move resolution that handles other forms of vreg movement. We +previously took the simpler approach of handling program-moves as +opaque instructions with a source and dest, and we found that there +were many redundant move-chains (A->B, B->C) that are eliminated when +everything is handled centrally. + +We also construct a `prog_move_merges` list of live-range index pairs +to attempt to merge when we reach that stage of allocation. + +## Core Allocation State: Ranges, Uses, Bundles, VRegs, PRegs + +We now come to the core data structures: live-ranges, bundles, virtual +registers and their state, and physical registers and their state. + +First we must define a `ProgPoint` precisely: a `ProgPoint` is an +instruction index and a `Before` or `After` suffix. We pack the +before/after suffix into the LSB of a `u32`, so a `ProgPoint` can be +incremented and compared as a simple integer. + +A live-range is a contiguous range of program points (half-open, +i.e. including `from` and excluding `to`) for which a particular vreg +is live with a value. + +A live-range contains a list of uses. Each use contains four parts: +the Operand word (directly copied, so there is no need to dereference +it); the ProgPoint at which the use occurs; the operand slot on that +instruction, if any, that the operand comes from, and the use's +'weight". (It's possible to have "ghost uses" that do not derive from +any slot on the isntruction.) These four parts are packed into three +`u32`s: the slot can fit in 8 bits, and the weight in 16. + +The live-range carries its program-point range, uses, vreg index, +bundle index (see below), and some metadata: spill weight and +flags. The spill weight is the sum of weights of each use. The flags +set currently carries one flag only: whether the live-range starts at +a Def-kind operand. (This is equivalent to whether the range consumes +a value at its start or not.) + +Uses are owned only by live-ranges and have no separate identity, but +live-ranges live in a toplevel array and are known by `LiveRangeIndex` +values throughout the allocator. New live-ranges can be created +(e.g. during splitting); old ones are not cleaned up, but rather, all +state is bulk-freed at the end. + +Live-ranges are aggregated into "bundles". A bundle is a collection of +ranges that does not overlap. Each bundle carries: a list (inline +SmallVec) of (range, live-range index) tuples, an allocation (starts +as "none"), a "spillset" (more below), and some metadata, including a +spill weight (sum of ranges' weights), a priority (sum of ranges' +lengths), and three property flags: "minimal", "contains fixed +constraints", "contains stack constraints". + +VRegs also contain their lists of live-ranges, in the same form as a +bundle does (inline SmallVec that has inline (from, to) range bounds +and range indices). + +There are two important overlap invariants: (i) no liveranges within a +bundle overlap, and (ii) no liveranges within a vreg overlap. These +are extremely important and we rely on them implicitly in many places. + +The live-range lists in bundles and vregs, and use-lists in ranges, +have various sorting invariants as well. These invariants differ +according to the phase of the allocator's computation. First, during +live-range construction, live-ranges are placed into vregs in reverse +order (because the computation is a reverse scan) and uses into ranges +in reverse order; these are sorted into forward order at the end of +live-range computation. When bundles are first constructed, their +range lists are sorted, and they remain so for the rest of allocation, +as we need for interference testing. However, as ranges are created +and split, sortedness of vreg ranges is *not* maintained; they are +sorted once more, in bulk, when allocation is done and we start to +resolve moves. + +Finally, we have physical registers. The main data associated with +each is the allocation map. This map is a standard BTree, indexed by +ranges (`from` and `to` ProgPoints) and yielding a LiveRange for each +location range. The ranges have a custom comparison operator defined +that compares equal for any overlap. + +This comparison operator allows us to determine whether a range is +free, i.e. has no overlap with a particular range, in one probe -- the +btree will not contain a match. However, it makes iteration over *all* +overlapping ranges somewhat tricky to get right. Notably, Rust's +BTreeMap does not guarantee that the lookup result will be the *first* +equal key, if multiple keys are equal to the probe key. Thus, when we +want to enumerate all overlapping ranges, we probe with a range that +consists of the single program point *before* the start of the actual +query range, using the API that returns an iterator over a range in +the BTree, and then iterate through the resulting iterator to gather +all overlapping ranges (which will be contiguous). + +## Spill Bundles + +It is worth describing "spill bundles" separately. Every spillset (see +below; a group of bundles that originated from one bundle) optionally +points to a single bundle that we designate the "spill bundle" for +that spillset. Contrary to the name, this bundle is not +unconditionally spilled. Rather, one can see it as a sort of fallback: +it is where liveranges go when we give up on processing them via the +normal backtracking loop, and will only process them once more in the +"second-chance" stage. + +The spill bundle acquires liveranges in two ways. First, as we split +bundles, we will trim the split pieces in certain ways so that some +liveranges are immediately placed in the spill bundle. Intuitively, +the "empty" regions that just carry a value, but do not satisfy any +operands, should be in the spill bundle: it is better to have a single +consistent location for the value than to move it between lots of +different split pieces without using it, as moves carry a cost. + +Second, the spill bundle acquires the liveranges of a bundle that has +no requirement to be in a register when that bundle is processed, but +only if the spill bundle already exists. In other words, we won't +create a second-chance spill bundle just for a liverange with an "Any" +use; but if it was already forced into existence by splitting and +trimming, then we might as well use it. + +Note that unlike other bundles, a spill bundle's liverange list +remains unsorted until we do the second-chance allocation. This allows +quick appends of more liveranges. + +## Allocation Queue + +The allocation queue is simply a priority queue (built with a binary +max-heap) of (prio, bundle-index) tuples. + +## Spillsets and Spillslots + +Every bundle contains a reference to a spillset. Spillsets are used to +assign spillslots near the end of allocation, but before then, they +are also a convenient place to store information that is common among +*all bundles* that share the spillset. In particular, spillsets are +initially assigned 1-to-1 to bundles after all bundle-merging is +complete; so spillsets represent in some sense the "original bundles", +and as splitting commences, the smaller bundle-pieces continue to +refer to their original spillsets. + +We stash some useful information on the spillset because of this: a +register hint, used to create some "stickiness" between pieces of an +original bundle that are assigned separately after splitting; the +spill bundle; the common register class of all vregs in this bundle; +the vregs whose liveranges are contained in this bundle; and then some +information actually used if this is spilled to the stack (`required` +indicates actual stack use; `size` is the spillslot count; `slot` is +the actual stack slot). + +Spill *sets* are later allocated to spill *slots*. Multiple spillsets +can be assigned to one spillslot; the only constraint is that +spillsets assigned to a spillslot must not overlap. When we look up +the allocation for a bundle, if the bundle is not given a specific +allocation (its `alloc` field is `Allocation::none()`), this means it +is spilled, and we traverse to the spillset then spillslot. + +## Other: Fixups, Stats, Debug Annotations + +There are a few fixup lists that we will cover in more detail +later. Of particular note is the "multi-fixed-reg fixup list": this +handles instructions that constrain the same input vreg to multiple, +different, fixed registers for different operands at the same program +point. The only way to satisfy such a set of constraints is to +decouple all but one of the inputs (make them no longer refer to the +vreg) and then later insert copies from the first fixed use of the +vreg to the other fixed regs. + +The `Env` also carries a statistics structure with counters that are +incremented, which can be useful for evaluating the effects of +changes; and a "debug annotations" hashmap from program point to +arbitrary strings that is filled out with various useful diagnostic +information if enabled, so that an annotated view of the program with +its liveranges, bundle assignments, inserted moves, merge and split +decisions, etc. can be viewed. + +# Allocation Pipeline + +We now describe the pipeline that computes register allocations. + +## Live-range Construction + +The first step in performing allocation is to analyze the input +program to understand its dataflow: that is, the ranges during which +virtual registers must be assigned to physical registers. Computing +these ranges is what allows us to do better than a trivial "every vreg +lives in a different location, always" allocation. + +We compute precise liveness first using an iterative dataflow +algorithm with BitVecs. (See below for our sparse chunked BitVec +description.) This produces the `liveins` and `liveouts` vectors of +BitVecs per block. + +We then perform a single pass over blocks in reverse order, and scan +instructions in each block in reverse order. Why reverse order? We +must see instructions within a block in reverse to properly compute +liveness (a value is live backward from an use to a def). Because we +want to keep liveranges in-order as we build them, to enable +coalescing, we visit blocks in reverse order as well, so overall this +is simply a scan over the whole instruction index space in reverse +order. + +For each block, we perform a scan with the following state: + +- A liveness bitvec, initialized at the start from `liveouts`. +- A vector of live-range indices, with one entry per vreg, initially + "invalid" (this vector is allocated once and reused at each block). +- In-progress list of live-range indices per vreg in the vreg state, + in *reverse* order (we will reverse it when we're done). + +A vreg is live at the current point in the scan if its bit is set in +the bitvec; its entry in the vreg-to-liverange vec may be stale, but +if the bit is not set, we ignore it. + +We initially create a liverange for all vregs that are live out of the +block, spanning the whole block. We will trim this below if it is +locally def'd and does not pass through the block. + +For each instruction, we process its effects on the scan state: + +- For all clobbers (which logically happen at the end of the + instruction), add a single-program-point liverange to each clobbered + preg. + +- If not a move: + - for each program point [after, before]: + - for each operand at this point(\*): + - if a def or mod: + - if not currently live, this is a dead def; create an empty + LR. + - if a def: + - set the start of the LR for this vreg to this point. + - set as dead. + - if a use: + - create LR if not live, with start at beginning of block. + +- Else, if a move: + - simple case (no pinned vregs): + - add to `prog_move` data structures, and update LRs as above. + - effective point for the use is *after* the move, and for the mod + is *before* the *next* instruction. Why not more conventional + use-before, def-after? Because this allows the move to happen in + parallel with other moves that the move-resolution inserts + (between split fragments of a vreg); these moves always happen + at the gaps between instructions. We place it after, not before, + because before may land at a block-start and interfere with edge + moves, while after is always a "normal" gap (a move cannot end a + block). + - otherwise: see below (pinned vregs). + + +(\*) an instruction operand's effective point is adjusted in a few +cases. If the instruction is a branch, its uses (which are +blockparams) are extended to the "after" point. If there is a reused +input, all *other* inputs are extended to "after": this ensures proper +interference (as we explain more below). + +We then treat blockparams as defs at the end of the scan (beginning of +the block), and create the "ins" tuples. (The uses for the other side +of the edge are already handled as normal uses on a branch +instruction.) + +### Optimization: Pinned VRegs and Moves + +In order to efficiently handle the translation from the regalloc.rs +API, which uses named RealRegs that are distinct from VirtualRegs +rather than operand constraints, we need to implement a few +optimizations. The translation layer translates RealRegs as particular +vregs at the regalloc2 layer, because we need to track their liveness +properly. Handling these as "normal" vregs, with massive bundles of +many liveranges throughout the function, turns out to be a very +inefficient solution. So we mark them as "pinned" with a hook in the +RA2 API. Semantically, this means they are always assigned to a +particular preg whenever mentioned in an operand (but *NOT* between +those points; it is possible for a pinned vreg to move all about +registers and stackslots as long as it eventually makes it back to its +home preg in time for its next use). + +This has a few implications during liverange construction. First, when +we see an operand that mentions a pinned vreg, we translate this to an +operand constraint that names a fixed preg. Later, when we build +bundles, we will not create a bundle for the pinned vreg; instead we +will transfer its liveranges directly as unmoveable reservations in +pregs' allocation maps. Finally, we need to handle moves specially. + +With the caveat that "this is a massive hack and I am very very +sorry", here is how it works. A move between two pinned vregs is easy: +we add that to the inserted-moves list right away because we know the +Allocation on both sides. A move from a pinned vreg to a normal vreg +is the first interesting case. In this case, we (i) create a ghost def +with a fixed-register policy on the normal vreg, doing the other +liverange-maintenance bits as above, and (ii) adjust the liveranges on +the pinned vreg (so the preg) in a particular way. If the preg is live +flowing downward, then this move implies a copy, because the normal +vreg and the pinned vreg are both used in the future and cannot +overlap. But we cannot keep the preg continuously live, because at +exactly one program point, the normal vreg is pinned to it. So we cut +the downward-flowing liverange just *after* the normal vreg's +fixed-reg ghost def. Then, whether it is live downward or not, we +create an upward-flowing liverange on the pinned vreg that ends just +*before* the ghost def. + +The move-from-normal-to-pinned case is similar. First, we create a +ghost use on the normal vreg that pins its value at this program point +to the fixed preg. Then, if the preg is live flowing downward, we trim +its downward liverange to start just after the fixed use. + +There are also some tricky metadata-maintenance records that we emit +so that the checker can keep this all straight. + +The outcome of this hack, together with the operand-constraint +translation on normal uses/defs/mods on pinned vregs, is that we +essentially are translating regalloc.rs's means of referring to real +registers to regalloc2's preferred abstractions by doing a bit of +reverse-engineering. It is not perfect, but it works. Still, we hope +to rip it all out once we get rid of the need for the compatibility +shim. + +### Handling Reused Inputs + +Reused inputs are also handled a bit specially. We have already +described how we essentially translate the idiom so that the output's +allocation is used for input and output, and there is a move just +before the instruction that copies the actual input (which will not be +clobbered) to the output. Together with an attempt to merge the +bundles for the two, to elide the move if possible, this works +perfectly well as long as we ignore all of the other inputs. + +But we can't do that: we have to ensure that other inputs' allocations +are correct too. Note that using the output's allocation as the input +is actually potentially incorrect if the output is at the After point +and the input is at the Before: the output might share a register with +one of the *other* (normal, non-reused) inputs if that input's vreg +were dead afterward. This will mean that we clobber the other input. + +So, to get the interference right, we *extend* all other (non-reused) +inputs of an instruction with a reused input to the After point. This +ensures that the other inputs are *not* clobbered by the slightly +premature use of the output register. + +The source has a link to a comment in IonMonkey that implies that it +uses a similar solution to this problem, though it's not entirely +clear. + +(This odd dance, like many of the others above and below, is "written +in fuzzbug failures", so to speak. It's not entirely obvious until one +sees the corner case where it's necessary!) + +## Bundle Merging + +Once we have built the liverange lists for every vreg, we can reverse +these lists (recall, they were built in strict reverse order) and +initially assign one bundle per (non-pinned) vreg. We then try to +merge bundles together as long as find pairs of bundles that do not +overlap and that (heuristically) make sense to merge. + +Note that this is the only point in the allocation pipeline where +bundles get larger. We initially merge as large as we dare (but not +too large, because then we'll just cause lots of conflicts and +splitting later), and then try out assignments, backtrack via +eviction, and split continuously to chip away at the problem until we +have a working set of allocation assignments. + +We attempt to merge three kinds of bundle pairs: reused-input to +corresponding output; across program moves; and across blockparam +assignments. + +To merge two bundles, we traverse over both their sorted liverange +lists at once, checking for overlaps. Note that we can do this without +pointer-chasing to the liverange data; the (from, to) range is in the +liverange list itself. + +We also check whether the merged bundle would have conflicting +requirements (see below for more on requirements). We do a coarse +check first, checking 1-bit flags that indicate whether either bundle +has any fixed-reg constraints or stack-only constraints. If so, we +need to do a detailed check by actually computing merged requirements +on both sides, merging, and checking for Conflict (the lattice bottom +value). If no conflict, we merge. + +A performance note: merging is extremely performance-sensitive, and it +turns out that a mergesort-like merge of the liverange lists is too +expensive, partly because it requires allocating a separate result +vector (in-place merge in mergesort is infamously complex). Instead, +we simply append one vector onto the end of the other and invoke +Rust's builtin sort. We could special-case "one bundle is completely +before the other", but we currently don't do that (performance idea!). + +Once all bundles are merged as far as they will go, we compute cached +bundle properties (priorities and weights) and enqueue them on the +priority queue for allocation. + +## Recurring: Bundle Property Computation + +The core allocation loop is a recurring iteration of the following: we +take the highest-priority bundle from the allocation queue; we compute +its requirements; we try to find it a register according to those +requirements; if no fit, we either evict some other bundle(s) from +their allocations and try again, or we split the bundle and put the +parts back on the queue. We record all the information we need to make +the evict-or-split decision (and where to split) *during* the physical +register allocation-map scans, so we don't need to go back again to +compute that. + +Termination is nontrivial to see, because of eviction. How do we +guarantee we don't get into an infinite loop where two bundles fight +over a register forever? In fact, this can easily happen if there is a +bug; we fixed many fuzzbugs like this, and we have a check for +"infinite loop" based on an upper bound on iterations. But if the +allocator is correct, it should never happen. + +Termination is guaranteed because (i) bundles always get smaller, (ii) +eviction only occurs when a bundle is *strictly* higher weight (not +higher-or-equal), and (iii) once a bundle gets down to its "minimal" +size, it has an extremely high weight that is guaranteed to evict any +non-minimal bundle. A minimal bundle is one that covers only one +instruction. As long as the input program does not have impossible +constraints that require more than one vreg to exist in one preg, an +allocation problem of all minimal bundles will always have a solution. + +## Bundle Processing + +Let's now talk about what happens when we take a bundle off the +allocation queue. The three basic outcomes are: allocate; split and +requeue; or evict and try again immediately (and eventually allocate +or split/requeue). + +### Properties: Weight, Priority, and Requirements + +To process a bundle, we have to compute a few properties. In fact we +will have already computed a few of these beforehand, but we describe +them all here. + +- Priority: a bundle's priority determines the order in which it is + considered for allocation. RA2 defines as the sum of the lengths (in + instruction index space) of each liverange. This causes the + allocator to consider larger bundles first, when the allocation maps + are generally more free; they can always be evicted and split later. + +- Weight: a bundle's weight indicates how important (in terms of + runtime) its uses/register mentions are. In an approximate sense, + inner loop bodies create higher-weight uses. Fixed register + constraints add some weight, and defs add some weight. Finally, + weight is divided by priority, so a very large bundle that happens + to have a few important uses does not unformly exert its weight + across its entire range. This has the effect of causing bundles to + be more important (more likely to evict others) the more they are + split. + +- Requirement: a bundle's requirement is a value in a lattice that we + have defined, where top is "Unknown" and bottom is + "Conflict". Between these two, we have: any register (of a class); + any stackslot (of a class); a particular register. "Any register" + can degrade to "a particular register", but any other pair of + different requirements meets to Conflict. Requirements are derived + from the operand constraints for all uses in all liveranges in a + bundle, and then merged with the lattice meet-function. + +Once we have the Requirement for a bundle, we can decide what to do. + +### No-Register-Required Cases + +If the requirement indicates that no register is needed (`Unknown` or +`Any`), *and* if the spill bundle already exists for this bundle's +spillset, then we move all the liveranges over to the spill bundle, as +described above. + +If the requirement indicates that the stack is needed explicitly +(e.g., for a safepoint), we set our spillset as "required" (this will +cause it to allocate a spillslot) and return; because the bundle has +no other allocation set, it will look to the spillset's spillslot by +default. + +If the requirement indicates a conflict, we immediately split and +requeue the split pieces. This split is a special one: rather than +split in a way informed by conflicts (see below), we unconditionally +split off the first use. This is a heuristic and we could in theory do +better by finding the source of the conflict; but in practice this +works well enough. Note that a bundle can reach this stage with a +conflicting requirement only if the original liverange had conflicting +uses (e.g., a liverange from a def in a register to a use on stack, or +a liverange between two different fixed-reg-constrained operands); our +bundle merging logic explicitly avoids merging two bundles if it would +create a conflict. + +### Allocation-Map Probing + +If we did not immediately dispose of the bundle as described above, +then we *can* use a register (either `Any`, which accepts a register +as one of several options, or `Reg`, which must have one, or `Fixed`, +which must have a particular one). + +We determine the list of physical registers whose allocation maps we +will probe, and in what order. If a particular fixed register is +required, we probe only that register. Otherwise, we probe all +registers in the required class. + +The order in which we probe, if we are not constrained to a single +register, is carefully chosen. First, if there is a hint register from +the spillset (this is set by the last allocation into a register of +any other bundle in this spillset), we probe that. Then, we probe all +preferred registers; then all non-preferred registers. + +For each of the preferred and non-preferred register lists, we probe +in an *offset* manner: we start at some index partway through the +list, determined by some heuristic number that is random and +well-dstributed. (In practice, we use the sum of the bundle index and +the instruction index of the start of the first range in the bundle.) +We then march through the list and wrap around, stopping before we hit +our starting point again. + +The purpose of this offset is to distribute the contention and speed +up the allocation process. In the common case where there are enough +registers to hold values without spilling (for small functions), we +are more likely to choose a free register right away if we throw the +dart at random than if we start *every* probe at register 0, in +order. This has a large allocation performance impact in practice. + +For each register in probe order, we probe the allocation map, and +gather, simultaneously, several results: (i) whether the entire range +is free; (ii) if not, the list of all conflicting bundles, *and* the +highest weight among those bundles; (iii) if not, the *first* conflict +point. + +We do this by iterating over all liveranges in the preg's btree that +overlap with each range in the current bundle. This iteration is +somewhat subtle due to multiple "equal" keys (see above where we +describe the use of the btree). It is also adaptive for performance +reasons: it initially obtains an iterator into the btree corresponding +to the start of the first range in the bundle, and concurrently +iterates through both the btree and the bundle. However, if there is a +large gap in the bundle, this might require skipping many irrelevant +entries in the btree. So, if we skip too many entries (heuristically, +16, right now), we do another lookup from scratch in the btree for the +start of the next range in the bundle. This balances between the two +cases: dense bundle, where O(1) iteration through the btree is faster, +and sparse bundle, where O(log n) lookup for each entry is better. + +### Decision: Allocate, Evict, or Split + +First, the "allocate" case is easy: if, during our register probe +loop, we find a physical register whose allocations do not overlap +this bundle, then we allocate this register; done! + +If not, then we need to decide whether to evict some conflicting +bundles and retry, or to split the current bundle into smaller pieces +that may have better luck. + +A bit about our split strategy first: contrary to the IonMonkey +allocator which inspired much of our design, we do *not* have a list +of split strategies that split one bundle into many pieces at +once. Instead, each iteration of the allocation loop splits at most +*once*. This simplifies the splitting code greatly, but also turns out +to be a nice heuristic: we split at the point that the bundle first +encounters a conflict for a particular preg assignment, then we hint +that preg for the first (pre-conflict) piece when we retry. In this +way, we always make forward progress -- one piece of the bundle is +always allocated -- and splits are informed by the actual situation at +hand, rather than best guesses. Also note that while this may appear +at first to be a greedy algorithm, it still allows backtracking: the +first half of the split bundle, which we *can* now assign to a preg, +does not necessarily remain on that preg forever (it can still be +evicted later). It is just a split that is known to make at least one +part of the allocation problem solvable. + +To determine whether to split or evict, we track our best options: as +we probe, we track the "lowest cost eviction option", which is a set +of bundles and the maximum weight in that set of bundles. We also +track the "lowest cost split option", which is the cost (more below), +the point at which to split, and the register for this option. + +For each register we probe, if there is a conflict but none of the +conflicts are fixed allocations, we receive a list of bundles that +conflicted, and also separately, the first conflicting program +point. We update the lowest-cost eviction option if the cost (max +weight) of the conflicting bundles is less than the current best. We +update the lowest-cost split option if the cost is less as well, +according to the following definition of cost: a split's cost is the +cost of its move, as defined by the weight of a normal def operand at +the split program point, plus the cost of all bundles beyond the split +point (which will still be conflicts even after the split). + +If there is a conflict with a fixed allocation, then eviction is not +an option, but we can still compute the candidate split point and cost +in the same way as above. + +Finally, as an optimization, we pass in the current best cost to the +btree probe inner loop; if, while probing, we have already exceeded +the best cost, we stop early (this improves allocation time without +affecting the result). + +Once we have the best cost for split and evict options, we split if +(i) the bundle is not already a minimal bundle, and (ii) we've already +evicted once in this toplevel iteration without success, or the weight +of the current bundle is less than the eviction cost. We then requeue +*both* resulting halves of the bundle with the preg that resulted in +this option as the register hint. Otherwise, we evict all conflicting +bundles and try again. + +Note that the split cost does not actually play into the above (split +vs. evict) decision; it is only used to choose *which* split is +best. This is equivalent to saying: we never evict if the current +bundle is less important than the evicted bundles, even if the split +is more expensive still. This is important for forward progress, and +the case where the split would be even more expensive should be very +very rare (it would have to come from a costly move in the middle of +an inner loop). + +### How to Split + +The actual split procedure is fairly simple. We are given a bundle and +a split-point. We create a new bundle to take on the second half +("rest") of the original. We find the point in the liverange list that +corresponds to the split, and distribute appropriately. If the +split-point lands in the middle of a liverange, then we split that +liverange as well. + +In the case that a new liverange is created, we add the liverange to +the corresponding vreg liverange list as well. Note that, as described +above, the vreg's liverange list is unsorted while splitting is +occurring (because we do not need to traverse it or do any lookups +during this phase); so we just append. + +The splitting code also supports a "minimal split", in which it simply +peels off the first use. This is used to ensure forward progress when +a bundle has conflicting requirements within it (see above). + +#### Spill Bundle and Splitting + +Once a split occurs, however, it turns out that we can improve results +by doing a little cleanup. Once we distribute a bundle's liveranges +across two half-bundles, we postprocess by trimming a bit. + +In particular, if we see that the "loose ends" around the split point +extend beyond uses, we will create and move ranges to a spill +bundle. That is: if the last liverange in the first-half bundle +extends beyond its last use, we trim that part off into an empty (no +uses) liverange and place that liverange in the spill +bundle. Likewise, if the first liverange in the second-half bundle +starts before its first use, we trim that part off into an empty +liverange and place it in the spill bundle. + +This is, empirically, an improvement: it reduces register contention +and makes splitting more effective. The intuition is twofold: (i) it +is better to put all of the "flow-through" parts of a vreg's liveness +into one bundle that is never split, and can be spilled to the stack +if needed, to avoid unnecessary moves; and (ii) if contention is high +enough to cause splitting, it is more likely there will be an actual +stack spill, and if this is the case, it is better to do the store +just after the last use and reload just before the first use of the +respective bundles. + +Unfortunately, this heuristic choice does interact somewhat poorly +with program moves: moves between two normal (non-pinned) vregs do not +create ghost uses or defs, and so these points of the ranges can be +spilled, turning a normal register move into a move from or to the +stack. However, empirically, we have found that adding such ghost +uses/defs actually regresses some cases as well, because it pulls +values back into registers when we could have had a stack-to-stack +move (that might even be a no-op if the same spillset); overall, it +seems better to trim. It also improves allocation performance by +reducing contention in the registers during the core loop (before +second-chance allocation). + +## Second-Chance Allocation: Spilled Bundles + +Once the main allocation loop terminates, when all bundles have either +been allocated or punted to the "spilled bundles" list, we do +second-chance allocation. This is a simpler loop that never evicts and +never splits. Instead, each bundle gets one second chance, in which it +can probe pregs and attempt to allocate. If it fails, it will actually +live on the stack. + +This is correct because we are careful to only place bundles on the +spilled-bundles list that are *allowed* to live on the +stack. Specifically, only the canonical spill bundles (which will +contain only empty ranges) and other bundles that have an "any" or +"unknown" requirement are placed here (but *not* "stack" requirements; +those *must* be on the stack, so do not undergo second-chance +allocation). + +At the end of this process, we have marked spillsets as required +whenever at least one bundle in the spillset actually requires a stack +slot. We can then allocate slots to the spillsets. + +## Spillslot Allocation + +We must allocate space on the stack, denoted by an abstract index +space, to each spillset that requires it, and for the liveranges in +which it requires it. + +To facilitate this, we keep a btree per spillslot in the same way we +do per preg. We will allocate spillsets to slots in a way that avoids +interference. + +Note that we actually overapproximate the required ranges for each +spillset in order to improve the behavior of a later phase (redundant +move elimination). Specifically, when we allocate a slot for a +spillset, we reserve that slot for *all* of the liveranges of *every* +vreg that is assigned to that spillset (due to merging rules that +initially merge one-vreg bundles into final merged bundles, there will +be no overlaps here). In other words, we rule out interleaving of +completely different values in the same slot, though bundle merging +does mean that potentially many (non-interfering) vregs may share +it. This provides the important property that if a vreg has been +reloaded, but not modified, its spillslot *still contains the +up-to-date value* (because the slot is reserved for all liveranges of +the vreg). This enables us to avoid another store to the spillslot +later if there is another spilled range. + +We perform probing in a way that is somewhat different than for +registers, because the spillslot space is conceptually infinite. We +can thus optimize for slightly better allocation performance by giving +up and allocating a new slot at any time. + +For each size class, we keep a linked list of slots. When we need to +allocate a spillset to a slot, we traverse down the list and try a +fixed number of slots. If we find one that fits the spillset's ranges, +we allocate, and we remove the slot from its current place in the list +and append to the end. In this way, it is deprioritized from probing +"for a while", which tends to reduce contention. This is a simple way +to round-robin between slots. If we don't find one that fits after a +fixed number of probes, we allocate a new slot. + +And with that, we have valid allocations for all vregs for all points +that they are live! Now we just need to modify the program to reify +these choices. + +## Allocation Assignment + +The first step in reifying the allocation is to iterate through all +mentions of a vreg and fill in the resulting `Allocation` array with +the appropriate allocations. We do this by simply traversing +liveranges per vreg, looking up the allocation by observing the bundle +(and spillset if no specific allocation for the bundle), and for each +use, filling in the slot according to the saved progpoint/slot info in +the use data. + +## Move Generation + +The more difficult half of the reification step is generating the +*moves* that will put the values in the right spots. + +There are two sources of moves that we must generate. The first are +moves between different ranges of the same vreg, as the split pieces +of that vreg's original bundle may have been assigned to different +locations. The second are moves that result from move semantics in the +input program: either assignments from blockparam args on branches to +the target block's params, or program move instructions. (Recall that +we reify program moves in a unified way with all other moves, so the +client should not generate any machine code for their original moves +in the pre-allocation program.) + +Moves are tricky to handle efficiently because they join two +potentially very different locations in the program (in the case of +control-flow-edge moves). In order to avoid the need for random +lookups, which are a cache-locality nightmare even if we have O(log n) +lookups, we instead take a scan-sort-scan approach. + +First, we scan over each vreg's liveranges, find the allocation for +each, and for each move that comes *to* or *from* this liverange, +generate a "half-move". The key idea is that we generate a record for +each "side" of the move, and these records are keyed in a way that +after a sort, the "from" and "to" ends will be consecutive. We can +sort the list of halfmoves once (this is expensive, but not as +expensive as many separate pointer-chasing lookups), then scan it +again to actually generate the move instructions. + +To enable the sort to work, half-moves are sorted by a key that is +equivalent to the tuple (from-block, to-block, to-vreg, kind), where +`kind` is "source" or "dest". For each key, the payload is an +allocation. The fields in this tuple are carefully chosen: we know all +of them at every location we generate a halfmove, without expensive +lookups, and sorting by this key will make the source and all dests +(there can be more than one) contiguous in the final order. + +Half-moves are generated for several situations. First, at the start +of every block covered by a liverange, we can generate "dest" +half-moves for blockparams, and at the end of every block covered by a +liverange, we can generate "source" half-moves for blockparam args on +branches. Incidentally, this is the reason that `blockparam_ins` and +`blockparam_outs` are sorted tuple-lists whose tuples begin with +(vreg, block, ...): this is the order in which we do the toplevel scan +over allocations. + +Second, at every block edge, if the vreg is live in any pred (at +block-start) or succ (at block-end), we generate a half-move to +transfer the vreg to its own location in the connected block. + +This completes the "edge-moves". We sort the half-move array and then +have all of the alloc-to-alloc pairs on a given (from-block, to-block) +edge. + +There are also two kinds of moves that happen within blocks. First, +when a live-range ends and another begins for the same vreg in the +same block (i.e., a split in the middle of a block), we know both +sides of the move immediately (because it is the same vreg and we can +look up the adjacent allocation easily), and we can generate that +move. + +Second, program moves occur within blocks. Here we need to do a +similar thing as for block-edge half-moves, but keyed on program point +instead. This is why the `prog_move_srcs` and `prog_move_dsts` arrays +are initially sorted by their (vreg, inst) keys: we can directly fill +in their allocation slots during our main scan. Note that when sorted +this way, the source and dest for a given move instruction will be at +different indices. After the main scan, we *re-sort* the arrays by +just the instruction, so the two sides of a move line up at the same +index; we can then traverse both arrays, zipped together, and generate +moves. + +Finally, we generate moves to fix up multi-fixed-reg-constraint +situations, and make reused inputs work, as described earlier. + +## Move Resolution + +During this whole discussion, we have described "generating moves", +but we have not said what that meant. Note that in many cases, there +are several moves at a particular program point that semantically +happen *in parallel*. For example, if multiple vregs change +allocations between two instructions, all of those moves happen as +part of one parallel permutation. Similarly, blockparams have +parallel-assignment semantics. We thus enqueue all the moves that we +generate at program points and resolve them into lists of sequential +moves that can actually be lowered to move instructions in the machine +code. + +First, a word on *move priorities*. There are different kinds of moves +that are generated between instructions, and we have to ensure that +some happen before others, i.e., *not* in parallel. For example, a +vreg might change allocation (due to a split) before an instruction, +then be copied to an output register for an output with a reused-input +policy. The latter move must happen *after* the vreg has been moved +into its location for this instruction. + +To enable this, we define "move priorities", which are a logical +extension of program points (i.e., they are sub-points) that enable +finer-grained ordering of moves. We currently have the following +priorities: + +- In-edge moves, to place edge-moves before the first instruction in a + block. +- Block-param metadata, used for the checker only. +- Regular, used for vreg movement between allocations. +- Post-regular, used for checker metadata related to pinned-vreg moves. +- Multi-fixed-reg, used for moves that handle the + single-vreg-in-multiple-fixed-pregs constraint case. +- Reused-input, used for implementing outputs with reused-input policies. +- Out-edge moves, to place edge-moves after the last instruction + (prior to the branch) in a block. + +Every move is statically given one of these priorities by the code +that generates it. + +We collect moves with (prog-point, prio) keys, and we short by those +keys. We then have, for each such key, a list of moves that +semantically happen in parallel. + +We then resolve those moves using a parallel-move resolver, as we now +describe. + +### Parallel-Move Resolver + +The fundamental issue that arises when resolving parallel moves to +sequential moves is *overlap*: some of the moves may overwrite +registers that other moves use as sources. We must carefully order +moves so that this does not clobber values incorrectly. + +We first check if such overlap occurs. If it does not (this is +actually the most common case), the list of parallel moves can be +emitted as sequential moves directly. Done! + +Otherwise, we have to order the moves carefully. Furthermore, if there +is a *cycle* anywhere among the moves, we will need a scratch +register. (Consider, e.g., t0 := t1 and t1 := t0 in parallel: with +only move instructions and no direct "exchange" instruction, we cannot +reify this without a third register.) + +We first compute a mapping from each move instruction to the move +instruction, if any, that it must precede. Note that there can be only +one such move for a given move, because each destination can be +written only once; so a move might be constrained only before the one +move that overwrites its source. (This will be important in a bit!) + +Our task is now to find an ordering of moves that respects these +dependencies. To do so, we perform a depth-first search on the graph +induced by the dependencies, which will generate a list of sequential +moves in reverse order. We keep a stack of moves; we start with any +move that has not been visited yet; in each iteration, if the +top-of-stack has no out-edge to another move (does not need to come +before any others), then push it to a result vector, followed by all +others on the stack (in popped order). If it does have an out-edge and +the target is already visited and not on the stack anymore (so already +emitted), likewise, emit this move and the rest on the stack. If it +has an out-edge to a move not yet visited, push on the stack and +continue. Otherwise, if out-edge to a move currently on the stack, we +have found a cycle. In this case, we emit the moves on the stack with +a modification: the first move writes to a scratch register, and we +emit an additional move that moves from the scratch to the first +move's dest. This breaks the cycle. + +The astute reader may notice that this sounds like a canonical +application of Tarjan's algorithm for finding SCCs (strongly-connected +components). Why don't we have the full complexity of that algorithm? +In particular, *why* can we emit the cycle *right away* once we find +it, rather than ensuring that we have gotten all of the SCC first? + +The answer is that because there is only *one* out-edge at most (a +move can only require preceding *one* other move), all SCCs must be +simple cycles. This means that once we have found a cycle, no other +nodes (moves) can be part of the SCC, because every node's single +out-edge is already accounted for. This is what allows us to avoid a +fully general SCC algorithm. + +Once the list of moves in-reverse has been constructed, we reverse it +and return. + +Note that this "move resolver" is fuzzed separately with a simple +symbolic move simulator (the `moves` fuzz-target). + +### Stack-to-Stack Moves + +There is one potentially difficult situation that could arise from the +move-resolution logic so far: if a vreg moves from one spillslot to +another, this implies a memory-to-memory move, which most machine +architectures cannot handle natively. It would be much nicer if we +could ensure within the regalloc that this never occurs. + +This is in fact possible to do in a postprocessing step. We iterate +through the sequential moves, tracking whether the scratch register is +in use (has been written). When we see a stack-to-stack move: (i) if +the scratch register is not in use, generate a stack-to-scratch move +and scratch-to-stack move; otherwise, (ii) if the scratch register is +in use, allocate an "extra spillslot" if one has not already been +allocated, move the scratch reg to that, do the above stack-to-scratch +/ scratch-to-stack sequence, then reload the scratch reg from the +extra spillslot. + +## Redundant-Move Elimination + +As a final step before returning the list of program edits to the +client, we perform one optimization: redundant-move elimination. + +To understand the need for this, consider what will occur when a vreg +is (i) defined once, (ii) used many times, and (iii) spilled multiple +times between some of the uses: with the design described above, we +will move the value from the preg to the stack after every segment of +uses, and then reload it when the next use occurs. However, only the +first spill is actually needed; as we noted above, we allocate +spillslots so that the slot that corresponded to the vreg at the first +spill will always be reserved for that vreg as long as it is live. If +no other defs or mods occur, the value in the slot can be reloaded, +and need not be written back every time. + +This inefficiency is a result of our invariant that a vreg lives in +exactly one place at a time, and these locations are joined by +moves. This is a simple and effective design to use for most of the +allocation pipeline, but falls flat here. It is especially inefficient +when the unnecessary spill occurs in an inner loop. (E.g.: value +defined at top of function is spilled, then used once in the middle of +an inner loop body.) + +The opposite case can also sometimes occur, though it is rarer: a +value is loaded into a register, spilled, and then reloaded into the +same register. This can happen when hinting is successful at getting +several segments of a vreg to use the same preg, but splitting has +trimmed part of the liverange between uses and put it in the spill +bundle, and the spill bundle did not get a reg. + +In order to resolve this inefficiency, we implement a general +redundant-move elimination pass. This pass tracks, for every +allocation (reg or spillslot), whether it is a copy of another +allocation. This state is invalidated whenever either that allocation +or the allocation of which it is a copy is overwritten. When we see a +move instruction, if the destination is already a copy of the source, +we elide the move. (There are some additional complexities to preserve +checker metadata which we do not describe here.) + +Note that this could, in principle, be done as a fixpoint analysis +over the CFG; it must be, if we try to preserve state across +blocks. This is because a location is only a copy of another if that +is true on every incoming edge. However, to avoid the cost and +complexity of doing such an analysis, we instead take the much simpler +approach of doing only an intra-block analysis. This turns out to be +sufficient to remove most redundant moves, especially in the common +case of a single use of an otherwise-spilled value. + +Note that we could do better *if* we accepted only SSA code, because +we would know that a value could not be redefined once written. We +should consider this again once we clean up and remove the non-SSA +support. + +# Future Plans + +## SSA-Only Cleanup + +When the major user (Cranelift via the regalloc.rs shim) migrates to +generate SSA code and native regalloc2 operands, there are many bits +of complexity we can remove, as noted throughout this +writeup. Briefly, we could (i) remove special handling of program +moves, (ii) remove the pinned-vreg hack, (iii) simplify redundant-move +elimination, (iv) remove special handling of "mod" operands, and (v) +probably simplify plenty of code given the invariant that a def always +starts a range. + +More importantly, we expect this change to result in potentially much +better allocation performance. The use of special pinned vregs and +moves to/from them instead of fixed-reg constraints, explicit moves +for every reused-input constraint, and already-sequentialized series +of move instructions on edges for phi nodes, are all expensive ways of +encoding regalloc2's native input primitives that have to be +reverse-engineered. Removing that translation layer would be +ideal. Also, allowing regalloc2 to handle phi-node (blockparam) +lowering in a way that is integrated with other moves will likely +generate better code than the way that program-move handling interacts +with Cranelift's manually lowered phi-moves at the moment. + +## Better Split Heuristics + +We have spent quite some effort trying to improve splitting behavior, +and it is now generally decent, but more work could be done here, +especially with regard to the interaction between splits and the loop +nest. + +## Native Debuginfo Output + +Cranelift currently computes value locations (in registers and +stack-slots) for detailed debuginfo with an expensive post-pass, after +regalloc is complete. This is because the existing register allocator +does not support returning this information directly. However, +providing such information by generating it while we scan over +liveranges in each vreg would be relatively simple, and has the +potential to be much faster and more reliable for Cranelift. We should +investigate adding an interface for this to regalloc2 and using it. + +# Appendix: Comparison to IonMonkey Allocator + +There are a number of differences between the [IonMonkey +allocator](https://searchfox.org/mozilla-central/source/js/src/jit/BacktrackingAllocator.cpp) +and this one. While this allocator initially began as an attempt to +clone IonMonkey's, it has drifted significantly as we optimized the +design (especially after we built the regalloc.rs shim and had to +adapt to its code style); it is easier at this point to name the +similarities than the differences. + +* The core abstractions of "liverange", "bundle", "vreg", "preg", and + "operand" (with policies/constraints) are the same. + +* The overall allocator pipeline is the same, and the top-level + structure of each stage should look similar. Both allocators begin + by computing liveranges, then merging bundles, then handling bundles + and splitting/evicting as necessary, then doing second-chance + allocation, then reifying the decisions. + +* The cost functions are very similar, though the heuristics that make + decisions based on them are not. + +Several notable high-level differences are: + +* There are [fuzz/fuzz_targets/](many different fuzz targets) that + exercise the allocator, including a full symbolic checker + (`ion_checker` target) based on the [symbolic checker in + regalloc.rs](https://cfallin.org/blog/2021/03/15/cranelift-isel-3/) + and, e.g., a targetted fuzzer for the parallel move-resolution + algorithm (`moves`) and the SSA generator used for generating cases + for the other fuzz targets (`ssagen`). + +* The data-structure invariants are simplified. While the IonMonkey + allocator allowed for LiveRanges and Bundles to overlap in certain + cases, this allocator sticks to a strict invariant: ranges do not + overlap in bundles, and bundles do not overlap. There are other + examples too: e.g., the definition of minimal bundles is very simple + and does not depend on scanning the code at all. In general, we + should be able to state simple invariants and see by inspection (as + well as fuzzing -- see above) that they hold. + +* The data structures themselves are simplified. Where IonMonkey uses + linked lists in many places, this allocator stores simple inline + smallvecs of liveranges on bundles and vregs, and smallvecs of uses + on liveranges. We also (i) find a way to construct liveranges + in-order immediately, without any need for splicing, unlike + IonMonkey, and (ii) relax sorting invariants where possible to allow + for cheap append operations in many cases. + +* The splitting heuristics are significantly reworked. Whereas + IonMonkey has an all-at-once approach to splitting an entire bundle, + and has a list of complex heuristics to choose where to split, this + allocator does conflict-based splitting, and tries to decide whether + to split or evict and which split to take based on cost heuristics. + +* The liverange computation is exact, whereas IonMonkey approximates + using a single-pass algorithm that makes vregs live across entire + loop bodies. We have found that precise liveness improves allocation + performance and generated code quality, even though the liveness + itself is slightly more expensive to compute. + +* Many of the algorithms in the IonMonkey allocator are built with + helper functions that do linear scans. These "small quadratic" loops + are likely not a huge issue in practice, but nevertheless have the + potential to be in corner cases. As much as possible, all work in + this allocator is done in linear scans. + +* There are novel schemes for solving certain interesting design + challenges. One example: in IonMonkey, liveranges are connected + across blocks by, when reaching one end of a control-flow edge in a + scan, doing a lookup of the allocation at the other end. This is in + principle a linear lookup (so quadratic overall). We instead + generate a list of "half-moves", keyed on the edge and from/to + vregs, with each holding one of the allocations. By sorting and then + scanning this list, we can generate all edge moves in one linear + scan. There are a number of other examples of simplifications: for + example, we handle multiple conflicting + physical-register-constrained uses of a vreg in a single instruction + by recording a copy to do in a side-table, then removing constraints + for the core regalloc. Ion instead has to tweak its definition of + minimal bundles and create two liveranges that overlap (!) to + represent the two uses. + +* Using block parameters rather than phi-nodes significantly + simplifies handling of inter-block data movement. IonMonkey had to + special-case phis in many ways because they are actually quite + weird: their uses happen semantically in other blocks, and their + defs happen in parallel at the top of the block. Block parameters + naturally and explicitly reprsent these semantics in a direct way. + +* The allocator supports irreducible control flow and arbitrary block + ordering (its only CFG requirement is that critical edges are + split). + +* The allocator supports non-SSA code, and has native support for + handling program moves specially. + +# Appendix: Performance-Tuning Lessons + +In the course of optimizing the allocator's performance, we found a +number of general principles: + +* We got substantial performance speedups from using vectors rather + than linked lists everywhere. This is well-known, but nevertheless, + it took some thought to work out how to avoid the need for any + splicing, and it turns out that even when our design is slightly + less efficient asymptotically (e.g., apend-and-re-sort rather than + linear-time merge of two sorted liverange lists when merging + bundles), it is faster. + +* We initially used a direct translation of IonMonkey's splay tree as + an allocation map for each PReg. This turned out to be significantly + (!) less efficient than Rust's built-in BTree data structures, for + the usual cache-efficiency vs. pointer-chasing reasons. + +* We initially used dense bitvecs, as IonMonkey does, for + livein/liveout bits. It turned out that a chunked sparse design (see + below) was much more efficient. + +* Precise liveness significantly improves performance because it + reduces the size of liveranges (i.e., interference), and probing + registers with liveranges is the most significant hot inner + loop. Paying a fraction of a percent runtime for the iterative + dataflow algorithm to get precise bitsets is more than worth it. + +* The randomized probing of registers was a huge win: as above, the + probing is very expensive, and reducing the average number of probes + it takes to find a free register is very important. + +* In general, single-pass algorithms and design of data structures to + enable them are important. For example, the half-move technique + avoids the need to do any O(log n) search at all, and is relatively + cache-efficient. As another example, a side-effect of the precise + liveness was that we could then process operands within blocks in + actual instruction order (in reverse), which allowed us to simply + append liveranges to in-progress vreg liverange lists and then + reverse at the end. The expensive part is a single pass; only the + bitset computation is a fixpoint loop. + +* Sorts are better than always-sorted data structures (like btrees): + they amortize all the comparison and update cost to one phase, and + this phase is much more cache-friendly than a bunch of spread-out + updates. + +* Take care of basic data structures and their operator definitions! + We initially used the auto-derived comparator on ProgPoint, and let + ProgPoint be a normal struct (with a u32 inst index and a + Befor/After enum). The comparator for this, used in many sorting + inner loops, was a compound thing with conditionals. Instead, pack + them in a u32 and do a simple compare (and save half the memory as + well). Likewise, the half-move key is a single value packed in a + u64; this is far more efficient than the tuple comparator on a + 4-tuple, and the half-move sort (which can be a few percent or more + of total allocation time) became multiple times cheaper. + +# Appendix: Data Structure: Chunked Sparse BitVec + +We use a "chunked sparse bitvec" to store liveness information, which +is just a set of VReg indices. The design is fairly simple: the +toplevel is a HashMap from "chunk" to a `u64`, and each `u64` +represents 64 contiguous indices. + +The intuition is that while the vreg sets are likely sparse overall, +they will probably be dense within small regions of the index +space. For example, in the Nth block in a function, the values that +flow from block N-1 will largely be almost-contiguous vreg indices, if +vregs are allocated in sequence down the function body. Or, at least, +they will be some local vregs together with a few defined at the top +of the function; two separate chunks will cover that. + +We tried a number of other designs as well. Initially we used a simple +dense bitvec, but this was prohibitively expensive: O(n^2) space when +the real need is closer to O(n) (i.e., a classic sparse matrix). We +also tried a hybrid scheme that kept a list of indices when small and +used either a bitvec or a hashset when large. This did not perform as +well because (i) it was less memory-efficient (the chunking helps with +this) and (ii) insertions are more expensive when they always require +a full hashset/hashmap insert. + +# Appendix: Fuzzing + +We have five fuzz targets: `ssagen`, `domtree`, `moves`, `ion`, and +`ion_checker`. + +## SSAGen + +The SSAGen target tests our SSA generator, which generates cases for +the full allocator fuzz targets. The SSA generator is careful to +always generate a valid CFG, with split critical edges, and valid SSA, +so that we never have to throw out a test input before we reach the +allocator itself. (An alternative fuzzing approach randomly generates +programs and then throws out those that do not meet certain conditions +before using them as legitimate testcases; this is much simpler, but +less efficient.) + +To generate a valid CFG, with no unreachable blocks and with no +critical edges, the generator (i) glues together units of either one +or three blocks (A->B, A->C), forming either a straight-through +section or a conditional. These are glued together into a "spine", and +the conditionals (the "C" block), where they exist, are then linked to +a random target block chosen among the main blocks of these one- or +three-block units. The targets are chosen either randomly, for +potentially irreducible CFGs, or in a way that ensures proper nesting +of loop backedges, if a structured CFG is requested. + +SSA is generated by first choosing which vregs will be defined in each +block, and which will be defined as blockparams vs. instruction +defs. Instructions are then generated, with operands chosen among the +"available" vregs: those defined so far in the current block and all +of those in any other block that dominates this one. + +The SSAGen fuzz target runs the above code generator against an SSA +validator, and thus ensures that it will only generate valid SSA code. + +## Domtree + +The `domtree` fuzz target computes dominance using the algorithm that +we use elsewhere in our CFG analysis, and then walks a +randomly-generated path through the CFG. It checks that the dominance +definition ("a dom b if any path from entry to b must pass through a") +is consistent with this particular randomly-chosen path. + +## Moves + +The `moves` fuzz target tests the parallel move resolver. It generates +a random sequence of parallel moves, careful to ensure that each +destination is written only once. It then runs the parallel move +resolver, and then *abstractly interprets* the resulting sequential +series of moves, thus determining which inputs flow to which +outputs. This must match the original set of parallel moves. + +## Ion and Ion-checker + +The `ion` fuzz target runs the allocator over test programs generated +by SSAGen. It does not validate the output; it only tests that the +allocator runs to completion and does not panic. This was used mainly +during development, and is now less useful than the checker-based +target. + +The `ion_checker` fuzz target runs the allocator's result through a +symbolic checker, which is adapted from the one developed for +regalloc.rs (see [this blog +post](https://cfallin.org/blog/2021/01/22/cranelift-isel-2/) for more +details). This is the most useful fuzz target in the fuzzing suite, +and has found many bugs in development. diff --git a/src/lib.rs b/src/lib.rs index 7d55624c..3a6ecb45 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,5 @@ /* - * The fellowing license applies to this file, which derives many + * The following license applies to this file, which derives many * details (register and constraint definitions, for example) from the * files `BacktrackingAllocator.h`, `BacktrackingAllocator.cpp`, * `LIR.h`, and possibly definitions in other related files in From 6944bc473594c688cdd75f7d138f0005fbdb010e Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 18 Jun 2021 15:24:11 -0700 Subject: [PATCH 120/155] Fix typo (thanks @bjorn3). Co-authored-by: bjorn3 --- doc/DESIGN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/DESIGN.md b/doc/DESIGN.md index c55887ac..32b207d6 100644 --- a/doc/DESIGN.md +++ b/doc/DESIGN.md @@ -42,7 +42,7 @@ the client ("swap"), but we have not pursued this. The allocator operates on an input program that is in a standard CFG representation: the function body is a list of basic blocks, and each -block has a list of insructions and zero or more successors. The +block has a list of instructions and zero or more successors. The allocator also requires the client to provide predecessors for each block, and these must be consistent with the successor lists. From b36a563d69fe99e436cf59c22d921f6917a628c8 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 18 Jun 2021 16:51:08 -0700 Subject: [PATCH 121/155] Cleanup: split allocator implemntation into 11 files of more reasonable size. --- doc/TODO | 15 + src/ion/data_structures.rs | 531 ++++ src/ion/dump.rs | 141 + src/ion/liveranges.rs | 1190 +++++++++ src/ion/merge.rs | 439 ++++ src/ion/mod.rs | 5078 +----------------------------------- src/ion/moves.rs | 1167 +++++++++ src/ion/process.rs | 1057 ++++++++ src/ion/redundant_moves.rs | 142 + src/ion/reg_traversal.rs | 123 + src/ion/requirement.rs | 92 + src/ion/spill.rs | 218 ++ src/ion/stackmap.rs | 73 + 13 files changed, 5222 insertions(+), 5044 deletions(-) create mode 100644 doc/TODO create mode 100644 src/ion/data_structures.rs create mode 100644 src/ion/dump.rs create mode 100644 src/ion/liveranges.rs create mode 100644 src/ion/merge.rs create mode 100644 src/ion/moves.rs create mode 100644 src/ion/process.rs create mode 100644 src/ion/redundant_moves.rs create mode 100644 src/ion/reg_traversal.rs create mode 100644 src/ion/requirement.rs create mode 100644 src/ion/spill.rs create mode 100644 src/ion/stackmap.rs diff --git a/doc/TODO b/doc/TODO new file mode 100644 index 00000000..aa76ce82 --- /dev/null +++ b/doc/TODO @@ -0,0 +1,15 @@ +# Features + +- Rematerialization +- Stack-location constraints that place operands in user-defined stack + locations (distinct from SpillSlots) (e.g., stack args) + +# Performance + +- Investigate better register hinting +- Investigate more principled cost functions and split locations, + especially around loop nests + +# Cleanup + +- Remove support for non-SSA code once no longer necessary \ No newline at end of file diff --git a/src/ion/data_structures.rs b/src/ion/data_structures.rs new file mode 100644 index 00000000..ce95f7a0 --- /dev/null +++ b/src/ion/data_structures.rs @@ -0,0 +1,531 @@ +/* + * The following license applies to this file, which was initially + * derived from the files `js/src/jit/BacktrackingAllocator.h` and + * `js/src/jit/BacktrackingAllocator.cpp` in Mozilla Firefox: + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * Since the initial port, the design has been substantially evolved + * and optimized. + */ + +//! Data structures for backtracking allocator. + +use crate::bitvec::BitVec; +use crate::cfg::CFGInfo; +use crate::index::ContainerComparator; +use crate::{ + define_index, Allocation, Block, Edit, Function, Inst, MachineEnv, Operand, PReg, ProgPoint, + RegClass, SpillSlot, VReg, +}; +use smallvec::SmallVec; +use std::cmp::Ordering; +use std::collections::{BTreeMap, HashMap, HashSet}; +use std::fmt::Debug; + +/// A range from `from` (inclusive) to `to` (exclusive). +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct CodeRange { + pub from: ProgPoint, + pub to: ProgPoint, +} + +impl CodeRange { + #[inline(always)] + pub fn is_empty(&self) -> bool { + self.from == self.to + } + #[inline(always)] + pub fn contains(&self, other: &Self) -> bool { + other.from >= self.from && other.to <= self.to + } + #[inline(always)] + pub fn contains_point(&self, other: ProgPoint) -> bool { + other >= self.from && other < self.to + } + #[inline(always)] + pub fn overlaps(&self, other: &Self) -> bool { + other.to > self.from && other.from < self.to + } + #[inline(always)] + pub fn len(&self) -> usize { + self.to.inst().index() - self.from.inst().index() + } +} + +impl std::cmp::PartialOrd for CodeRange { + #[inline(always)] + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} +impl std::cmp::Ord for CodeRange { + #[inline(always)] + fn cmp(&self, other: &Self) -> Ordering { + if self.to <= other.from { + Ordering::Less + } else if self.from >= other.to { + Ordering::Greater + } else { + Ordering::Equal + } + } +} + +define_index!(LiveBundleIndex); +define_index!(LiveRangeIndex); +define_index!(SpillSetIndex); +define_index!(UseIndex); +define_index!(VRegIndex); +define_index!(PRegIndex); +define_index!(SpillSlotIndex); + +/// Used to carry small sets of bundles, e.g. for conflict sets. +pub type LiveBundleVec = SmallVec<[LiveBundleIndex; 4]>; + +#[derive(Clone, Copy, Debug)] +pub struct LiveRangeListEntry { + pub range: CodeRange, + pub index: LiveRangeIndex, +} + +pub type LiveRangeList = SmallVec<[LiveRangeListEntry; 4]>; +pub type UseList = SmallVec<[Use; 2]>; + +#[derive(Clone, Debug)] +pub struct LiveRange { + pub range: CodeRange, + + pub vreg: VRegIndex, + pub bundle: LiveBundleIndex, + pub uses_spill_weight_and_flags: u32, + + pub uses: UseList, + + pub merged_into: LiveRangeIndex, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[repr(u32)] +pub enum LiveRangeFlag { + StartsAtDef = 1, +} + +impl LiveRange { + #[inline(always)] + pub fn set_flag(&mut self, flag: LiveRangeFlag) { + self.uses_spill_weight_and_flags |= (flag as u32) << 29; + } + #[inline(always)] + pub fn clear_flag(&mut self, flag: LiveRangeFlag) { + self.uses_spill_weight_and_flags &= !((flag as u32) << 29); + } + #[inline(always)] + pub fn assign_flag(&mut self, flag: LiveRangeFlag, val: bool) { + let bit = if val { (flag as u32) << 29 } else { 0 }; + self.uses_spill_weight_and_flags &= 0xe000_0000; + self.uses_spill_weight_and_flags |= bit; + } + #[inline(always)] + pub fn has_flag(&self, flag: LiveRangeFlag) -> bool { + self.uses_spill_weight_and_flags & ((flag as u32) << 29) != 0 + } + #[inline(always)] + pub fn flag_word(&self) -> u32 { + self.uses_spill_weight_and_flags & 0xe000_0000 + } + #[inline(always)] + pub fn merge_flags(&mut self, flag_word: u32) { + self.uses_spill_weight_and_flags |= flag_word; + } + #[inline(always)] + pub fn uses_spill_weight(&self) -> u32 { + self.uses_spill_weight_and_flags & 0x1fff_ffff + } + #[inline(always)] + pub fn set_uses_spill_weight(&mut self, weight: u32) { + assert!(weight < (1 << 29)); + self.uses_spill_weight_and_flags = + (self.uses_spill_weight_and_flags & 0xe000_0000) | weight; + } +} + +#[derive(Clone, Copy, Debug)] +pub struct Use { + pub operand: Operand, + pub pos: ProgPoint, + pub slot: u8, + pub weight: u16, +} + +impl Use { + #[inline(always)] + pub fn new(operand: Operand, pos: ProgPoint, slot: u8) -> Self { + Self { + operand, + pos, + slot, + // Weight is updated on insertion into LR. + weight: 0, + } + } +} + +pub const SLOT_NONE: u8 = u8::MAX; + +#[derive(Clone, Debug)] +pub struct LiveBundle { + pub ranges: LiveRangeList, + pub spillset: SpillSetIndex, + pub allocation: Allocation, + pub prio: u32, // recomputed after every bulk update + pub spill_weight_and_props: u32, +} + +impl LiveBundle { + #[inline(always)] + pub fn set_cached_spill_weight_and_props( + &mut self, + spill_weight: u32, + minimal: bool, + fixed: bool, + stack: bool, + ) { + debug_assert!(spill_weight < ((1 << 29) - 1)); + self.spill_weight_and_props = spill_weight + | (if minimal { 1 << 31 } else { 0 }) + | (if fixed { 1 << 30 } else { 0 }) + | (if stack { 1 << 29 } else { 0 }); + } + + #[inline(always)] + pub fn cached_minimal(&self) -> bool { + self.spill_weight_and_props & (1 << 31) != 0 + } + + #[inline(always)] + pub fn cached_fixed(&self) -> bool { + self.spill_weight_and_props & (1 << 30) != 0 + } + + #[inline(always)] + pub fn cached_stack(&self) -> bool { + self.spill_weight_and_props & (1 << 29) != 0 + } + + #[inline(always)] + pub fn set_cached_fixed(&mut self) { + self.spill_weight_and_props |= 1 << 30; + } + + #[inline(always)] + pub fn set_cached_stack(&mut self) { + self.spill_weight_and_props |= 1 << 29; + } + + #[inline(always)] + pub fn cached_spill_weight(&self) -> u32 { + self.spill_weight_and_props & ((1 << 29) - 1) + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct BundleProperties { + pub minimal: bool, + pub fixed: bool, +} + +#[derive(Clone, Debug)] +pub struct SpillSet { + pub vregs: SmallVec<[VRegIndex; 2]>, + pub slot: SpillSlotIndex, + pub reg_hint: PReg, + pub class: RegClass, + pub spill_bundle: LiveBundleIndex, + pub required: bool, + pub size: u8, +} + +#[derive(Clone, Debug)] +pub struct VRegData { + pub ranges: LiveRangeList, + pub blockparam: Block, + pub is_ref: bool, + pub is_pinned: bool, +} + +#[derive(Clone, Debug)] +pub struct PRegData { + pub reg: PReg, + pub allocations: LiveRangeSet, +} + +#[derive(Clone, Debug)] +pub struct Env<'a, F: Function> { + pub func: &'a F, + pub env: &'a MachineEnv, + pub cfginfo: CFGInfo, + pub liveins: Vec, + pub liveouts: Vec, + /// Blockparam outputs: from-vreg, (end of) from-block, (start of) + /// to-block, to-vreg. The field order is significant: these are sorted so + /// that a scan over vregs, then blocks in each range, can scan in + /// order through this (sorted) list and add allocs to the + /// half-move list. + pub blockparam_outs: Vec<(VRegIndex, Block, Block, VRegIndex)>, + /// Blockparam inputs: to-vreg, (start of) to-block, (end of) + /// from-block. As above for `blockparam_outs`, field order is + /// significant. + pub blockparam_ins: Vec<(VRegIndex, Block, Block)>, + /// Blockparam allocs: block, idx, vreg, alloc. Info to describe + /// blockparam locations at block entry, for metadata purposes + /// (e.g. for the checker). + pub blockparam_allocs: Vec<(Block, u32, VRegIndex, Allocation)>, + + pub ranges: Vec, + pub bundles: Vec, + pub spillsets: Vec, + pub vregs: Vec, + pub vreg_regs: Vec, + pub pregs: Vec, + pub allocation_queue: PrioQueue, + pub clobbers: Vec, // Sorted list of insts with clobbers. + pub safepoints: Vec, // Sorted list of safepoint insts. + pub safepoints_per_vreg: HashMap>, + + pub spilled_bundles: Vec, + pub spillslots: Vec, + pub slots_by_size: Vec, + + pub extra_spillslot: Vec>, + + // Program moves: these are moves in the provided program that we + // handle with our internal machinery, in order to avoid the + // overhead of ordinary operand processing. We expect the client + // to not generate any code for instructions that return + // `Some(..)` for `.is_move()`, and instead use the edits that we + // provide to implement those moves (or some simplified version of + // them) post-regalloc. + // + // (from-vreg, inst, from-alloc), sorted by (from-vreg, inst) + pub prog_move_srcs: Vec<((VRegIndex, Inst), Allocation)>, + // (to-vreg, inst, to-alloc), sorted by (to-vreg, inst) + pub prog_move_dsts: Vec<((VRegIndex, Inst), Allocation)>, + // (from-vreg, to-vreg) for bundle-merging. + pub prog_move_merges: Vec<(LiveRangeIndex, LiveRangeIndex)>, + + // When multiple fixed-register constraints are present on a + // single VReg at a single program point (this can happen for, + // e.g., call args that use the same value multiple times), we + // remove all but one of the fixed-register constraints, make a + // note here, and add a clobber with that PReg instread to keep + // the register available. When we produce the final edit-list, we + // will insert a copy from wherever the VReg's primary allocation + // was to the approprate PReg. + // + // (progpoint, copy-from-preg, copy-to-preg, to-slot) + pub multi_fixed_reg_fixups: Vec<(ProgPoint, PRegIndex, PRegIndex, usize)>, + + pub inserted_moves: Vec, + + // Output: + pub edits: Vec<(u32, InsertMovePrio, Edit)>, + pub allocs: Vec, + pub inst_alloc_offsets: Vec, + pub num_spillslots: u32, + pub safepoint_slots: Vec<(ProgPoint, SpillSlot)>, + + pub stats: Stats, + + // For debug output only: a list of textual annotations at every + // ProgPoint to insert into the final allocated program listing. + pub debug_annotations: std::collections::HashMap>, + pub annotations_enabled: bool, +} + +#[derive(Clone, Debug)] +pub struct SpillSlotData { + pub ranges: LiveRangeSet, + pub class: RegClass, + pub alloc: Allocation, + pub next_spillslot: SpillSlotIndex, +} + +#[derive(Clone, Debug)] +pub struct SpillSlotList { + pub first_spillslot: SpillSlotIndex, + pub last_spillslot: SpillSlotIndex, +} + +#[derive(Clone, Debug)] +pub struct PrioQueue { + pub heap: std::collections::BinaryHeap, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct PrioQueueEntry { + pub prio: u32, + pub bundle: LiveBundleIndex, + pub reg_hint: PReg, +} + +#[derive(Clone, Debug)] +pub struct LiveRangeSet { + pub btree: BTreeMap, +} + +#[derive(Clone, Copy, Debug)] +pub struct LiveRangeKey { + pub from: u32, + pub to: u32, +} + +impl LiveRangeKey { + #[inline(always)] + pub fn from_range(range: &CodeRange) -> Self { + Self { + from: range.from.to_index(), + to: range.to.to_index(), + } + } + + #[inline(always)] + pub fn to_range(&self) -> CodeRange { + CodeRange { + from: ProgPoint::from_index(self.from), + to: ProgPoint::from_index(self.to), + } + } +} + +impl std::cmp::PartialEq for LiveRangeKey { + #[inline(always)] + fn eq(&self, other: &Self) -> bool { + self.to > other.from && self.from < other.to + } +} +impl std::cmp::Eq for LiveRangeKey {} +impl std::cmp::PartialOrd for LiveRangeKey { + #[inline(always)] + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} +impl std::cmp::Ord for LiveRangeKey { + #[inline(always)] + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + if self.to <= other.from { + std::cmp::Ordering::Less + } else if self.from >= other.to { + std::cmp::Ordering::Greater + } else { + std::cmp::Ordering::Equal + } + } +} + +pub struct PrioQueueComparator<'a> { + pub prios: &'a [usize], +} +impl<'a> ContainerComparator for PrioQueueComparator<'a> { + type Ix = LiveBundleIndex; + fn compare(&self, a: Self::Ix, b: Self::Ix) -> std::cmp::Ordering { + self.prios[a.index()].cmp(&self.prios[b.index()]) + } +} + +impl PrioQueue { + pub fn new() -> Self { + PrioQueue { + heap: std::collections::BinaryHeap::new(), + } + } + + #[inline(always)] + pub fn insert(&mut self, bundle: LiveBundleIndex, prio: usize, reg_hint: PReg) { + self.heap.push(PrioQueueEntry { + prio: prio as u32, + bundle, + reg_hint, + }); + } + + #[inline(always)] + pub fn is_empty(self) -> bool { + self.heap.is_empty() + } + + #[inline(always)] + pub fn pop(&mut self) -> Option<(LiveBundleIndex, PReg)> { + self.heap.pop().map(|entry| (entry.bundle, entry.reg_hint)) + } +} + +impl LiveRangeSet { + pub(crate) fn new() -> Self { + Self { + btree: BTreeMap::new(), + } + } +} + +#[derive(Clone, Debug)] +pub struct InsertedMove { + pub pos: ProgPoint, + pub prio: InsertMovePrio, + pub from_alloc: Allocation, + pub to_alloc: Allocation, + pub to_vreg: Option, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum InsertMovePrio { + InEdgeMoves, + BlockParam, + Regular, + PostRegular, + MultiFixedReg, + ReusedInput, + OutEdgeMoves, +} + +#[derive(Clone, Copy, Debug, Default)] +pub struct Stats { + pub livein_blocks: usize, + pub livein_iterations: usize, + pub initial_liverange_count: usize, + pub merged_bundle_count: usize, + pub prog_moves: usize, + pub prog_moves_dead_src: usize, + pub prog_move_merge_attempt: usize, + pub prog_move_merge_success: usize, + pub process_bundle_count: usize, + pub process_bundle_reg_probes_fixed: usize, + pub process_bundle_reg_success_fixed: usize, + pub process_bundle_bounding_range_probe_start_any: usize, + pub process_bundle_bounding_range_probes_any: usize, + pub process_bundle_bounding_range_success_any: usize, + pub process_bundle_reg_probe_start_any: usize, + pub process_bundle_reg_probes_any: usize, + pub process_bundle_reg_success_any: usize, + pub evict_bundle_event: usize, + pub evict_bundle_count: usize, + pub splits: usize, + pub splits_clobbers: usize, + pub splits_hot: usize, + pub splits_conflicts: usize, + pub splits_defs: usize, + pub splits_all: usize, + pub final_liverange_count: usize, + pub final_bundle_count: usize, + pub spill_bundle_count: usize, + pub spill_bundle_reg_probes: usize, + pub spill_bundle_reg_success: usize, + pub blockparam_ins_count: usize, + pub blockparam_outs_count: usize, + pub blockparam_allocs_count: usize, + pub halfmoves_count: usize, + pub edits_count: usize, +} diff --git a/src/ion/dump.rs b/src/ion/dump.rs new file mode 100644 index 00000000..c2912532 --- /dev/null +++ b/src/ion/dump.rs @@ -0,0 +1,141 @@ +//! Debugging output. + +use super::Env; +use crate::{Function, ProgPoint, Block}; + +impl<'a, F: Function> Env<'a, F> { + pub fn dump_state(&self) { + log::debug!("Bundles:"); + for (i, b) in self.bundles.iter().enumerate() { + log::debug!( + "bundle{}: spillset={:?} alloc={:?}", + i, + b.spillset, + b.allocation + ); + for entry in &b.ranges { + log::debug!( + " * range {:?} -- {:?}: range{}", + entry.range.from, + entry.range.to, + entry.index.index() + ); + } + } + log::debug!("VRegs:"); + for (i, v) in self.vregs.iter().enumerate() { + log::debug!("vreg{}:", i); + for entry in &v.ranges { + log::debug!( + " * range {:?} -- {:?}: range{}", + entry.range.from, + entry.range.to, + entry.index.index() + ); + } + } + log::debug!("Ranges:"); + for (i, r) in self.ranges.iter().enumerate() { + log::debug!( + "range{}: range={:?} vreg={:?} bundle={:?} weight={}", + i, + r.range, + r.vreg, + r.bundle, + r.uses_spill_weight(), + ); + for u in &r.uses { + log::debug!(" * use at {:?} (slot {}): {:?}", u.pos, u.slot, u.operand); + } + } + } + + pub fn annotate(&mut self, progpoint: ProgPoint, s: String) { + if self.annotations_enabled { + self.debug_annotations + .entry(progpoint) + .or_insert_with(|| vec![]) + .push(s); + } + } + + pub fn dump_results(&self) { + log::info!("=== REGALLOC RESULTS ==="); + for block in 0..self.func.blocks() { + let block = Block::new(block); + log::info!( + "block{}: [succs {:?} preds {:?}]", + block.index(), + self.func + .block_succs(block) + .iter() + .map(|b| b.index()) + .collect::>(), + self.func + .block_preds(block) + .iter() + .map(|b| b.index()) + .collect::>() + ); + for inst in self.func.block_insns(block).iter() { + for annotation in self + .debug_annotations + .get(&ProgPoint::before(inst)) + .map(|v| &v[..]) + .unwrap_or(&[]) + { + log::info!(" inst{}-pre: {}", inst.index(), annotation); + } + let ops = self + .func + .inst_operands(inst) + .iter() + .map(|op| format!("{}", op)) + .collect::>(); + let clobbers = self + .func + .inst_clobbers(inst) + .iter() + .map(|preg| format!("{}", preg)) + .collect::>(); + let allocs = (0..ops.len()) + .map(|i| format!("{}", self.get_alloc(inst, i))) + .collect::>(); + let opname = if self.func.is_branch(inst) { + "br" + } else if self.func.is_call(inst) { + "call" + } else if self.func.is_ret(inst) { + "ret" + } else { + "op" + }; + let args = ops + .iter() + .zip(allocs.iter()) + .map(|(op, alloc)| format!("{} [{}]", op, alloc)) + .collect::>(); + let clobbers = if clobbers.is_empty() { + "".to_string() + } else { + format!(" [clobber: {}]", clobbers.join(", ")) + }; + log::info!( + " inst{}: {} {}{}", + inst.index(), + opname, + args.join(", "), + clobbers + ); + for annotation in self + .debug_annotations + .get(&ProgPoint::after(inst)) + .map(|v| &v[..]) + .unwrap_or(&[]) + { + log::info!(" inst{}-post: {}", inst.index(), annotation); + } + } + } + } +} diff --git a/src/ion/liveranges.rs b/src/ion/liveranges.rs new file mode 100644 index 00000000..41895025 --- /dev/null +++ b/src/ion/liveranges.rs @@ -0,0 +1,1190 @@ +/* + * The following license applies to this file, which was initially + * derived from the files `js/src/jit/BacktrackingAllocator.h` and + * `js/src/jit/BacktrackingAllocator.cpp` in Mozilla Firefox: + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * Since the initial port, the design has been substantially evolved + * and optimized. + */ + +//! Live-range computation. + +use super::{ + CodeRange, Env, InsertMovePrio, LiveBundle, LiveBundleIndex, LiveRange, LiveRangeFlag, + LiveRangeIndex, LiveRangeKey, LiveRangeListEntry, LiveRangeSet, PRegData, PRegIndex, RegClass, + SpillSetIndex, Use, VRegData, VRegIndex, SLOT_NONE, +}; +use crate::bitvec::BitVec; +use crate::{ + Allocation, Block, Function, Inst, InstPosition, Operand, OperandKind, OperandPolicy, + OperandPos, PReg, ProgPoint, RegAllocError, VReg, +}; +use fxhash::FxHashSet; +use smallvec::{smallvec, SmallVec}; +use std::collections::{HashSet, VecDeque}; +use std::convert::TryFrom; + +#[inline(always)] +pub fn spill_weight_from_policy(policy: OperandPolicy, loop_depth: usize, is_def: bool) -> u32 { + // A bonus of 1000 for one loop level, 4000 for two loop levels, + // 16000 for three loop levels, etc. Avoids exponentiation. + let hot_bonus = std::cmp::min(16000, 1000 * (1 << (2 * loop_depth))); + let def_bonus = if is_def { 2000 } else { 0 }; + let policy_bonus = match policy { + OperandPolicy::Any => 1000, + OperandPolicy::Reg | OperandPolicy::FixedReg(_) => 2000, + _ => 0, + }; + hot_bonus + def_bonus + policy_bonus +} + +impl<'a, F: Function> Env<'a, F> { + pub fn create_pregs_and_vregs(&mut self) { + // Create PRegs from the env. + self.pregs.resize( + PReg::MAX_INDEX, + PRegData { + reg: PReg::invalid(), + allocations: LiveRangeSet::new(), + }, + ); + for &preg in &self.env.regs { + self.pregs[preg.index()].reg = preg; + } + // Create VRegs from the vreg count. + for idx in 0..self.func.num_vregs() { + // We'll fill in the real details when we see the def. + let reg = VReg::new(idx, RegClass::Int); + self.add_vreg( + reg, + VRegData { + ranges: smallvec![], + blockparam: Block::invalid(), + is_ref: false, + is_pinned: false, + }, + ); + } + for v in self.func.reftype_vregs() { + self.vregs[v.vreg()].is_ref = true; + } + for v in self.func.pinned_vregs() { + self.vregs[v.vreg()].is_pinned = true; + } + // Create allocations too. + for inst in 0..self.func.insts() { + let start = self.allocs.len() as u32; + self.inst_alloc_offsets.push(start); + for _ in 0..self.func.inst_operands(Inst::new(inst)).len() { + self.allocs.push(Allocation::none()); + } + } + } + + pub fn add_vreg(&mut self, reg: VReg, data: VRegData) -> VRegIndex { + let idx = self.vregs.len(); + self.vregs.push(data); + self.vreg_regs.push(reg); + VRegIndex::new(idx) + } + + pub fn create_bundle(&mut self) -> LiveBundleIndex { + let bundle = self.bundles.len(); + self.bundles.push(LiveBundle { + allocation: Allocation::none(), + ranges: smallvec![], + spillset: SpillSetIndex::invalid(), + prio: 0, + spill_weight_and_props: 0, + }); + LiveBundleIndex::new(bundle) + } + + pub fn create_liverange(&mut self, range: CodeRange) -> LiveRangeIndex { + let idx = self.ranges.len(); + + self.ranges.push(LiveRange { + range, + vreg: VRegIndex::invalid(), + bundle: LiveBundleIndex::invalid(), + uses_spill_weight_and_flags: 0, + + uses: smallvec![], + + merged_into: LiveRangeIndex::invalid(), + }); + + LiveRangeIndex::new(idx) + } + + /// Mark `range` as live for the given `vreg`. + /// + /// Returns the liverange that contains the given range. + pub fn add_liverange_to_vreg(&mut self, vreg: VRegIndex, range: CodeRange) -> LiveRangeIndex { + log::debug!("add_liverange_to_vreg: vreg {:?} range {:?}", vreg, range); + + // Invariant: as we are building liveness information, we + // *always* process instructions bottom-to-top, and as a + // consequence, new liveranges are always created before any + // existing liveranges for a given vreg. We assert this here, + // then use it to avoid an O(n) merge step (which would lead + // to O(n^2) liveness construction cost overall). + // + // We store liveranges in reverse order in the `.ranges` + // array, then reverse them at the end of + // `compute_liveness()`. + + assert!( + self.vregs[vreg.index()].ranges.is_empty() + || range.to + <= self.ranges[self.vregs[vreg.index()] + .ranges + .last() + .unwrap() + .index + .index()] + .range + .from + ); + + if self.vregs[vreg.index()].ranges.is_empty() + || range.to + < self.ranges[self.vregs[vreg.index()] + .ranges + .last() + .unwrap() + .index + .index()] + .range + .from + { + // Is not contiguous with previously-added (immediately + // following) range; create a new range. + let lr = self.create_liverange(range); + self.ranges[lr.index()].vreg = vreg; + self.vregs[vreg.index()] + .ranges + .push(LiveRangeListEntry { range, index: lr }); + lr + } else { + // Is contiguous with previously-added range; just extend + // its range and return it. + let lr = self.vregs[vreg.index()].ranges.last().unwrap().index; + assert!(range.to == self.ranges[lr.index()].range.from); + self.ranges[lr.index()].range.from = range.from; + lr + } + } + + pub fn insert_use_into_liverange(&mut self, into: LiveRangeIndex, mut u: Use) { + let operand = u.operand; + let policy = operand.policy(); + let block = self.cfginfo.insn_block[u.pos.inst().index()]; + let loop_depth = self.cfginfo.approx_loop_depth[block.index()] as usize; + let weight = + spill_weight_from_policy(policy, loop_depth, operand.kind() != OperandKind::Use); + u.weight = u16::try_from(weight).expect("weight too large for u16 field"); + + log::debug!( + "insert use {:?} into lr {:?} with weight {}", + u, + into, + weight, + ); + + // N.B.: we do *not* update `requirement` on the range, + // because those will be computed during the multi-fixed-reg + // fixup pass later (after all uses are inserted). + + self.ranges[into.index()].uses.push(u); + + // Update stats. + self.ranges[into.index()].uses_spill_weight_and_flags += weight; + log::debug!( + " -> now range has weight {}", + self.ranges[into.index()].uses_spill_weight(), + ); + } + + pub fn find_vreg_liverange_for_pos( + &self, + vreg: VRegIndex, + pos: ProgPoint, + ) -> Option { + for entry in &self.vregs[vreg.index()].ranges { + if entry.range.contains_point(pos) { + return Some(entry.index); + } + } + None + } + + pub fn add_liverange_to_preg(&mut self, range: CodeRange, reg: PReg) { + log::debug!("adding liverange to preg: {:?} to {}", range, reg); + let preg_idx = PRegIndex::new(reg.index()); + self.pregs[preg_idx.index()] + .allocations + .btree + .insert(LiveRangeKey::from_range(&range), LiveRangeIndex::invalid()); + } + + pub fn is_live_in(&mut self, block: Block, vreg: VRegIndex) -> bool { + self.liveins[block.index()].get(vreg.index()) + } + + pub fn compute_liveness(&mut self) -> Result<(), RegAllocError> { + // Create initial LiveIn and LiveOut bitsets. + for _ in 0..self.func.blocks() { + self.liveins.push(BitVec::new()); + self.liveouts.push(BitVec::new()); + } + + // Run a worklist algorithm to precisely compute liveins and + // liveouts. + let mut workqueue = VecDeque::new(); + let mut workqueue_set = FxHashSet::default(); + // Initialize workqueue with postorder traversal. + for &block in &self.cfginfo.postorder[..] { + workqueue.push_back(block); + workqueue_set.insert(block); + } + + while !workqueue.is_empty() { + let block = workqueue.pop_front().unwrap(); + workqueue_set.remove(&block); + + log::debug!("computing liveins for block{}", block.index()); + + self.stats.livein_iterations += 1; + + let mut live = self.liveouts[block.index()].clone(); + log::debug!(" -> initial liveout set: {:?}", live); + + for inst in self.func.block_insns(block).rev().iter() { + if let Some((src, dst)) = self.func.is_move(inst) { + live.set(dst.vreg().vreg(), false); + live.set(src.vreg().vreg(), true); + } + + for pos in &[OperandPos::After, OperandPos::Before] { + for op in self.func.inst_operands(inst) { + if op.pos() == *pos { + let was_live = live.get(op.vreg().vreg()); + log::debug!("op {:?} was_live = {}", op, was_live); + match op.kind() { + OperandKind::Use | OperandKind::Mod => { + live.set(op.vreg().vreg(), true); + } + OperandKind::Def => { + live.set(op.vreg().vreg(), false); + } + } + } + } + } + } + for &blockparam in self.func.block_params(block) { + live.set(blockparam.vreg(), false); + } + + for &pred in self.func.block_preds(block) { + if self.liveouts[pred.index()].or(&live) { + if !workqueue_set.contains(&pred) { + workqueue_set.insert(pred); + workqueue.push_back(pred); + } + } + } + + log::debug!("computed liveins at block{}: {:?}", block.index(), live); + self.liveins[block.index()] = live; + } + + // Check that there are no liveins to the entry block. (The + // client should create a virtual intsruction that defines any + // PReg liveins if necessary.) + if self.liveins[self.func.entry_block().index()] + .iter() + .next() + .is_some() + { + log::debug!( + "non-empty liveins to entry block: {:?}", + self.liveins[self.func.entry_block().index()] + ); + return Err(RegAllocError::EntryLivein); + } + + for &vreg in self.func.reftype_vregs() { + self.safepoints_per_vreg.insert(vreg.vreg(), HashSet::new()); + } + + // Create Uses and Defs referring to VRegs, and place the Uses + // in LiveRanges. + // + // We already computed precise liveouts and liveins for every + // block above, so we don't need to run an iterative algorithm + // here; instead, every block's computation is purely local, + // from end to start. + + // Track current LiveRange for each vreg. + // + // Invariant: a stale range may be present here; ranges are + // only valid if `live.get(vreg)` is true. + let mut vreg_ranges: Vec = + vec![LiveRangeIndex::invalid(); self.func.num_vregs()]; + + for i in (0..self.func.blocks()).rev() { + let block = Block::new(i); + + self.stats.livein_blocks += 1; + + // Init our local live-in set. + let mut live = self.liveouts[block.index()].clone(); + + // Initially, registers are assumed live for the whole block. + for vreg in live.iter() { + let range = CodeRange { + from: self.cfginfo.block_entry[block.index()], + to: self.cfginfo.block_exit[block.index()].next(), + }; + log::debug!( + "vreg {:?} live at end of block --> create range {:?}", + VRegIndex::new(vreg), + range + ); + let lr = self.add_liverange_to_vreg(VRegIndex::new(vreg), range); + vreg_ranges[vreg] = lr; + } + + // Create vreg data for blockparams. + for param in self.func.block_params(block) { + self.vreg_regs[param.vreg()] = *param; + self.vregs[param.vreg()].blockparam = block; + } + + let insns = self.func.block_insns(block); + + // If the last instruction is a branch (rather than + // return), create blockparam_out entries. + if self.func.is_branch(insns.last()) { + let operands = self.func.inst_operands(insns.last()); + let mut i = self.func.branch_blockparam_arg_offset(block, insns.last()); + for &succ in self.func.block_succs(block) { + for &blockparam in self.func.block_params(succ) { + let from_vreg = VRegIndex::new(operands[i].vreg().vreg()); + let blockparam_vreg = VRegIndex::new(blockparam.vreg()); + self.blockparam_outs + .push((from_vreg, block, succ, blockparam_vreg)); + i += 1; + } + } + } + + // For each instruction, in reverse order, process + // operands and clobbers. + for inst in insns.rev().iter() { + if self.func.inst_clobbers(inst).len() > 0 { + self.clobbers.push(inst); + } + + // Mark clobbers with CodeRanges on PRegs. + for i in 0..self.func.inst_clobbers(inst).len() { + // don't borrow `self` + let clobber = self.func.inst_clobbers(inst)[i]; + // Clobber range is at After point only: an + // instruction can still take an input in a reg + // that it later clobbers. (In other words, the + // clobber is like a normal def that never gets + // used.) + let range = CodeRange { + from: ProgPoint::after(inst), + to: ProgPoint::before(inst.next()), + }; + self.add_liverange_to_preg(range, clobber); + } + + // Does the instruction have any input-reusing + // outputs? This is important below to establish + // proper interference wrt other inputs. + let mut reused_input = None; + for op in self.func.inst_operands(inst) { + if let OperandPolicy::Reuse(i) = op.policy() { + reused_input = Some(i); + break; + } + } + + // If this is a move, handle specially. + if let Some((src, dst)) = self.func.is_move(inst) { + // We can completely skip the move if it is + // trivial (vreg to same vreg). + if src.vreg() != dst.vreg() { + log::debug!(" -> move inst{}: src {} -> dst {}", inst.index(), src, dst); + + assert_eq!(src.class(), dst.class()); + assert_eq!(src.kind(), OperandKind::Use); + assert_eq!(src.pos(), OperandPos::Before); + assert_eq!(dst.kind(), OperandKind::Def); + assert_eq!(dst.pos(), OperandPos::After); + + // If both src and dest are pinned, emit the + // move right here, right now. + if self.vregs[src.vreg().vreg()].is_pinned + && self.vregs[dst.vreg().vreg()].is_pinned + { + // Update LRs. + if !live.get(src.vreg().vreg()) { + let lr = self.add_liverange_to_vreg( + VRegIndex::new(src.vreg().vreg()), + CodeRange { + from: self.cfginfo.block_entry[block.index()], + to: ProgPoint::after(inst), + }, + ); + live.set(src.vreg().vreg(), true); + vreg_ranges[src.vreg().vreg()] = lr; + } + if live.get(dst.vreg().vreg()) { + let lr = vreg_ranges[dst.vreg().vreg()]; + self.ranges[lr.index()].range.from = ProgPoint::after(inst); + live.set(dst.vreg().vreg(), false); + } else { + self.add_liverange_to_vreg( + VRegIndex::new(dst.vreg().vreg()), + CodeRange { + from: ProgPoint::after(inst), + to: ProgPoint::before(inst.next()), + }, + ); + } + + let src_preg = match src.policy() { + OperandPolicy::FixedReg(r) => r, + _ => unreachable!(), + }; + let dst_preg = match dst.policy() { + OperandPolicy::FixedReg(r) => r, + _ => unreachable!(), + }; + self.insert_move( + ProgPoint::before(inst), + InsertMovePrio::MultiFixedReg, + Allocation::reg(src_preg), + Allocation::reg(dst_preg), + Some(dst.vreg()), + ); + } + // If exactly one of source and dest (but not + // both) is a pinned-vreg, convert this into a + // ghost use on the other vreg with a FixedReg + // policy. + else if self.vregs[src.vreg().vreg()].is_pinned + || self.vregs[dst.vreg().vreg()].is_pinned + { + log::debug!( + " -> exactly one of src/dst is pinned; converting to ghost use" + ); + let (preg, vreg, pinned_vreg, kind, pos, progpoint) = + if self.vregs[src.vreg().vreg()].is_pinned { + // Source is pinned: this is a def on the dst with a pinned preg. + ( + self.func.is_pinned_vreg(src.vreg()).unwrap(), + dst.vreg(), + src.vreg(), + OperandKind::Def, + OperandPos::After, + ProgPoint::after(inst), + ) + } else { + // Dest is pinned: this is a use on the src with a pinned preg. + ( + self.func.is_pinned_vreg(dst.vreg()).unwrap(), + src.vreg(), + dst.vreg(), + OperandKind::Use, + OperandPos::Before, + ProgPoint::after(inst), + ) + }; + let policy = OperandPolicy::FixedReg(preg); + let operand = Operand::new(vreg, policy, kind, pos); + + log::debug!( + concat!( + " -> preg {:?} vreg {:?} kind {:?} ", + "pos {:?} progpoint {:?} policy {:?} operand {:?}" + ), + preg, + vreg, + kind, + pos, + progpoint, + policy, + operand + ); + + // Get the LR for the vreg; if none, create one. + let mut lr = vreg_ranges[vreg.vreg()]; + if !live.get(vreg.vreg()) { + let from = match kind { + OperandKind::Use => self.cfginfo.block_entry[block.index()], + OperandKind::Def => progpoint, + _ => unreachable!(), + }; + let to = progpoint.next(); + lr = self.add_liverange_to_vreg( + VRegIndex::new(vreg.vreg()), + CodeRange { from, to }, + ); + log::debug!(" -> dead; created LR"); + } + log::debug!(" -> LR {:?}", lr); + + self.insert_use_into_liverange( + lr, + Use::new(operand, progpoint, SLOT_NONE), + ); + + if kind == OperandKind::Def { + live.set(vreg.vreg(), false); + if self.ranges[lr.index()].range.from + == self.cfginfo.block_entry[block.index()] + { + self.ranges[lr.index()].range.from = progpoint; + } + self.ranges[lr.index()].set_flag(LiveRangeFlag::StartsAtDef); + } else { + live.set(vreg.vreg(), true); + vreg_ranges[vreg.vreg()] = lr; + } + + // Handle liveness of the other vreg. Note + // that this is somewhat special. For the + // destination case, we want the pinned + // vreg's LR to start just *after* the + // operand we inserted above, because + // otherwise it would overlap, and + // interfere, and prevent allocation. For + // the source case, we want to "poke a + // hole" in the LR: if it's live going + // downward, end it just after the operand + // and restart it before; if it isn't + // (this is the last use), start it + // before. + if kind == OperandKind::Def { + log::debug!(" -> src on pinned vreg {:?}", pinned_vreg); + // The *other* vreg is a def, so the pinned-vreg + // mention is a use. If already live, + // end the existing LR just *after* + // the `progpoint` defined above and + // start a new one just *before* the + // `progpoint` defined above, + // preserving the start. If not, start + // a new one live back to the top of + // the block, starting just before + // `progpoint`. + if live.get(pinned_vreg.vreg()) { + let pinned_lr = vreg_ranges[pinned_vreg.vreg()]; + let orig_start = self.ranges[pinned_lr.index()].range.from; + log::debug!( + " -> live with LR {:?}; truncating to start at {:?}", + pinned_lr, + progpoint.next() + ); + self.ranges[pinned_lr.index()].range.from = progpoint.next(); + let new_lr = self.add_liverange_to_vreg( + VRegIndex::new(pinned_vreg.vreg()), + CodeRange { + from: orig_start, + to: progpoint.prev(), + }, + ); + vreg_ranges[pinned_vreg.vreg()] = new_lr; + log::debug!(" -> created LR {:?} with remaining range from {:?} to {:?}", new_lr, orig_start, progpoint); + + // Add an edit right now to indicate that at + // this program point, the given + // preg is now known as that vreg, + // not the preg, but immediately + // after, it is known as the preg + // again. This is used by the + // checker. + self.insert_move( + ProgPoint::after(inst), + InsertMovePrio::Regular, + Allocation::reg(preg), + Allocation::reg(preg), + Some(dst.vreg()), + ); + self.insert_move( + ProgPoint::before(inst.next()), + InsertMovePrio::MultiFixedReg, + Allocation::reg(preg), + Allocation::reg(preg), + Some(src.vreg()), + ); + } else { + if inst > self.cfginfo.block_entry[block.index()].inst() { + let new_lr = self.add_liverange_to_vreg( + VRegIndex::new(pinned_vreg.vreg()), + CodeRange { + from: self.cfginfo.block_entry[block.index()], + to: ProgPoint::before(inst), + }, + ); + vreg_ranges[pinned_vreg.vreg()] = new_lr; + live.set(pinned_vreg.vreg(), true); + log::debug!( + " -> was not live; created new LR {:?}", + new_lr + ); + } + + // Add an edit right now to indicate that at + // this program point, the given + // preg is now known as that vreg, + // not the preg. This is used by + // the checker. + self.insert_move( + ProgPoint::after(inst), + InsertMovePrio::BlockParam, + Allocation::reg(preg), + Allocation::reg(preg), + Some(dst.vreg()), + ); + } + } else { + log::debug!(" -> dst on pinned vreg {:?}", pinned_vreg); + // The *other* vreg is a use, so the pinned-vreg + // mention is a def. Truncate its LR + // just *after* the `progpoint` + // defined above. + if live.get(pinned_vreg.vreg()) { + let pinned_lr = vreg_ranges[pinned_vreg.vreg()]; + self.ranges[pinned_lr.index()].range.from = progpoint.next(); + log::debug!( + " -> was live with LR {:?}; truncated start to {:?}", + pinned_lr, + progpoint.next() + ); + live.set(pinned_vreg.vreg(), false); + + // Add a no-op edit right now to indicate that + // at this program point, the + // given preg is now known as that + // preg, not the vreg. This is + // used by the checker. + self.insert_move( + ProgPoint::before(inst.next()), + InsertMovePrio::PostRegular, + Allocation::reg(preg), + Allocation::reg(preg), + Some(dst.vreg()), + ); + } + // Otherwise, if dead, no need to create + // a dummy LR -- there is no + // reservation to make (the other vreg + // will land in the reg with the + // fixed-reg operand constraint, but + // it's a dead move anyway). + } + } else { + // Redefine src and dst operands to have + // positions of After and Before respectively + // (see note below), and to have Any + // constraints if they were originally Reg. + let src_policy = match src.policy() { + OperandPolicy::Reg => OperandPolicy::Any, + x => x, + }; + let dst_policy = match dst.policy() { + OperandPolicy::Reg => OperandPolicy::Any, + x => x, + }; + let src = Operand::new( + src.vreg(), + src_policy, + OperandKind::Use, + OperandPos::After, + ); + let dst = Operand::new( + dst.vreg(), + dst_policy, + OperandKind::Def, + OperandPos::Before, + ); + + if self.annotations_enabled { + self.annotate( + ProgPoint::after(inst), + format!( + " prog-move v{} ({:?}) -> v{} ({:?})", + src.vreg().vreg(), + src_policy, + dst.vreg().vreg(), + dst_policy, + ), + ); + } + + // N.B.: in order to integrate with the move + // resolution that joins LRs in general, we + // conceptually treat the move as happening + // between the move inst's After and the next + // inst's Before. Thus the src LR goes up to + // (exclusive) next-inst-pre, and the dst LR + // starts at next-inst-pre. We have to take + // care in our move insertion to handle this + // like other inter-inst moves, i.e., at + // `Regular` priority, so it properly happens + // in parallel with other inter-LR moves. + // + // Why the progpoint between move and next + // inst, and not the progpoint between prev + // inst and move? Because a move can be the + // first inst in a block, but cannot be the + // last; so the following progpoint is always + // within the same block, while the previous + // one may be an inter-block point (and the + // After of the prev inst in a different + // block). + + // Handle the def w.r.t. liveranges: trim the + // start of the range and mark it dead at this + // point in our backward scan. + let pos = ProgPoint::before(inst.next()); + let mut dst_lr = vreg_ranges[dst.vreg().vreg()]; + if !live.get(dst.vreg().vreg()) { + let from = pos; + let to = pos.next(); + dst_lr = self.add_liverange_to_vreg( + VRegIndex::new(dst.vreg().vreg()), + CodeRange { from, to }, + ); + log::debug!(" -> invalid LR for def; created {:?}", dst_lr); + } + log::debug!(" -> has existing LR {:?}", dst_lr); + // Trim the LR to start here. + if self.ranges[dst_lr.index()].range.from + == self.cfginfo.block_entry[block.index()] + { + log::debug!(" -> started at block start; trimming to {:?}", pos); + self.ranges[dst_lr.index()].range.from = pos; + } + self.ranges[dst_lr.index()].set_flag(LiveRangeFlag::StartsAtDef); + live.set(dst.vreg().vreg(), false); + vreg_ranges[dst.vreg().vreg()] = LiveRangeIndex::invalid(); + self.vreg_regs[dst.vreg().vreg()] = dst.vreg(); + + // Handle the use w.r.t. liveranges: make it live + // and create an initial LR back to the start of + // the block. + let pos = ProgPoint::after(inst); + let src_lr = if !live.get(src.vreg().vreg()) { + let range = CodeRange { + from: self.cfginfo.block_entry[block.index()], + to: pos.next(), + }; + let src_lr = self.add_liverange_to_vreg( + VRegIndex::new(src.vreg().vreg()), + range, + ); + vreg_ranges[src.vreg().vreg()] = src_lr; + src_lr + } else { + vreg_ranges[src.vreg().vreg()] + }; + + log::debug!(" -> src LR {:?}", src_lr); + + // Add to live-set. + let src_is_dead_after_move = !live.get(src.vreg().vreg()); + live.set(src.vreg().vreg(), true); + + // Add to program-moves lists. + self.prog_move_srcs.push(( + (VRegIndex::new(src.vreg().vreg()), inst), + Allocation::none(), + )); + self.prog_move_dsts.push(( + (VRegIndex::new(dst.vreg().vreg()), inst.next()), + Allocation::none(), + )); + self.stats.prog_moves += 1; + if src_is_dead_after_move { + self.stats.prog_moves_dead_src += 1; + self.prog_move_merges.push((src_lr, dst_lr)); + } + } + } + + continue; + } + + // Process defs and uses. + for &cur_pos in &[InstPosition::After, InstPosition::Before] { + for i in 0..self.func.inst_operands(inst).len() { + // don't borrow `self` + let operand = self.func.inst_operands(inst)[i]; + let pos = match (operand.kind(), operand.pos()) { + (OperandKind::Mod, _) => ProgPoint::before(inst), + (OperandKind::Def, OperandPos::Before) => ProgPoint::before(inst), + (OperandKind::Def, OperandPos::After) => ProgPoint::after(inst), + (OperandKind::Use, OperandPos::After) => ProgPoint::after(inst), + // If this is a branch, extend `pos` to + // the end of the block. (Branch uses are + // blockparams and need to be live at the + // end of the block.) + (OperandKind::Use, _) if self.func.is_branch(inst) => { + self.cfginfo.block_exit[block.index()] + } + // If there are any reused inputs in this + // instruction, and this is *not* the + // reused input, force `pos` to + // `After`. (See note below for why; it's + // very subtle!) + (OperandKind::Use, OperandPos::Before) + if reused_input.is_some() && reused_input.unwrap() != i => + { + ProgPoint::after(inst) + } + (OperandKind::Use, OperandPos::Before) => ProgPoint::before(inst), + }; + + if pos.pos() != cur_pos { + continue; + } + + log::debug!( + "processing inst{} operand at {:?}: {:?}", + inst.index(), + pos, + operand + ); + + match operand.kind() { + OperandKind::Def | OperandKind::Mod => { + log::debug!("Def of {} at {:?}", operand.vreg(), pos); + + // Fill in vreg's actual data. + self.vreg_regs[operand.vreg().vreg()] = operand.vreg(); + + // Get or create the LiveRange. + let mut lr = vreg_ranges[operand.vreg().vreg()]; + log::debug!(" -> has existing LR {:?}", lr); + // If there was no liverange (dead def), create a trivial one. + if !live.get(operand.vreg().vreg()) { + let from = match operand.kind() { + OperandKind::Def => pos, + OperandKind::Mod => self.cfginfo.block_entry[block.index()], + _ => unreachable!(), + }; + let to = match operand.kind() { + OperandKind::Def => pos.next(), + OperandKind::Mod => pos.next().next(), // both Before and After positions + _ => unreachable!(), + }; + lr = self.add_liverange_to_vreg( + VRegIndex::new(operand.vreg().vreg()), + CodeRange { from, to }, + ); + log::debug!(" -> invalid; created {:?}", lr); + vreg_ranges[operand.vreg().vreg()] = lr; + live.set(operand.vreg().vreg(), true); + } + // Create the use in the LiveRange. + self.insert_use_into_liverange(lr, Use::new(operand, pos, i as u8)); + // If def (not mod), this reg is now dead, + // scanning backward; make it so. + if operand.kind() == OperandKind::Def { + // Trim the range for this vreg to start + // at `pos` if it previously ended at the + // start of this block (i.e. was not + // merged into some larger LiveRange due + // to out-of-order blocks). + if self.ranges[lr.index()].range.from + == self.cfginfo.block_entry[block.index()] + { + log::debug!( + " -> started at block start; trimming to {:?}", + pos + ); + self.ranges[lr.index()].range.from = pos; + } + + self.ranges[lr.index()].set_flag(LiveRangeFlag::StartsAtDef); + + // Remove from live-set. + live.set(operand.vreg().vreg(), false); + vreg_ranges[operand.vreg().vreg()] = LiveRangeIndex::invalid(); + } + } + OperandKind::Use => { + // Create/extend the LiveRange if it + // doesn't already exist, and add the use + // to the range. + let mut lr = vreg_ranges[operand.vreg().vreg()]; + if !live.get(operand.vreg().vreg()) { + let range = CodeRange { + from: self.cfginfo.block_entry[block.index()], + to: pos.next(), + }; + lr = self.add_liverange_to_vreg( + VRegIndex::new(operand.vreg().vreg()), + range, + ); + vreg_ranges[operand.vreg().vreg()] = lr; + } + assert!(lr.is_valid()); + + log::debug!("Use of {:?} at {:?} -> {:?}", operand, pos, lr,); + + self.insert_use_into_liverange(lr, Use::new(operand, pos, i as u8)); + + // Add to live-set. + live.set(operand.vreg().vreg(), true); + } + } + } + } + + if self.func.is_safepoint(inst) { + self.safepoints.push(inst); + for vreg in live.iter() { + if let Some(safepoints) = self.safepoints_per_vreg.get_mut(&vreg) { + safepoints.insert(inst); + } + } + } + } + + // Block parameters define vregs at the very beginning of + // the block. Remove their live vregs from the live set + // here. + for vreg in self.func.block_params(block) { + if live.get(vreg.vreg()) { + live.set(vreg.vreg(), false); + } else { + // Create trivial liverange if blockparam is dead. + let start = self.cfginfo.block_entry[block.index()]; + self.add_liverange_to_vreg( + VRegIndex::new(vreg.vreg()), + CodeRange { + from: start, + to: start.next(), + }, + ); + } + // add `blockparam_ins` entries. + let vreg_idx = VRegIndex::new(vreg.vreg()); + for &pred in self.func.block_preds(block) { + self.blockparam_ins.push((vreg_idx, block, pred)); + } + } + } + + self.safepoints.sort_unstable(); + + // Make ranges in each vreg and uses in each range appear in + // sorted order. We built them in reverse order above, so this + // is a simple reversal, *not* a full sort. + // + // The ordering invariant is always maintained for uses and + // always for ranges in bundles (which are initialized later), + // but not always for ranges in vregs; those are sorted only + // when needed, here and then again at the end of allocation + // when resolving moves. + + for vreg in &mut self.vregs { + vreg.ranges.reverse(); + let mut last = None; + for entry in &mut vreg.ranges { + // Ranges may have been truncated above at defs. We + // need to update with the final range here. + entry.range = self.ranges[entry.index.index()].range; + // Assert in-order and non-overlapping. + assert!(last.is_none() || last.unwrap() <= entry.range.from); + last = Some(entry.range.to); + } + } + + for range in 0..self.ranges.len() { + self.ranges[range].uses.reverse(); + debug_assert!(self.ranges[range] + .uses + .windows(2) + .all(|win| win[0].pos <= win[1].pos)); + } + + // Insert safepoint virtual stack uses, if needed. + for vreg in self.func.reftype_vregs() { + if self.vregs[vreg.vreg()].is_pinned { + continue; + } + let vreg = VRegIndex::new(vreg.vreg()); + let mut inserted = false; + let mut safepoint_idx = 0; + for range_idx in 0..self.vregs[vreg.index()].ranges.len() { + let LiveRangeListEntry { range, index } = + self.vregs[vreg.index()].ranges[range_idx]; + while safepoint_idx < self.safepoints.len() + && ProgPoint::before(self.safepoints[safepoint_idx]) < range.from + { + safepoint_idx += 1; + } + while safepoint_idx < self.safepoints.len() + && range.contains_point(ProgPoint::before(self.safepoints[safepoint_idx])) + { + // Create a virtual use. + let pos = ProgPoint::before(self.safepoints[safepoint_idx]); + let operand = Operand::new( + self.vreg_regs[vreg.index()], + OperandPolicy::Stack, + OperandKind::Use, + OperandPos::Before, + ); + + log::debug!( + "Safepoint-induced stack use of {:?} at {:?} -> {:?}", + operand, + pos, + index, + ); + + self.insert_use_into_liverange(index, Use::new(operand, pos, SLOT_NONE)); + safepoint_idx += 1; + + inserted = true; + } + + if inserted { + self.ranges[index.index()] + .uses + .sort_unstable_by_key(|u| u.pos); + } + + if safepoint_idx >= self.safepoints.len() { + break; + } + } + } + + // Do a fixed-reg cleanup pass: if there are any LiveRanges with + // multiple uses (or defs) at the same ProgPoint and there is + // more than one FixedReg constraint at that ProgPoint, we + // need to record all but one of them in a special fixup list + // and handle them later; otherwise, bundle-splitting to + // create minimal bundles becomes much more complex (we would + // have to split the multiple uses at the same progpoint into + // different bundles, which breaks invariants related to + // disjoint ranges and bundles). + let mut seen_fixed_for_vreg: SmallVec<[VReg; 16]> = smallvec![]; + let mut first_preg: SmallVec<[PRegIndex; 16]> = smallvec![]; + let mut extra_clobbers: SmallVec<[(PReg, Inst); 8]> = smallvec![]; + for vreg in 0..self.vregs.len() { + for range_idx in 0..self.vregs[vreg].ranges.len() { + let entry = self.vregs[vreg].ranges[range_idx]; + let range = entry.index; + log::debug!( + "multi-fixed-reg cleanup: vreg {:?} range {:?}", + VRegIndex::new(vreg), + range, + ); + let mut last_point = None; + let mut fixup_multi_fixed_vregs = |pos: ProgPoint, + slot: usize, + op: &mut Operand, + fixups: &mut Vec<( + ProgPoint, + PRegIndex, + PRegIndex, + usize, + )>| { + if last_point.is_some() && Some(pos) != last_point { + seen_fixed_for_vreg.clear(); + first_preg.clear(); + } + last_point = Some(pos); + + if let OperandPolicy::FixedReg(preg) = op.policy() { + let vreg_idx = VRegIndex::new(op.vreg().vreg()); + let preg_idx = PRegIndex::new(preg.index()); + log::debug!( + "at pos {:?}, vreg {:?} has fixed constraint to preg {:?}", + pos, + vreg_idx, + preg_idx + ); + if let Some(idx) = seen_fixed_for_vreg.iter().position(|r| *r == op.vreg()) + { + let orig_preg = first_preg[idx]; + if orig_preg != preg_idx { + log::debug!(" -> duplicate; switching to policy Reg"); + fixups.push((pos, orig_preg, preg_idx, slot)); + *op = Operand::new( + op.vreg(), + OperandPolicy::Reg, + op.kind(), + op.pos(), + ); + log::debug!( + " -> extra clobber {} at inst{}", + preg, + pos.inst().index() + ); + extra_clobbers.push((preg, pos.inst())); + } + } else { + seen_fixed_for_vreg.push(op.vreg()); + first_preg.push(preg_idx); + } + } + }; + + for u in &mut self.ranges[range.index()].uses { + let pos = u.pos; + let slot = u.slot as usize; + fixup_multi_fixed_vregs( + pos, + slot, + &mut u.operand, + &mut self.multi_fixed_reg_fixups, + ); + } + + for &(clobber, inst) in &extra_clobbers { + let range = CodeRange { + from: ProgPoint::before(inst), + to: ProgPoint::before(inst.next()), + }; + self.add_liverange_to_preg(range, clobber); + } + + extra_clobbers.clear(); + first_preg.clear(); + seen_fixed_for_vreg.clear(); + } + } + + self.clobbers.sort_unstable(); + self.blockparam_ins.sort_unstable(); + self.blockparam_outs.sort_unstable(); + self.prog_move_srcs.sort_unstable_by_key(|(pos, _)| *pos); + self.prog_move_dsts.sort_unstable_by_key(|(pos, _)| *pos); + + log::debug!("prog_move_srcs = {:?}", self.prog_move_srcs); + log::debug!("prog_move_dsts = {:?}", self.prog_move_dsts); + + self.stats.initial_liverange_count = self.ranges.len(); + self.stats.blockparam_ins_count = self.blockparam_ins.len(); + self.stats.blockparam_outs_count = self.blockparam_outs.len(); + + Ok(()) + } +} diff --git a/src/ion/merge.rs b/src/ion/merge.rs new file mode 100644 index 00000000..d0219a3c --- /dev/null +++ b/src/ion/merge.rs @@ -0,0 +1,439 @@ +/* + * The following license applies to this file, which was initially + * derived from the files `js/src/jit/BacktrackingAllocator.h` and + * `js/src/jit/BacktrackingAllocator.cpp` in Mozilla Firefox: + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * Since the initial port, the design has been substantially evolved + * and optimized. + */ + +//! Bundle merging. + +use super::{ + Env, LiveBundleIndex, LiveRangeIndex, LiveRangeKey, Requirement, SpillSet, SpillSetIndex, + SpillSlotIndex, VRegIndex, +}; +use crate::{Function, Inst, OperandPolicy, PReg}; +use smallvec::smallvec; + +impl<'a, F: Function> Env<'a, F> { + pub fn merge_bundles(&mut self, from: LiveBundleIndex, to: LiveBundleIndex) -> bool { + if from == to { + // Merge bundle into self -- trivial merge. + return true; + } + log::debug!( + "merging from bundle{} to bundle{}", + from.index(), + to.index() + ); + + // Both bundles must deal with the same RegClass. + let from_rc = self.spillsets[self.bundles[from.index()].spillset.index()].class; + let to_rc = self.spillsets[self.bundles[to.index()].spillset.index()].class; + if from_rc != to_rc { + log::debug!(" -> mismatching reg classes"); + return false; + } + + // If either bundle is already assigned (due to a pinned vreg), don't merge. + if !self.bundles[from.index()].allocation.is_none() + || !self.bundles[to.index()].allocation.is_none() + { + log::debug!("one of the bundles is already assigned (pinned)"); + return false; + } + + #[cfg(debug)] + { + // Sanity check: both bundles should contain only ranges with appropriate VReg classes. + for entry in &self.bundles[from.index()].ranges { + let vreg = self.ranges[entry.index.index()].vreg; + assert_eq!(rc, self.vregs[vreg.index()].reg.class()); + } + for entry in &self.bundles[to.index()].ranges { + let vreg = self.ranges[entry.index.index()].vreg; + assert_eq!(rc, self.vregs[vreg.index()].reg.class()); + } + } + + // Check for overlap in LiveRanges and for conflicting + // requirements. + let ranges_from = &self.bundles[from.index()].ranges[..]; + let ranges_to = &self.bundles[to.index()].ranges[..]; + let mut idx_from = 0; + let mut idx_to = 0; + let mut range_count = 0; + while idx_from < ranges_from.len() && idx_to < ranges_to.len() { + range_count += 1; + if range_count > 200 { + log::debug!( + "reached merge complexity (range_count = {}); exiting", + range_count + ); + // Limit merge complexity. + return false; + } + + if ranges_from[idx_from].range.from >= ranges_to[idx_to].range.to { + idx_to += 1; + } else if ranges_to[idx_to].range.from >= ranges_from[idx_from].range.to { + idx_from += 1; + } else { + // Overlap -- cannot merge. + log::debug!( + " -> overlap between {:?} and {:?}, exiting", + ranges_from[idx_from].index, + ranges_to[idx_to].index + ); + return false; + } + } + + // Check for a requirements conflict. + if self.bundles[from.index()].cached_stack() + || self.bundles[from.index()].cached_fixed() + || self.bundles[to.index()].cached_stack() + || self.bundles[to.index()].cached_fixed() + { + let req = self + .compute_requirement(from) + .merge(self.compute_requirement(to)); + if req == Requirement::Conflict { + log::debug!(" -> conflicting requirements; aborting merge"); + return false; + } + } + + log::debug!(" -> committing to merge"); + + // If we reach here, then the bundles do not overlap -- merge + // them! We do this with a merge-sort-like scan over both + // lists, building a new range list and replacing the list on + // `to` when we're done. + if ranges_from.is_empty() { + // `from` bundle is empty -- trivial merge. + log::debug!(" -> from bundle{} is empty; trivial merge", from.index()); + return true; + } + if ranges_to.is_empty() { + // `to` bundle is empty -- just move the list over from + // `from` and set `bundle` up-link on all ranges. + log::debug!(" -> to bundle{} is empty; trivial merge", to.index()); + let list = std::mem::replace(&mut self.bundles[from.index()].ranges, smallvec![]); + for entry in &list { + self.ranges[entry.index.index()].bundle = to; + + if self.annotations_enabled { + self.annotate( + entry.range.from, + format!( + " MERGE range{} v{} from bundle{} to bundle{}", + entry.index.index(), + self.ranges[entry.index.index()].vreg.index(), + from.index(), + to.index(), + ), + ); + } + } + self.bundles[to.index()].ranges = list; + + if self.bundles[from.index()].cached_stack() { + self.bundles[to.index()].set_cached_stack(); + } + if self.bundles[from.index()].cached_fixed() { + self.bundles[to.index()].set_cached_fixed(); + } + + return true; + } + + log::debug!( + "merging: ranges_from = {:?} ranges_to = {:?}", + ranges_from, + ranges_to + ); + + // Two non-empty lists of LiveRanges: concatenate and + // sort. This is faster than a mergesort-like merge into a new + // list, empirically. + let from_list = std::mem::replace(&mut self.bundles[from.index()].ranges, smallvec![]); + for entry in &from_list { + self.ranges[entry.index.index()].bundle = to; + } + self.bundles[to.index()] + .ranges + .extend_from_slice(&from_list[..]); + self.bundles[to.index()] + .ranges + .sort_unstable_by_key(|entry| entry.range.from); + + if self.annotations_enabled { + log::debug!("merging: merged = {:?}", self.bundles[to.index()].ranges); + let mut last_range = None; + for i in 0..self.bundles[to.index()].ranges.len() { + let entry = self.bundles[to.index()].ranges[i]; + if last_range.is_some() { + assert!(last_range.unwrap() < entry.range); + } + last_range = Some(entry.range); + + if self.ranges[entry.index.index()].bundle == from { + self.annotate( + entry.range.from, + format!( + " MERGE range{} v{} from bundle{} to bundle{}", + entry.index.index(), + self.ranges[entry.index.index()].vreg.index(), + from.index(), + to.index(), + ), + ); + } + + log::debug!( + " -> merged result for bundle{}: range{}", + to.index(), + entry.index.index(), + ); + } + } + + if self.bundles[from.index()].spillset != self.bundles[to.index()].spillset { + let from_vregs = std::mem::replace( + &mut self.spillsets[self.bundles[from.index()].spillset.index()].vregs, + smallvec![], + ); + let to_vregs = &mut self.spillsets[self.bundles[to.index()].spillset.index()].vregs; + for vreg in from_vregs { + if !to_vregs.contains(&vreg) { + to_vregs.push(vreg); + } + } + } + + if self.bundles[from.index()].cached_stack() { + self.bundles[to.index()].set_cached_stack(); + } + if self.bundles[from.index()].cached_fixed() { + self.bundles[to.index()].set_cached_fixed(); + } + + true + } + + pub fn merge_vreg_bundles(&mut self) { + // Create a bundle for every vreg, initially. + log::debug!("merge_vreg_bundles: creating vreg bundles"); + for vreg in 0..self.vregs.len() { + let vreg = VRegIndex::new(vreg); + if self.vregs[vreg.index()].ranges.is_empty() { + continue; + } + + // If this is a pinned vreg, go ahead and add it to the + // commitment map, and avoid creating a bundle entirely. + if self.vregs[vreg.index()].is_pinned { + for entry in &self.vregs[vreg.index()].ranges { + let preg = self + .func + .is_pinned_vreg(self.vreg_regs[vreg.index()]) + .unwrap(); + let key = LiveRangeKey::from_range(&entry.range); + self.pregs[preg.index()] + .allocations + .btree + .insert(key, LiveRangeIndex::invalid()); + } + continue; + } + + let bundle = self.create_bundle(); + self.bundles[bundle.index()].ranges = self.vregs[vreg.index()].ranges.clone(); + log::debug!("vreg v{} gets bundle{}", vreg.index(), bundle.index()); + for entry in &self.bundles[bundle.index()].ranges { + log::debug!( + " -> with LR range{}: {:?}", + entry.index.index(), + entry.range + ); + self.ranges[entry.index.index()].bundle = bundle; + } + + let mut fixed = false; + let mut stack = false; + for entry in &self.bundles[bundle.index()].ranges { + for u in &self.ranges[entry.index.index()].uses { + if let OperandPolicy::FixedReg(_) = u.operand.policy() { + fixed = true; + } + if let OperandPolicy::Stack = u.operand.policy() { + stack = true; + } + if fixed && stack { + break; + } + } + } + if fixed { + self.bundles[bundle.index()].set_cached_fixed(); + } + if stack { + self.bundles[bundle.index()].set_cached_stack(); + } + + // Create a spillslot for this bundle. + let ssidx = SpillSetIndex::new(self.spillsets.len()); + let reg = self.vreg_regs[vreg.index()]; + let size = self.func.spillslot_size(reg.class()) as u8; + self.spillsets.push(SpillSet { + vregs: smallvec![vreg], + slot: SpillSlotIndex::invalid(), + size, + required: false, + class: reg.class(), + reg_hint: PReg::invalid(), + spill_bundle: LiveBundleIndex::invalid(), + }); + self.bundles[bundle.index()].spillset = ssidx; + } + + for inst in 0..self.func.insts() { + let inst = Inst::new(inst); + + // Attempt to merge Reuse-policy operand outputs with the + // corresponding inputs. + for op in self.func.inst_operands(inst) { + if let OperandPolicy::Reuse(reuse_idx) = op.policy() { + let src_vreg = op.vreg(); + let dst_vreg = self.func.inst_operands(inst)[reuse_idx].vreg(); + if self.vregs[src_vreg.vreg()].is_pinned + || self.vregs[dst_vreg.vreg()].is_pinned + { + continue; + } + + log::debug!( + "trying to merge reused-input def: src {} to dst {}", + src_vreg, + dst_vreg + ); + let src_bundle = + self.ranges[self.vregs[src_vreg.vreg()].ranges[0].index.index()].bundle; + assert!(src_bundle.is_valid()); + let dest_bundle = + self.ranges[self.vregs[dst_vreg.vreg()].ranges[0].index.index()].bundle; + assert!(dest_bundle.is_valid()); + self.merge_bundles(/* from */ dest_bundle, /* to */ src_bundle); + } + } + } + + // Attempt to merge blockparams with their inputs. + for i in 0..self.blockparam_outs.len() { + let (from_vreg, _, _, to_vreg) = self.blockparam_outs[i]; + log::debug!( + "trying to merge blockparam v{} with input v{}", + to_vreg.index(), + from_vreg.index() + ); + let to_bundle = self.ranges[self.vregs[to_vreg.index()].ranges[0].index.index()].bundle; + assert!(to_bundle.is_valid()); + let from_bundle = + self.ranges[self.vregs[from_vreg.index()].ranges[0].index.index()].bundle; + assert!(from_bundle.is_valid()); + log::debug!( + " -> from bundle{} to bundle{}", + from_bundle.index(), + to_bundle.index() + ); + self.merge_bundles(from_bundle, to_bundle); + } + + // Attempt to merge move srcs/dsts. + for i in 0..self.prog_move_merges.len() { + let (src, dst) = self.prog_move_merges[i]; + log::debug!("trying to merge move src LR {:?} to dst LR {:?}", src, dst); + let src = self.resolve_merged_lr(src); + let dst = self.resolve_merged_lr(dst); + log::debug!( + "resolved LR-construction merging chains: move-merge is now src LR {:?} to dst LR {:?}", + src, + dst + ); + + let dst_vreg = self.vreg_regs[self.ranges[dst.index()].vreg.index()]; + let src_vreg = self.vreg_regs[self.ranges[src.index()].vreg.index()]; + if self.vregs[src_vreg.vreg()].is_pinned && self.vregs[dst_vreg.vreg()].is_pinned { + continue; + } + if self.vregs[src_vreg.vreg()].is_pinned { + let dest_bundle = self.ranges[dst.index()].bundle; + let spillset = self.bundles[dest_bundle.index()].spillset; + self.spillsets[spillset.index()].reg_hint = + self.func.is_pinned_vreg(src_vreg).unwrap(); + continue; + } + if self.vregs[dst_vreg.vreg()].is_pinned { + let src_bundle = self.ranges[src.index()].bundle; + let spillset = self.bundles[src_bundle.index()].spillset; + self.spillsets[spillset.index()].reg_hint = + self.func.is_pinned_vreg(dst_vreg).unwrap(); + continue; + } + + let src_bundle = self.ranges[src.index()].bundle; + assert!(src_bundle.is_valid()); + let dest_bundle = self.ranges[dst.index()].bundle; + assert!(dest_bundle.is_valid()); + self.stats.prog_move_merge_attempt += 1; + if self.merge_bundles(/* from */ dest_bundle, /* to */ src_bundle) { + self.stats.prog_move_merge_success += 1; + } + } + + log::debug!("done merging bundles"); + } + + pub fn resolve_merged_lr(&self, mut lr: LiveRangeIndex) -> LiveRangeIndex { + let mut iter = 0; + while iter < 100 && self.ranges[lr.index()].merged_into.is_valid() { + lr = self.ranges[lr.index()].merged_into; + iter += 1; + } + lr + } + + pub fn compute_bundle_prio(&self, bundle: LiveBundleIndex) -> u32 { + // The priority is simply the total "length" -- the number of + // instructions covered by all LiveRanges. + let mut total = 0; + for entry in &self.bundles[bundle.index()].ranges { + total += entry.range.len() as u32; + } + total + } + + pub fn queue_bundles(&mut self) { + for bundle in 0..self.bundles.len() { + log::debug!("enqueueing bundle{}", bundle); + if self.bundles[bundle].ranges.is_empty() { + log::debug!(" -> no ranges; skipping"); + continue; + } + let bundle = LiveBundleIndex::new(bundle); + let prio = self.compute_bundle_prio(bundle); + log::debug!(" -> prio {}", prio); + self.bundles[bundle.index()].prio = prio; + self.recompute_bundle_properties(bundle); + self.allocation_queue + .insert(bundle, prio as usize, PReg::invalid()); + } + self.stats.merged_bundle_count = self.allocation_queue.heap.len(); + } +} diff --git a/src/ion/mod.rs b/src/ion/mod.rs index a309d27d..e78a0f59 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -1,932 +1,41 @@ /* - * The following license applies to this file, which has been largely + * The following license applies to this file, which was initially * derived from the files `js/src/jit/BacktrackingAllocator.h` and * `js/src/jit/BacktrackingAllocator.cpp` in Mozilla Firefox: * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. - */ - -//! Backtracking register allocator on SSA code ported from IonMonkey's -//! BacktrackingAllocator. - -/* - * TODO: * - * - "Fixed-stack location": negative spillslot numbers? - * - * - Rematerialization - */ - -/* - Performance and code-quality ideas: - - - Better hinting: collect N regs associated with one spillslot? - Collect pointers to other "connected" spillslots (via moves) to - allow move to be elided if possible? - - - Profile allocations + * Since the initial port, the design has been substantially evolved + * and optimized. */ -#![allow(dead_code, unused_imports)] +//! Backtracking register allocator. See doc/DESIGN.md for details of +//! its design. -use crate::bitvec::BitVec; use crate::cfg::CFGInfo; -use crate::index::ContainerComparator; -use crate::moves::ParallelMoves; -use crate::{ - define_index, domtree, Allocation, AllocationKind, Block, Edit, Function, Inst, InstPosition, - MachineEnv, Operand, OperandKind, OperandPolicy, OperandPos, Output, PReg, ProgPoint, - RegAllocError, RegClass, SpillSlot, VReg, -}; -use fxhash::{FxHashMap, FxHashSet}; -use log::debug; -use smallvec::{smallvec, SmallVec}; -use std::cmp::Ordering; -use std::collections::{BTreeMap, BinaryHeap, HashMap, HashSet, VecDeque}; -use std::convert::TryFrom; -use std::fmt::Debug; - -/// A range from `from` (inclusive) to `to` (exclusive). -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub struct CodeRange { - from: ProgPoint, - to: ProgPoint, -} - -impl CodeRange { - #[inline(always)] - pub fn is_empty(&self) -> bool { - self.from == self.to - } - #[inline(always)] - pub fn contains(&self, other: &Self) -> bool { - other.from >= self.from && other.to <= self.to - } - #[inline(always)] - pub fn contains_point(&self, other: ProgPoint) -> bool { - other >= self.from && other < self.to - } - #[inline(always)] - pub fn overlaps(&self, other: &Self) -> bool { - other.to > self.from && other.from < self.to - } - #[inline(always)] - pub fn len(&self) -> usize { - self.to.inst().index() - self.from.inst().index() - } -} - -impl std::cmp::PartialOrd for CodeRange { - #[inline(always)] - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} -impl std::cmp::Ord for CodeRange { - #[inline(always)] - fn cmp(&self, other: &Self) -> Ordering { - if self.to <= other.from { - Ordering::Less - } else if self.from >= other.to { - Ordering::Greater - } else { - Ordering::Equal - } - } -} - -define_index!(LiveBundleIndex); -define_index!(LiveRangeIndex); -define_index!(SpillSetIndex); -define_index!(UseIndex); -define_index!(VRegIndex); -define_index!(PRegIndex); -define_index!(SpillSlotIndex); - -/// Used to carry small sets of bundles, e.g. for conflict sets. -type LiveBundleVec = SmallVec<[LiveBundleIndex; 4]>; - -#[derive(Clone, Copy, Debug)] -struct LiveRangeListEntry { - range: CodeRange, - index: LiveRangeIndex, -} - -type LiveRangeList = SmallVec<[LiveRangeListEntry; 4]>; -type UseList = SmallVec<[Use; 2]>; - -#[derive(Clone, Debug)] -struct LiveRange { - range: CodeRange, - - vreg: VRegIndex, - bundle: LiveBundleIndex, - uses_spill_weight_and_flags: u32, - - uses: UseList, - - merged_into: LiveRangeIndex, -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -#[repr(u32)] -enum LiveRangeFlag { - StartsAtDef = 1, -} - -impl LiveRange { - #[inline(always)] - pub fn set_flag(&mut self, flag: LiveRangeFlag) { - self.uses_spill_weight_and_flags |= (flag as u32) << 29; - } - #[inline(always)] - pub fn clear_flag(&mut self, flag: LiveRangeFlag) { - self.uses_spill_weight_and_flags &= !((flag as u32) << 29); - } - #[inline(always)] - pub fn assign_flag(&mut self, flag: LiveRangeFlag, val: bool) { - let bit = if val { (flag as u32) << 29 } else { 0 }; - self.uses_spill_weight_and_flags &= 0xe000_0000; - self.uses_spill_weight_and_flags |= bit; - } - #[inline(always)] - pub fn has_flag(&self, flag: LiveRangeFlag) -> bool { - self.uses_spill_weight_and_flags & ((flag as u32) << 29) != 0 - } - #[inline(always)] - pub fn flag_word(&self) -> u32 { - self.uses_spill_weight_and_flags & 0xe000_0000 - } - #[inline(always)] - pub fn merge_flags(&mut self, flag_word: u32) { - self.uses_spill_weight_and_flags |= flag_word; - } - #[inline(always)] - pub fn uses_spill_weight(&self) -> u32 { - self.uses_spill_weight_and_flags & 0x1fff_ffff - } - #[inline(always)] - pub fn set_uses_spill_weight(&mut self, weight: u32) { - assert!(weight < (1 << 29)); - self.uses_spill_weight_and_flags = - (self.uses_spill_weight_and_flags & 0xe000_0000) | weight; - } -} - -#[derive(Clone, Copy, Debug)] -struct Use { - operand: Operand, - pos: ProgPoint, - slot: u8, - weight: u16, -} - -impl Use { - #[inline(always)] - fn new(operand: Operand, pos: ProgPoint, slot: u8) -> Self { - Self { - operand, - pos, - slot, - // Weight is updated on insertion into LR. - weight: 0, - } - } -} - -const SLOT_NONE: u8 = u8::MAX; - -#[derive(Clone, Debug)] -struct LiveBundle { - ranges: LiveRangeList, - spillset: SpillSetIndex, - allocation: Allocation, - prio: u32, // recomputed after every bulk update - spill_weight_and_props: u32, -} - -impl LiveBundle { - #[inline(always)] - fn set_cached_spill_weight_and_props( - &mut self, - spill_weight: u32, - minimal: bool, - fixed: bool, - stack: bool, - ) { - debug_assert!(spill_weight < ((1 << 29) - 1)); - self.spill_weight_and_props = spill_weight - | (if minimal { 1 << 31 } else { 0 }) - | (if fixed { 1 << 30 } else { 0 }) - | (if stack { 1 << 29 } else { 0 }); - } - - #[inline(always)] - fn cached_minimal(&self) -> bool { - self.spill_weight_and_props & (1 << 31) != 0 - } - - #[inline(always)] - fn cached_fixed(&self) -> bool { - self.spill_weight_and_props & (1 << 30) != 0 - } - - #[inline(always)] - fn cached_stack(&self) -> bool { - self.spill_weight_and_props & (1 << 29) != 0 - } - - #[inline(always)] - fn set_cached_fixed(&mut self) { - self.spill_weight_and_props |= 1 << 30; - } - - #[inline(always)] - fn set_cached_stack(&mut self) { - self.spill_weight_and_props |= 1 << 29; - } - - #[inline(always)] - fn cached_spill_weight(&self) -> u32 { - self.spill_weight_and_props & ((1 << 29) - 1) - } -} - -#[derive(Clone, Debug)] -struct SpillSet { - vregs: SmallVec<[VRegIndex; 2]>, - slot: SpillSlotIndex, - reg_hint: PReg, - class: RegClass, - spill_bundle: LiveBundleIndex, - required: bool, - size: u8, -} - -#[derive(Clone, Debug)] -struct VRegData { - ranges: LiveRangeList, - blockparam: Block, - is_ref: bool, - is_pinned: bool, -} - -#[derive(Clone, Debug)] -struct PRegData { - reg: PReg, - allocations: LiveRangeSet, -} - -/* - * Environment setup: - * - * We have seven fundamental objects: LiveRange, LiveBundle, SpillSet, Use, VReg, PReg. - * - * The relationship is as follows: - * - * LiveRange --(vreg)--> shared(VReg) - * LiveRange --(bundle)--> shared(LiveBundle) - * LiveRange --(use) --> list(Use) - * - * Use --(vreg)--> shared(VReg) - * - * LiveBundle --(range)--> list(LiveRange) - * LiveBundle --(spillset)--> shared(SpillSet) - * LiveBundle --(parent)--> parent(LiveBundle) - * - * SpillSet --(parent)--> parent(SpillSet) - * SpillSet --(bundles)--> list(LiveBundle) - * - * VReg --(range)--> list(LiveRange) - * - * PReg --(ranges)--> set(LiveRange) - */ - -#[derive(Clone, Debug)] -struct Env<'a, F: Function> { - func: &'a F, - env: &'a MachineEnv, - cfginfo: CFGInfo, - liveins: Vec, - liveouts: Vec, - /// Blockparam outputs: from-vreg, (end of) from-block, (start of) - /// to-block, to-vreg. The field order is significant: these are sorted so - /// that a scan over vregs, then blocks in each range, can scan in - /// order through this (sorted) list and add allocs to the - /// half-move list. - blockparam_outs: Vec<(VRegIndex, Block, Block, VRegIndex)>, - /// Blockparam inputs: to-vreg, (start of) to-block, (end of) - /// from-block. As above for `blockparam_outs`, field order is - /// significant. - blockparam_ins: Vec<(VRegIndex, Block, Block)>, - /// Blockparam allocs: block, idx, vreg, alloc. Info to describe - /// blockparam locations at block entry, for metadata purposes - /// (e.g. for the checker). - blockparam_allocs: Vec<(Block, u32, VRegIndex, Allocation)>, - - ranges: Vec, - bundles: Vec, - spillsets: Vec, - vregs: Vec, - vreg_regs: Vec, - pregs: Vec, - allocation_queue: PrioQueue, - clobbers: Vec, // Sorted list of insts with clobbers. - safepoints: Vec, // Sorted list of safepoint insts. - safepoints_per_vreg: HashMap>, - - spilled_bundles: Vec, - spillslots: Vec, - slots_by_size: Vec, - - extra_spillslot: Vec>, - - // Program moves: these are moves in the provided program that we - // handle with our internal machinery, in order to avoid the - // overhead of ordinary operand processing. We expect the client - // to not generate any code for instructions that return - // `Some(..)` for `.is_move()`, and instead use the edits that we - // provide to implement those moves (or some simplified version of - // them) post-regalloc. - // - // (from-vreg, inst, from-alloc), sorted by (from-vreg, inst) - prog_move_srcs: Vec<((VRegIndex, Inst), Allocation)>, - // (to-vreg, inst, to-alloc), sorted by (to-vreg, inst) - prog_move_dsts: Vec<((VRegIndex, Inst), Allocation)>, - // (from-vreg, to-vreg) for bundle-merging. - prog_move_merges: Vec<(LiveRangeIndex, LiveRangeIndex)>, - - // When multiple fixed-register constraints are present on a - // single VReg at a single program point (this can happen for, - // e.g., call args that use the same value multiple times), we - // remove all but one of the fixed-register constraints, make a - // note here, and add a clobber with that PReg instread to keep - // the register available. When we produce the final edit-list, we - // will insert a copy from wherever the VReg's primary allocation - // was to the approprate PReg. - // - // (progpoint, copy-from-preg, copy-to-preg, to-slot) - multi_fixed_reg_fixups: Vec<(ProgPoint, PRegIndex, PRegIndex, usize)>, - - inserted_moves: Vec, - - // Output: - edits: Vec<(u32, InsertMovePrio, Edit)>, - allocs: Vec, - inst_alloc_offsets: Vec, - num_spillslots: u32, - safepoint_slots: Vec<(ProgPoint, SpillSlot)>, - - stats: Stats, - - // For debug output only: a list of textual annotations at every - // ProgPoint to insert into the final allocated program listing. - debug_annotations: std::collections::HashMap>, - annotations_enabled: bool, -} - -#[derive(Clone, Debug)] -struct SpillSlotData { - ranges: LiveRangeSet, - class: RegClass, - alloc: Allocation, - next_spillslot: SpillSlotIndex, -} - -#[derive(Clone, Debug)] -struct SpillSlotList { - first_spillslot: SpillSlotIndex, - last_spillslot: SpillSlotIndex, -} - -#[derive(Clone, Debug)] -struct PrioQueue { - heap: std::collections::BinaryHeap, -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] -struct PrioQueueEntry { - prio: u32, - bundle: LiveBundleIndex, - reg_hint: PReg, -} - -#[derive(Clone, Debug)] -struct LiveRangeSet { - btree: BTreeMap, -} - -#[derive(Clone, Copy, Debug)] -struct LiveRangeKey { - from: u32, - to: u32, -} - -impl LiveRangeKey { - #[inline(always)] - fn from_range(range: &CodeRange) -> Self { - Self { - from: range.from.to_index(), - to: range.to.to_index(), - } - } - - #[inline(always)] - fn to_range(&self) -> CodeRange { - CodeRange { - from: ProgPoint::from_index(self.from), - to: ProgPoint::from_index(self.to), - } - } -} - -impl std::cmp::PartialEq for LiveRangeKey { - #[inline(always)] - fn eq(&self, other: &Self) -> bool { - self.to > other.from && self.from < other.to - } -} -impl std::cmp::Eq for LiveRangeKey {} -impl std::cmp::PartialOrd for LiveRangeKey { - #[inline(always)] - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} -impl std::cmp::Ord for LiveRangeKey { - #[inline(always)] - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - if self.to <= other.from { - std::cmp::Ordering::Less - } else if self.from >= other.to { - std::cmp::Ordering::Greater - } else { - std::cmp::Ordering::Equal - } - } -} - -struct PrioQueueComparator<'a> { - prios: &'a [usize], -} -impl<'a> ContainerComparator for PrioQueueComparator<'a> { - type Ix = LiveBundleIndex; - fn compare(&self, a: Self::Ix, b: Self::Ix) -> std::cmp::Ordering { - self.prios[a.index()].cmp(&self.prios[b.index()]) - } -} - -impl PrioQueue { - fn new() -> Self { - PrioQueue { - heap: std::collections::BinaryHeap::new(), - } - } - - #[inline(always)] - fn insert(&mut self, bundle: LiveBundleIndex, prio: usize, reg_hint: PReg) { - self.heap.push(PrioQueueEntry { - prio: prio as u32, - bundle, - reg_hint, - }); - } - - #[inline(always)] - fn is_empty(self) -> bool { - self.heap.is_empty() - } - - #[inline(always)] - fn pop(&mut self) -> Option<(LiveBundleIndex, PReg)> { - self.heap.pop().map(|entry| (entry.bundle, entry.reg_hint)) - } -} - -impl LiveRangeSet { - pub(crate) fn new() -> Self { - Self { - btree: BTreeMap::new(), - } - } -} - -#[inline(always)] -fn spill_weight_from_policy(policy: OperandPolicy, loop_depth: usize, is_def: bool) -> u32 { - // A bonus of 1000 for one loop level, 4000 for two loop levels, - // 16000 for three loop levels, etc. Avoids exponentiation. - let hot_bonus = std::cmp::min(16000, 1000 * (1 << (2 * loop_depth))); - let def_bonus = if is_def { 2000 } else { 0 }; - let policy_bonus = match policy { - OperandPolicy::Any => 1000, - OperandPolicy::Reg | OperandPolicy::FixedReg(_) => 2000, - _ => 0, - }; - hot_bonus + def_bonus + policy_bonus -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -enum Requirement { - Unknown, - Fixed(PReg), - Register(RegClass), - Stack(RegClass), - Any(RegClass), - Conflict, -} -impl Requirement { - #[inline(always)] - fn class(self) -> RegClass { - match self { - Requirement::Unknown => panic!("No class for unknown Requirement"), - Requirement::Fixed(preg) => preg.class(), - Requirement::Register(class) | Requirement::Any(class) | Requirement::Stack(class) => { - class - } - Requirement::Conflict => panic!("No class for conflicted Requirement"), - } - } - #[inline(always)] - fn merge(self, other: Requirement) -> Requirement { - match (self, other) { - (Requirement::Unknown, other) | (other, Requirement::Unknown) => other, - (Requirement::Conflict, _) | (_, Requirement::Conflict) => Requirement::Conflict, - (other, Requirement::Any(rc)) | (Requirement::Any(rc), other) => { - if other.class() == rc { - other - } else { - Requirement::Conflict - } - } - (Requirement::Stack(rc1), Requirement::Stack(rc2)) => { - if rc1 == rc2 { - self - } else { - Requirement::Conflict - } - } - (Requirement::Register(rc), Requirement::Fixed(preg)) - | (Requirement::Fixed(preg), Requirement::Register(rc)) => { - if rc == preg.class() { - Requirement::Fixed(preg) - } else { - Requirement::Conflict - } - } - (Requirement::Register(rc1), Requirement::Register(rc2)) => { - if rc1 == rc2 { - self - } else { - Requirement::Conflict - } - } - (Requirement::Fixed(a), Requirement::Fixed(b)) if a == b => self, - _ => Requirement::Conflict, - } - } - #[inline(always)] - fn from_operand(op: Operand) -> Requirement { - match op.policy() { - OperandPolicy::FixedReg(preg) => Requirement::Fixed(preg), - OperandPolicy::Reg | OperandPolicy::Reuse(_) => Requirement::Register(op.class()), - OperandPolicy::Stack => Requirement::Stack(op.class()), - _ => Requirement::Any(op.class()), - } - } -} - -#[derive(Clone, Debug, PartialEq, Eq)] -enum AllocRegResult { - Allocated(Allocation), - Conflict(LiveBundleVec, ProgPoint), - ConflictWithFixed(u32, ProgPoint), - ConflictHighCost, -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -struct BundleProperties { - minimal: bool, - fixed: bool, -} - -#[derive(Clone, Debug)] -struct InsertedMove { - pos: ProgPoint, - prio: InsertMovePrio, - from_alloc: Allocation, - to_alloc: Allocation, - to_vreg: Option, -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] -enum InsertMovePrio { - InEdgeMoves, - BlockParam, - Regular, - PostRegular, - MultiFixedReg, - ReusedInput, - OutEdgeMoves, -} - -#[derive(Clone, Copy, Debug, Default)] -pub struct Stats { - livein_blocks: usize, - livein_iterations: usize, - initial_liverange_count: usize, - merged_bundle_count: usize, - prog_moves: usize, - prog_moves_dead_src: usize, - prog_move_merge_attempt: usize, - prog_move_merge_success: usize, - process_bundle_count: usize, - process_bundle_reg_probes_fixed: usize, - process_bundle_reg_success_fixed: usize, - process_bundle_bounding_range_probe_start_any: usize, - process_bundle_bounding_range_probes_any: usize, - process_bundle_bounding_range_success_any: usize, - process_bundle_reg_probe_start_any: usize, - process_bundle_reg_probes_any: usize, - process_bundle_reg_success_any: usize, - evict_bundle_event: usize, - evict_bundle_count: usize, - splits: usize, - splits_clobbers: usize, - splits_hot: usize, - splits_conflicts: usize, - splits_defs: usize, - splits_all: usize, - final_liverange_count: usize, - final_bundle_count: usize, - spill_bundle_count: usize, - spill_bundle_reg_probes: usize, - spill_bundle_reg_success: usize, - blockparam_ins_count: usize, - blockparam_outs_count: usize, - blockparam_allocs_count: usize, - halfmoves_count: usize, - edits_count: usize, -} - -/// This iterator represents a traversal through all allocatable -/// registers of a given class, in a certain order designed to -/// minimize allocation contention. -/// -/// The order in which we try registers is somewhat complex: -/// - First, if there is a hint, we try that. -/// - Then, we try registers in a traversal order that is based on an -/// "offset" (usually the bundle index) spreading pressure evenly -/// among registers to reduce commitment-map contention. -/// - Within that scan, we try registers in two groups: first, -/// prferred registers; then, non-preferred registers. (In normal -/// usage, these consist of caller-save and callee-save registers -/// respectively, to minimize clobber-saves; but they need not.) -struct RegTraversalIter<'a> { - env: &'a MachineEnv, - class: usize, - hints: [Option; 2], - hint_idx: usize, - pref_idx: usize, - non_pref_idx: usize, - offset_pref: usize, - offset_non_pref: usize, - is_fixed: bool, - fixed: Option, -} - -impl<'a> RegTraversalIter<'a> { - pub fn new( - env: &'a MachineEnv, - class: RegClass, - hint_reg: PReg, - hint2_reg: PReg, - offset: usize, - fixed: Option, - ) -> Self { - let mut hint_reg = if hint_reg != PReg::invalid() { - Some(hint_reg) - } else { - None - }; - let mut hint2_reg = if hint2_reg != PReg::invalid() { - Some(hint2_reg) - } else { - None - }; - - if hint_reg.is_none() { - hint_reg = hint2_reg; - hint2_reg = None; - } - let hints = [hint_reg, hint2_reg]; - let class = class as u8 as usize; - let offset_pref = if env.preferred_regs_by_class[class].len() > 0 { - offset % env.preferred_regs_by_class[class].len() - } else { - 0 - }; - let offset_non_pref = if env.non_preferred_regs_by_class[class].len() > 0 { - offset % env.non_preferred_regs_by_class[class].len() - } else { - 0 - }; - Self { - env, - class, - hints, - hint_idx: 0, - pref_idx: 0, - non_pref_idx: 0, - offset_pref, - offset_non_pref, - is_fixed: fixed.is_some(), - fixed, - } - } -} - -impl<'a> std::iter::Iterator for RegTraversalIter<'a> { - type Item = PReg; - - fn next(&mut self) -> Option { - if self.is_fixed { - let ret = self.fixed; - self.fixed = None; - return ret; - } - - fn wrap(idx: usize, limit: usize) -> usize { - if idx >= limit { - idx - limit - } else { - idx - } - } - if self.hint_idx < 2 && self.hints[self.hint_idx].is_some() { - let h = self.hints[self.hint_idx]; - self.hint_idx += 1; - return h; - } - while self.pref_idx < self.env.preferred_regs_by_class[self.class].len() { - let arr = &self.env.preferred_regs_by_class[self.class][..]; - let r = arr[wrap(self.pref_idx + self.offset_pref, arr.len())]; - self.pref_idx += 1; - if Some(r) == self.hints[0] || Some(r) == self.hints[1] { - continue; - } - return Some(r); - } - while self.non_pref_idx < self.env.non_preferred_regs_by_class[self.class].len() { - let arr = &self.env.non_preferred_regs_by_class[self.class][..]; - let r = arr[wrap(self.non_pref_idx + self.offset_non_pref, arr.len())]; - self.non_pref_idx += 1; - if Some(r) == self.hints[0] || Some(r) == self.hints[1] { - continue; - } - return Some(r); - } - None - } -} - -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -enum RedundantMoveState { - Copy(Allocation, Option), - Orig(VReg), - None, -} -#[derive(Clone, Debug, Default)] -struct RedundantMoveEliminator { - allocs: FxHashMap, - reverse_allocs: FxHashMap>, -} -#[derive(Copy, Clone, Debug)] -struct RedundantMoveAction { - elide: bool, - def_alloc: Option<(Allocation, VReg)>, -} - -impl RedundantMoveEliminator { - fn process_move( - &mut self, - from: Allocation, - to: Allocation, - to_vreg: Option, - ) -> RedundantMoveAction { - // Look up the src and dest. - let from_state = self - .allocs - .get(&from) - .map(|&p| p) - .unwrap_or(RedundantMoveState::None); - let to_state = self - .allocs - .get(&to) - .map(|&p| p) - .unwrap_or(RedundantMoveState::None); - - log::debug!( - " -> redundant move tracker: from {} to {} to_vreg {:?}", - from, - to, - to_vreg - ); - log::debug!( - " -> from_state {:?} to_state {:?}", - from_state, - to_state - ); - - if from == to && to_vreg.is_some() { - self.clear_alloc(to); - self.allocs - .insert(to, RedundantMoveState::Orig(to_vreg.unwrap())); - return RedundantMoveAction { - elide: true, - def_alloc: Some((to, to_vreg.unwrap())), - }; - } - - let src_vreg = match from_state { - RedundantMoveState::Copy(_, opt_r) => opt_r, - RedundantMoveState::Orig(r) => Some(r), - _ => None, - }; - log::debug!(" -> src_vreg {:?}", src_vreg); - let dst_vreg = to_vreg.or(src_vreg); - log::debug!(" -> dst_vreg {:?}", dst_vreg); - let existing_dst_vreg = match to_state { - RedundantMoveState::Copy(_, opt_r) => opt_r, - RedundantMoveState::Orig(r) => Some(r), - _ => None, - }; - log::debug!(" -> existing_dst_vreg {:?}", existing_dst_vreg); - - let elide = match (from_state, to_state) { - (_, RedundantMoveState::Copy(orig_alloc, _)) if orig_alloc == from => true, - (RedundantMoveState::Copy(new_alloc, _), _) if new_alloc == to => true, - _ => false, - }; - log::debug!(" -> elide {}", elide); - - let def_alloc = if dst_vreg != existing_dst_vreg && dst_vreg.is_some() { - Some((to, dst_vreg.unwrap())) - } else { - None - }; - log::debug!(" -> def_alloc {:?}", def_alloc); - - // Invalidate all existing copies of `to` if `to` actually changed value. - if !elide { - self.clear_alloc(to); - } - - // Set up forward and reverse mapping. Don't track stack-to-stack copies. - if from.is_reg() || to.is_reg() { - self.allocs - .insert(to, RedundantMoveState::Copy(from, dst_vreg)); - log::debug!( - " -> create mapping {} -> {:?}", - to, - RedundantMoveState::Copy(from, dst_vreg) - ); - self.reverse_allocs - .entry(from) - .or_insert_with(|| smallvec![]) - .push(to); - } - - RedundantMoveAction { elide, def_alloc } - } - - fn clear(&mut self) { - log::debug!(" redundant move eliminator cleared"); - self.allocs.clear(); - self.reverse_allocs.clear(); - } - - fn clear_alloc(&mut self, alloc: Allocation) { - log::debug!(" redundant move eliminator: clear {:?}", alloc); - if let Some(ref mut existing_copies) = self.reverse_allocs.get_mut(&alloc) { - for to_inval in existing_copies.iter() { - log::debug!(" -> clear existing copy: {:?}", to_inval); - if let Some(val) = self.allocs.get_mut(to_inval) { - match val { - RedundantMoveState::Copy(_, Some(vreg)) => { - *val = RedundantMoveState::Orig(*vreg); - } - _ => *val = RedundantMoveState::None, - } - } - self.allocs.remove(to_inval); - } - existing_copies.clear(); - } - self.allocs.remove(&alloc); - } -} +use crate::{Function, MachineEnv, Output, PReg, ProgPoint, RegAllocError, RegClass}; +use std::collections::HashMap; + +pub(crate) mod data_structures; +pub use data_structures::Stats; +use data_structures::*; +pub(crate) mod reg_traversal; +use reg_traversal::*; +pub(crate) mod requirement; +use requirement::*; +pub(crate) mod redundant_moves; +use redundant_moves::*; +pub(crate) mod liveranges; +use liveranges::*; +pub(crate) mod merge; +pub(crate) mod process; +use process::*; +pub(crate) mod dump; +pub(crate) mod moves; +pub(crate) mod spill; +pub(crate) mod stackmap; impl<'a, F: Function> Env<'a, F> { pub(crate) fn new( @@ -981,4045 +90,15 @@ impl<'a, F: Function> Env<'a, F> { } } - fn create_pregs_and_vregs(&mut self) { - // Create PRegs from the env. - self.pregs.resize( - PReg::MAX_INDEX, - PRegData { - reg: PReg::invalid(), - allocations: LiveRangeSet::new(), - }, - ); - for &preg in &self.env.regs { - self.pregs[preg.index()].reg = preg; - } - // Create VRegs from the vreg count. - for idx in 0..self.func.num_vregs() { - // We'll fill in the real details when we see the def. - let reg = VReg::new(idx, RegClass::Int); - self.add_vreg( - reg, - VRegData { - ranges: smallvec![], - blockparam: Block::invalid(), - is_ref: false, - is_pinned: false, - }, - ); - } - for v in self.func.reftype_vregs() { - self.vregs[v.vreg()].is_ref = true; - } - for v in self.func.pinned_vregs() { - self.vregs[v.vreg()].is_pinned = true; - } - // Create allocations too. - for inst in 0..self.func.insts() { - let start = self.allocs.len() as u32; - self.inst_alloc_offsets.push(start); - for _ in 0..self.func.inst_operands(Inst::new(inst)).len() { - self.allocs.push(Allocation::none()); - } - } - } - - fn add_vreg(&mut self, reg: VReg, data: VRegData) -> VRegIndex { - let idx = self.vregs.len(); - self.vregs.push(data); - self.vreg_regs.push(reg); - VRegIndex::new(idx) - } - - fn create_liverange(&mut self, range: CodeRange) -> LiveRangeIndex { - let idx = self.ranges.len(); - - self.ranges.push(LiveRange { - range, - vreg: VRegIndex::invalid(), - bundle: LiveBundleIndex::invalid(), - uses_spill_weight_and_flags: 0, - - uses: smallvec![], - - merged_into: LiveRangeIndex::invalid(), - }); - - LiveRangeIndex::new(idx) - } - - /// Mark `range` as live for the given `vreg`. - /// - /// Returns the liverange that contains the given range. - fn add_liverange_to_vreg(&mut self, vreg: VRegIndex, range: CodeRange) -> LiveRangeIndex { - log::debug!("add_liverange_to_vreg: vreg {:?} range {:?}", vreg, range); - - // Invariant: as we are building liveness information, we - // *always* process instructions bottom-to-top, and as a - // consequence, new liveranges are always created before any - // existing liveranges for a given vreg. We assert this here, - // then use it to avoid an O(n) merge step (which would lead - // to O(n^2) liveness construction cost overall). - // - // We store liveranges in reverse order in the `.ranges` - // array, then reverse them at the end of - // `compute_liveness()`. - - assert!( - self.vregs[vreg.index()].ranges.is_empty() - || range.to - <= self.ranges[self.vregs[vreg.index()] - .ranges - .last() - .unwrap() - .index - .index()] - .range - .from - ); - - if self.vregs[vreg.index()].ranges.is_empty() - || range.to - < self.ranges[self.vregs[vreg.index()] - .ranges - .last() - .unwrap() - .index - .index()] - .range - .from - { - // Is not contiguous with previously-added (immediately - // following) range; create a new range. - let lr = self.create_liverange(range); - self.ranges[lr.index()].vreg = vreg; - self.vregs[vreg.index()] - .ranges - .push(LiveRangeListEntry { range, index: lr }); - lr - } else { - // Is contiguous with previously-added range; just extend - // its range and return it. - let lr = self.vregs[vreg.index()].ranges.last().unwrap().index; - assert!(range.to == self.ranges[lr.index()].range.from); - self.ranges[lr.index()].range.from = range.from; - lr - } - } - - fn insert_use_into_liverange(&mut self, into: LiveRangeIndex, mut u: Use) { - let operand = u.operand; - let policy = operand.policy(); - let block = self.cfginfo.insn_block[u.pos.inst().index()]; - let loop_depth = self.cfginfo.approx_loop_depth[block.index()] as usize; - let weight = - spill_weight_from_policy(policy, loop_depth, operand.kind() != OperandKind::Use); - u.weight = u16::try_from(weight).expect("weight too large for u16 field"); - - log::debug!( - "insert use {:?} into lr {:?} with weight {}", - u, - into, - weight, - ); - - // N.B.: we do *not* update `requirement` on the range, - // because those will be computed during the multi-fixed-reg - // fixup pass later (after all uses are inserted). - - self.ranges[into.index()].uses.push(u); - - // Update stats. - self.ranges[into.index()].uses_spill_weight_and_flags += weight; - log::debug!( - " -> now range has weight {}", - self.ranges[into.index()].uses_spill_weight(), - ); - } - - fn find_vreg_liverange_for_pos( - &self, - vreg: VRegIndex, - pos: ProgPoint, - ) -> Option { - for entry in &self.vregs[vreg.index()].ranges { - if entry.range.contains_point(pos) { - return Some(entry.index); - } + pub(crate) fn init(&mut self) -> Result<(), RegAllocError> { + self.create_pregs_and_vregs(); + self.compute_liveness()?; + self.merge_vreg_bundles(); + self.queue_bundles(); + if log::log_enabled!(log::Level::Debug) { + self.dump_state(); } - None - } - - fn add_liverange_to_preg(&mut self, range: CodeRange, reg: PReg) { - log::debug!("adding liverange to preg: {:?} to {}", range, reg); - let preg_idx = PRegIndex::new(reg.index()); - self.pregs[preg_idx.index()] - .allocations - .btree - .insert(LiveRangeKey::from_range(&range), LiveRangeIndex::invalid()); - } - - fn is_live_in(&mut self, block: Block, vreg: VRegIndex) -> bool { - self.liveins[block.index()].get(vreg.index()) - } - - fn compute_liveness(&mut self) -> Result<(), RegAllocError> { - // Create initial LiveIn and LiveOut bitsets. - for _ in 0..self.func.blocks() { - self.liveins.push(BitVec::new()); - self.liveouts.push(BitVec::new()); - } - - // Run a worklist algorithm to precisely compute liveins and - // liveouts. - let mut workqueue = VecDeque::new(); - let mut workqueue_set = FxHashSet::default(); - // Initialize workqueue with postorder traversal. - for &block in &self.cfginfo.postorder[..] { - workqueue.push_back(block); - workqueue_set.insert(block); - } - - while !workqueue.is_empty() { - let block = workqueue.pop_front().unwrap(); - workqueue_set.remove(&block); - - log::debug!("computing liveins for block{}", block.index()); - - self.stats.livein_iterations += 1; - - let mut live = self.liveouts[block.index()].clone(); - log::debug!(" -> initial liveout set: {:?}", live); - - for inst in self.func.block_insns(block).rev().iter() { - if let Some((src, dst)) = self.func.is_move(inst) { - live.set(dst.vreg().vreg(), false); - live.set(src.vreg().vreg(), true); - } - - for pos in &[OperandPos::After, OperandPos::Before] { - for op in self.func.inst_operands(inst) { - if op.pos() == *pos { - let was_live = live.get(op.vreg().vreg()); - log::debug!("op {:?} was_live = {}", op, was_live); - match op.kind() { - OperandKind::Use | OperandKind::Mod => { - live.set(op.vreg().vreg(), true); - } - OperandKind::Def => { - live.set(op.vreg().vreg(), false); - } - } - } - } - } - } - for &blockparam in self.func.block_params(block) { - live.set(blockparam.vreg(), false); - } - - for &pred in self.func.block_preds(block) { - if self.liveouts[pred.index()].or(&live) { - if !workqueue_set.contains(&pred) { - workqueue_set.insert(pred); - workqueue.push_back(pred); - } - } - } - - log::debug!("computed liveins at block{}: {:?}", block.index(), live); - self.liveins[block.index()] = live; - } - - // Check that there are no liveins to the entry block. (The - // client should create a virtual intsruction that defines any - // PReg liveins if necessary.) - if self.liveins[self.func.entry_block().index()] - .iter() - .next() - .is_some() - { - log::debug!( - "non-empty liveins to entry block: {:?}", - self.liveins[self.func.entry_block().index()] - ); - return Err(RegAllocError::EntryLivein); - } - - for &vreg in self.func.reftype_vregs() { - self.safepoints_per_vreg.insert(vreg.vreg(), HashSet::new()); - } - - // Create Uses and Defs referring to VRegs, and place the Uses - // in LiveRanges. - // - // We already computed precise liveouts and liveins for every - // block above, so we don't need to run an iterative algorithm - // here; instead, every block's computation is purely local, - // from end to start. - - // Track current LiveRange for each vreg. - // - // Invariant: a stale range may be present here; ranges are - // only valid if `live.get(vreg)` is true. - let mut vreg_ranges: Vec = - vec![LiveRangeIndex::invalid(); self.func.num_vregs()]; - - for i in (0..self.func.blocks()).rev() { - let block = Block::new(i); - - self.stats.livein_blocks += 1; - - // Init our local live-in set. - let mut live = self.liveouts[block.index()].clone(); - - // Initially, registers are assumed live for the whole block. - for vreg in live.iter() { - let range = CodeRange { - from: self.cfginfo.block_entry[block.index()], - to: self.cfginfo.block_exit[block.index()].next(), - }; - log::debug!( - "vreg {:?} live at end of block --> create range {:?}", - VRegIndex::new(vreg), - range - ); - let lr = self.add_liverange_to_vreg(VRegIndex::new(vreg), range); - vreg_ranges[vreg] = lr; - } - - // Create vreg data for blockparams. - for param in self.func.block_params(block) { - self.vreg_regs[param.vreg()] = *param; - self.vregs[param.vreg()].blockparam = block; - } - - let insns = self.func.block_insns(block); - - // If the last instruction is a branch (rather than - // return), create blockparam_out entries. - if self.func.is_branch(insns.last()) { - let operands = self.func.inst_operands(insns.last()); - let mut i = self.func.branch_blockparam_arg_offset(block, insns.last()); - for &succ in self.func.block_succs(block) { - for &blockparam in self.func.block_params(succ) { - let from_vreg = VRegIndex::new(operands[i].vreg().vreg()); - let blockparam_vreg = VRegIndex::new(blockparam.vreg()); - self.blockparam_outs - .push((from_vreg, block, succ, blockparam_vreg)); - i += 1; - } - } - } - - // For each instruction, in reverse order, process - // operands and clobbers. - for inst in insns.rev().iter() { - if self.func.inst_clobbers(inst).len() > 0 { - self.clobbers.push(inst); - } - - // Mark clobbers with CodeRanges on PRegs. - for i in 0..self.func.inst_clobbers(inst).len() { - // don't borrow `self` - let clobber = self.func.inst_clobbers(inst)[i]; - // Clobber range is at After point only: an - // instruction can still take an input in a reg - // that it later clobbers. (In other words, the - // clobber is like a normal def that never gets - // used.) - let range = CodeRange { - from: ProgPoint::after(inst), - to: ProgPoint::before(inst.next()), - }; - self.add_liverange_to_preg(range, clobber); - } - - // Does the instruction have any input-reusing - // outputs? This is important below to establish - // proper interference wrt other inputs. - let mut reused_input = None; - for op in self.func.inst_operands(inst) { - if let OperandPolicy::Reuse(i) = op.policy() { - reused_input = Some(i); - break; - } - } - - // If this is a move, handle specially. - if let Some((src, dst)) = self.func.is_move(inst) { - // We can completely skip the move if it is - // trivial (vreg to same vreg). - if src.vreg() != dst.vreg() { - log::debug!(" -> move inst{}: src {} -> dst {}", inst.index(), src, dst); - - assert_eq!(src.class(), dst.class()); - assert_eq!(src.kind(), OperandKind::Use); - assert_eq!(src.pos(), OperandPos::Before); - assert_eq!(dst.kind(), OperandKind::Def); - assert_eq!(dst.pos(), OperandPos::After); - - // If both src and dest are pinned, emit the - // move right here, right now. - if self.vregs[src.vreg().vreg()].is_pinned - && self.vregs[dst.vreg().vreg()].is_pinned - { - // Update LRs. - if !live.get(src.vreg().vreg()) { - let lr = self.add_liverange_to_vreg( - VRegIndex::new(src.vreg().vreg()), - CodeRange { - from: self.cfginfo.block_entry[block.index()], - to: ProgPoint::after(inst), - }, - ); - live.set(src.vreg().vreg(), true); - vreg_ranges[src.vreg().vreg()] = lr; - } - if live.get(dst.vreg().vreg()) { - let lr = vreg_ranges[dst.vreg().vreg()]; - self.ranges[lr.index()].range.from = ProgPoint::after(inst); - live.set(dst.vreg().vreg(), false); - } else { - self.add_liverange_to_vreg( - VRegIndex::new(dst.vreg().vreg()), - CodeRange { - from: ProgPoint::after(inst), - to: ProgPoint::before(inst.next()), - }, - ); - } - - let src_preg = match src.policy() { - OperandPolicy::FixedReg(r) => r, - _ => unreachable!(), - }; - let dst_preg = match dst.policy() { - OperandPolicy::FixedReg(r) => r, - _ => unreachable!(), - }; - self.insert_move( - ProgPoint::before(inst), - InsertMovePrio::MultiFixedReg, - Allocation::reg(src_preg), - Allocation::reg(dst_preg), - Some(dst.vreg()), - ); - } - // If exactly one of source and dest (but not - // both) is a pinned-vreg, convert this into a - // ghost use on the other vreg with a FixedReg - // policy. - else if self.vregs[src.vreg().vreg()].is_pinned - || self.vregs[dst.vreg().vreg()].is_pinned - { - log::debug!( - " -> exactly one of src/dst is pinned; converting to ghost use" - ); - let (preg, vreg, pinned_vreg, kind, pos, progpoint) = - if self.vregs[src.vreg().vreg()].is_pinned { - // Source is pinned: this is a def on the dst with a pinned preg. - ( - self.func.is_pinned_vreg(src.vreg()).unwrap(), - dst.vreg(), - src.vreg(), - OperandKind::Def, - OperandPos::After, - ProgPoint::after(inst), - ) - } else { - // Dest is pinned: this is a use on the src with a pinned preg. - ( - self.func.is_pinned_vreg(dst.vreg()).unwrap(), - src.vreg(), - dst.vreg(), - OperandKind::Use, - OperandPos::Before, - ProgPoint::after(inst), - ) - }; - let policy = OperandPolicy::FixedReg(preg); - let operand = Operand::new(vreg, policy, kind, pos); - - log::debug!( - concat!( - " -> preg {:?} vreg {:?} kind {:?} ", - "pos {:?} progpoint {:?} policy {:?} operand {:?}" - ), - preg, - vreg, - kind, - pos, - progpoint, - policy, - operand - ); - - // Get the LR for the vreg; if none, create one. - let mut lr = vreg_ranges[vreg.vreg()]; - if !live.get(vreg.vreg()) { - let from = match kind { - OperandKind::Use => self.cfginfo.block_entry[block.index()], - OperandKind::Def => progpoint, - _ => unreachable!(), - }; - let to = progpoint.next(); - lr = self.add_liverange_to_vreg( - VRegIndex::new(vreg.vreg()), - CodeRange { from, to }, - ); - log::debug!(" -> dead; created LR"); - } - log::debug!(" -> LR {:?}", lr); - - self.insert_use_into_liverange( - lr, - Use::new(operand, progpoint, SLOT_NONE), - ); - - if kind == OperandKind::Def { - live.set(vreg.vreg(), false); - if self.ranges[lr.index()].range.from - == self.cfginfo.block_entry[block.index()] - { - self.ranges[lr.index()].range.from = progpoint; - } - self.ranges[lr.index()].set_flag(LiveRangeFlag::StartsAtDef); - } else { - live.set(vreg.vreg(), true); - vreg_ranges[vreg.vreg()] = lr; - } - - // Handle liveness of the other vreg. Note - // that this is somewhat special. For the - // destination case, we want the pinned - // vreg's LR to start just *after* the - // operand we inserted above, because - // otherwise it would overlap, and - // interfere, and prevent allocation. For - // the source case, we want to "poke a - // hole" in the LR: if it's live going - // downward, end it just after the operand - // and restart it before; if it isn't - // (this is the last use), start it - // before. - if kind == OperandKind::Def { - log::debug!(" -> src on pinned vreg {:?}", pinned_vreg); - // The *other* vreg is a def, so the pinned-vreg - // mention is a use. If already live, - // end the existing LR just *after* - // the `progpoint` defined above and - // start a new one just *before* the - // `progpoint` defined above, - // preserving the start. If not, start - // a new one live back to the top of - // the block, starting just before - // `progpoint`. - if live.get(pinned_vreg.vreg()) { - let pinned_lr = vreg_ranges[pinned_vreg.vreg()]; - let orig_start = self.ranges[pinned_lr.index()].range.from; - log::debug!( - " -> live with LR {:?}; truncating to start at {:?}", - pinned_lr, - progpoint.next() - ); - self.ranges[pinned_lr.index()].range.from = progpoint.next(); - let new_lr = self.add_liverange_to_vreg( - VRegIndex::new(pinned_vreg.vreg()), - CodeRange { - from: orig_start, - to: progpoint.prev(), - }, - ); - vreg_ranges[pinned_vreg.vreg()] = new_lr; - log::debug!(" -> created LR {:?} with remaining range from {:?} to {:?}", new_lr, orig_start, progpoint); - - // Add an edit right now to indicate that at - // this program point, the given - // preg is now known as that vreg, - // not the preg, but immediately - // after, it is known as the preg - // again. This is used by the - // checker. - self.insert_move( - ProgPoint::after(inst), - InsertMovePrio::Regular, - Allocation::reg(preg), - Allocation::reg(preg), - Some(dst.vreg()), - ); - self.insert_move( - ProgPoint::before(inst.next()), - InsertMovePrio::MultiFixedReg, - Allocation::reg(preg), - Allocation::reg(preg), - Some(src.vreg()), - ); - } else { - if inst > self.cfginfo.block_entry[block.index()].inst() { - let new_lr = self.add_liverange_to_vreg( - VRegIndex::new(pinned_vreg.vreg()), - CodeRange { - from: self.cfginfo.block_entry[block.index()], - to: ProgPoint::before(inst), - }, - ); - vreg_ranges[pinned_vreg.vreg()] = new_lr; - live.set(pinned_vreg.vreg(), true); - log::debug!( - " -> was not live; created new LR {:?}", - new_lr - ); - } - - // Add an edit right now to indicate that at - // this program point, the given - // preg is now known as that vreg, - // not the preg. This is used by - // the checker. - self.insert_move( - ProgPoint::after(inst), - InsertMovePrio::BlockParam, - Allocation::reg(preg), - Allocation::reg(preg), - Some(dst.vreg()), - ); - } - } else { - log::debug!(" -> dst on pinned vreg {:?}", pinned_vreg); - // The *other* vreg is a use, so the pinned-vreg - // mention is a def. Truncate its LR - // just *after* the `progpoint` - // defined above. - if live.get(pinned_vreg.vreg()) { - let pinned_lr = vreg_ranges[pinned_vreg.vreg()]; - self.ranges[pinned_lr.index()].range.from = progpoint.next(); - log::debug!( - " -> was live with LR {:?}; truncated start to {:?}", - pinned_lr, - progpoint.next() - ); - live.set(pinned_vreg.vreg(), false); - - // Add a no-op edit right now to indicate that - // at this program point, the - // given preg is now known as that - // preg, not the vreg. This is - // used by the checker. - self.insert_move( - ProgPoint::before(inst.next()), - InsertMovePrio::PostRegular, - Allocation::reg(preg), - Allocation::reg(preg), - Some(dst.vreg()), - ); - } - // Otherwise, if dead, no need to create - // a dummy LR -- there is no - // reservation to make (the other vreg - // will land in the reg with the - // fixed-reg operand constraint, but - // it's a dead move anyway). - } - } else { - // Redefine src and dst operands to have - // positions of After and Before respectively - // (see note below), and to have Any - // constraints if they were originally Reg. - let src_policy = match src.policy() { - OperandPolicy::Reg => OperandPolicy::Any, - x => x, - }; - let dst_policy = match dst.policy() { - OperandPolicy::Reg => OperandPolicy::Any, - x => x, - }; - let src = Operand::new( - src.vreg(), - src_policy, - OperandKind::Use, - OperandPos::After, - ); - let dst = Operand::new( - dst.vreg(), - dst_policy, - OperandKind::Def, - OperandPos::Before, - ); - - if self.annotations_enabled { - self.annotate( - ProgPoint::after(inst), - format!( - " prog-move v{} ({:?}) -> v{} ({:?})", - src.vreg().vreg(), - src_policy, - dst.vreg().vreg(), - dst_policy, - ), - ); - } - - // N.B.: in order to integrate with the move - // resolution that joins LRs in general, we - // conceptually treat the move as happening - // between the move inst's After and the next - // inst's Before. Thus the src LR goes up to - // (exclusive) next-inst-pre, and the dst LR - // starts at next-inst-pre. We have to take - // care in our move insertion to handle this - // like other inter-inst moves, i.e., at - // `Regular` priority, so it properly happens - // in parallel with other inter-LR moves. - // - // Why the progpoint between move and next - // inst, and not the progpoint between prev - // inst and move? Because a move can be the - // first inst in a block, but cannot be the - // last; so the following progpoint is always - // within the same block, while the previous - // one may be an inter-block point (and the - // After of the prev inst in a different - // block). - - // Handle the def w.r.t. liveranges: trim the - // start of the range and mark it dead at this - // point in our backward scan. - let pos = ProgPoint::before(inst.next()); - let mut dst_lr = vreg_ranges[dst.vreg().vreg()]; - if !live.get(dst.vreg().vreg()) { - let from = pos; - let to = pos.next(); - dst_lr = self.add_liverange_to_vreg( - VRegIndex::new(dst.vreg().vreg()), - CodeRange { from, to }, - ); - log::debug!(" -> invalid LR for def; created {:?}", dst_lr); - } - log::debug!(" -> has existing LR {:?}", dst_lr); - // Trim the LR to start here. - if self.ranges[dst_lr.index()].range.from - == self.cfginfo.block_entry[block.index()] - { - log::debug!(" -> started at block start; trimming to {:?}", pos); - self.ranges[dst_lr.index()].range.from = pos; - } - self.ranges[dst_lr.index()].set_flag(LiveRangeFlag::StartsAtDef); - live.set(dst.vreg().vreg(), false); - vreg_ranges[dst.vreg().vreg()] = LiveRangeIndex::invalid(); - self.vreg_regs[dst.vreg().vreg()] = dst.vreg(); - - // Handle the use w.r.t. liveranges: make it live - // and create an initial LR back to the start of - // the block. - let pos = ProgPoint::after(inst); - let src_lr = if !live.get(src.vreg().vreg()) { - let range = CodeRange { - from: self.cfginfo.block_entry[block.index()], - to: pos.next(), - }; - let src_lr = self.add_liverange_to_vreg( - VRegIndex::new(src.vreg().vreg()), - range, - ); - vreg_ranges[src.vreg().vreg()] = src_lr; - src_lr - } else { - vreg_ranges[src.vreg().vreg()] - }; - - log::debug!(" -> src LR {:?}", src_lr); - - // Add to live-set. - let src_is_dead_after_move = !live.get(src.vreg().vreg()); - live.set(src.vreg().vreg(), true); - - // Add to program-moves lists. - self.prog_move_srcs.push(( - (VRegIndex::new(src.vreg().vreg()), inst), - Allocation::none(), - )); - self.prog_move_dsts.push(( - (VRegIndex::new(dst.vreg().vreg()), inst.next()), - Allocation::none(), - )); - self.stats.prog_moves += 1; - if src_is_dead_after_move { - self.stats.prog_moves_dead_src += 1; - self.prog_move_merges.push((src_lr, dst_lr)); - } - } - } - - continue; - } - - // Process defs and uses. - for &cur_pos in &[InstPosition::After, InstPosition::Before] { - for i in 0..self.func.inst_operands(inst).len() { - // don't borrow `self` - let operand = self.func.inst_operands(inst)[i]; - let pos = match (operand.kind(), operand.pos()) { - (OperandKind::Mod, _) => ProgPoint::before(inst), - (OperandKind::Def, OperandPos::Before) => ProgPoint::before(inst), - (OperandKind::Def, OperandPos::After) => ProgPoint::after(inst), - (OperandKind::Use, OperandPos::After) => ProgPoint::after(inst), - // If this is a branch, extend `pos` to - // the end of the block. (Branch uses are - // blockparams and need to be live at the - // end of the block.) - (OperandKind::Use, _) if self.func.is_branch(inst) => { - self.cfginfo.block_exit[block.index()] - } - // If there are any reused inputs in this - // instruction, and this is *not* the - // reused input, force `pos` to - // `After`. (See note below for why; it's - // very subtle!) - (OperandKind::Use, OperandPos::Before) - if reused_input.is_some() && reused_input.unwrap() != i => - { - ProgPoint::after(inst) - } - (OperandKind::Use, OperandPos::Before) => ProgPoint::before(inst), - }; - - if pos.pos() != cur_pos { - continue; - } - - log::debug!( - "processing inst{} operand at {:?}: {:?}", - inst.index(), - pos, - operand - ); - - match operand.kind() { - OperandKind::Def | OperandKind::Mod => { - log::debug!("Def of {} at {:?}", operand.vreg(), pos); - - // Fill in vreg's actual data. - self.vreg_regs[operand.vreg().vreg()] = operand.vreg(); - - // Get or create the LiveRange. - let mut lr = vreg_ranges[operand.vreg().vreg()]; - log::debug!(" -> has existing LR {:?}", lr); - // If there was no liverange (dead def), create a trivial one. - if !live.get(operand.vreg().vreg()) { - let from = match operand.kind() { - OperandKind::Def => pos, - OperandKind::Mod => self.cfginfo.block_entry[block.index()], - _ => unreachable!(), - }; - let to = match operand.kind() { - OperandKind::Def => pos.next(), - OperandKind::Mod => pos.next().next(), // both Before and After positions - _ => unreachable!(), - }; - lr = self.add_liverange_to_vreg( - VRegIndex::new(operand.vreg().vreg()), - CodeRange { from, to }, - ); - log::debug!(" -> invalid; created {:?}", lr); - vreg_ranges[operand.vreg().vreg()] = lr; - live.set(operand.vreg().vreg(), true); - } - // Create the use in the LiveRange. - self.insert_use_into_liverange(lr, Use::new(operand, pos, i as u8)); - // If def (not mod), this reg is now dead, - // scanning backward; make it so. - if operand.kind() == OperandKind::Def { - // Trim the range for this vreg to start - // at `pos` if it previously ended at the - // start of this block (i.e. was not - // merged into some larger LiveRange due - // to out-of-order blocks). - if self.ranges[lr.index()].range.from - == self.cfginfo.block_entry[block.index()] - { - log::debug!( - " -> started at block start; trimming to {:?}", - pos - ); - self.ranges[lr.index()].range.from = pos; - } - - self.ranges[lr.index()].set_flag(LiveRangeFlag::StartsAtDef); - - // Remove from live-set. - live.set(operand.vreg().vreg(), false); - vreg_ranges[operand.vreg().vreg()] = LiveRangeIndex::invalid(); - } - } - OperandKind::Use => { - // Create/extend the LiveRange if it - // doesn't already exist, and add the use - // to the range. - let mut lr = vreg_ranges[operand.vreg().vreg()]; - if !live.get(operand.vreg().vreg()) { - let range = CodeRange { - from: self.cfginfo.block_entry[block.index()], - to: pos.next(), - }; - lr = self.add_liverange_to_vreg( - VRegIndex::new(operand.vreg().vreg()), - range, - ); - vreg_ranges[operand.vreg().vreg()] = lr; - } - assert!(lr.is_valid()); - - log::debug!("Use of {:?} at {:?} -> {:?}", operand, pos, lr,); - - self.insert_use_into_liverange(lr, Use::new(operand, pos, i as u8)); - - // Add to live-set. - live.set(operand.vreg().vreg(), true); - } - } - } - } - - if self.func.is_safepoint(inst) { - self.safepoints.push(inst); - for vreg in live.iter() { - if let Some(safepoints) = self.safepoints_per_vreg.get_mut(&vreg) { - safepoints.insert(inst); - } - } - } - } - - // Block parameters define vregs at the very beginning of - // the block. Remove their live vregs from the live set - // here. - for vreg in self.func.block_params(block) { - if live.get(vreg.vreg()) { - live.set(vreg.vreg(), false); - } else { - // Create trivial liverange if blockparam is dead. - let start = self.cfginfo.block_entry[block.index()]; - self.add_liverange_to_vreg( - VRegIndex::new(vreg.vreg()), - CodeRange { - from: start, - to: start.next(), - }, - ); - } - // add `blockparam_ins` entries. - let vreg_idx = VRegIndex::new(vreg.vreg()); - for &pred in self.func.block_preds(block) { - self.blockparam_ins.push((vreg_idx, block, pred)); - } - } - } - - self.safepoints.sort_unstable(); - - // Make ranges in each vreg and uses in each range appear in - // sorted order. We built them in reverse order above, so this - // is a simple reversal, *not* a full sort. - // - // The ordering invariant is always maintained for uses and - // always for ranges in bundles (which are initialized later), - // but not always for ranges in vregs; those are sorted only - // when needed, here and then again at the end of allocation - // when resolving moves. - - for vreg in &mut self.vregs { - vreg.ranges.reverse(); - let mut last = None; - for entry in &mut vreg.ranges { - // Ranges may have been truncated above at defs. We - // need to update with the final range here. - entry.range = self.ranges[entry.index.index()].range; - // Assert in-order and non-overlapping. - assert!(last.is_none() || last.unwrap() <= entry.range.from); - last = Some(entry.range.to); - } - } - - for range in 0..self.ranges.len() { - self.ranges[range].uses.reverse(); - debug_assert!(self.ranges[range] - .uses - .windows(2) - .all(|win| win[0].pos <= win[1].pos)); - } - - // Insert safepoint virtual stack uses, if needed. - for vreg in self.func.reftype_vregs() { - if self.vregs[vreg.vreg()].is_pinned { - continue; - } - let vreg = VRegIndex::new(vreg.vreg()); - let mut inserted = false; - let mut safepoint_idx = 0; - for range_idx in 0..self.vregs[vreg.index()].ranges.len() { - let LiveRangeListEntry { range, index } = - self.vregs[vreg.index()].ranges[range_idx]; - while safepoint_idx < self.safepoints.len() - && ProgPoint::before(self.safepoints[safepoint_idx]) < range.from - { - safepoint_idx += 1; - } - while safepoint_idx < self.safepoints.len() - && range.contains_point(ProgPoint::before(self.safepoints[safepoint_idx])) - { - // Create a virtual use. - let pos = ProgPoint::before(self.safepoints[safepoint_idx]); - let operand = Operand::new( - self.vreg_regs[vreg.index()], - OperandPolicy::Stack, - OperandKind::Use, - OperandPos::Before, - ); - - log::debug!( - "Safepoint-induced stack use of {:?} at {:?} -> {:?}", - operand, - pos, - index, - ); - - self.insert_use_into_liverange(index, Use::new(operand, pos, SLOT_NONE)); - safepoint_idx += 1; - - inserted = true; - } - - if inserted { - self.ranges[index.index()] - .uses - .sort_unstable_by_key(|u| u.pos); - } - - if safepoint_idx >= self.safepoints.len() { - break; - } - } - } - - // Do a fixed-reg cleanup pass: if there are any LiveRanges with - // multiple uses (or defs) at the same ProgPoint and there is - // more than one FixedReg constraint at that ProgPoint, we - // need to record all but one of them in a special fixup list - // and handle them later; otherwise, bundle-splitting to - // create minimal bundles becomes much more complex (we would - // have to split the multiple uses at the same progpoint into - // different bundles, which breaks invariants related to - // disjoint ranges and bundles). - let mut seen_fixed_for_vreg: SmallVec<[VReg; 16]> = smallvec![]; - let mut first_preg: SmallVec<[PRegIndex; 16]> = smallvec![]; - let mut extra_clobbers: SmallVec<[(PReg, Inst); 8]> = smallvec![]; - for vreg in 0..self.vregs.len() { - for range_idx in 0..self.vregs[vreg].ranges.len() { - let entry = self.vregs[vreg].ranges[range_idx]; - let range = entry.index; - log::debug!( - "multi-fixed-reg cleanup: vreg {:?} range {:?}", - VRegIndex::new(vreg), - range, - ); - let mut last_point = None; - let mut fixup_multi_fixed_vregs = |pos: ProgPoint, - slot: usize, - op: &mut Operand, - fixups: &mut Vec<( - ProgPoint, - PRegIndex, - PRegIndex, - usize, - )>| { - if last_point.is_some() && Some(pos) != last_point { - seen_fixed_for_vreg.clear(); - first_preg.clear(); - } - last_point = Some(pos); - - if let OperandPolicy::FixedReg(preg) = op.policy() { - let vreg_idx = VRegIndex::new(op.vreg().vreg()); - let preg_idx = PRegIndex::new(preg.index()); - log::debug!( - "at pos {:?}, vreg {:?} has fixed constraint to preg {:?}", - pos, - vreg_idx, - preg_idx - ); - if let Some(idx) = seen_fixed_for_vreg.iter().position(|r| *r == op.vreg()) - { - let orig_preg = first_preg[idx]; - if orig_preg != preg_idx { - log::debug!(" -> duplicate; switching to policy Reg"); - fixups.push((pos, orig_preg, preg_idx, slot)); - *op = Operand::new( - op.vreg(), - OperandPolicy::Reg, - op.kind(), - op.pos(), - ); - log::debug!( - " -> extra clobber {} at inst{}", - preg, - pos.inst().index() - ); - extra_clobbers.push((preg, pos.inst())); - } - } else { - seen_fixed_for_vreg.push(op.vreg()); - first_preg.push(preg_idx); - } - } - }; - - for u in &mut self.ranges[range.index()].uses { - let pos = u.pos; - let slot = u.slot as usize; - fixup_multi_fixed_vregs( - pos, - slot, - &mut u.operand, - &mut self.multi_fixed_reg_fixups, - ); - } - - for &(clobber, inst) in &extra_clobbers { - let range = CodeRange { - from: ProgPoint::before(inst), - to: ProgPoint::before(inst.next()), - }; - self.add_liverange_to_preg(range, clobber); - } - - extra_clobbers.clear(); - first_preg.clear(); - seen_fixed_for_vreg.clear(); - } - } - - self.clobbers.sort_unstable(); - self.blockparam_ins.sort_unstable(); - self.blockparam_outs.sort_unstable(); - self.prog_move_srcs.sort_unstable_by_key(|(pos, _)| *pos); - self.prog_move_dsts.sort_unstable_by_key(|(pos, _)| *pos); - - log::debug!("prog_move_srcs = {:?}", self.prog_move_srcs); - log::debug!("prog_move_dsts = {:?}", self.prog_move_dsts); - - self.stats.initial_liverange_count = self.ranges.len(); - self.stats.blockparam_ins_count = self.blockparam_ins.len(); - self.stats.blockparam_outs_count = self.blockparam_outs.len(); - - Ok(()) - } - - fn create_bundle(&mut self) -> LiveBundleIndex { - let bundle = self.bundles.len(); - self.bundles.push(LiveBundle { - allocation: Allocation::none(), - ranges: smallvec![], - spillset: SpillSetIndex::invalid(), - prio: 0, - spill_weight_and_props: 0, - }); - LiveBundleIndex::new(bundle) - } - - fn merge_bundles(&mut self, from: LiveBundleIndex, to: LiveBundleIndex) -> bool { - if from == to { - // Merge bundle into self -- trivial merge. - return true; - } - log::debug!( - "merging from bundle{} to bundle{}", - from.index(), - to.index() - ); - - // Both bundles must deal with the same RegClass. - let from_rc = self.spillsets[self.bundles[from.index()].spillset.index()].class; - let to_rc = self.spillsets[self.bundles[to.index()].spillset.index()].class; - if from_rc != to_rc { - log::debug!(" -> mismatching reg classes"); - return false; - } - - // If either bundle is already assigned (due to a pinned vreg), don't merge. - if !self.bundles[from.index()].allocation.is_none() - || !self.bundles[to.index()].allocation.is_none() - { - log::debug!("one of the bundles is already assigned (pinned)"); - return false; - } - - #[cfg(debug)] - { - // Sanity check: both bundles should contain only ranges with appropriate VReg classes. - for entry in &self.bundles[from.index()].ranges { - let vreg = self.ranges[entry.index.index()].vreg; - assert_eq!(rc, self.vregs[vreg.index()].reg.class()); - } - for entry in &self.bundles[to.index()].ranges { - let vreg = self.ranges[entry.index.index()].vreg; - assert_eq!(rc, self.vregs[vreg.index()].reg.class()); - } - } - - // Check for overlap in LiveRanges and for conflicting - // requirements. - let ranges_from = &self.bundles[from.index()].ranges[..]; - let ranges_to = &self.bundles[to.index()].ranges[..]; - let mut idx_from = 0; - let mut idx_to = 0; - let mut range_count = 0; - while idx_from < ranges_from.len() && idx_to < ranges_to.len() { - range_count += 1; - if range_count > 200 { - log::debug!( - "reached merge complexity (range_count = {}); exiting", - range_count - ); - // Limit merge complexity. - return false; - } - - if ranges_from[idx_from].range.from >= ranges_to[idx_to].range.to { - idx_to += 1; - } else if ranges_to[idx_to].range.from >= ranges_from[idx_from].range.to { - idx_from += 1; - } else { - // Overlap -- cannot merge. - log::debug!( - " -> overlap between {:?} and {:?}, exiting", - ranges_from[idx_from].index, - ranges_to[idx_to].index - ); - return false; - } - } - - // Check for a requirements conflict. - if self.bundles[from.index()].cached_stack() - || self.bundles[from.index()].cached_fixed() - || self.bundles[to.index()].cached_stack() - || self.bundles[to.index()].cached_fixed() - { - let req = self - .compute_requirement(from) - .merge(self.compute_requirement(to)); - if req == Requirement::Conflict { - log::debug!(" -> conflicting requirements; aborting merge"); - return false; - } - } - - log::debug!(" -> committing to merge"); - - // If we reach here, then the bundles do not overlap -- merge - // them! We do this with a merge-sort-like scan over both - // lists, building a new range list and replacing the list on - // `to` when we're done. - if ranges_from.is_empty() { - // `from` bundle is empty -- trivial merge. - log::debug!(" -> from bundle{} is empty; trivial merge", from.index()); - return true; - } - if ranges_to.is_empty() { - // `to` bundle is empty -- just move the list over from - // `from` and set `bundle` up-link on all ranges. - log::debug!(" -> to bundle{} is empty; trivial merge", to.index()); - let list = std::mem::replace(&mut self.bundles[from.index()].ranges, smallvec![]); - for entry in &list { - self.ranges[entry.index.index()].bundle = to; - - if self.annotations_enabled { - self.annotate( - entry.range.from, - format!( - " MERGE range{} v{} from bundle{} to bundle{}", - entry.index.index(), - self.ranges[entry.index.index()].vreg.index(), - from.index(), - to.index(), - ), - ); - } - } - self.bundles[to.index()].ranges = list; - - if self.bundles[from.index()].cached_stack() { - self.bundles[to.index()].set_cached_stack(); - } - if self.bundles[from.index()].cached_fixed() { - self.bundles[to.index()].set_cached_fixed(); - } - - return true; - } - - log::debug!( - "merging: ranges_from = {:?} ranges_to = {:?}", - ranges_from, - ranges_to - ); - - // Two non-empty lists of LiveRanges: concatenate and - // sort. This is faster than a mergesort-like merge into a new - // list, empirically. - let from_list = std::mem::replace(&mut self.bundles[from.index()].ranges, smallvec![]); - for entry in &from_list { - self.ranges[entry.index.index()].bundle = to; - } - self.bundles[to.index()] - .ranges - .extend_from_slice(&from_list[..]); - self.bundles[to.index()] - .ranges - .sort_unstable_by_key(|entry| entry.range.from); - - if self.annotations_enabled { - log::debug!("merging: merged = {:?}", self.bundles[to.index()].ranges); - let mut last_range = None; - for i in 0..self.bundles[to.index()].ranges.len() { - let entry = self.bundles[to.index()].ranges[i]; - if last_range.is_some() { - assert!(last_range.unwrap() < entry.range); - } - last_range = Some(entry.range); - - if self.ranges[entry.index.index()].bundle == from { - self.annotate( - entry.range.from, - format!( - " MERGE range{} v{} from bundle{} to bundle{}", - entry.index.index(), - self.ranges[entry.index.index()].vreg.index(), - from.index(), - to.index(), - ), - ); - } - - log::debug!( - " -> merged result for bundle{}: range{}", - to.index(), - entry.index.index(), - ); - } - } - - if self.bundles[from.index()].spillset != self.bundles[to.index()].spillset { - let from_vregs = std::mem::replace( - &mut self.spillsets[self.bundles[from.index()].spillset.index()].vregs, - smallvec![], - ); - let to_vregs = &mut self.spillsets[self.bundles[to.index()].spillset.index()].vregs; - for vreg in from_vregs { - if !to_vregs.contains(&vreg) { - to_vregs.push(vreg); - } - } - } - - if self.bundles[from.index()].cached_stack() { - self.bundles[to.index()].set_cached_stack(); - } - if self.bundles[from.index()].cached_fixed() { - self.bundles[to.index()].set_cached_fixed(); - } - - true - } - - fn merge_vreg_bundles(&mut self) { - // Create a bundle for every vreg, initially. - log::debug!("merge_vreg_bundles: creating vreg bundles"); - for vreg in 0..self.vregs.len() { - let vreg = VRegIndex::new(vreg); - if self.vregs[vreg.index()].ranges.is_empty() { - continue; - } - - // If this is a pinned vreg, go ahead and add it to the - // commitment map, and avoid creating a bundle entirely. - if self.vregs[vreg.index()].is_pinned { - for entry in &self.vregs[vreg.index()].ranges { - let preg = self - .func - .is_pinned_vreg(self.vreg_regs[vreg.index()]) - .unwrap(); - let key = LiveRangeKey::from_range(&entry.range); - self.pregs[preg.index()] - .allocations - .btree - .insert(key, LiveRangeIndex::invalid()); - } - continue; - } - - let bundle = self.create_bundle(); - self.bundles[bundle.index()].ranges = self.vregs[vreg.index()].ranges.clone(); - log::debug!("vreg v{} gets bundle{}", vreg.index(), bundle.index()); - for entry in &self.bundles[bundle.index()].ranges { - log::debug!( - " -> with LR range{}: {:?}", - entry.index.index(), - entry.range - ); - self.ranges[entry.index.index()].bundle = bundle; - } - - let mut fixed = false; - let mut stack = false; - for entry in &self.bundles[bundle.index()].ranges { - for u in &self.ranges[entry.index.index()].uses { - if let OperandPolicy::FixedReg(_) = u.operand.policy() { - fixed = true; - } - if let OperandPolicy::Stack = u.operand.policy() { - stack = true; - } - if fixed && stack { - break; - } - } - } - if fixed { - self.bundles[bundle.index()].set_cached_fixed(); - } - if stack { - self.bundles[bundle.index()].set_cached_stack(); - } - - // Create a spillslot for this bundle. - let ssidx = SpillSetIndex::new(self.spillsets.len()); - let reg = self.vreg_regs[vreg.index()]; - let size = self.func.spillslot_size(reg.class()) as u8; - self.spillsets.push(SpillSet { - vregs: smallvec![vreg], - slot: SpillSlotIndex::invalid(), - size, - required: false, - class: reg.class(), - reg_hint: PReg::invalid(), - spill_bundle: LiveBundleIndex::invalid(), - }); - self.bundles[bundle.index()].spillset = ssidx; - } - - for inst in 0..self.func.insts() { - let inst = Inst::new(inst); - - // Attempt to merge Reuse-policy operand outputs with the - // corresponding inputs. - for op in self.func.inst_operands(inst) { - if let OperandPolicy::Reuse(reuse_idx) = op.policy() { - let src_vreg = op.vreg(); - let dst_vreg = self.func.inst_operands(inst)[reuse_idx].vreg(); - if self.vregs[src_vreg.vreg()].is_pinned - || self.vregs[dst_vreg.vreg()].is_pinned - { - continue; - } - - log::debug!( - "trying to merge reused-input def: src {} to dst {}", - src_vreg, - dst_vreg - ); - let src_bundle = - self.ranges[self.vregs[src_vreg.vreg()].ranges[0].index.index()].bundle; - assert!(src_bundle.is_valid()); - let dest_bundle = - self.ranges[self.vregs[dst_vreg.vreg()].ranges[0].index.index()].bundle; - assert!(dest_bundle.is_valid()); - self.merge_bundles(/* from */ dest_bundle, /* to */ src_bundle); - } - } - } - - // Attempt to merge blockparams with their inputs. - for i in 0..self.blockparam_outs.len() { - let (from_vreg, _, _, to_vreg) = self.blockparam_outs[i]; - log::debug!( - "trying to merge blockparam v{} with input v{}", - to_vreg.index(), - from_vreg.index() - ); - let to_bundle = self.ranges[self.vregs[to_vreg.index()].ranges[0].index.index()].bundle; - assert!(to_bundle.is_valid()); - let from_bundle = - self.ranges[self.vregs[from_vreg.index()].ranges[0].index.index()].bundle; - assert!(from_bundle.is_valid()); - log::debug!( - " -> from bundle{} to bundle{}", - from_bundle.index(), - to_bundle.index() - ); - self.merge_bundles(from_bundle, to_bundle); - } - - // Attempt to merge move srcs/dsts. - for i in 0..self.prog_move_merges.len() { - let (src, dst) = self.prog_move_merges[i]; - log::debug!("trying to merge move src LR {:?} to dst LR {:?}", src, dst); - let src = self.resolve_merged_lr(src); - let dst = self.resolve_merged_lr(dst); - log::debug!( - "resolved LR-construction merging chains: move-merge is now src LR {:?} to dst LR {:?}", - src, - dst - ); - - let dst_vreg = self.vreg_regs[self.ranges[dst.index()].vreg.index()]; - let src_vreg = self.vreg_regs[self.ranges[src.index()].vreg.index()]; - if self.vregs[src_vreg.vreg()].is_pinned && self.vregs[dst_vreg.vreg()].is_pinned { - continue; - } - if self.vregs[src_vreg.vreg()].is_pinned { - let dest_bundle = self.ranges[dst.index()].bundle; - let spillset = self.bundles[dest_bundle.index()].spillset; - self.spillsets[spillset.index()].reg_hint = - self.func.is_pinned_vreg(src_vreg).unwrap(); - continue; - } - if self.vregs[dst_vreg.vreg()].is_pinned { - let src_bundle = self.ranges[src.index()].bundle; - let spillset = self.bundles[src_bundle.index()].spillset; - self.spillsets[spillset.index()].reg_hint = - self.func.is_pinned_vreg(dst_vreg).unwrap(); - continue; - } - - let src_bundle = self.ranges[src.index()].bundle; - assert!(src_bundle.is_valid()); - let dest_bundle = self.ranges[dst.index()].bundle; - assert!(dest_bundle.is_valid()); - self.stats.prog_move_merge_attempt += 1; - if self.merge_bundles(/* from */ dest_bundle, /* to */ src_bundle) { - self.stats.prog_move_merge_success += 1; - } - } - - log::debug!("done merging bundles"); - } - - fn resolve_merged_lr(&self, mut lr: LiveRangeIndex) -> LiveRangeIndex { - let mut iter = 0; - while iter < 100 && self.ranges[lr.index()].merged_into.is_valid() { - lr = self.ranges[lr.index()].merged_into; - iter += 1; - } - lr - } - - fn compute_bundle_prio(&self, bundle: LiveBundleIndex) -> u32 { - // The priority is simply the total "length" -- the number of - // instructions covered by all LiveRanges. - let mut total = 0; - for entry in &self.bundles[bundle.index()].ranges { - total += entry.range.len() as u32; - } - total - } - - fn queue_bundles(&mut self) { - for bundle in 0..self.bundles.len() { - log::debug!("enqueueing bundle{}", bundle); - if self.bundles[bundle].ranges.is_empty() { - log::debug!(" -> no ranges; skipping"); - continue; - } - let bundle = LiveBundleIndex::new(bundle); - let prio = self.compute_bundle_prio(bundle); - log::debug!(" -> prio {}", prio); - self.bundles[bundle.index()].prio = prio; - self.recompute_bundle_properties(bundle); - self.allocation_queue - .insert(bundle, prio as usize, PReg::invalid()); - } - self.stats.merged_bundle_count = self.allocation_queue.heap.len(); - } - - fn process_bundles(&mut self) -> Result<(), RegAllocError> { - let mut count = 0; - while let Some((bundle, reg_hint)) = self.allocation_queue.pop() { - self.stats.process_bundle_count += 1; - self.process_bundle(bundle, reg_hint)?; - count += 1; - if count > self.func.insts() * 50 { - self.dump_state(); - panic!("Infinite loop!"); - } - } - self.stats.final_liverange_count = self.ranges.len(); - self.stats.final_bundle_count = self.bundles.len(); - self.stats.spill_bundle_count = self.spilled_bundles.len(); - - Ok(()) - } - - fn dump_state(&self) { - log::debug!("Bundles:"); - for (i, b) in self.bundles.iter().enumerate() { - log::debug!( - "bundle{}: spillset={:?} alloc={:?}", - i, - b.spillset, - b.allocation - ); - for entry in &b.ranges { - log::debug!( - " * range {:?} -- {:?}: range{}", - entry.range.from, - entry.range.to, - entry.index.index() - ); - } - } - log::debug!("VRegs:"); - for (i, v) in self.vregs.iter().enumerate() { - log::debug!("vreg{}:", i); - for entry in &v.ranges { - log::debug!( - " * range {:?} -- {:?}: range{}", - entry.range.from, - entry.range.to, - entry.index.index() - ); - } - } - log::debug!("Ranges:"); - for (i, r) in self.ranges.iter().enumerate() { - log::debug!( - "range{}: range={:?} vreg={:?} bundle={:?} weight={}", - i, - r.range, - r.vreg, - r.bundle, - r.uses_spill_weight(), - ); - for u in &r.uses { - log::debug!(" * use at {:?} (slot {}): {:?}", u.pos, u.slot, u.operand); - } - } - } - - fn try_to_allocate_bundle_to_reg( - &mut self, - bundle: LiveBundleIndex, - reg: PRegIndex, - // if the max bundle weight in the conflict set exceeds this - // cost (if provided), just return - // `AllocRegResult::ConflictHighCost`. - max_allowable_cost: Option, - ) -> AllocRegResult { - log::debug!("try_to_allocate_bundle_to_reg: {:?} -> {:?}", bundle, reg); - let mut conflicts = smallvec![]; - let mut conflict_set = FxHashSet::default(); - let mut max_conflict_weight = 0; - // Traverse the BTreeMap in order by requesting the whole - // range spanned by the bundle and iterating over that - // concurrently with our ranges. Because our ranges are in - // order, and the BTreeMap is as well, this allows us to have - // an overall O(n log n) + O(b) complexity, where the PReg has - // n current ranges and the bundle has b ranges, rather than - // O(b * n log n) with the simple probe-for-each-bundle-range - // approach. - // - // Note that the comparator function on a CodeRange tests for - // *overlap*, so we are checking whether the BTree contains - // any preg range that *overlaps* with range `range`, not - // literally the range `range`. - let bundle_ranges = &self.bundles[bundle.index()].ranges; - let from_key = LiveRangeKey::from_range(&CodeRange { - from: bundle_ranges.first().unwrap().range.from, - to: bundle_ranges.first().unwrap().range.from, - }); - let mut preg_range_iter = self.pregs[reg.index()] - .allocations - .btree - .range(from_key..) - .peekable(); - log::debug!( - "alloc map for {:?} in range {:?}..: {:?}", - reg, - from_key, - self.pregs[reg.index()].allocations.btree - ); - let mut first_conflict: Option = None; - - 'ranges: for entry in bundle_ranges { - log::debug!(" -> range LR {:?}: {:?}", entry.index, entry.range); - let key = LiveRangeKey::from_range(&entry.range); - - let mut skips = 0; - 'alloc: loop { - log::debug!(" -> PReg range {:?}", preg_range_iter.peek()); - - // Advance our BTree traversal until it is >= this bundle - // range (i.e., skip PReg allocations in the BTree that - // are completely before this bundle range). - - if preg_range_iter.peek().is_some() && *preg_range_iter.peek().unwrap().0 < key { - log::debug!( - "Skipping PReg range {:?}", - preg_range_iter.peek().unwrap().0 - ); - preg_range_iter.next(); - skips += 1; - if skips >= 16 { - let from_pos = entry.range.from; - let from_key = LiveRangeKey::from_range(&CodeRange { - from: from_pos, - to: from_pos, - }); - preg_range_iter = self.pregs[reg.index()] - .allocations - .btree - .range(from_key..) - .peekable(); - skips = 0; - } - continue 'alloc; - } - skips = 0; - - // If there are no more PReg allocations, we're done! - if preg_range_iter.peek().is_none() { - log::debug!(" -> no more PReg allocations; so no conflict possible!"); - break 'ranges; - } - - // If the current PReg range is beyond this range, there is no conflict; continue. - if *preg_range_iter.peek().unwrap().0 > key { - log::debug!( - " -> next PReg allocation is at {:?}; moving to next VReg range", - preg_range_iter.peek().unwrap().0 - ); - break 'alloc; - } - - // Otherwise, there is a conflict. - let preg_key = *preg_range_iter.peek().unwrap().0; - assert_eq!(preg_key, key); // Assert that this range overlaps. - let preg_range = preg_range_iter.next().unwrap().1; - - log::debug!(" -> btree contains range {:?} that overlaps", preg_range); - if preg_range.is_valid() { - log::debug!(" -> from vreg {:?}", self.ranges[preg_range.index()].vreg); - // range from an allocated bundle: find the bundle and add to - // conflicts list. - let conflict_bundle = self.ranges[preg_range.index()].bundle; - log::debug!(" -> conflict bundle {:?}", conflict_bundle); - if !conflict_set.contains(&conflict_bundle) { - conflicts.push(conflict_bundle); - conflict_set.insert(conflict_bundle); - max_conflict_weight = std::cmp::max( - max_conflict_weight, - self.bundles[conflict_bundle.index()].cached_spill_weight(), - ); - if max_allowable_cost.is_some() - && max_conflict_weight > max_allowable_cost.unwrap() - { - log::debug!(" -> reached high cost, retrying early"); - return AllocRegResult::ConflictHighCost; - } - } - - if first_conflict.is_none() { - first_conflict = Some(ProgPoint::from_index(std::cmp::max( - preg_key.from, - key.from, - ))); - } - } else { - log::debug!(" -> conflict with fixed reservation"); - // range from a direct use of the PReg (due to clobber). - return AllocRegResult::ConflictWithFixed( - max_conflict_weight, - ProgPoint::from_index(preg_key.from), - ); - } - } - } - - if conflicts.len() > 0 { - return AllocRegResult::Conflict(conflicts, first_conflict.unwrap()); - } - - // We can allocate! Add our ranges to the preg's BTree. - let preg = self.pregs[reg.index()].reg; - log::debug!(" -> bundle {:?} assigned to preg {:?}", bundle, preg); - self.bundles[bundle.index()].allocation = Allocation::reg(preg); - for entry in &self.bundles[bundle.index()].ranges { - self.pregs[reg.index()] - .allocations - .btree - .insert(LiveRangeKey::from_range(&entry.range), entry.index); - } - - AllocRegResult::Allocated(Allocation::reg(preg)) - } - - fn evict_bundle(&mut self, bundle: LiveBundleIndex) { - log::debug!( - "evicting bundle {:?}: alloc {:?}", - bundle, - self.bundles[bundle.index()].allocation - ); - let preg = match self.bundles[bundle.index()].allocation.as_reg() { - Some(preg) => preg, - None => { - log::debug!( - " -> has no allocation! {:?}", - self.bundles[bundle.index()].allocation - ); - return; - } - }; - let preg_idx = PRegIndex::new(preg.index()); - self.bundles[bundle.index()].allocation = Allocation::none(); - for entry in &self.bundles[bundle.index()].ranges { - log::debug!(" -> removing LR {:?} from reg {:?}", entry.index, preg_idx); - self.pregs[preg_idx.index()] - .allocations - .btree - .remove(&LiveRangeKey::from_range(&entry.range)); - } - let prio = self.bundles[bundle.index()].prio; - log::debug!(" -> prio {}; back into queue", prio); - self.allocation_queue - .insert(bundle, prio as usize, PReg::invalid()); - } - - fn bundle_spill_weight(&self, bundle: LiveBundleIndex) -> u32 { - self.bundles[bundle.index()].cached_spill_weight() - } - - fn maximum_spill_weight_in_bundle_set(&self, bundles: &LiveBundleVec) -> u32 { - log::debug!("maximum_spill_weight_in_bundle_set: {:?}", bundles); - let m = bundles - .iter() - .map(|&b| { - let w = self.bundles[b.index()].cached_spill_weight(); - log::debug!("bundle{}: {}", b.index(), w); - w - }) - .max() - .unwrap_or(0); - log::debug!(" -> max: {}", m); - m - } - - fn recompute_bundle_properties(&mut self, bundle: LiveBundleIndex) { - log::debug!("recompute bundle properties: bundle {:?}", bundle); - - let minimal; - let mut fixed = false; - let mut stack = false; - let bundledata = &self.bundles[bundle.index()]; - let first_range = bundledata.ranges[0].index; - let first_range_data = &self.ranges[first_range.index()]; - - self.bundles[bundle.index()].prio = self.compute_bundle_prio(bundle); - - if first_range_data.vreg.is_invalid() { - log::debug!(" -> no vreg; minimal and fixed"); - minimal = true; - fixed = true; - } else { - for u in &first_range_data.uses { - log::debug!(" -> use: {:?}", u); - if let OperandPolicy::FixedReg(_) = u.operand.policy() { - log::debug!(" -> fixed use at {:?}: {:?}", u.pos, u.operand); - fixed = true; - } - if let OperandPolicy::Stack = u.operand.policy() { - log::debug!(" -> stack use at {:?}: {:?}", u.pos, u.operand); - stack = true; - } - if stack && fixed { - break; - } - } - // Minimal if the range covers only one instruction. Note - // that it could cover just one ProgPoint, - // i.e. X.Before..X.After, or two ProgPoints, - // i.e. X.Before..X+1.Before. - log::debug!(" -> first range has range {:?}", first_range_data.range); - let bundle_start = self.bundles[bundle.index()] - .ranges - .first() - .unwrap() - .range - .from; - let bundle_end = self.bundles[bundle.index()].ranges.last().unwrap().range.to; - minimal = bundle_start.inst() == bundle_end.prev().inst(); - log::debug!(" -> minimal: {}", minimal); - } - - let spill_weight = if minimal { - if fixed { - log::debug!(" -> fixed and minimal: spill weight 2000000"); - 2_000_000 - } else { - log::debug!(" -> non-fixed and minimal: spill weight 1000000"); - 1_000_000 - } - } else { - let mut total = 0; - for entry in &self.bundles[bundle.index()].ranges { - let range_data = &self.ranges[entry.index.index()]; - log::debug!( - " -> uses spill weight: +{}", - range_data.uses_spill_weight() - ); - total += range_data.uses_spill_weight(); - } - - if self.bundles[bundle.index()].prio > 0 { - log::debug!( - " -> dividing by prio {}; final weight {}", - self.bundles[bundle.index()].prio, - total / self.bundles[bundle.index()].prio - ); - total / self.bundles[bundle.index()].prio - } else { - 0 - } - }; - - self.bundles[bundle.index()].set_cached_spill_weight_and_props( - spill_weight, - minimal, - fixed, - stack, - ); - } - - fn minimal_bundle(&self, bundle: LiveBundleIndex) -> bool { - self.bundles[bundle.index()].cached_minimal() - } - - fn recompute_range_properties(&mut self, range: LiveRangeIndex) { - let rangedata = &mut self.ranges[range.index()]; - let mut w = 0; - for u in &rangedata.uses { - w += u.weight as u32; - log::debug!("range{}: use {:?}", range.index(), u); - } - rangedata.set_uses_spill_weight(w); - if rangedata.uses.len() > 0 && rangedata.uses[0].operand.kind() == OperandKind::Def { - // Note that we *set* the flag here, but we never *clear* - // it: it may be set by a progmove as well (which does not - // create an explicit use or def), and we want to preserve - // that. We will never split or trim ranges in a way that - // removes a def at the front and requires the flag to be - // cleared. - rangedata.set_flag(LiveRangeFlag::StartsAtDef); - } - } - - fn get_or_create_spill_bundle( - &mut self, - bundle: LiveBundleIndex, - create_if_absent: bool, - ) -> Option { - let ssidx = self.bundles[bundle.index()].spillset; - let idx = self.spillsets[ssidx.index()].spill_bundle; - if idx.is_valid() { - Some(idx) - } else if create_if_absent { - let idx = self.create_bundle(); - self.spillsets[ssidx.index()].spill_bundle = idx; - self.bundles[idx.index()].spillset = ssidx; - self.spilled_bundles.push(idx); - Some(idx) - } else { - None - } - } - - fn split_and_requeue_bundle( - &mut self, - bundle: LiveBundleIndex, - mut split_at: ProgPoint, - reg_hint: PReg, - ) { - self.stats.splits += 1; - log::debug!( - "split bundle {:?} at {:?} and requeue with reg hint (for first part) {:?}", - bundle, - split_at, - reg_hint, - ); - - // Split `bundle` at `split_at`, creating new LiveRanges and - // bundles (and updating vregs' linked lists appropriately), - // and enqueue the new bundles. - - let spillset = self.bundles[bundle.index()].spillset; - - assert!(!self.bundles[bundle.index()].ranges.is_empty()); - // Split point *at* start is OK; this means we peel off - // exactly one use to create a minimal bundle. - let bundle_start = self.bundles[bundle.index()] - .ranges - .first() - .unwrap() - .range - .from; - assert!(split_at >= bundle_start); - let bundle_end = self.bundles[bundle.index()].ranges.last().unwrap().range.to; - assert!(split_at < bundle_end); - - // Is the split point *at* the start? If so, peel off the - // first use: set the split point just after it, or just - // before it if it comes after the start of the bundle. - if split_at == bundle_start { - // Find any uses; if none, just chop off one instruction. - let mut first_use = None; - 'outer: for entry in &self.bundles[bundle.index()].ranges { - for u in &self.ranges[entry.index.index()].uses { - first_use = Some(u.pos); - break 'outer; - } - } - log::debug!(" -> first use loc is {:?}", first_use); - split_at = match first_use { - Some(pos) => { - if pos.inst() == bundle_start.inst() { - ProgPoint::before(pos.inst().next()) - } else { - ProgPoint::before(pos.inst()) - } - } - None => ProgPoint::before( - self.bundles[bundle.index()] - .ranges - .first() - .unwrap() - .range - .from - .inst() - .next(), - ), - }; - log::debug!( - "split point is at bundle start; advancing to {:?}", - split_at - ); - } else { - // Don't split in the middle of an instruction -- this could - // create impossible moves (we cannot insert a move between an - // instruction's uses and defs). - if split_at.pos() == InstPosition::After { - split_at = split_at.next(); - } - if split_at >= bundle_end { - split_at = split_at.prev().prev(); - } - } - - assert!(split_at > bundle_start && split_at < bundle_end); - - // We need to find which LRs fall on each side of the split, - // which LR we need to split down the middle, then update the - // current bundle, create a new one, and (re)-queue both. - - log::debug!(" -> LRs: {:?}", self.bundles[bundle.index()].ranges); - - let mut last_lr_in_old_bundle_idx = 0; // last LR-list index in old bundle - let mut first_lr_in_new_bundle_idx = 0; // first LR-list index in new bundle - for (i, entry) in self.bundles[bundle.index()].ranges.iter().enumerate() { - if split_at > entry.range.from { - last_lr_in_old_bundle_idx = i; - first_lr_in_new_bundle_idx = i; - } - if split_at < entry.range.to { - first_lr_in_new_bundle_idx = i; - break; - } - } - - log::debug!( - " -> last LR in old bundle: LR {:?}", - self.bundles[bundle.index()].ranges[last_lr_in_old_bundle_idx] - ); - log::debug!( - " -> first LR in new bundle: LR {:?}", - self.bundles[bundle.index()].ranges[first_lr_in_new_bundle_idx] - ); - - // Take the sublist of LRs that will go in the new bundle. - let mut new_lr_list: LiveRangeList = self.bundles[bundle.index()] - .ranges - .iter() - .cloned() - .skip(first_lr_in_new_bundle_idx) - .collect(); - self.bundles[bundle.index()] - .ranges - .truncate(last_lr_in_old_bundle_idx + 1); - - // If the first entry in `new_lr_list` is a LR that is split - // down the middle, replace it with a new LR and chop off the - // end of the same LR in the original list. - if split_at > new_lr_list[0].range.from { - assert_eq!(last_lr_in_old_bundle_idx, first_lr_in_new_bundle_idx); - let orig_lr = new_lr_list[0].index; - let new_lr = self.create_liverange(CodeRange { - from: split_at, - to: new_lr_list[0].range.to, - }); - self.ranges[new_lr.index()].vreg = self.ranges[orig_lr.index()].vreg; - log::debug!(" -> splitting LR {:?} into {:?}", orig_lr, new_lr); - let first_use = self.ranges[orig_lr.index()] - .uses - .iter() - .position(|u| u.pos >= split_at) - .unwrap_or(self.ranges[orig_lr.index()].uses.len()); - let rest_uses: UseList = self.ranges[orig_lr.index()] - .uses - .iter() - .cloned() - .skip(first_use) - .collect(); - self.ranges[new_lr.index()].uses = rest_uses; - self.ranges[orig_lr.index()].uses.truncate(first_use); - self.recompute_range_properties(orig_lr); - self.recompute_range_properties(new_lr); - new_lr_list[0].index = new_lr; - new_lr_list[0].range = self.ranges[new_lr.index()].range; - self.ranges[orig_lr.index()].range.to = split_at; - self.bundles[bundle.index()].ranges[last_lr_in_old_bundle_idx].range = - self.ranges[orig_lr.index()].range; - - // Perform a lazy split in the VReg data. We just - // append the new LR and its range; we will sort by - // start of range, and fix up range ends, once when we - // iterate over the VReg's ranges after allocation - // completes (this is the only time when order - // matters). - self.vregs[self.ranges[new_lr.index()].vreg.index()] - .ranges - .push(LiveRangeListEntry { - range: self.ranges[new_lr.index()].range, - index: new_lr, - }); - } - - let new_bundle = self.create_bundle(); - log::debug!(" -> creating new bundle {:?}", new_bundle); - self.bundles[new_bundle.index()].spillset = spillset; - for entry in &new_lr_list { - self.ranges[entry.index.index()].bundle = new_bundle; - } - self.bundles[new_bundle.index()].ranges = new_lr_list; - - // Finally, handle moving LRs to the spill bundle when - // appropriate: If the first range in `new_bundle` or last - // range in `bundle` has "empty space" beyond the first or - // last use (respectively), trim it and put an empty LR into - // the spill bundle. (We are careful to treat the "starts at - // def" flag as an implicit first def even if no def-type Use - // is present.) - while let Some(entry) = self.bundles[bundle.index()].ranges.last().cloned() { - let end = entry.range.to; - let vreg = self.ranges[entry.index.index()].vreg; - let last_use = self.ranges[entry.index.index()].uses.last().map(|u| u.pos); - if last_use.is_none() { - let spill = self - .get_or_create_spill_bundle(bundle, /* create_if_absent = */ true) - .unwrap(); - log::debug!( - " -> bundle {:?} range {:?}: no uses; moving to spill bundle {:?}", - bundle, - entry.index, - spill - ); - self.bundles[spill.index()].ranges.push(entry); - self.bundles[bundle.index()].ranges.pop(); - self.ranges[entry.index.index()].bundle = spill; - continue; - } - let last_use = last_use.unwrap(); - let split = ProgPoint::before(last_use.inst().next()); - if split < end { - let spill = self - .get_or_create_spill_bundle(bundle, /* create_if_absent = */ true) - .unwrap(); - self.bundles[bundle.index()] - .ranges - .last_mut() - .unwrap() - .range - .to = split; - self.ranges[self.bundles[bundle.index()] - .ranges - .last() - .unwrap() - .index - .index()] - .range - .to = split; - let range = CodeRange { - from: split, - to: end, - }; - let empty_lr = self.create_liverange(range); - self.bundles[spill.index()].ranges.push(LiveRangeListEntry { - range, - index: empty_lr, - }); - self.ranges[empty_lr.index()].bundle = spill; - self.vregs[vreg.index()].ranges.push(LiveRangeListEntry { - range, - index: empty_lr, - }); - log::debug!( - " -> bundle {:?} range {:?}: last use implies split point {:?}", - bundle, - entry.index, - split - ); - log::debug!( - " -> moving trailing empty region to new spill bundle {:?} with new LR {:?}", - spill, - empty_lr - ); - } - break; - } - while let Some(entry) = self.bundles[new_bundle.index()].ranges.first().cloned() { - if self.ranges[entry.index.index()].has_flag(LiveRangeFlag::StartsAtDef) { - break; - } - let start = entry.range.from; - let vreg = self.ranges[entry.index.index()].vreg; - let first_use = self.ranges[entry.index.index()].uses.first().map(|u| u.pos); - if first_use.is_none() { - let spill = self - .get_or_create_spill_bundle(new_bundle, /* create_if_absent = */ true) - .unwrap(); - log::debug!( - " -> bundle {:?} range {:?}: no uses; moving to spill bundle {:?}", - new_bundle, - entry.index, - spill - ); - self.bundles[spill.index()].ranges.push(entry); - self.bundles[new_bundle.index()].ranges.drain(..1); - self.ranges[entry.index.index()].bundle = spill; - continue; - } - let first_use = first_use.unwrap(); - let split = ProgPoint::before(first_use.inst()); - if split > start { - let spill = self - .get_or_create_spill_bundle(new_bundle, /* create_if_absent = */ true) - .unwrap(); - self.bundles[new_bundle.index()] - .ranges - .first_mut() - .unwrap() - .range - .from = split; - self.ranges[self.bundles[new_bundle.index()] - .ranges - .first() - .unwrap() - .index - .index()] - .range - .from = split; - let range = CodeRange { - from: start, - to: split, - }; - let empty_lr = self.create_liverange(range); - self.bundles[spill.index()].ranges.push(LiveRangeListEntry { - range, - index: empty_lr, - }); - self.ranges[empty_lr.index()].bundle = spill; - self.vregs[vreg.index()].ranges.push(LiveRangeListEntry { - range, - index: empty_lr, - }); - log::debug!( - " -> bundle {:?} range {:?}: first use implies split point {:?}", - bundle, - entry.index, - first_use, - ); - log::debug!( - " -> moving leading empty region to new spill bundle {:?} with new LR {:?}", - spill, - empty_lr - ); - } - break; - } - - if self.bundles[bundle.index()].ranges.len() > 0 { - self.recompute_bundle_properties(bundle); - let prio = self.bundles[bundle.index()].prio; - self.allocation_queue - .insert(bundle, prio as usize, reg_hint); - } - if self.bundles[new_bundle.index()].ranges.len() > 0 { - self.recompute_bundle_properties(new_bundle); - let prio = self.bundles[new_bundle.index()].prio; - self.allocation_queue - .insert(new_bundle, prio as usize, reg_hint); - } - } - - fn compute_requirement(&self, bundle: LiveBundleIndex) -> Requirement { - let mut req = Requirement::Unknown; - log::debug!("compute_requirement: {:?}", bundle); - for entry in &self.bundles[bundle.index()].ranges { - log::debug!(" -> LR {:?}", entry.index); - for u in &self.ranges[entry.index.index()].uses { - log::debug!(" -> use {:?}", u); - let r = Requirement::from_operand(u.operand); - req = req.merge(r); - log::debug!(" -> req {:?}", req); - } - } - log::debug!(" -> final: {:?}", req); - req - } - - fn process_bundle( - &mut self, - bundle: LiveBundleIndex, - reg_hint: PReg, - ) -> Result<(), RegAllocError> { - let req = self.compute_requirement(bundle); - // Grab a hint from either the queue or our spillset, if any. - let hint_reg = if reg_hint != PReg::invalid() { - reg_hint - } else { - self.spillsets[self.bundles[bundle.index()].spillset.index()].reg_hint - }; - log::debug!("process_bundle: bundle {:?} hint {:?}", bundle, hint_reg,); - - if let Requirement::Conflict = req { - // We have to split right away. - assert!( - !self.minimal_bundle(bundle), - "Minimal bundle with conflict!" - ); - let bundle_start = self.bundles[bundle.index()].ranges[0].range.from; - self.split_and_requeue_bundle( - bundle, - /* split_at_point = */ bundle_start, - reg_hint, - ); - return Ok(()); - } - - // If no requirement at all (because no uses), and *if* a - // spill bundle is already present, then move the LRs over to - // the spill bundle right away. - match req { - Requirement::Unknown | Requirement::Any(_) => { - if let Some(spill) = - self.get_or_create_spill_bundle(bundle, /* create_if_absent = */ false) - { - let mut list = - std::mem::replace(&mut self.bundles[bundle.index()].ranges, smallvec![]); - for entry in &list { - self.ranges[entry.index.index()].bundle = spill; - } - self.bundles[spill.index()].ranges.extend(list.drain(..)); - return Ok(()); - } - } - _ => {} - } - - // Try to allocate! - let mut attempts = 0; - loop { - attempts += 1; - log::debug!("attempt {}, req {:?}", attempts, req); - debug_assert!(attempts < 100 * self.func.insts()); - - let (class, fixed_preg) = match req { - Requirement::Fixed(preg) => (preg.class(), Some(preg)), - Requirement::Register(class) => (class, None), - Requirement::Stack(_) => { - // If we must be on the stack, mark our spillset - // as required immediately. - self.spillsets[self.bundles[bundle.index()].spillset.index()].required = true; - return Ok(()); - } - - Requirement::Any(_) | Requirement::Unknown => { - self.spilled_bundles.push(bundle); - return Ok(()); - } - - Requirement::Conflict => { - unreachable!() - } - }; - // Scan all pregs, or the one fixed preg, and attempt to allocate. - - let mut lowest_cost_evict_conflict_set: Option = None; - let mut lowest_cost_evict_conflict_cost: Option = None; - - let mut lowest_cost_split_conflict_cost: Option = None; - let mut lowest_cost_split_conflict_point = ProgPoint::before(Inst::new(0)); - let mut lowest_cost_split_conflict_reg = PReg::invalid(); - - // Heuristic: start the scan for an available - // register at an offset influenced both by our - // location in the code and by the bundle we're - // considering. This has the effect of spreading - // demand more evenly across registers. - let scan_offset = self.ranges[self.bundles[bundle.index()].ranges[0].index.index()] - .range - .from - .inst() - .index() - + bundle.index(); - - self.stats.process_bundle_reg_probe_start_any += 1; - for preg in RegTraversalIter::new( - self.env, - class, - hint_reg, - PReg::invalid(), - scan_offset, - fixed_preg, - ) { - self.stats.process_bundle_reg_probes_any += 1; - let preg_idx = PRegIndex::new(preg.index()); - log::debug!("trying preg {:?}", preg_idx); - - let scan_limit_cost = match ( - lowest_cost_evict_conflict_cost, - lowest_cost_split_conflict_cost, - ) { - (Some(a), Some(b)) => Some(std::cmp::max(a, b)), - _ => None, - }; - match self.try_to_allocate_bundle_to_reg(bundle, preg_idx, scan_limit_cost) { - AllocRegResult::Allocated(alloc) => { - self.stats.process_bundle_reg_success_any += 1; - log::debug!(" -> allocated to any {:?}", preg_idx); - self.spillsets[self.bundles[bundle.index()].spillset.index()].reg_hint = - alloc.as_reg().unwrap(); - return Ok(()); - } - AllocRegResult::Conflict(bundles, first_conflict_point) => { - log::debug!( - " -> conflict with bundles {:?}, first conflict at {:?}", - bundles, - first_conflict_point - ); - - let conflict_cost = self.maximum_spill_weight_in_bundle_set(&bundles); - - if lowest_cost_evict_conflict_cost.is_none() - || conflict_cost < lowest_cost_evict_conflict_cost.unwrap() - { - lowest_cost_evict_conflict_cost = Some(conflict_cost); - lowest_cost_evict_conflict_set = Some(bundles); - } - - let loop_depth = self.cfginfo.approx_loop_depth - [self.cfginfo.insn_block[first_conflict_point.inst().index()].index()]; - let move_cost = spill_weight_from_policy( - OperandPolicy::Reg, - loop_depth as usize, - /* is_def = */ true, - ); - if lowest_cost_split_conflict_cost.is_none() - || (conflict_cost + move_cost) - < lowest_cost_split_conflict_cost.unwrap() - { - lowest_cost_split_conflict_cost = Some(conflict_cost + move_cost); - lowest_cost_split_conflict_point = first_conflict_point; - lowest_cost_split_conflict_reg = preg; - } - } - AllocRegResult::ConflictWithFixed(max_cost, point) => { - log::debug!(" -> conflict with fixed alloc; cost of other bundles up to point is {}, conflict at {:?}", max_cost, point); - - let loop_depth = self.cfginfo.approx_loop_depth - [self.cfginfo.insn_block[point.inst().index()].index()]; - let move_cost = spill_weight_from_policy( - OperandPolicy::Reg, - loop_depth as usize, - /* is_def = */ true, - ); - - if lowest_cost_split_conflict_cost.is_none() - || (max_cost + move_cost) < lowest_cost_split_conflict_cost.unwrap() - { - lowest_cost_split_conflict_cost = Some(max_cost + move_cost); - lowest_cost_split_conflict_point = point; - lowest_cost_split_conflict_reg = preg; - } - } - AllocRegResult::ConflictHighCost => { - // Simply don't consider -- we already have - // a lower-cost conflict bundle option - // to evict. - continue; - } - } - } - - // Otherwise, we *require* a register, but didn't fit into - // any with current bundle assignments. Hence, we will need - // to either split or attempt to evict some bundles. - - log::debug!( - " -> lowest cost evict: set {:?}, cost {:?}", - lowest_cost_evict_conflict_set, - lowest_cost_evict_conflict_cost, - ); - log::debug!( - " -> lowest cost split: cost {:?}, point {:?}, reg {:?}", - lowest_cost_split_conflict_cost, - lowest_cost_split_conflict_point, - lowest_cost_split_conflict_reg - ); - - // If we reach here, we *must* have an option either to split or evict. - assert!( - lowest_cost_split_conflict_cost.is_some() - || lowest_cost_evict_conflict_cost.is_some() - ); - - let our_spill_weight = self.bundle_spill_weight(bundle); - log::debug!(" -> our spill weight: {}", our_spill_weight); - - // We detect the "too-many-live-registers" case here and - // return an error cleanly, rather than panicking, because - // the regalloc.rs fuzzer depends on the register - // allocator to correctly reject impossible-to-allocate - // programs in order to discard invalid test cases. - if self.minimal_bundle(bundle) - && (attempts >= 2 - || lowest_cost_evict_conflict_cost.is_none() - || lowest_cost_evict_conflict_cost.unwrap() >= our_spill_weight) - { - if let Requirement::Register(class) = req { - // Check if this is a too-many-live-registers situation. - let range = self.bundles[bundle.index()].ranges[0].range; - log::debug!("checking for too many live regs"); - let mut min_bundles_assigned = 0; - let mut fixed_assigned = 0; - let mut total_regs = 0; - for preg in self.env.preferred_regs_by_class[class as u8 as usize] - .iter() - .chain(self.env.non_preferred_regs_by_class[class as u8 as usize].iter()) - { - log::debug!(" -> PR {:?}", preg); - let start = LiveRangeKey::from_range(&CodeRange { - from: range.from.prev(), - to: range.from.prev(), - }); - for (key, lr) in self.pregs[preg.index()].allocations.btree.range(start..) { - let preg_range = key.to_range(); - if preg_range.to <= range.from { - continue; - } - if preg_range.from >= range.to { - break; - } - if lr.is_valid() { - if self.minimal_bundle(self.ranges[lr.index()].bundle) { - log::debug!(" -> min bundle {:?}", lr); - min_bundles_assigned += 1; - } else { - log::debug!(" -> non-min bundle {:?}", lr); - } - } else { - log::debug!(" -> fixed bundle"); - fixed_assigned += 1; - } - } - total_regs += 1; - } - log::debug!( - " -> total {}, fixed {}, min {}", - total_regs, - fixed_assigned, - min_bundles_assigned - ); - if min_bundles_assigned + fixed_assigned >= total_regs { - return Err(RegAllocError::TooManyLiveRegs); - } - } - - panic!("Could not allocate minimal bundle, but the allocation problem should be possible to solve"); - } - - // If our bundle's weight is less than or equal to(*) the - // evict cost, choose to split. Also pick splitting if - // we're on our second or more attempt and we didn't - // allocate. Also pick splitting if the conflict set is - // empty, meaning a fixed conflict that can't be evicted. - // - // (*) the "equal to" part is very important: it prevents - // an infinite loop where two bundles with equal spill - // cost continually evict each other in an infinite - // allocation loop. In such a case, the first bundle in - // wins, and the other splits. - // - // Note that we don't split if the bundle is minimal. - if !self.minimal_bundle(bundle) - && (attempts >= 2 - || lowest_cost_evict_conflict_cost.is_none() - || our_spill_weight <= lowest_cost_evict_conflict_cost.unwrap()) - { - log::debug!( - " -> deciding to split: our spill weight is {}", - self.bundle_spill_weight(bundle) - ); - let bundle_start = self.bundles[bundle.index()].ranges[0].range.from; - let mut split_at_point = - std::cmp::max(lowest_cost_split_conflict_point, bundle_start); - let requeue_with_reg = lowest_cost_split_conflict_reg; - - // Adjust `split_at_point` if it is within a deeper loop - // than the bundle start -- hoist it to just before the - // first loop header it encounters. - let bundle_start_depth = self.cfginfo.approx_loop_depth - [self.cfginfo.insn_block[bundle_start.inst().index()].index()]; - let split_at_depth = self.cfginfo.approx_loop_depth - [self.cfginfo.insn_block[split_at_point.inst().index()].index()]; - if split_at_depth > bundle_start_depth { - for block in (self.cfginfo.insn_block[bundle_start.inst().index()].index() + 1) - ..=self.cfginfo.insn_block[split_at_point.inst().index()].index() - { - if self.cfginfo.approx_loop_depth[block] > bundle_start_depth { - split_at_point = self.cfginfo.block_entry[block]; - break; - } - } - } - - self.split_and_requeue_bundle(bundle, split_at_point, requeue_with_reg); - return Ok(()); - } else { - // Evict all bundles in `conflicting bundles` and try again. - self.stats.evict_bundle_event += 1; - for &bundle in &lowest_cost_evict_conflict_set.unwrap() { - log::debug!(" -> evicting {:?}", bundle); - self.evict_bundle(bundle); - self.stats.evict_bundle_count += 1; - } - } - } - } - - fn try_allocating_regs_for_spilled_bundles(&mut self) { - log::debug!("allocating regs for spilled bundles"); - for i in 0..self.spilled_bundles.len() { - let bundle = self.spilled_bundles[i]; // don't borrow self - - let class = self.spillsets[self.bundles[bundle.index()].spillset.index()].class; - let hint = self.spillsets[self.bundles[bundle.index()].spillset.index()].reg_hint; - - // This may be an empty-range bundle whose ranges are not - // sorted; sort all range-lists again here. - self.bundles[bundle.index()] - .ranges - .sort_unstable_by_key(|entry| entry.range.from); - - let mut success = false; - self.stats.spill_bundle_reg_probes += 1; - for preg in - RegTraversalIter::new(self.env, class, hint, PReg::invalid(), bundle.index(), None) - { - log::debug!("trying bundle {:?} to preg {:?}", bundle, preg); - let preg_idx = PRegIndex::new(preg.index()); - if let AllocRegResult::Allocated(_) = - self.try_to_allocate_bundle_to_reg(bundle, preg_idx, None) - { - self.stats.spill_bundle_reg_success += 1; - success = true; - break; - } - } - if !success { - log::debug!( - "spilling bundle {:?}: marking spillset {:?} as required", - bundle, - self.bundles[bundle.index()].spillset - ); - self.spillsets[self.bundles[bundle.index()].spillset.index()].required = true; - } - } - } - - fn spillslot_can_fit_spillset( - &mut self, - spillslot: SpillSlotIndex, - spillset: SpillSetIndex, - ) -> bool { - for &vreg in &self.spillsets[spillset.index()].vregs { - for entry in &self.vregs[vreg.index()].ranges { - if self.spillslots[spillslot.index()] - .ranges - .btree - .contains_key(&LiveRangeKey::from_range(&entry.range)) - { - return false; - } - } - } - true - } - - fn allocate_spillset_to_spillslot( - &mut self, - spillset: SpillSetIndex, - spillslot: SpillSlotIndex, - ) { - self.spillsets[spillset.index()].slot = spillslot; - for i in 0..self.spillsets[spillset.index()].vregs.len() { - // don't borrow self - let vreg = self.spillsets[spillset.index()].vregs[i]; - log::debug!( - "spillslot {:?} alloc'ed to spillset {:?}: vreg {:?}", - spillslot, - spillset, - vreg, - ); - for entry in &self.vregs[vreg.index()].ranges { - log::debug!( - "spillslot {:?} getting range {:?} from LR {:?} from vreg {:?}", - spillslot, - entry.range, - entry.index, - vreg, - ); - self.spillslots[spillslot.index()] - .ranges - .btree - .insert(LiveRangeKey::from_range(&entry.range), entry.index); - } - } - } - - fn allocate_spillslots(&mut self) { - for spillset in 0..self.spillsets.len() { - log::debug!("allocate spillslot: {}", spillset); - let spillset = SpillSetIndex::new(spillset); - if !self.spillsets[spillset.index()].required { - continue; - } - // Get or create the spillslot list for this size. - let size = self.spillsets[spillset.index()].size as usize; - if size >= self.slots_by_size.len() { - self.slots_by_size.resize( - size + 1, - SpillSlotList { - first_spillslot: SpillSlotIndex::invalid(), - last_spillslot: SpillSlotIndex::invalid(), - }, - ); - } - // Try a few existing spillslots. - let mut spillslot_iter = self.slots_by_size[size].first_spillslot; - let mut first_slot = SpillSlotIndex::invalid(); - let mut prev = SpillSlotIndex::invalid(); - let mut success = false; - for _attempt in 0..10 { - if spillslot_iter.is_invalid() { - break; - } - if spillslot_iter == first_slot { - // We've started looking at slots we placed at the end; end search. - break; - } - if first_slot.is_invalid() { - first_slot = spillslot_iter; - } - - if self.spillslot_can_fit_spillset(spillslot_iter, spillset) { - self.allocate_spillset_to_spillslot(spillset, spillslot_iter); - success = true; - break; - } - // Remove the slot and place it at the end of the respective list. - let next = self.spillslots[spillslot_iter.index()].next_spillslot; - if prev.is_valid() { - self.spillslots[prev.index()].next_spillslot = next; - } else { - self.slots_by_size[size].first_spillslot = next; - } - if !next.is_valid() { - self.slots_by_size[size].last_spillslot = prev; - } - - let last = self.slots_by_size[size].last_spillslot; - if last.is_valid() { - self.spillslots[last.index()].next_spillslot = spillslot_iter; - } else { - self.slots_by_size[size].first_spillslot = spillslot_iter; - } - self.slots_by_size[size].last_spillslot = spillslot_iter; - - prev = spillslot_iter; - spillslot_iter = next; - } - - if !success { - // Allocate a new spillslot. - let spillslot = SpillSlotIndex::new(self.spillslots.len()); - let next = self.slots_by_size[size].first_spillslot; - self.spillslots.push(SpillSlotData { - ranges: LiveRangeSet::new(), - next_spillslot: next, - alloc: Allocation::none(), - class: self.spillsets[spillset.index()].class, - }); - self.slots_by_size[size].first_spillslot = spillslot; - if !next.is_valid() { - self.slots_by_size[size].last_spillslot = spillslot; - } - - self.allocate_spillset_to_spillslot(spillset, spillslot); - } - } - - // Assign actual slot indices to spillslots. - for i in 0..self.spillslots.len() { - self.spillslots[i].alloc = self.allocate_spillslot(self.spillslots[i].class); - } - - log::debug!("spillslot allocator done"); - } - - fn allocate_spillslot(&mut self, class: RegClass) -> Allocation { - let size = self.func.spillslot_size(class) as u32; - let mut offset = self.num_spillslots; - // Align up to `size`. - debug_assert!(size.is_power_of_two()); - offset = (offset + size - 1) & !(size - 1); - let slot = if self.func.multi_spillslot_named_by_last_slot() { - offset + size - 1 - } else { - offset - }; - offset += size; - self.num_spillslots = offset; - Allocation::stack(SpillSlot::new(slot as usize, class)) - } - - fn is_start_of_block(&self, pos: ProgPoint) -> bool { - let block = self.cfginfo.insn_block[pos.inst().index()]; - pos == self.cfginfo.block_entry[block.index()] - } - fn is_end_of_block(&self, pos: ProgPoint) -> bool { - let block = self.cfginfo.insn_block[pos.inst().index()]; - pos == self.cfginfo.block_exit[block.index()] - } - - fn insert_move( - &mut self, - pos: ProgPoint, - prio: InsertMovePrio, - from_alloc: Allocation, - to_alloc: Allocation, - to_vreg: Option, - ) { - debug!( - "insert_move: pos {:?} prio {:?} from_alloc {:?} to_alloc {:?}", - pos, prio, from_alloc, to_alloc - ); - match (from_alloc.as_reg(), to_alloc.as_reg()) { - (Some(from), Some(to)) => { - assert_eq!(from.class(), to.class()); - } - _ => {} - } - self.inserted_moves.push(InsertedMove { - pos, - prio, - from_alloc, - to_alloc, - to_vreg, - }); - } - - fn get_alloc(&self, inst: Inst, slot: usize) -> Allocation { - let inst_allocs = &self.allocs[self.inst_alloc_offsets[inst.index()] as usize..]; - inst_allocs[slot] - } - - fn set_alloc(&mut self, inst: Inst, slot: usize, alloc: Allocation) { - let inst_allocs = &mut self.allocs[self.inst_alloc_offsets[inst.index()] as usize..]; - inst_allocs[slot] = alloc; - } - - fn get_alloc_for_range(&self, range: LiveRangeIndex) -> Allocation { - log::debug!("get_alloc_for_range: {:?}", range); - let bundle = self.ranges[range.index()].bundle; - log::debug!(" -> bundle: {:?}", bundle); - let bundledata = &self.bundles[bundle.index()]; - log::debug!(" -> allocation {:?}", bundledata.allocation); - if bundledata.allocation != Allocation::none() { - bundledata.allocation - } else { - log::debug!(" -> spillset {:?}", bundledata.spillset); - log::debug!( - " -> spill slot {:?}", - self.spillsets[bundledata.spillset.index()].slot - ); - self.spillslots[self.spillsets[bundledata.spillset.index()].slot.index()].alloc - } - } - - fn apply_allocations_and_insert_moves(&mut self) { - log::debug!("apply_allocations_and_insert_moves"); - log::debug!("blockparam_ins: {:?}", self.blockparam_ins); - log::debug!("blockparam_outs: {:?}", self.blockparam_outs); - - // Now that all splits are done, we can pay the cost once to - // sort VReg range lists and update with the final ranges. - for vreg in &mut self.vregs { - for entry in &mut vreg.ranges { - entry.range = self.ranges[entry.index.index()].range; - } - vreg.ranges.sort_unstable_by_key(|entry| entry.range.from); - } - - /// We create "half-moves" in order to allow a single-scan - /// strategy with a subsequent sort. Basically, the key idea - /// is that as our single scan through a range for a vreg hits - /// upon the source or destination of an edge-move, we emit a - /// "half-move". These half-moves are carefully keyed in a - /// particular sort order (the field order below is - /// significant!) so that all half-moves on a given (from, to) - /// block-edge appear contiguously, and then all moves from a - /// given vreg appear contiguously. Within a given from-vreg, - /// pick the first `Source` (there should only be one, but - /// imprecision in liveranges due to loop handling sometimes - /// means that a blockparam-out is also recognized as a normal-out), - /// and then for each `Dest`, copy the source-alloc to that - /// dest-alloc. - #[derive(Clone, Debug, PartialEq, Eq)] - struct HalfMove { - key: u64, - alloc: Allocation, - } - #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] - #[repr(u8)] - enum HalfMoveKind { - Source = 0, - Dest = 1, - } - fn half_move_key( - from_block: Block, - to_block: Block, - to_vreg: VRegIndex, - kind: HalfMoveKind, - ) -> u64 { - assert!(from_block.index() < 1 << 21); - assert!(to_block.index() < 1 << 21); - assert!(to_vreg.index() < 1 << 21); - ((from_block.index() as u64) << 43) - | ((to_block.index() as u64) << 22) - | ((to_vreg.index() as u64) << 1) - | (kind as u8 as u64) - } - impl HalfMove { - fn from_block(&self) -> Block { - Block::new(((self.key >> 43) & ((1 << 21) - 1)) as usize) - } - fn to_block(&self) -> Block { - Block::new(((self.key >> 22) & ((1 << 21) - 1)) as usize) - } - fn to_vreg(&self) -> VRegIndex { - VRegIndex::new(((self.key >> 1) & ((1 << 21) - 1)) as usize) - } - fn kind(&self) -> HalfMoveKind { - if self.key & 1 == 1 { - HalfMoveKind::Dest - } else { - HalfMoveKind::Source - } - } - } - - let mut half_moves: Vec = Vec::with_capacity(6 * self.func.insts()); - let mut reuse_input_insts = Vec::with_capacity(self.func.insts() / 2); - - let mut blockparam_in_idx = 0; - let mut blockparam_out_idx = 0; - let mut prog_move_src_idx = 0; - let mut prog_move_dst_idx = 0; - for vreg in 0..self.vregs.len() { - let vreg = VRegIndex::new(vreg); - - let pinned_alloc = if self.vregs[vreg.index()].is_pinned { - self.func.is_pinned_vreg(self.vreg_regs[vreg.index()]) - } else { - None - }; - - // For each range in each vreg, insert moves or - // half-moves. We also scan over `blockparam_ins` and - // `blockparam_outs`, which are sorted by (block, vreg), - // and over program-move srcs/dsts to fill in allocations. - let mut prev = LiveRangeIndex::invalid(); - for range_idx in 0..self.vregs[vreg.index()].ranges.len() { - let entry = self.vregs[vreg.index()].ranges[range_idx]; - let alloc = pinned_alloc - .map(|preg| Allocation::reg(preg)) - .unwrap_or_else(|| self.get_alloc_for_range(entry.index)); - let range = entry.range; - log::debug!( - "apply_allocations: vreg {:?} LR {:?} with range {:?} has alloc {:?} (pinned {:?})", - vreg, - entry.index, - range, - alloc, - pinned_alloc, - ); - debug_assert!(alloc != Allocation::none()); - - if self.annotations_enabled { - self.annotate( - range.from, - format!( - " <<< start v{} in {} (range{}) (bundle{})", - vreg.index(), - alloc, - entry.index.index(), - self.ranges[entry.index.index()].bundle.raw_u32(), - ), - ); - self.annotate( - range.to, - format!( - " end v{} in {} (range{}) (bundle{}) >>>", - vreg.index(), - alloc, - entry.index.index(), - self.ranges[entry.index.index()].bundle.raw_u32(), - ), - ); - } - - // Does this range follow immediately after a prior - // range in the same block? If so, insert a move (if - // the allocs differ). We do this directly rather than - // with half-moves because we eagerly know both sides - // already (and also, half-moves are specific to - // inter-block transfers). - // - // Note that we do *not* do this if there is also a - // def as the first use in the new range: it's - // possible that an old liverange covers the Before - // pos of an inst, a new liverange covers the After - // pos, and the def also happens at After. In this - // case we don't want to an insert a move after the - // instruction copying the old liverange. - // - // Note also that we assert that the new range has to - // start at the Before-point of an instruction; we - // can't insert a move that logically happens just - // before After (i.e. in the middle of a single - // instruction). - // - // Also note that this case is not applicable to - // pinned vregs (because they are always in one PReg). - if pinned_alloc.is_none() && prev.is_valid() { - let prev_alloc = self.get_alloc_for_range(prev); - let prev_range = self.ranges[prev.index()].range; - let first_is_def = - self.ranges[entry.index.index()].has_flag(LiveRangeFlag::StartsAtDef); - debug_assert!(prev_alloc != Allocation::none()); - - if prev_range.to == range.from - && !self.is_start_of_block(range.from) - && !first_is_def - { - log::debug!( - "prev LR {} abuts LR {} in same block; moving {} -> {} for v{}", - prev.index(), - entry.index.index(), - prev_alloc, - alloc, - vreg.index() - ); - assert_eq!(range.from.pos(), InstPosition::Before); - self.insert_move( - range.from, - InsertMovePrio::Regular, - prev_alloc, - alloc, - Some(self.vreg_regs[vreg.index()]), - ); - } - } - - // The block-to-block edge-move logic is not - // applicable to pinned vregs, which are always in one - // PReg (so never need moves within their own vreg - // ranges). - if pinned_alloc.is_none() { - // Scan over blocks whose ends are covered by this - // range. For each, for each successor that is not - // already in this range (hence guaranteed to have the - // same allocation) and if the vreg is live, add a - // Source half-move. - let mut block = self.cfginfo.insn_block[range.from.inst().index()]; - while block.is_valid() && block.index() < self.func.blocks() { - if range.to < self.cfginfo.block_exit[block.index()].next() { - break; - } - log::debug!("examining block with end in range: block{}", block.index()); - for &succ in self.func.block_succs(block) { - log::debug!( - " -> has succ block {} with entry {:?}", - succ.index(), - self.cfginfo.block_entry[succ.index()] - ); - if range.contains_point(self.cfginfo.block_entry[succ.index()]) { - continue; - } - log::debug!(" -> out of this range, requires half-move if live"); - if self.is_live_in(succ, vreg) { - log::debug!(" -> live at input to succ, adding halfmove"); - half_moves.push(HalfMove { - key: half_move_key(block, succ, vreg, HalfMoveKind::Source), - alloc, - }); - } - } - - // Scan forward in `blockparam_outs`, adding all - // half-moves for outgoing values to blockparams - // in succs. - log::debug!( - "scanning blockparam_outs for v{} block{}: blockparam_out_idx = {}", - vreg.index(), - block.index(), - blockparam_out_idx, - ); - while blockparam_out_idx < self.blockparam_outs.len() { - let (from_vreg, from_block, to_block, to_vreg) = - self.blockparam_outs[blockparam_out_idx]; - if (from_vreg, from_block) > (vreg, block) { - break; - } - if (from_vreg, from_block) == (vreg, block) { - log::debug!( - " -> found: from v{} block{} to v{} block{}", - from_vreg.index(), - from_block.index(), - to_vreg.index(), - to_vreg.index() - ); - half_moves.push(HalfMove { - key: half_move_key( - from_block, - to_block, - to_vreg, - HalfMoveKind::Source, - ), - alloc, - }); - - if self.annotations_enabled { - self.annotate( - self.cfginfo.block_exit[block.index()], - format!( - "blockparam-out: block{} to block{}: v{} to v{} in {}", - from_block.index(), - to_block.index(), - from_vreg.index(), - to_vreg.index(), - alloc - ), - ); - } - } - - blockparam_out_idx += 1; - } - - block = block.next(); - } - - // Scan over blocks whose beginnings are covered by - // this range and for which the vreg is live at the - // start of the block. For each, for each predecessor, - // add a Dest half-move. - let mut block = self.cfginfo.insn_block[range.from.inst().index()]; - if self.cfginfo.block_entry[block.index()] < range.from { - block = block.next(); - } - while block.is_valid() && block.index() < self.func.blocks() { - if self.cfginfo.block_entry[block.index()] >= range.to { - break; - } - - // Add half-moves for blockparam inputs. - log::debug!( - "scanning blockparam_ins at vreg {} block {}: blockparam_in_idx = {}", - vreg.index(), - block.index(), - blockparam_in_idx - ); - while blockparam_in_idx < self.blockparam_ins.len() { - let (to_vreg, to_block, from_block) = - self.blockparam_ins[blockparam_in_idx]; - if (to_vreg, to_block) > (vreg, block) { - break; - } - if (to_vreg, to_block) == (vreg, block) { - half_moves.push(HalfMove { - key: half_move_key( - from_block, - to_block, - to_vreg, - HalfMoveKind::Dest, - ), - alloc, - }); - log::debug!( - "match: blockparam_in: v{} in block{} from block{} into {}", - to_vreg.index(), - to_block.index(), - from_block.index(), - alloc, - ); - #[cfg(debug)] - { - if log::log_enabled!(log::Level::Debug) { - self.annotate( - self.cfginfo.block_entry[block.index()], - format!( - "blockparam-in: block{} to block{}:into v{} in {}", - from_block.index(), - to_block.index(), - to_vreg.index(), - alloc - ), - ); - } - } - } - blockparam_in_idx += 1; - } - - if !self.is_live_in(block, vreg) { - block = block.next(); - continue; - } - - log::debug!( - "scanning preds at vreg {} block {} for ends outside the range", - vreg.index(), - block.index() - ); - - // Now find any preds whose ends are not in the - // same range, and insert appropriate moves. - for &pred in self.func.block_preds(block) { - log::debug!( - "pred block {} has exit {:?}", - pred.index(), - self.cfginfo.block_exit[pred.index()] - ); - if range.contains_point(self.cfginfo.block_exit[pred.index()]) { - continue; - } - log::debug!(" -> requires half-move"); - half_moves.push(HalfMove { - key: half_move_key(pred, block, vreg, HalfMoveKind::Dest), - alloc, - }); - } - - block = block.next(); - } - - // If this is a blockparam vreg and the start of block - // is in this range, add to blockparam_allocs. - let (blockparam_block, blockparam_idx) = - self.cfginfo.vreg_def_blockparam[vreg.index()]; - if blockparam_block.is_valid() - && range.contains_point(self.cfginfo.block_entry[blockparam_block.index()]) - { - self.blockparam_allocs.push(( - blockparam_block, - blockparam_idx, - vreg, - alloc, - )); - } - } - - // Scan over def/uses and apply allocations. - for use_idx in 0..self.ranges[entry.index.index()].uses.len() { - let usedata = self.ranges[entry.index.index()].uses[use_idx]; - log::debug!("applying to use: {:?}", usedata); - debug_assert!(range.contains_point(usedata.pos)); - let inst = usedata.pos.inst(); - let slot = usedata.slot; - let operand = usedata.operand; - // Safepoints add virtual uses with no slots; - // avoid these. - if slot != SLOT_NONE { - self.set_alloc(inst, slot as usize, alloc); - } - if let OperandPolicy::Reuse(_) = operand.policy() { - reuse_input_insts.push(inst); - } - } - - // Scan over program move srcs/dsts to fill in allocations. - - // Move srcs happen at `After` of a given - // inst. Compute [from, to) semi-inclusive range of - // inst indices for which we should fill in the source - // with this LR's allocation. - // - // range from inst-Before or inst-After covers cur - // inst's After; so includes move srcs from inst. - let move_src_start = (vreg, range.from.inst()); - // range to (exclusive) inst-Before or inst-After - // covers only prev inst's After; so includes move - // srcs to (exclusive) inst. - let move_src_end = (vreg, range.to.inst()); - log::debug!( - "vreg {:?} range {:?}: looking for program-move sources from {:?} to {:?}", - vreg, - range, - move_src_start, - move_src_end - ); - while prog_move_src_idx < self.prog_move_srcs.len() - && self.prog_move_srcs[prog_move_src_idx].0 < move_src_start - { - log::debug!(" -> skipping idx {}", prog_move_src_idx); - prog_move_src_idx += 1; - } - while prog_move_src_idx < self.prog_move_srcs.len() - && self.prog_move_srcs[prog_move_src_idx].0 < move_src_end - { - log::debug!( - " -> setting idx {} ({:?}) to alloc {:?}", - prog_move_src_idx, - self.prog_move_srcs[prog_move_src_idx].0, - alloc - ); - self.prog_move_srcs[prog_move_src_idx].1 = alloc; - prog_move_src_idx += 1; - } - - // move dsts happen at Before point. - // - // Range from inst-Before includes cur inst, while inst-After includes only next inst. - let move_dst_start = if range.from.pos() == InstPosition::Before { - (vreg, range.from.inst()) - } else { - (vreg, range.from.inst().next()) - }; - // Range to (exclusive) inst-Before includes prev - // inst, so to (exclusive) cur inst; range to - // (exclusive) inst-After includes cur inst, so to - // (exclusive) next inst. - let move_dst_end = if range.to.pos() == InstPosition::Before { - (vreg, range.to.inst()) - } else { - (vreg, range.to.inst().next()) - }; - log::debug!( - "vreg {:?} range {:?}: looking for program-move dests from {:?} to {:?}", - vreg, - range, - move_dst_start, - move_dst_end - ); - while prog_move_dst_idx < self.prog_move_dsts.len() - && self.prog_move_dsts[prog_move_dst_idx].0 < move_dst_start - { - log::debug!(" -> skipping idx {}", prog_move_dst_idx); - prog_move_dst_idx += 1; - } - while prog_move_dst_idx < self.prog_move_dsts.len() - && self.prog_move_dsts[prog_move_dst_idx].0 < move_dst_end - { - log::debug!( - " -> setting idx {} ({:?}) to alloc {:?}", - prog_move_dst_idx, - self.prog_move_dsts[prog_move_dst_idx].0, - alloc - ); - self.prog_move_dsts[prog_move_dst_idx].1 = alloc; - prog_move_dst_idx += 1; - } - - prev = entry.index; - } - } - - // Sort the half-moves list. For each (from, to, - // from-vreg) tuple, find the from-alloc and all the - // to-allocs, and insert moves on the block edge. - half_moves.sort_unstable_by_key(|h| h.key); - log::debug!("halfmoves: {:?}", half_moves); - self.stats.halfmoves_count = half_moves.len(); - - let mut i = 0; - while i < half_moves.len() { - // Find a Source. - while i < half_moves.len() && half_moves[i].kind() != HalfMoveKind::Source { - i += 1; - } - if i >= half_moves.len() { - break; - } - let src = &half_moves[i]; - i += 1; - - // Find all Dests. - let dest_key = src.key | 1; - let first_dest = i; - while i < half_moves.len() && half_moves[i].key == dest_key { - i += 1; - } - let last_dest = i; - - log::debug!( - "halfmove match: src {:?} dests {:?}", - src, - &half_moves[first_dest..last_dest] - ); - - // Determine the ProgPoint where moves on this (from, to) - // edge should go: - // - If there is more than one in-edge to `to`, then - // `from` must have only one out-edge; moves go at tail of - // `from` just before last Branch/Ret. - // - Otherwise, there must be at most one in-edge to `to`, - // and moves go at start of `to`. - let from_last_insn = self.func.block_insns(src.from_block()).last(); - let to_first_insn = self.func.block_insns(src.to_block()).first(); - let from_is_ret = self.func.is_ret(from_last_insn); - let to_is_entry = self.func.entry_block() == src.to_block(); - let from_outs = - self.func.block_succs(src.from_block()).len() + if from_is_ret { 1 } else { 0 }; - let to_ins = - self.func.block_preds(src.to_block()).len() + if to_is_entry { 1 } else { 0 }; - - let (insertion_point, prio) = if to_ins > 1 && from_outs <= 1 { - ( - // N.B.: though semantically the edge moves happen - // after the branch, we must insert them before - // the branch because otherwise, of course, they - // would never execute. This is correct even in - // the presence of branches that read register - // inputs (e.g. conditional branches on some RISCs - // that branch on reg zero/not-zero, or any - // indirect branch), but for a very subtle reason: - // all cases of such branches will (or should) - // have multiple successors, and thus due to - // critical-edge splitting, their successors will - // have only the single predecessor, and we prefer - // to insert at the head of the successor in that - // case (rather than here). We make this a - // requirement, in fact: the user of this library - // shall not read registers in a branch - // instruction of there is only one successor per - // the given CFG information. - ProgPoint::before(from_last_insn), - InsertMovePrio::OutEdgeMoves, - ) - } else if to_ins <= 1 { - ( - ProgPoint::before(to_first_insn), - InsertMovePrio::InEdgeMoves, - ) - } else { - panic!( - "Critical edge: can't insert moves between blocks {:?} and {:?}", - src.from_block(), - src.to_block() - ); - }; - - let mut last = None; - for dest in first_dest..last_dest { - let dest = &half_moves[dest]; - if last == Some(dest.alloc) { - continue; - } - self.insert_move( - insertion_point, - prio, - src.alloc, - dest.alloc, - Some(self.vreg_regs[dest.to_vreg().index()]), - ); - last = Some(dest.alloc); - } - } - - // Handle multi-fixed-reg constraints by copying. - for (progpoint, from_preg, to_preg, slot) in - std::mem::replace(&mut self.multi_fixed_reg_fixups, vec![]) - { - log::debug!( - "multi-fixed-move constraint at {:?} from p{} to p{}", - progpoint, - from_preg.index(), - to_preg.index() - ); - self.insert_move( - progpoint, - InsertMovePrio::MultiFixedReg, - Allocation::reg(self.pregs[from_preg.index()].reg), - Allocation::reg(self.pregs[to_preg.index()].reg), - None, - ); - self.set_alloc( - progpoint.inst(), - slot, - Allocation::reg(self.pregs[to_preg.index()].reg), - ); - } - - // Handle outputs that reuse inputs: copy beforehand, then set - // input's alloc to output's. - // - // Note that the output's allocation may not *actually* be - // valid until InstPosition::After, but the reused input may - // occur at InstPosition::Before. This may appear incorrect, - // but we make it work by ensuring that all *other* inputs are - // extended to InstPosition::After so that the def will not - // interfere. (The liveness computation code does this -- we - // do not require the user to do so.) - // - // One might ask: why not insist that input-reusing defs occur - // at InstPosition::Before? this would be correct, but would - // mean that the reused input and the reusing output - // interfere, *guaranteeing* that every such case would - // require a move. This is really bad on ISAs (like x86) where - // reused inputs are ubiquitous. - // - // Another approach might be to put the def at Before, and - // trim the reused input's liverange back to the previous - // instruction's After. This is kind of OK until (i) a block - // boundary occurs between the prior inst and this one, or - // (ii) any moves/spills/reloads occur between the two - // instructions. We really do need the input to be live at - // this inst's Before. - // - // In principle what we really need is a "BeforeBefore" - // program point, but we don't want to introduce that - // everywhere and pay the cost of twice as many ProgPoints - // throughout the allocator. - // - // Or we could introduce a separate move instruction -- this - // is the approach that regalloc.rs takes with "mod" operands - // -- but that is also costly. - // - // So we take this approach (invented by IonMonkey -- somewhat - // hard to discern, though see [0] for a comment that makes - // this slightly less unclear) to avoid interference between - // the actual reused input and reusing output, ensure - // interference (hence no incorrectness) between other inputs - // and the reusing output, and not require a separate explicit - // move instruction. - // - // [0] https://searchfox.org/mozilla-central/rev/3a798ef9252896fb389679f06dd3203169565af0/js/src/jit/shared/Lowering-shared-inl.h#108-110 - for inst in reuse_input_insts { - let mut input_reused: SmallVec<[usize; 4]> = smallvec![]; - for output_idx in 0..self.func.inst_operands(inst).len() { - let operand = self.func.inst_operands(inst)[output_idx]; - if let OperandPolicy::Reuse(input_idx) = operand.policy() { - debug_assert!(!input_reused.contains(&input_idx)); - debug_assert_eq!(operand.pos(), OperandPos::After); - input_reused.push(input_idx); - let input_alloc = self.get_alloc(inst, input_idx); - let output_alloc = self.get_alloc(inst, output_idx); - log::debug!( - "reuse-input inst {:?}: output {} has alloc {:?}, input {} has alloc {:?}", - inst, - output_idx, - output_alloc, - input_idx, - input_alloc - ); - if input_alloc != output_alloc { - #[cfg(debug)] - { - if log::log_enabled!(log::Level::Debug) { - self.annotate( - ProgPoint::before(inst), - format!( - " reuse-input-copy: {} -> {}", - input_alloc, output_alloc - ), - ); - } - } - let input_operand = self.func.inst_operands(inst)[input_idx]; - self.insert_move( - ProgPoint::before(inst), - InsertMovePrio::ReusedInput, - input_alloc, - output_alloc, - Some(input_operand.vreg()), - ); - self.set_alloc(inst, input_idx, output_alloc); - } - } - } - } - - // Sort the prog-moves lists and insert moves to reify the - // input program's move operations. - self.prog_move_srcs - .sort_unstable_by_key(|((_, inst), _)| *inst); - self.prog_move_dsts - .sort_unstable_by_key(|((_, inst), _)| inst.prev()); - let prog_move_srcs = std::mem::replace(&mut self.prog_move_srcs, vec![]); - let prog_move_dsts = std::mem::replace(&mut self.prog_move_dsts, vec![]); - assert_eq!(prog_move_srcs.len(), prog_move_dsts.len()); - for (&((_, from_inst), from_alloc), &((to_vreg, to_inst), to_alloc)) in - prog_move_srcs.iter().zip(prog_move_dsts.iter()) - { - log::debug!( - "program move at inst {:?}: alloc {:?} -> {:?} (v{})", - from_inst, - from_alloc, - to_alloc, - to_vreg.index(), - ); - assert!(!from_alloc.is_none()); - assert!(!to_alloc.is_none()); - assert_eq!(from_inst, to_inst.prev()); - // N.B.: these moves happen with the *same* priority as - // LR-to-LR moves, because they work just like them: they - // connect a use at one progpoint (move-After) with a def - // at an adjacent progpoint (move+1-Before), so they must - // happen in parallel with all other LR-to-LR moves. - self.insert_move( - ProgPoint::before(to_inst), - InsertMovePrio::Regular, - from_alloc, - to_alloc, - Some(self.vreg_regs[to_vreg.index()]), - ); - } - } - - fn resolve_inserted_moves(&mut self) { - // For each program point, gather all moves together. Then - // resolve (see cases below). - let mut i = 0; - self.inserted_moves - .sort_unstable_by_key(|m| (m.pos.to_index(), m.prio)); - - // Redundant-move elimination state tracker. - let mut redundant_moves = RedundantMoveEliminator::default(); - - fn redundant_move_process_side_effects<'a, F: Function>( - this: &Env<'a, F>, - redundant_moves: &mut RedundantMoveEliminator, - from: ProgPoint, - to: ProgPoint, - ) { - // If any safepoints in range, clear and return. - // Also, if we cross a block boundary, clear and return. - if this.cfginfo.insn_block[from.inst().index()] - != this.cfginfo.insn_block[to.inst().index()] - { - redundant_moves.clear(); - return; - } - for inst in from.inst().index()..=to.inst().index() { - if this.func.is_safepoint(Inst::new(inst)) { - redundant_moves.clear(); - return; - } - } - - let start_inst = if from.pos() == InstPosition::Before { - from.inst() - } else { - from.inst().next() - }; - let end_inst = if to.pos() == InstPosition::Before { - to.inst() - } else { - to.inst().next() - }; - for inst in start_inst.index()..end_inst.index() { - let inst = Inst::new(inst); - for (i, op) in this.func.inst_operands(inst).iter().enumerate() { - match op.kind() { - OperandKind::Def | OperandKind::Mod => { - let alloc = this.get_alloc(inst, i); - redundant_moves.clear_alloc(alloc); - } - _ => {} - } - } - for reg in this.func.inst_clobbers(inst) { - redundant_moves.clear_alloc(Allocation::reg(*reg)); - } - } - } - - let mut last_pos = ProgPoint::before(Inst::new(0)); - - while i < self.inserted_moves.len() { - let start = i; - let pos = self.inserted_moves[i].pos; - let prio = self.inserted_moves[i].prio; - while i < self.inserted_moves.len() - && self.inserted_moves[i].pos == pos - && self.inserted_moves[i].prio == prio - { - i += 1; - } - let moves = &self.inserted_moves[start..i]; - - redundant_move_process_side_effects(self, &mut redundant_moves, last_pos, pos); - last_pos = pos; - - // Gather all the moves with Int class and Float class - // separately. These cannot interact, so it is safe to - // have two separate ParallelMove instances. They need to - // be separate because moves between the two classes are - // impossible. (We could enhance ParallelMoves to - // understand register classes and take multiple scratch - // regs, but this seems simpler.) - let mut int_moves: SmallVec<[InsertedMove; 8]> = smallvec![]; - let mut float_moves: SmallVec<[InsertedMove; 8]> = smallvec![]; - let mut self_moves: SmallVec<[InsertedMove; 8]> = smallvec![]; - - for m in moves { - if m.from_alloc.is_reg() && m.to_alloc.is_reg() { - assert_eq!(m.from_alloc.class(), m.to_alloc.class()); - } - if m.from_alloc == m.to_alloc { - if m.to_vreg.is_some() { - self_moves.push(m.clone()); - } - continue; - } - match m.from_alloc.class() { - RegClass::Int => { - int_moves.push(m.clone()); - } - RegClass::Float => { - float_moves.push(m.clone()); - } - } - } - - for &(regclass, moves) in - &[(RegClass::Int, &int_moves), (RegClass::Float, &float_moves)] - { - // All moves in `moves` semantically happen in - // parallel. Let's resolve these to a sequence of moves - // that can be done one at a time. - let scratch = self.env.scratch_by_class[regclass as u8 as usize]; - let mut parallel_moves = ParallelMoves::new(Allocation::reg(scratch)); - log::debug!("parallel moves at pos {:?} prio {:?}", pos, prio); - for m in moves { - if (m.from_alloc != m.to_alloc) || m.to_vreg.is_some() { - log::debug!(" {} -> {}", m.from_alloc, m.to_alloc,); - parallel_moves.add(m.from_alloc, m.to_alloc, m.to_vreg); - } - } - - let resolved = parallel_moves.resolve(); - - // If (i) the scratch register is used, and (ii) a - // stack-to-stack move exists, then we need to - // allocate an additional scratch spillslot to which - // we can temporarily spill the scratch reg when we - // lower the stack-to-stack move to a - // stack-to-scratch-to-stack sequence. - let scratch_used = resolved.iter().any(|&(src, dst, _)| { - src == Allocation::reg(scratch) || dst == Allocation::reg(scratch) - }); - let stack_stack_move = resolved - .iter() - .any(|&(src, dst, _)| src.is_stack() && dst.is_stack()); - let extra_slot = if scratch_used && stack_stack_move { - if self.extra_spillslot[regclass as u8 as usize].is_none() { - let slot = self.allocate_spillslot(regclass); - self.extra_spillslot[regclass as u8 as usize] = Some(slot); - } - self.extra_spillslot[regclass as u8 as usize] - } else { - None - }; - - let mut scratch_used_yet = false; - for (src, dst, to_vreg) in resolved { - log::debug!(" resolved: {} -> {} ({:?})", src, dst, to_vreg); - let action = redundant_moves.process_move(src, dst, to_vreg); - if !action.elide { - if dst == Allocation::reg(scratch) { - scratch_used_yet = true; - } - if src.is_stack() && dst.is_stack() { - if !scratch_used_yet { - self.add_edit( - pos, - prio, - Edit::Move { - from: src, - to: Allocation::reg(scratch), - to_vreg, - }, - ); - self.add_edit( - pos, - prio, - Edit::Move { - from: Allocation::reg(scratch), - to: dst, - to_vreg, - }, - ); - } else { - assert!(extra_slot.is_some()); - self.add_edit( - pos, - prio, - Edit::Move { - from: Allocation::reg(scratch), - to: extra_slot.unwrap(), - to_vreg: None, - }, - ); - self.add_edit( - pos, - prio, - Edit::Move { - from: src, - to: Allocation::reg(scratch), - to_vreg, - }, - ); - self.add_edit( - pos, - prio, - Edit::Move { - from: Allocation::reg(scratch), - to: dst, - to_vreg, - }, - ); - self.add_edit( - pos, - prio, - Edit::Move { - from: extra_slot.unwrap(), - to: Allocation::reg(scratch), - to_vreg: None, - }, - ); - } - } else { - self.add_edit( - pos, - prio, - Edit::Move { - from: src, - to: dst, - to_vreg, - }, - ); - } - } else { - log::debug!(" -> redundant move elided"); - } - if let Some((alloc, vreg)) = action.def_alloc { - log::debug!( - " -> converted to DefAlloc: alloc {} vreg {}", - alloc, - vreg - ); - self.add_edit(pos, prio, Edit::DefAlloc { alloc, vreg }); - } - } - } - - for m in &self_moves { - log::debug!( - "self move at pos {:?} prio {:?}: {} -> {} to_vreg {:?}", - pos, - prio, - m.from_alloc, - m.to_alloc, - m.to_vreg - ); - let action = redundant_moves.process_move(m.from_alloc, m.to_alloc, m.to_vreg); - assert!(action.elide); - if let Some((alloc, vreg)) = action.def_alloc { - log::debug!(" -> DefAlloc: alloc {} vreg {}", alloc, vreg); - self.add_edit(pos, prio, Edit::DefAlloc { alloc, vreg }); - } - } - } - - // Add edits to describe blockparam locations too. This is - // required by the checker. This comes after any edge-moves. - self.blockparam_allocs - .sort_unstable_by_key(|&(block, idx, _, _)| (block, idx)); - self.stats.blockparam_allocs_count = self.blockparam_allocs.len(); - let mut i = 0; - while i < self.blockparam_allocs.len() { - let start = i; - let block = self.blockparam_allocs[i].0; - while i < self.blockparam_allocs.len() && self.blockparam_allocs[i].0 == block { - i += 1; - } - let params = &self.blockparam_allocs[start..i]; - let vregs = params - .iter() - .map(|(_, _, vreg_idx, _)| self.vreg_regs[vreg_idx.index()]) - .collect::>(); - let allocs = params - .iter() - .map(|(_, _, _, alloc)| *alloc) - .collect::>(); - assert_eq!(vregs.len(), self.func.block_params(block).len()); - assert_eq!(allocs.len(), self.func.block_params(block).len()); - self.add_edit( - self.cfginfo.block_entry[block.index()], - InsertMovePrio::BlockParam, - Edit::BlockParams { vregs, allocs }, - ); - } - - // Ensure edits are in sorted ProgPoint order. N.B.: this must - // be a stable sort! We have to keep the order produced by the - // parallel-move resolver for all moves within a single sort - // key. - self.edits.sort_by_key(|&(pos, prio, _)| (pos, prio)); - self.stats.edits_count = self.edits.len(); - - // Add debug annotations. - if self.annotations_enabled { - for i in 0..self.edits.len() { - let &(pos, _, ref edit) = &self.edits[i]; - match edit { - &Edit::Move { from, to, to_vreg } => { - self.annotate( - ProgPoint::from_index(pos), - format!("move {} -> {} ({:?})", from, to, to_vreg), - ); - } - &Edit::BlockParams { - ref vregs, - ref allocs, - } => { - let s = format!("blockparams vregs:{:?} allocs:{:?}", vregs, allocs); - self.annotate(ProgPoint::from_index(pos), s); - } - &Edit::DefAlloc { alloc, vreg } => { - let s = format!("defalloc {:?} := {:?}", alloc, vreg); - self.annotate(ProgPoint::from_index(pos), s); - } - } - } - } - } - - fn add_edit(&mut self, pos: ProgPoint, prio: InsertMovePrio, edit: Edit) { - match &edit { - &Edit::Move { from, to, to_vreg } if from == to && to_vreg.is_none() => return, - &Edit::Move { from, to, .. } if from.is_reg() && to.is_reg() => { - assert_eq!(from.as_reg().unwrap().class(), to.as_reg().unwrap().class()); - } - _ => {} - } - - self.edits.push((pos.to_index(), prio, edit)); - } - - fn compute_stackmaps(&mut self) { - // For each ref-typed vreg, iterate through ranges and find - // safepoints in-range. Add the SpillSlot to the stackmap. - - if self.func.reftype_vregs().is_empty() { - return; - } - - // Given `safepoints_per_vreg` from the liveness computation, - // all we have to do is, for each vreg in this map, step - // through the LiveRanges along with a sorted list of - // safepoints; and for each safepoint in the current range, - // emit the allocation into the `safepoint_slots` list. - - log::debug!("safepoints_per_vreg = {:?}", self.safepoints_per_vreg); - - for vreg in self.func.reftype_vregs() { - log::debug!("generating safepoint info for vreg {}", vreg); - let vreg = VRegIndex::new(vreg.vreg()); - let mut safepoints: Vec = self - .safepoints_per_vreg - .get(&vreg.index()) - .unwrap() - .iter() - .map(|&inst| ProgPoint::before(inst)) - .collect(); - safepoints.sort_unstable(); - log::debug!(" -> live over safepoints: {:?}", safepoints); - - let mut safepoint_idx = 0; - for entry in &self.vregs[vreg.index()].ranges { - let range = entry.range; - let alloc = self.get_alloc_for_range(entry.index); - log::debug!(" -> range {:?}: alloc {}", range, alloc); - while safepoint_idx < safepoints.len() && safepoints[safepoint_idx] < range.to { - if safepoints[safepoint_idx] < range.from { - safepoint_idx += 1; - continue; - } - log::debug!(" -> covers safepoint {:?}", safepoints[safepoint_idx]); - - let slot = alloc - .as_stack() - .expect("Reference-typed value not in spillslot at safepoint"); - self.safepoint_slots.push((safepoints[safepoint_idx], slot)); - safepoint_idx += 1; - } - } - } - - self.safepoint_slots.sort_unstable(); - log::debug!("final safepoint slots info: {:?}", self.safepoint_slots); - } - - pub(crate) fn init(&mut self) -> Result<(), RegAllocError> { - self.create_pregs_and_vregs(); - self.compute_liveness()?; - self.merge_vreg_bundles(); - self.queue_bundles(); - if log::log_enabled!(log::Level::Debug) { - self.dump_state(); - } - Ok(()) + Ok(()) } pub(crate) fn run(&mut self) -> Result<(), RegAllocError> { @@ -5031,95 +110,6 @@ impl<'a, F: Function> Env<'a, F> { self.compute_stackmaps(); Ok(()) } - - fn annotate(&mut self, progpoint: ProgPoint, s: String) { - if self.annotations_enabled { - self.debug_annotations - .entry(progpoint) - .or_insert_with(|| vec![]) - .push(s); - } - } - - fn dump_results(&self) { - log::info!("=== REGALLOC RESULTS ==="); - for block in 0..self.func.blocks() { - let block = Block::new(block); - log::info!( - "block{}: [succs {:?} preds {:?}]", - block.index(), - self.func - .block_succs(block) - .iter() - .map(|b| b.index()) - .collect::>(), - self.func - .block_preds(block) - .iter() - .map(|b| b.index()) - .collect::>() - ); - for inst in self.func.block_insns(block).iter() { - for annotation in self - .debug_annotations - .get(&ProgPoint::before(inst)) - .map(|v| &v[..]) - .unwrap_or(&[]) - { - log::info!(" inst{}-pre: {}", inst.index(), annotation); - } - let ops = self - .func - .inst_operands(inst) - .iter() - .map(|op| format!("{}", op)) - .collect::>(); - let clobbers = self - .func - .inst_clobbers(inst) - .iter() - .map(|preg| format!("{}", preg)) - .collect::>(); - let allocs = (0..ops.len()) - .map(|i| format!("{}", self.get_alloc(inst, i))) - .collect::>(); - let opname = if self.func.is_branch(inst) { - "br" - } else if self.func.is_call(inst) { - "call" - } else if self.func.is_ret(inst) { - "ret" - } else { - "op" - }; - let args = ops - .iter() - .zip(allocs.iter()) - .map(|(op, alloc)| format!("{} [{}]", op, alloc)) - .collect::>(); - let clobbers = if clobbers.is_empty() { - "".to_string() - } else { - format!(" [clobber: {}]", clobbers.join(", ")) - }; - log::info!( - " inst{}: {} {}{}", - inst.index(), - opname, - args.join(", "), - clobbers - ); - for annotation in self - .debug_annotations - .get(&ProgPoint::after(inst)) - .map(|v| &v[..]) - .unwrap_or(&[]) - { - log::info!(" inst{}-post: {}", inst.index(), annotation); - } - } - } - } } pub fn run( diff --git a/src/ion/moves.rs b/src/ion/moves.rs new file mode 100644 index 00000000..18edd21d --- /dev/null +++ b/src/ion/moves.rs @@ -0,0 +1,1167 @@ +/* + * The following license applies to this file, which was initially + * derived from the files `js/src/jit/BacktrackingAllocator.h` and + * `js/src/jit/BacktrackingAllocator.cpp` in Mozilla Firefox: + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * Since the initial port, the design has been substantially evolved + * and optimized. + */ + +//! Move resolution. + +use super::{ + Env, InsertMovePrio, InsertedMove, LiveRangeFlag, LiveRangeIndex, RedundantMoveEliminator, + VRegIndex, SLOT_NONE, +}; + +use crate::moves::ParallelMoves; +use crate::{ + Allocation, Block, Edit, Function, Inst, InstPosition, OperandKind, OperandPolicy, OperandPos, + ProgPoint, RegClass, VReg, +}; +use log::debug; +use smallvec::{smallvec, SmallVec}; +use std::fmt::Debug; + +impl<'a, F: Function> Env<'a, F> { + pub fn is_start_of_block(&self, pos: ProgPoint) -> bool { + let block = self.cfginfo.insn_block[pos.inst().index()]; + pos == self.cfginfo.block_entry[block.index()] + } + pub fn is_end_of_block(&self, pos: ProgPoint) -> bool { + let block = self.cfginfo.insn_block[pos.inst().index()]; + pos == self.cfginfo.block_exit[block.index()] + } + + pub fn insert_move( + &mut self, + pos: ProgPoint, + prio: InsertMovePrio, + from_alloc: Allocation, + to_alloc: Allocation, + to_vreg: Option, + ) { + debug!( + "insert_move: pos {:?} prio {:?} from_alloc {:?} to_alloc {:?}", + pos, prio, from_alloc, to_alloc + ); + match (from_alloc.as_reg(), to_alloc.as_reg()) { + (Some(from), Some(to)) => { + assert_eq!(from.class(), to.class()); + } + _ => {} + } + self.inserted_moves.push(InsertedMove { + pos, + prio, + from_alloc, + to_alloc, + to_vreg, + }); + } + + pub fn get_alloc(&self, inst: Inst, slot: usize) -> Allocation { + let inst_allocs = &self.allocs[self.inst_alloc_offsets[inst.index()] as usize..]; + inst_allocs[slot] + } + + pub fn set_alloc(&mut self, inst: Inst, slot: usize, alloc: Allocation) { + let inst_allocs = &mut self.allocs[self.inst_alloc_offsets[inst.index()] as usize..]; + inst_allocs[slot] = alloc; + } + + pub fn get_alloc_for_range(&self, range: LiveRangeIndex) -> Allocation { + log::debug!("get_alloc_for_range: {:?}", range); + let bundle = self.ranges[range.index()].bundle; + log::debug!(" -> bundle: {:?}", bundle); + let bundledata = &self.bundles[bundle.index()]; + log::debug!(" -> allocation {:?}", bundledata.allocation); + if bundledata.allocation != Allocation::none() { + bundledata.allocation + } else { + log::debug!(" -> spillset {:?}", bundledata.spillset); + log::debug!( + " -> spill slot {:?}", + self.spillsets[bundledata.spillset.index()].slot + ); + self.spillslots[self.spillsets[bundledata.spillset.index()].slot.index()].alloc + } + } + + pub fn apply_allocations_and_insert_moves(&mut self) { + log::debug!("apply_allocations_and_insert_moves"); + log::debug!("blockparam_ins: {:?}", self.blockparam_ins); + log::debug!("blockparam_outs: {:?}", self.blockparam_outs); + + // Now that all splits are done, we can pay the cost once to + // sort VReg range lists and update with the final ranges. + for vreg in &mut self.vregs { + for entry in &mut vreg.ranges { + entry.range = self.ranges[entry.index.index()].range; + } + vreg.ranges.sort_unstable_by_key(|entry| entry.range.from); + } + + /// We create "half-moves" in order to allow a single-scan + /// strategy with a subsequent sort. Basically, the key idea + /// is that as our single scan through a range for a vreg hits + /// upon the source or destination of an edge-move, we emit a + /// "half-move". These half-moves are carefully keyed in a + /// particular sort order (the field order below is + /// significant!) so that all half-moves on a given (from, to) + /// block-edge appear contiguously, and then all moves from a + /// given vreg appear contiguously. Within a given from-vreg, + /// pick the first `Source` (there should only be one, but + /// imprecision in liveranges due to loop handling sometimes + /// means that a blockparam-out is also recognized as a normal-out), + /// and then for each `Dest`, copy the source-alloc to that + /// dest-alloc. + #[derive(Clone, Debug, PartialEq, Eq)] + struct HalfMove { + key: u64, + alloc: Allocation, + } + #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] + #[repr(u8)] + enum HalfMoveKind { + Source = 0, + Dest = 1, + } + fn half_move_key( + from_block: Block, + to_block: Block, + to_vreg: VRegIndex, + kind: HalfMoveKind, + ) -> u64 { + assert!(from_block.index() < 1 << 21); + assert!(to_block.index() < 1 << 21); + assert!(to_vreg.index() < 1 << 21); + ((from_block.index() as u64) << 43) + | ((to_block.index() as u64) << 22) + | ((to_vreg.index() as u64) << 1) + | (kind as u8 as u64) + } + impl HalfMove { + fn from_block(&self) -> Block { + Block::new(((self.key >> 43) & ((1 << 21) - 1)) as usize) + } + fn to_block(&self) -> Block { + Block::new(((self.key >> 22) & ((1 << 21) - 1)) as usize) + } + fn to_vreg(&self) -> VRegIndex { + VRegIndex::new(((self.key >> 1) & ((1 << 21) - 1)) as usize) + } + fn kind(&self) -> HalfMoveKind { + if self.key & 1 == 1 { + HalfMoveKind::Dest + } else { + HalfMoveKind::Source + } + } + } + + let mut half_moves: Vec = Vec::with_capacity(6 * self.func.insts()); + let mut reuse_input_insts = Vec::with_capacity(self.func.insts() / 2); + + let mut blockparam_in_idx = 0; + let mut blockparam_out_idx = 0; + let mut prog_move_src_idx = 0; + let mut prog_move_dst_idx = 0; + for vreg in 0..self.vregs.len() { + let vreg = VRegIndex::new(vreg); + + let pinned_alloc = if self.vregs[vreg.index()].is_pinned { + self.func.is_pinned_vreg(self.vreg_regs[vreg.index()]) + } else { + None + }; + + // For each range in each vreg, insert moves or + // half-moves. We also scan over `blockparam_ins` and + // `blockparam_outs`, which are sorted by (block, vreg), + // and over program-move srcs/dsts to fill in allocations. + let mut prev = LiveRangeIndex::invalid(); + for range_idx in 0..self.vregs[vreg.index()].ranges.len() { + let entry = self.vregs[vreg.index()].ranges[range_idx]; + let alloc = pinned_alloc + .map(|preg| Allocation::reg(preg)) + .unwrap_or_else(|| self.get_alloc_for_range(entry.index)); + let range = entry.range; + log::debug!( + "apply_allocations: vreg {:?} LR {:?} with range {:?} has alloc {:?} (pinned {:?})", + vreg, + entry.index, + range, + alloc, + pinned_alloc, + ); + debug_assert!(alloc != Allocation::none()); + + if self.annotations_enabled { + self.annotate( + range.from, + format!( + " <<< start v{} in {} (range{}) (bundle{})", + vreg.index(), + alloc, + entry.index.index(), + self.ranges[entry.index.index()].bundle.raw_u32(), + ), + ); + self.annotate( + range.to, + format!( + " end v{} in {} (range{}) (bundle{}) >>>", + vreg.index(), + alloc, + entry.index.index(), + self.ranges[entry.index.index()].bundle.raw_u32(), + ), + ); + } + + // Does this range follow immediately after a prior + // range in the same block? If so, insert a move (if + // the allocs differ). We do this directly rather than + // with half-moves because we eagerly know both sides + // already (and also, half-moves are specific to + // inter-block transfers). + // + // Note that we do *not* do this if there is also a + // def as the first use in the new range: it's + // possible that an old liverange covers the Before + // pos of an inst, a new liverange covers the After + // pos, and the def also happens at After. In this + // case we don't want to an insert a move after the + // instruction copying the old liverange. + // + // Note also that we assert that the new range has to + // start at the Before-point of an instruction; we + // can't insert a move that logically happens just + // before After (i.e. in the middle of a single + // instruction). + // + // Also note that this case is not applicable to + // pinned vregs (because they are always in one PReg). + if pinned_alloc.is_none() && prev.is_valid() { + let prev_alloc = self.get_alloc_for_range(prev); + let prev_range = self.ranges[prev.index()].range; + let first_is_def = + self.ranges[entry.index.index()].has_flag(LiveRangeFlag::StartsAtDef); + debug_assert!(prev_alloc != Allocation::none()); + + if prev_range.to == range.from + && !self.is_start_of_block(range.from) + && !first_is_def + { + log::debug!( + "prev LR {} abuts LR {} in same block; moving {} -> {} for v{}", + prev.index(), + entry.index.index(), + prev_alloc, + alloc, + vreg.index() + ); + assert_eq!(range.from.pos(), InstPosition::Before); + self.insert_move( + range.from, + InsertMovePrio::Regular, + prev_alloc, + alloc, + Some(self.vreg_regs[vreg.index()]), + ); + } + } + + // The block-to-block edge-move logic is not + // applicable to pinned vregs, which are always in one + // PReg (so never need moves within their own vreg + // ranges). + if pinned_alloc.is_none() { + // Scan over blocks whose ends are covered by this + // range. For each, for each successor that is not + // already in this range (hence guaranteed to have the + // same allocation) and if the vreg is live, add a + // Source half-move. + let mut block = self.cfginfo.insn_block[range.from.inst().index()]; + while block.is_valid() && block.index() < self.func.blocks() { + if range.to < self.cfginfo.block_exit[block.index()].next() { + break; + } + log::debug!("examining block with end in range: block{}", block.index()); + for &succ in self.func.block_succs(block) { + log::debug!( + " -> has succ block {} with entry {:?}", + succ.index(), + self.cfginfo.block_entry[succ.index()] + ); + if range.contains_point(self.cfginfo.block_entry[succ.index()]) { + continue; + } + log::debug!(" -> out of this range, requires half-move if live"); + if self.is_live_in(succ, vreg) { + log::debug!(" -> live at input to succ, adding halfmove"); + half_moves.push(HalfMove { + key: half_move_key(block, succ, vreg, HalfMoveKind::Source), + alloc, + }); + } + } + + // Scan forward in `blockparam_outs`, adding all + // half-moves for outgoing values to blockparams + // in succs. + log::debug!( + "scanning blockparam_outs for v{} block{}: blockparam_out_idx = {}", + vreg.index(), + block.index(), + blockparam_out_idx, + ); + while blockparam_out_idx < self.blockparam_outs.len() { + let (from_vreg, from_block, to_block, to_vreg) = + self.blockparam_outs[blockparam_out_idx]; + if (from_vreg, from_block) > (vreg, block) { + break; + } + if (from_vreg, from_block) == (vreg, block) { + log::debug!( + " -> found: from v{} block{} to v{} block{}", + from_vreg.index(), + from_block.index(), + to_vreg.index(), + to_vreg.index() + ); + half_moves.push(HalfMove { + key: half_move_key( + from_block, + to_block, + to_vreg, + HalfMoveKind::Source, + ), + alloc, + }); + + if self.annotations_enabled { + self.annotate( + self.cfginfo.block_exit[block.index()], + format!( + "blockparam-out: block{} to block{}: v{} to v{} in {}", + from_block.index(), + to_block.index(), + from_vreg.index(), + to_vreg.index(), + alloc + ), + ); + } + } + + blockparam_out_idx += 1; + } + + block = block.next(); + } + + // Scan over blocks whose beginnings are covered by + // this range and for which the vreg is live at the + // start of the block. For each, for each predecessor, + // add a Dest half-move. + let mut block = self.cfginfo.insn_block[range.from.inst().index()]; + if self.cfginfo.block_entry[block.index()] < range.from { + block = block.next(); + } + while block.is_valid() && block.index() < self.func.blocks() { + if self.cfginfo.block_entry[block.index()] >= range.to { + break; + } + + // Add half-moves for blockparam inputs. + log::debug!( + "scanning blockparam_ins at vreg {} block {}: blockparam_in_idx = {}", + vreg.index(), + block.index(), + blockparam_in_idx + ); + while blockparam_in_idx < self.blockparam_ins.len() { + let (to_vreg, to_block, from_block) = + self.blockparam_ins[blockparam_in_idx]; + if (to_vreg, to_block) > (vreg, block) { + break; + } + if (to_vreg, to_block) == (vreg, block) { + half_moves.push(HalfMove { + key: half_move_key( + from_block, + to_block, + to_vreg, + HalfMoveKind::Dest, + ), + alloc, + }); + log::debug!( + "match: blockparam_in: v{} in block{} from block{} into {}", + to_vreg.index(), + to_block.index(), + from_block.index(), + alloc, + ); + #[cfg(debug)] + { + if log::log_enabled!(log::Level::Debug) { + self.annotate( + self.cfginfo.block_entry[block.index()], + format!( + "blockparam-in: block{} to block{}:into v{} in {}", + from_block.index(), + to_block.index(), + to_vreg.index(), + alloc + ), + ); + } + } + } + blockparam_in_idx += 1; + } + + if !self.is_live_in(block, vreg) { + block = block.next(); + continue; + } + + log::debug!( + "scanning preds at vreg {} block {} for ends outside the range", + vreg.index(), + block.index() + ); + + // Now find any preds whose ends are not in the + // same range, and insert appropriate moves. + for &pred in self.func.block_preds(block) { + log::debug!( + "pred block {} has exit {:?}", + pred.index(), + self.cfginfo.block_exit[pred.index()] + ); + if range.contains_point(self.cfginfo.block_exit[pred.index()]) { + continue; + } + log::debug!(" -> requires half-move"); + half_moves.push(HalfMove { + key: half_move_key(pred, block, vreg, HalfMoveKind::Dest), + alloc, + }); + } + + block = block.next(); + } + + // If this is a blockparam vreg and the start of block + // is in this range, add to blockparam_allocs. + let (blockparam_block, blockparam_idx) = + self.cfginfo.vreg_def_blockparam[vreg.index()]; + if blockparam_block.is_valid() + && range.contains_point(self.cfginfo.block_entry[blockparam_block.index()]) + { + self.blockparam_allocs.push(( + blockparam_block, + blockparam_idx, + vreg, + alloc, + )); + } + } + + // Scan over def/uses and apply allocations. + for use_idx in 0..self.ranges[entry.index.index()].uses.len() { + let usedata = self.ranges[entry.index.index()].uses[use_idx]; + log::debug!("applying to use: {:?}", usedata); + debug_assert!(range.contains_point(usedata.pos)); + let inst = usedata.pos.inst(); + let slot = usedata.slot; + let operand = usedata.operand; + // Safepoints add virtual uses with no slots; + // avoid these. + if slot != SLOT_NONE { + self.set_alloc(inst, slot as usize, alloc); + } + if let OperandPolicy::Reuse(_) = operand.policy() { + reuse_input_insts.push(inst); + } + } + + // Scan over program move srcs/dsts to fill in allocations. + + // Move srcs happen at `After` of a given + // inst. Compute [from, to) semi-inclusive range of + // inst indices for which we should fill in the source + // with this LR's allocation. + // + // range from inst-Before or inst-After covers cur + // inst's After; so includes move srcs from inst. + let move_src_start = (vreg, range.from.inst()); + // range to (exclusive) inst-Before or inst-After + // covers only prev inst's After; so includes move + // srcs to (exclusive) inst. + let move_src_end = (vreg, range.to.inst()); + log::debug!( + "vreg {:?} range {:?}: looking for program-move sources from {:?} to {:?}", + vreg, + range, + move_src_start, + move_src_end + ); + while prog_move_src_idx < self.prog_move_srcs.len() + && self.prog_move_srcs[prog_move_src_idx].0 < move_src_start + { + log::debug!(" -> skipping idx {}", prog_move_src_idx); + prog_move_src_idx += 1; + } + while prog_move_src_idx < self.prog_move_srcs.len() + && self.prog_move_srcs[prog_move_src_idx].0 < move_src_end + { + log::debug!( + " -> setting idx {} ({:?}) to alloc {:?}", + prog_move_src_idx, + self.prog_move_srcs[prog_move_src_idx].0, + alloc + ); + self.prog_move_srcs[prog_move_src_idx].1 = alloc; + prog_move_src_idx += 1; + } + + // move dsts happen at Before point. + // + // Range from inst-Before includes cur inst, while inst-After includes only next inst. + let move_dst_start = if range.from.pos() == InstPosition::Before { + (vreg, range.from.inst()) + } else { + (vreg, range.from.inst().next()) + }; + // Range to (exclusive) inst-Before includes prev + // inst, so to (exclusive) cur inst; range to + // (exclusive) inst-After includes cur inst, so to + // (exclusive) next inst. + let move_dst_end = if range.to.pos() == InstPosition::Before { + (vreg, range.to.inst()) + } else { + (vreg, range.to.inst().next()) + }; + log::debug!( + "vreg {:?} range {:?}: looking for program-move dests from {:?} to {:?}", + vreg, + range, + move_dst_start, + move_dst_end + ); + while prog_move_dst_idx < self.prog_move_dsts.len() + && self.prog_move_dsts[prog_move_dst_idx].0 < move_dst_start + { + log::debug!(" -> skipping idx {}", prog_move_dst_idx); + prog_move_dst_idx += 1; + } + while prog_move_dst_idx < self.prog_move_dsts.len() + && self.prog_move_dsts[prog_move_dst_idx].0 < move_dst_end + { + log::debug!( + " -> setting idx {} ({:?}) to alloc {:?}", + prog_move_dst_idx, + self.prog_move_dsts[prog_move_dst_idx].0, + alloc + ); + self.prog_move_dsts[prog_move_dst_idx].1 = alloc; + prog_move_dst_idx += 1; + } + + prev = entry.index; + } + } + + // Sort the half-moves list. For each (from, to, + // from-vreg) tuple, find the from-alloc and all the + // to-allocs, and insert moves on the block edge. + half_moves.sort_unstable_by_key(|h| h.key); + log::debug!("halfmoves: {:?}", half_moves); + self.stats.halfmoves_count = half_moves.len(); + + let mut i = 0; + while i < half_moves.len() { + // Find a Source. + while i < half_moves.len() && half_moves[i].kind() != HalfMoveKind::Source { + i += 1; + } + if i >= half_moves.len() { + break; + } + let src = &half_moves[i]; + i += 1; + + // Find all Dests. + let dest_key = src.key | 1; + let first_dest = i; + while i < half_moves.len() && half_moves[i].key == dest_key { + i += 1; + } + let last_dest = i; + + log::debug!( + "halfmove match: src {:?} dests {:?}", + src, + &half_moves[first_dest..last_dest] + ); + + // Determine the ProgPoint where moves on this (from, to) + // edge should go: + // - If there is more than one in-edge to `to`, then + // `from` must have only one out-edge; moves go at tail of + // `from` just before last Branch/Ret. + // - Otherwise, there must be at most one in-edge to `to`, + // and moves go at start of `to`. + let from_last_insn = self.func.block_insns(src.from_block()).last(); + let to_first_insn = self.func.block_insns(src.to_block()).first(); + let from_is_ret = self.func.is_ret(from_last_insn); + let to_is_entry = self.func.entry_block() == src.to_block(); + let from_outs = + self.func.block_succs(src.from_block()).len() + if from_is_ret { 1 } else { 0 }; + let to_ins = + self.func.block_preds(src.to_block()).len() + if to_is_entry { 1 } else { 0 }; + + let (insertion_point, prio) = if to_ins > 1 && from_outs <= 1 { + ( + // N.B.: though semantically the edge moves happen + // after the branch, we must insert them before + // the branch because otherwise, of course, they + // would never execute. This is correct even in + // the presence of branches that read register + // inputs (e.g. conditional branches on some RISCs + // that branch on reg zero/not-zero, or any + // indirect branch), but for a very subtle reason: + // all cases of such branches will (or should) + // have multiple successors, and thus due to + // critical-edge splitting, their successors will + // have only the single predecessor, and we prefer + // to insert at the head of the successor in that + // case (rather than here). We make this a + // requirement, in fact: the user of this library + // shall not read registers in a branch + // instruction of there is only one successor per + // the given CFG information. + ProgPoint::before(from_last_insn), + InsertMovePrio::OutEdgeMoves, + ) + } else if to_ins <= 1 { + ( + ProgPoint::before(to_first_insn), + InsertMovePrio::InEdgeMoves, + ) + } else { + panic!( + "Critical edge: can't insert moves between blocks {:?} and {:?}", + src.from_block(), + src.to_block() + ); + }; + + let mut last = None; + for dest in first_dest..last_dest { + let dest = &half_moves[dest]; + if last == Some(dest.alloc) { + continue; + } + self.insert_move( + insertion_point, + prio, + src.alloc, + dest.alloc, + Some(self.vreg_regs[dest.to_vreg().index()]), + ); + last = Some(dest.alloc); + } + } + + // Handle multi-fixed-reg constraints by copying. + for (progpoint, from_preg, to_preg, slot) in + std::mem::replace(&mut self.multi_fixed_reg_fixups, vec![]) + { + log::debug!( + "multi-fixed-move constraint at {:?} from p{} to p{}", + progpoint, + from_preg.index(), + to_preg.index() + ); + self.insert_move( + progpoint, + InsertMovePrio::MultiFixedReg, + Allocation::reg(self.pregs[from_preg.index()].reg), + Allocation::reg(self.pregs[to_preg.index()].reg), + None, + ); + self.set_alloc( + progpoint.inst(), + slot, + Allocation::reg(self.pregs[to_preg.index()].reg), + ); + } + + // Handle outputs that reuse inputs: copy beforehand, then set + // input's alloc to output's. + // + // Note that the output's allocation may not *actually* be + // valid until InstPosition::After, but the reused input may + // occur at InstPosition::Before. This may appear incorrect, + // but we make it work by ensuring that all *other* inputs are + // extended to InstPosition::After so that the def will not + // interfere. (The liveness computation code does this -- we + // do not require the user to do so.) + // + // One might ask: why not insist that input-reusing defs occur + // at InstPosition::Before? this would be correct, but would + // mean that the reused input and the reusing output + // interfere, *guaranteeing* that every such case would + // require a move. This is really bad on ISAs (like x86) where + // reused inputs are ubiquitous. + // + // Another approach might be to put the def at Before, and + // trim the reused input's liverange back to the previous + // instruction's After. This is kind of OK until (i) a block + // boundary occurs between the prior inst and this one, or + // (ii) any moves/spills/reloads occur between the two + // instructions. We really do need the input to be live at + // this inst's Before. + // + // In principle what we really need is a "BeforeBefore" + // program point, but we don't want to introduce that + // everywhere and pay the cost of twice as many ProgPoints + // throughout the allocator. + // + // Or we could introduce a separate move instruction -- this + // is the approach that regalloc.rs takes with "mod" operands + // -- but that is also costly. + // + // So we take this approach (invented by IonMonkey -- somewhat + // hard to discern, though see [0] for a comment that makes + // this slightly less unclear) to avoid interference between + // the actual reused input and reusing output, ensure + // interference (hence no incorrectness) between other inputs + // and the reusing output, and not require a separate explicit + // move instruction. + // + // [0] https://searchfox.org/mozilla-central/rev/3a798ef9252896fb389679f06dd3203169565af0/js/src/jit/shared/Lowering-shared-inl.h#108-110 + for inst in reuse_input_insts { + let mut input_reused: SmallVec<[usize; 4]> = smallvec![]; + for output_idx in 0..self.func.inst_operands(inst).len() { + let operand = self.func.inst_operands(inst)[output_idx]; + if let OperandPolicy::Reuse(input_idx) = operand.policy() { + debug_assert!(!input_reused.contains(&input_idx)); + debug_assert_eq!(operand.pos(), OperandPos::After); + input_reused.push(input_idx); + let input_alloc = self.get_alloc(inst, input_idx); + let output_alloc = self.get_alloc(inst, output_idx); + log::debug!( + "reuse-input inst {:?}: output {} has alloc {:?}, input {} has alloc {:?}", + inst, + output_idx, + output_alloc, + input_idx, + input_alloc + ); + if input_alloc != output_alloc { + #[cfg(debug)] + { + if log::log_enabled!(log::Level::Debug) { + self.annotate( + ProgPoint::before(inst), + format!( + " reuse-input-copy: {} -> {}", + input_alloc, output_alloc + ), + ); + } + } + let input_operand = self.func.inst_operands(inst)[input_idx]; + self.insert_move( + ProgPoint::before(inst), + InsertMovePrio::ReusedInput, + input_alloc, + output_alloc, + Some(input_operand.vreg()), + ); + self.set_alloc(inst, input_idx, output_alloc); + } + } + } + } + + // Sort the prog-moves lists and insert moves to reify the + // input program's move operations. + self.prog_move_srcs + .sort_unstable_by_key(|((_, inst), _)| *inst); + self.prog_move_dsts + .sort_unstable_by_key(|((_, inst), _)| inst.prev()); + let prog_move_srcs = std::mem::replace(&mut self.prog_move_srcs, vec![]); + let prog_move_dsts = std::mem::replace(&mut self.prog_move_dsts, vec![]); + assert_eq!(prog_move_srcs.len(), prog_move_dsts.len()); + for (&((_, from_inst), from_alloc), &((to_vreg, to_inst), to_alloc)) in + prog_move_srcs.iter().zip(prog_move_dsts.iter()) + { + log::debug!( + "program move at inst {:?}: alloc {:?} -> {:?} (v{})", + from_inst, + from_alloc, + to_alloc, + to_vreg.index(), + ); + assert!(!from_alloc.is_none()); + assert!(!to_alloc.is_none()); + assert_eq!(from_inst, to_inst.prev()); + // N.B.: these moves happen with the *same* priority as + // LR-to-LR moves, because they work just like them: they + // connect a use at one progpoint (move-After) with a def + // at an adjacent progpoint (move+1-Before), so they must + // happen in parallel with all other LR-to-LR moves. + self.insert_move( + ProgPoint::before(to_inst), + InsertMovePrio::Regular, + from_alloc, + to_alloc, + Some(self.vreg_regs[to_vreg.index()]), + ); + } + } + + pub fn resolve_inserted_moves(&mut self) { + // For each program point, gather all moves together. Then + // resolve (see cases below). + let mut i = 0; + self.inserted_moves + .sort_unstable_by_key(|m| (m.pos.to_index(), m.prio)); + + // Redundant-move elimination state tracker. + let mut redundant_moves = RedundantMoveEliminator::default(); + + fn redundant_move_process_side_effects<'a, F: Function>( + this: &Env<'a, F>, + redundant_moves: &mut RedundantMoveEliminator, + from: ProgPoint, + to: ProgPoint, + ) { + // If any safepoints in range, clear and return. + // Also, if we cross a block boundary, clear and return. + if this.cfginfo.insn_block[from.inst().index()] + != this.cfginfo.insn_block[to.inst().index()] + { + redundant_moves.clear(); + return; + } + for inst in from.inst().index()..=to.inst().index() { + if this.func.is_safepoint(Inst::new(inst)) { + redundant_moves.clear(); + return; + } + } + + let start_inst = if from.pos() == InstPosition::Before { + from.inst() + } else { + from.inst().next() + }; + let end_inst = if to.pos() == InstPosition::Before { + to.inst() + } else { + to.inst().next() + }; + for inst in start_inst.index()..end_inst.index() { + let inst = Inst::new(inst); + for (i, op) in this.func.inst_operands(inst).iter().enumerate() { + match op.kind() { + OperandKind::Def | OperandKind::Mod => { + let alloc = this.get_alloc(inst, i); + redundant_moves.clear_alloc(alloc); + } + _ => {} + } + } + for reg in this.func.inst_clobbers(inst) { + redundant_moves.clear_alloc(Allocation::reg(*reg)); + } + } + } + + let mut last_pos = ProgPoint::before(Inst::new(0)); + + while i < self.inserted_moves.len() { + let start = i; + let pos = self.inserted_moves[i].pos; + let prio = self.inserted_moves[i].prio; + while i < self.inserted_moves.len() + && self.inserted_moves[i].pos == pos + && self.inserted_moves[i].prio == prio + { + i += 1; + } + let moves = &self.inserted_moves[start..i]; + + redundant_move_process_side_effects(self, &mut redundant_moves, last_pos, pos); + last_pos = pos; + + // Gather all the moves with Int class and Float class + // separately. These cannot interact, so it is safe to + // have two separate ParallelMove instances. They need to + // be separate because moves between the two classes are + // impossible. (We could enhance ParallelMoves to + // understand register classes and take multiple scratch + // regs, but this seems simpler.) + let mut int_moves: SmallVec<[InsertedMove; 8]> = smallvec![]; + let mut float_moves: SmallVec<[InsertedMove; 8]> = smallvec![]; + let mut self_moves: SmallVec<[InsertedMove; 8]> = smallvec![]; + + for m in moves { + if m.from_alloc.is_reg() && m.to_alloc.is_reg() { + assert_eq!(m.from_alloc.class(), m.to_alloc.class()); + } + if m.from_alloc == m.to_alloc { + if m.to_vreg.is_some() { + self_moves.push(m.clone()); + } + continue; + } + match m.from_alloc.class() { + RegClass::Int => { + int_moves.push(m.clone()); + } + RegClass::Float => { + float_moves.push(m.clone()); + } + } + } + + for &(regclass, moves) in + &[(RegClass::Int, &int_moves), (RegClass::Float, &float_moves)] + { + // All moves in `moves` semantically happen in + // parallel. Let's resolve these to a sequence of moves + // that can be done one at a time. + let scratch = self.env.scratch_by_class[regclass as u8 as usize]; + let mut parallel_moves = ParallelMoves::new(Allocation::reg(scratch)); + log::debug!("parallel moves at pos {:?} prio {:?}", pos, prio); + for m in moves { + if (m.from_alloc != m.to_alloc) || m.to_vreg.is_some() { + log::debug!(" {} -> {}", m.from_alloc, m.to_alloc,); + parallel_moves.add(m.from_alloc, m.to_alloc, m.to_vreg); + } + } + + let resolved = parallel_moves.resolve(); + + // If (i) the scratch register is used, and (ii) a + // stack-to-stack move exists, then we need to + // allocate an additional scratch spillslot to which + // we can temporarily spill the scratch reg when we + // lower the stack-to-stack move to a + // stack-to-scratch-to-stack sequence. + let scratch_used = resolved.iter().any(|&(src, dst, _)| { + src == Allocation::reg(scratch) || dst == Allocation::reg(scratch) + }); + let stack_stack_move = resolved + .iter() + .any(|&(src, dst, _)| src.is_stack() && dst.is_stack()); + let extra_slot = if scratch_used && stack_stack_move { + if self.extra_spillslot[regclass as u8 as usize].is_none() { + let slot = self.allocate_spillslot(regclass); + self.extra_spillslot[regclass as u8 as usize] = Some(slot); + } + self.extra_spillslot[regclass as u8 as usize] + } else { + None + }; + + let mut scratch_used_yet = false; + for (src, dst, to_vreg) in resolved { + log::debug!(" resolved: {} -> {} ({:?})", src, dst, to_vreg); + let action = redundant_moves.process_move(src, dst, to_vreg); + if !action.elide { + if dst == Allocation::reg(scratch) { + scratch_used_yet = true; + } + if src.is_stack() && dst.is_stack() { + if !scratch_used_yet { + self.add_edit( + pos, + prio, + Edit::Move { + from: src, + to: Allocation::reg(scratch), + to_vreg, + }, + ); + self.add_edit( + pos, + prio, + Edit::Move { + from: Allocation::reg(scratch), + to: dst, + to_vreg, + }, + ); + } else { + assert!(extra_slot.is_some()); + self.add_edit( + pos, + prio, + Edit::Move { + from: Allocation::reg(scratch), + to: extra_slot.unwrap(), + to_vreg: None, + }, + ); + self.add_edit( + pos, + prio, + Edit::Move { + from: src, + to: Allocation::reg(scratch), + to_vreg, + }, + ); + self.add_edit( + pos, + prio, + Edit::Move { + from: Allocation::reg(scratch), + to: dst, + to_vreg, + }, + ); + self.add_edit( + pos, + prio, + Edit::Move { + from: extra_slot.unwrap(), + to: Allocation::reg(scratch), + to_vreg: None, + }, + ); + } + } else { + self.add_edit( + pos, + prio, + Edit::Move { + from: src, + to: dst, + to_vreg, + }, + ); + } + } else { + log::debug!(" -> redundant move elided"); + } + if let Some((alloc, vreg)) = action.def_alloc { + log::debug!( + " -> converted to DefAlloc: alloc {} vreg {}", + alloc, + vreg + ); + self.add_edit(pos, prio, Edit::DefAlloc { alloc, vreg }); + } + } + } + + for m in &self_moves { + log::debug!( + "self move at pos {:?} prio {:?}: {} -> {} to_vreg {:?}", + pos, + prio, + m.from_alloc, + m.to_alloc, + m.to_vreg + ); + let action = redundant_moves.process_move(m.from_alloc, m.to_alloc, m.to_vreg); + assert!(action.elide); + if let Some((alloc, vreg)) = action.def_alloc { + log::debug!(" -> DefAlloc: alloc {} vreg {}", alloc, vreg); + self.add_edit(pos, prio, Edit::DefAlloc { alloc, vreg }); + } + } + } + + // Add edits to describe blockparam locations too. This is + // required by the checker. This comes after any edge-moves. + self.blockparam_allocs + .sort_unstable_by_key(|&(block, idx, _, _)| (block, idx)); + self.stats.blockparam_allocs_count = self.blockparam_allocs.len(); + let mut i = 0; + while i < self.blockparam_allocs.len() { + let start = i; + let block = self.blockparam_allocs[i].0; + while i < self.blockparam_allocs.len() && self.blockparam_allocs[i].0 == block { + i += 1; + } + let params = &self.blockparam_allocs[start..i]; + let vregs = params + .iter() + .map(|(_, _, vreg_idx, _)| self.vreg_regs[vreg_idx.index()]) + .collect::>(); + let allocs = params + .iter() + .map(|(_, _, _, alloc)| *alloc) + .collect::>(); + assert_eq!(vregs.len(), self.func.block_params(block).len()); + assert_eq!(allocs.len(), self.func.block_params(block).len()); + self.add_edit( + self.cfginfo.block_entry[block.index()], + InsertMovePrio::BlockParam, + Edit::BlockParams { vregs, allocs }, + ); + } + + // Ensure edits are in sorted ProgPoint order. N.B.: this must + // be a stable sort! We have to keep the order produced by the + // parallel-move resolver for all moves within a single sort + // key. + self.edits.sort_by_key(|&(pos, prio, _)| (pos, prio)); + self.stats.edits_count = self.edits.len(); + + // Add debug annotations. + if self.annotations_enabled { + for i in 0..self.edits.len() { + let &(pos, _, ref edit) = &self.edits[i]; + match edit { + &Edit::Move { from, to, to_vreg } => { + self.annotate( + ProgPoint::from_index(pos), + format!("move {} -> {} ({:?})", from, to, to_vreg), + ); + } + &Edit::BlockParams { + ref vregs, + ref allocs, + } => { + let s = format!("blockparams vregs:{:?} allocs:{:?}", vregs, allocs); + self.annotate(ProgPoint::from_index(pos), s); + } + &Edit::DefAlloc { alloc, vreg } => { + let s = format!("defalloc {:?} := {:?}", alloc, vreg); + self.annotate(ProgPoint::from_index(pos), s); + } + } + } + } + } + + pub fn add_edit(&mut self, pos: ProgPoint, prio: InsertMovePrio, edit: Edit) { + match &edit { + &Edit::Move { from, to, to_vreg } if from == to && to_vreg.is_none() => return, + &Edit::Move { from, to, .. } if from.is_reg() && to.is_reg() => { + assert_eq!(from.as_reg().unwrap().class(), to.as_reg().unwrap().class()); + } + _ => {} + } + + self.edits.push((pos.to_index(), prio, edit)); + } +} diff --git a/src/ion/process.rs b/src/ion/process.rs new file mode 100644 index 00000000..0b272493 --- /dev/null +++ b/src/ion/process.rs @@ -0,0 +1,1057 @@ +/* + * The following license applies to this file, which was initially + * derived from the files `js/src/jit/BacktrackingAllocator.h` and + * `js/src/jit/BacktrackingAllocator.cpp` in Mozilla Firefox: + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * Since the initial port, the design has been substantially evolved + * and optimized. + */ + +//! Main allocation loop that processes bundles. + +use super::{ + spill_weight_from_policy, CodeRange, Env, LiveBundleIndex, LiveBundleVec, LiveRangeFlag, + LiveRangeIndex, LiveRangeKey, LiveRangeList, LiveRangeListEntry, PRegIndex, RegTraversalIter, + Requirement, UseList, +}; +use crate::{ + Allocation, Function, Inst, InstPosition, OperandKind, OperandPolicy, PReg, ProgPoint, + RegAllocError, +}; +use fxhash::FxHashSet; +use smallvec::smallvec; +use std::fmt::Debug; + +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum AllocRegResult { + Allocated(Allocation), + Conflict(LiveBundleVec, ProgPoint), + ConflictWithFixed(u32, ProgPoint), + ConflictHighCost, +} + +impl<'a, F: Function> Env<'a, F> { + pub fn process_bundles(&mut self) -> Result<(), RegAllocError> { + let mut count = 0; + while let Some((bundle, reg_hint)) = self.allocation_queue.pop() { + self.stats.process_bundle_count += 1; + self.process_bundle(bundle, reg_hint)?; + count += 1; + if count > self.func.insts() * 50 { + self.dump_state(); + panic!("Infinite loop!"); + } + } + self.stats.final_liverange_count = self.ranges.len(); + self.stats.final_bundle_count = self.bundles.len(); + self.stats.spill_bundle_count = self.spilled_bundles.len(); + + Ok(()) + } + + pub fn try_to_allocate_bundle_to_reg( + &mut self, + bundle: LiveBundleIndex, + reg: PRegIndex, + // if the max bundle weight in the conflict set exceeds this + // cost (if provided), just return + // `AllocRegResult::ConflictHighCost`. + max_allowable_cost: Option, + ) -> AllocRegResult { + log::debug!("try_to_allocate_bundle_to_reg: {:?} -> {:?}", bundle, reg); + let mut conflicts = smallvec![]; + let mut conflict_set = FxHashSet::default(); + let mut max_conflict_weight = 0; + // Traverse the BTreeMap in order by requesting the whole + // range spanned by the bundle and iterating over that + // concurrently with our ranges. Because our ranges are in + // order, and the BTreeMap is as well, this allows us to have + // an overall O(n log n) + O(b) complexity, where the PReg has + // n current ranges and the bundle has b ranges, rather than + // O(b * n log n) with the simple probe-for-each-bundle-range + // approach. + // + // Note that the comparator function on a CodeRange tests for + // *overlap*, so we are checking whether the BTree contains + // any preg range that *overlaps* with range `range`, not + // literally the range `range`. + let bundle_ranges = &self.bundles[bundle.index()].ranges; + let from_key = LiveRangeKey::from_range(&CodeRange { + from: bundle_ranges.first().unwrap().range.from, + to: bundle_ranges.first().unwrap().range.from, + }); + let mut preg_range_iter = self.pregs[reg.index()] + .allocations + .btree + .range(from_key..) + .peekable(); + log::debug!( + "alloc map for {:?} in range {:?}..: {:?}", + reg, + from_key, + self.pregs[reg.index()].allocations.btree + ); + let mut first_conflict: Option = None; + + 'ranges: for entry in bundle_ranges { + log::debug!(" -> range LR {:?}: {:?}", entry.index, entry.range); + let key = LiveRangeKey::from_range(&entry.range); + + let mut skips = 0; + 'alloc: loop { + log::debug!(" -> PReg range {:?}", preg_range_iter.peek()); + + // Advance our BTree traversal until it is >= this bundle + // range (i.e., skip PReg allocations in the BTree that + // are completely before this bundle range). + + if preg_range_iter.peek().is_some() && *preg_range_iter.peek().unwrap().0 < key { + log::debug!( + "Skipping PReg range {:?}", + preg_range_iter.peek().unwrap().0 + ); + preg_range_iter.next(); + skips += 1; + if skips >= 16 { + let from_pos = entry.range.from; + let from_key = LiveRangeKey::from_range(&CodeRange { + from: from_pos, + to: from_pos, + }); + preg_range_iter = self.pregs[reg.index()] + .allocations + .btree + .range(from_key..) + .peekable(); + skips = 0; + } + continue 'alloc; + } + skips = 0; + + // If there are no more PReg allocations, we're done! + if preg_range_iter.peek().is_none() { + log::debug!(" -> no more PReg allocations; so no conflict possible!"); + break 'ranges; + } + + // If the current PReg range is beyond this range, there is no conflict; continue. + if *preg_range_iter.peek().unwrap().0 > key { + log::debug!( + " -> next PReg allocation is at {:?}; moving to next VReg range", + preg_range_iter.peek().unwrap().0 + ); + break 'alloc; + } + + // Otherwise, there is a conflict. + let preg_key = *preg_range_iter.peek().unwrap().0; + assert_eq!(preg_key, key); // Assert that this range overlaps. + let preg_range = preg_range_iter.next().unwrap().1; + + log::debug!(" -> btree contains range {:?} that overlaps", preg_range); + if preg_range.is_valid() { + log::debug!(" -> from vreg {:?}", self.ranges[preg_range.index()].vreg); + // range from an allocated bundle: find the bundle and add to + // conflicts list. + let conflict_bundle = self.ranges[preg_range.index()].bundle; + log::debug!(" -> conflict bundle {:?}", conflict_bundle); + if !conflict_set.contains(&conflict_bundle) { + conflicts.push(conflict_bundle); + conflict_set.insert(conflict_bundle); + max_conflict_weight = std::cmp::max( + max_conflict_weight, + self.bundles[conflict_bundle.index()].cached_spill_weight(), + ); + if max_allowable_cost.is_some() + && max_conflict_weight > max_allowable_cost.unwrap() + { + log::debug!(" -> reached high cost, retrying early"); + return AllocRegResult::ConflictHighCost; + } + } + + if first_conflict.is_none() { + first_conflict = Some(ProgPoint::from_index(std::cmp::max( + preg_key.from, + key.from, + ))); + } + } else { + log::debug!(" -> conflict with fixed reservation"); + // range from a direct use of the PReg (due to clobber). + return AllocRegResult::ConflictWithFixed( + max_conflict_weight, + ProgPoint::from_index(preg_key.from), + ); + } + } + } + + if conflicts.len() > 0 { + return AllocRegResult::Conflict(conflicts, first_conflict.unwrap()); + } + + // We can allocate! Add our ranges to the preg's BTree. + let preg = self.pregs[reg.index()].reg; + log::debug!(" -> bundle {:?} assigned to preg {:?}", bundle, preg); + self.bundles[bundle.index()].allocation = Allocation::reg(preg); + for entry in &self.bundles[bundle.index()].ranges { + self.pregs[reg.index()] + .allocations + .btree + .insert(LiveRangeKey::from_range(&entry.range), entry.index); + } + + AllocRegResult::Allocated(Allocation::reg(preg)) + } + + pub fn evict_bundle(&mut self, bundle: LiveBundleIndex) { + log::debug!( + "evicting bundle {:?}: alloc {:?}", + bundle, + self.bundles[bundle.index()].allocation + ); + let preg = match self.bundles[bundle.index()].allocation.as_reg() { + Some(preg) => preg, + None => { + log::debug!( + " -> has no allocation! {:?}", + self.bundles[bundle.index()].allocation + ); + return; + } + }; + let preg_idx = PRegIndex::new(preg.index()); + self.bundles[bundle.index()].allocation = Allocation::none(); + for entry in &self.bundles[bundle.index()].ranges { + log::debug!(" -> removing LR {:?} from reg {:?}", entry.index, preg_idx); + self.pregs[preg_idx.index()] + .allocations + .btree + .remove(&LiveRangeKey::from_range(&entry.range)); + } + let prio = self.bundles[bundle.index()].prio; + log::debug!(" -> prio {}; back into queue", prio); + self.allocation_queue + .insert(bundle, prio as usize, PReg::invalid()); + } + + pub fn bundle_spill_weight(&self, bundle: LiveBundleIndex) -> u32 { + self.bundles[bundle.index()].cached_spill_weight() + } + + pub fn maximum_spill_weight_in_bundle_set(&self, bundles: &LiveBundleVec) -> u32 { + log::debug!("maximum_spill_weight_in_bundle_set: {:?}", bundles); + let m = bundles + .iter() + .map(|&b| { + let w = self.bundles[b.index()].cached_spill_weight(); + log::debug!("bundle{}: {}", b.index(), w); + w + }) + .max() + .unwrap_or(0); + log::debug!(" -> max: {}", m); + m + } + + pub fn recompute_bundle_properties(&mut self, bundle: LiveBundleIndex) { + log::debug!("recompute bundle properties: bundle {:?}", bundle); + + let minimal; + let mut fixed = false; + let mut stack = false; + let bundledata = &self.bundles[bundle.index()]; + let first_range = bundledata.ranges[0].index; + let first_range_data = &self.ranges[first_range.index()]; + + self.bundles[bundle.index()].prio = self.compute_bundle_prio(bundle); + + if first_range_data.vreg.is_invalid() { + log::debug!(" -> no vreg; minimal and fixed"); + minimal = true; + fixed = true; + } else { + for u in &first_range_data.uses { + log::debug!(" -> use: {:?}", u); + if let OperandPolicy::FixedReg(_) = u.operand.policy() { + log::debug!(" -> fixed use at {:?}: {:?}", u.pos, u.operand); + fixed = true; + } + if let OperandPolicy::Stack = u.operand.policy() { + log::debug!(" -> stack use at {:?}: {:?}", u.pos, u.operand); + stack = true; + } + if stack && fixed { + break; + } + } + // Minimal if the range covers only one instruction. Note + // that it could cover just one ProgPoint, + // i.e. X.Before..X.After, or two ProgPoints, + // i.e. X.Before..X+1.Before. + log::debug!(" -> first range has range {:?}", first_range_data.range); + let bundle_start = self.bundles[bundle.index()] + .ranges + .first() + .unwrap() + .range + .from; + let bundle_end = self.bundles[bundle.index()].ranges.last().unwrap().range.to; + minimal = bundle_start.inst() == bundle_end.prev().inst(); + log::debug!(" -> minimal: {}", minimal); + } + + let spill_weight = if minimal { + if fixed { + log::debug!(" -> fixed and minimal: spill weight 2000000"); + 2_000_000 + } else { + log::debug!(" -> non-fixed and minimal: spill weight 1000000"); + 1_000_000 + } + } else { + let mut total = 0; + for entry in &self.bundles[bundle.index()].ranges { + let range_data = &self.ranges[entry.index.index()]; + log::debug!( + " -> uses spill weight: +{}", + range_data.uses_spill_weight() + ); + total += range_data.uses_spill_weight(); + } + + if self.bundles[bundle.index()].prio > 0 { + log::debug!( + " -> dividing by prio {}; final weight {}", + self.bundles[bundle.index()].prio, + total / self.bundles[bundle.index()].prio + ); + total / self.bundles[bundle.index()].prio + } else { + 0 + } + }; + + self.bundles[bundle.index()].set_cached_spill_weight_and_props( + spill_weight, + minimal, + fixed, + stack, + ); + } + + pub fn minimal_bundle(&self, bundle: LiveBundleIndex) -> bool { + self.bundles[bundle.index()].cached_minimal() + } + + pub fn recompute_range_properties(&mut self, range: LiveRangeIndex) { + let rangedata = &mut self.ranges[range.index()]; + let mut w = 0; + for u in &rangedata.uses { + w += u.weight as u32; + log::debug!("range{}: use {:?}", range.index(), u); + } + rangedata.set_uses_spill_weight(w); + if rangedata.uses.len() > 0 && rangedata.uses[0].operand.kind() == OperandKind::Def { + // Note that we *set* the flag here, but we never *clear* + // it: it may be set by a progmove as well (which does not + // create an explicit use or def), and we want to preserve + // that. We will never split or trim ranges in a way that + // removes a def at the front and requires the flag to be + // cleared. + rangedata.set_flag(LiveRangeFlag::StartsAtDef); + } + } + + pub fn get_or_create_spill_bundle( + &mut self, + bundle: LiveBundleIndex, + create_if_absent: bool, + ) -> Option { + let ssidx = self.bundles[bundle.index()].spillset; + let idx = self.spillsets[ssidx.index()].spill_bundle; + if idx.is_valid() { + Some(idx) + } else if create_if_absent { + let idx = self.create_bundle(); + self.spillsets[ssidx.index()].spill_bundle = idx; + self.bundles[idx.index()].spillset = ssidx; + self.spilled_bundles.push(idx); + Some(idx) + } else { + None + } + } + + pub fn split_and_requeue_bundle( + &mut self, + bundle: LiveBundleIndex, + mut split_at: ProgPoint, + reg_hint: PReg, + ) { + self.stats.splits += 1; + log::debug!( + "split bundle {:?} at {:?} and requeue with reg hint (for first part) {:?}", + bundle, + split_at, + reg_hint, + ); + + // Split `bundle` at `split_at`, creating new LiveRanges and + // bundles (and updating vregs' linked lists appropriately), + // and enqueue the new bundles. + + let spillset = self.bundles[bundle.index()].spillset; + + assert!(!self.bundles[bundle.index()].ranges.is_empty()); + // Split point *at* start is OK; this means we peel off + // exactly one use to create a minimal bundle. + let bundle_start = self.bundles[bundle.index()] + .ranges + .first() + .unwrap() + .range + .from; + assert!(split_at >= bundle_start); + let bundle_end = self.bundles[bundle.index()].ranges.last().unwrap().range.to; + assert!(split_at < bundle_end); + + // Is the split point *at* the start? If so, peel off the + // first use: set the split point just after it, or just + // before it if it comes after the start of the bundle. + if split_at == bundle_start { + // Find any uses; if none, just chop off one instruction. + let mut first_use = None; + 'outer: for entry in &self.bundles[bundle.index()].ranges { + for u in &self.ranges[entry.index.index()].uses { + first_use = Some(u.pos); + break 'outer; + } + } + log::debug!(" -> first use loc is {:?}", first_use); + split_at = match first_use { + Some(pos) => { + if pos.inst() == bundle_start.inst() { + ProgPoint::before(pos.inst().next()) + } else { + ProgPoint::before(pos.inst()) + } + } + None => ProgPoint::before( + self.bundles[bundle.index()] + .ranges + .first() + .unwrap() + .range + .from + .inst() + .next(), + ), + }; + log::debug!( + "split point is at bundle start; advancing to {:?}", + split_at + ); + } else { + // Don't split in the middle of an instruction -- this could + // create impossible moves (we cannot insert a move between an + // instruction's uses and defs). + if split_at.pos() == InstPosition::After { + split_at = split_at.next(); + } + if split_at >= bundle_end { + split_at = split_at.prev().prev(); + } + } + + assert!(split_at > bundle_start && split_at < bundle_end); + + // We need to find which LRs fall on each side of the split, + // which LR we need to split down the middle, then update the + // current bundle, create a new one, and (re)-queue both. + + log::debug!(" -> LRs: {:?}", self.bundles[bundle.index()].ranges); + + let mut last_lr_in_old_bundle_idx = 0; // last LR-list index in old bundle + let mut first_lr_in_new_bundle_idx = 0; // first LR-list index in new bundle + for (i, entry) in self.bundles[bundle.index()].ranges.iter().enumerate() { + if split_at > entry.range.from { + last_lr_in_old_bundle_idx = i; + first_lr_in_new_bundle_idx = i; + } + if split_at < entry.range.to { + first_lr_in_new_bundle_idx = i; + break; + } + } + + log::debug!( + " -> last LR in old bundle: LR {:?}", + self.bundles[bundle.index()].ranges[last_lr_in_old_bundle_idx] + ); + log::debug!( + " -> first LR in new bundle: LR {:?}", + self.bundles[bundle.index()].ranges[first_lr_in_new_bundle_idx] + ); + + // Take the sublist of LRs that will go in the new bundle. + let mut new_lr_list: LiveRangeList = self.bundles[bundle.index()] + .ranges + .iter() + .cloned() + .skip(first_lr_in_new_bundle_idx) + .collect(); + self.bundles[bundle.index()] + .ranges + .truncate(last_lr_in_old_bundle_idx + 1); + + // If the first entry in `new_lr_list` is a LR that is split + // down the middle, replace it with a new LR and chop off the + // end of the same LR in the original list. + if split_at > new_lr_list[0].range.from { + assert_eq!(last_lr_in_old_bundle_idx, first_lr_in_new_bundle_idx); + let orig_lr = new_lr_list[0].index; + let new_lr = self.create_liverange(CodeRange { + from: split_at, + to: new_lr_list[0].range.to, + }); + self.ranges[new_lr.index()].vreg = self.ranges[orig_lr.index()].vreg; + log::debug!(" -> splitting LR {:?} into {:?}", orig_lr, new_lr); + let first_use = self.ranges[orig_lr.index()] + .uses + .iter() + .position(|u| u.pos >= split_at) + .unwrap_or(self.ranges[orig_lr.index()].uses.len()); + let rest_uses: UseList = self.ranges[orig_lr.index()] + .uses + .iter() + .cloned() + .skip(first_use) + .collect(); + self.ranges[new_lr.index()].uses = rest_uses; + self.ranges[orig_lr.index()].uses.truncate(first_use); + self.recompute_range_properties(orig_lr); + self.recompute_range_properties(new_lr); + new_lr_list[0].index = new_lr; + new_lr_list[0].range = self.ranges[new_lr.index()].range; + self.ranges[orig_lr.index()].range.to = split_at; + self.bundles[bundle.index()].ranges[last_lr_in_old_bundle_idx].range = + self.ranges[orig_lr.index()].range; + + // Perform a lazy split in the VReg data. We just + // append the new LR and its range; we will sort by + // start of range, and fix up range ends, once when we + // iterate over the VReg's ranges after allocation + // completes (this is the only time when order + // matters). + self.vregs[self.ranges[new_lr.index()].vreg.index()] + .ranges + .push(LiveRangeListEntry { + range: self.ranges[new_lr.index()].range, + index: new_lr, + }); + } + + let new_bundle = self.create_bundle(); + log::debug!(" -> creating new bundle {:?}", new_bundle); + self.bundles[new_bundle.index()].spillset = spillset; + for entry in &new_lr_list { + self.ranges[entry.index.index()].bundle = new_bundle; + } + self.bundles[new_bundle.index()].ranges = new_lr_list; + + // Finally, handle moving LRs to the spill bundle when + // appropriate: If the first range in `new_bundle` or last + // range in `bundle` has "empty space" beyond the first or + // last use (respectively), trim it and put an empty LR into + // the spill bundle. (We are careful to treat the "starts at + // def" flag as an implicit first def even if no def-type Use + // is present.) + while let Some(entry) = self.bundles[bundle.index()].ranges.last().cloned() { + let end = entry.range.to; + let vreg = self.ranges[entry.index.index()].vreg; + let last_use = self.ranges[entry.index.index()].uses.last().map(|u| u.pos); + if last_use.is_none() { + let spill = self + .get_or_create_spill_bundle(bundle, /* create_if_absent = */ true) + .unwrap(); + log::debug!( + " -> bundle {:?} range {:?}: no uses; moving to spill bundle {:?}", + bundle, + entry.index, + spill + ); + self.bundles[spill.index()].ranges.push(entry); + self.bundles[bundle.index()].ranges.pop(); + self.ranges[entry.index.index()].bundle = spill; + continue; + } + let last_use = last_use.unwrap(); + let split = ProgPoint::before(last_use.inst().next()); + if split < end { + let spill = self + .get_or_create_spill_bundle(bundle, /* create_if_absent = */ true) + .unwrap(); + self.bundles[bundle.index()] + .ranges + .last_mut() + .unwrap() + .range + .to = split; + self.ranges[self.bundles[bundle.index()] + .ranges + .last() + .unwrap() + .index + .index()] + .range + .to = split; + let range = CodeRange { + from: split, + to: end, + }; + let empty_lr = self.create_liverange(range); + self.bundles[spill.index()].ranges.push(LiveRangeListEntry { + range, + index: empty_lr, + }); + self.ranges[empty_lr.index()].bundle = spill; + self.vregs[vreg.index()].ranges.push(LiveRangeListEntry { + range, + index: empty_lr, + }); + log::debug!( + " -> bundle {:?} range {:?}: last use implies split point {:?}", + bundle, + entry.index, + split + ); + log::debug!( + " -> moving trailing empty region to new spill bundle {:?} with new LR {:?}", + spill, + empty_lr + ); + } + break; + } + while let Some(entry) = self.bundles[new_bundle.index()].ranges.first().cloned() { + if self.ranges[entry.index.index()].has_flag(LiveRangeFlag::StartsAtDef) { + break; + } + let start = entry.range.from; + let vreg = self.ranges[entry.index.index()].vreg; + let first_use = self.ranges[entry.index.index()].uses.first().map(|u| u.pos); + if first_use.is_none() { + let spill = self + .get_or_create_spill_bundle(new_bundle, /* create_if_absent = */ true) + .unwrap(); + log::debug!( + " -> bundle {:?} range {:?}: no uses; moving to spill bundle {:?}", + new_bundle, + entry.index, + spill + ); + self.bundles[spill.index()].ranges.push(entry); + self.bundles[new_bundle.index()].ranges.drain(..1); + self.ranges[entry.index.index()].bundle = spill; + continue; + } + let first_use = first_use.unwrap(); + let split = ProgPoint::before(first_use.inst()); + if split > start { + let spill = self + .get_or_create_spill_bundle(new_bundle, /* create_if_absent = */ true) + .unwrap(); + self.bundles[new_bundle.index()] + .ranges + .first_mut() + .unwrap() + .range + .from = split; + self.ranges[self.bundles[new_bundle.index()] + .ranges + .first() + .unwrap() + .index + .index()] + .range + .from = split; + let range = CodeRange { + from: start, + to: split, + }; + let empty_lr = self.create_liverange(range); + self.bundles[spill.index()].ranges.push(LiveRangeListEntry { + range, + index: empty_lr, + }); + self.ranges[empty_lr.index()].bundle = spill; + self.vregs[vreg.index()].ranges.push(LiveRangeListEntry { + range, + index: empty_lr, + }); + log::debug!( + " -> bundle {:?} range {:?}: first use implies split point {:?}", + bundle, + entry.index, + first_use, + ); + log::debug!( + " -> moving leading empty region to new spill bundle {:?} with new LR {:?}", + spill, + empty_lr + ); + } + break; + } + + if self.bundles[bundle.index()].ranges.len() > 0 { + self.recompute_bundle_properties(bundle); + let prio = self.bundles[bundle.index()].prio; + self.allocation_queue + .insert(bundle, prio as usize, reg_hint); + } + if self.bundles[new_bundle.index()].ranges.len() > 0 { + self.recompute_bundle_properties(new_bundle); + let prio = self.bundles[new_bundle.index()].prio; + self.allocation_queue + .insert(new_bundle, prio as usize, reg_hint); + } + } + + pub fn process_bundle( + &mut self, + bundle: LiveBundleIndex, + reg_hint: PReg, + ) -> Result<(), RegAllocError> { + let req = self.compute_requirement(bundle); + // Grab a hint from either the queue or our spillset, if any. + let hint_reg = if reg_hint != PReg::invalid() { + reg_hint + } else { + self.spillsets[self.bundles[bundle.index()].spillset.index()].reg_hint + }; + log::debug!("process_bundle: bundle {:?} hint {:?}", bundle, hint_reg,); + + if let Requirement::Conflict = req { + // We have to split right away. + assert!( + !self.minimal_bundle(bundle), + "Minimal bundle with conflict!" + ); + let bundle_start = self.bundles[bundle.index()].ranges[0].range.from; + self.split_and_requeue_bundle( + bundle, + /* split_at_point = */ bundle_start, + reg_hint, + ); + return Ok(()); + } + + // If no requirement at all (because no uses), and *if* a + // spill bundle is already present, then move the LRs over to + // the spill bundle right away. + match req { + Requirement::Unknown | Requirement::Any(_) => { + if let Some(spill) = + self.get_or_create_spill_bundle(bundle, /* create_if_absent = */ false) + { + let mut list = + std::mem::replace(&mut self.bundles[bundle.index()].ranges, smallvec![]); + for entry in &list { + self.ranges[entry.index.index()].bundle = spill; + } + self.bundles[spill.index()].ranges.extend(list.drain(..)); + return Ok(()); + } + } + _ => {} + } + + // Try to allocate! + let mut attempts = 0; + loop { + attempts += 1; + log::debug!("attempt {}, req {:?}", attempts, req); + debug_assert!(attempts < 100 * self.func.insts()); + + let (class, fixed_preg) = match req { + Requirement::Fixed(preg) => (preg.class(), Some(preg)), + Requirement::Register(class) => (class, None), + Requirement::Stack(_) => { + // If we must be on the stack, mark our spillset + // as required immediately. + self.spillsets[self.bundles[bundle.index()].spillset.index()].required = true; + return Ok(()); + } + + Requirement::Any(_) | Requirement::Unknown => { + self.spilled_bundles.push(bundle); + return Ok(()); + } + + Requirement::Conflict => { + unreachable!() + } + }; + // Scan all pregs, or the one fixed preg, and attempt to allocate. + + let mut lowest_cost_evict_conflict_set: Option = None; + let mut lowest_cost_evict_conflict_cost: Option = None; + + let mut lowest_cost_split_conflict_cost: Option = None; + let mut lowest_cost_split_conflict_point = ProgPoint::before(Inst::new(0)); + let mut lowest_cost_split_conflict_reg = PReg::invalid(); + + // Heuristic: start the scan for an available + // register at an offset influenced both by our + // location in the code and by the bundle we're + // considering. This has the effect of spreading + // demand more evenly across registers. + let scan_offset = self.ranges[self.bundles[bundle.index()].ranges[0].index.index()] + .range + .from + .inst() + .index() + + bundle.index(); + + self.stats.process_bundle_reg_probe_start_any += 1; + for preg in RegTraversalIter::new( + self.env, + class, + hint_reg, + PReg::invalid(), + scan_offset, + fixed_preg, + ) { + self.stats.process_bundle_reg_probes_any += 1; + let preg_idx = PRegIndex::new(preg.index()); + log::debug!("trying preg {:?}", preg_idx); + + let scan_limit_cost = match ( + lowest_cost_evict_conflict_cost, + lowest_cost_split_conflict_cost, + ) { + (Some(a), Some(b)) => Some(std::cmp::max(a, b)), + _ => None, + }; + match self.try_to_allocate_bundle_to_reg(bundle, preg_idx, scan_limit_cost) { + AllocRegResult::Allocated(alloc) => { + self.stats.process_bundle_reg_success_any += 1; + log::debug!(" -> allocated to any {:?}", preg_idx); + self.spillsets[self.bundles[bundle.index()].spillset.index()].reg_hint = + alloc.as_reg().unwrap(); + return Ok(()); + } + AllocRegResult::Conflict(bundles, first_conflict_point) => { + log::debug!( + " -> conflict with bundles {:?}, first conflict at {:?}", + bundles, + first_conflict_point + ); + + let conflict_cost = self.maximum_spill_weight_in_bundle_set(&bundles); + + if lowest_cost_evict_conflict_cost.is_none() + || conflict_cost < lowest_cost_evict_conflict_cost.unwrap() + { + lowest_cost_evict_conflict_cost = Some(conflict_cost); + lowest_cost_evict_conflict_set = Some(bundles); + } + + let loop_depth = self.cfginfo.approx_loop_depth + [self.cfginfo.insn_block[first_conflict_point.inst().index()].index()]; + let move_cost = spill_weight_from_policy( + OperandPolicy::Reg, + loop_depth as usize, + /* is_def = */ true, + ); + if lowest_cost_split_conflict_cost.is_none() + || (conflict_cost + move_cost) + < lowest_cost_split_conflict_cost.unwrap() + { + lowest_cost_split_conflict_cost = Some(conflict_cost + move_cost); + lowest_cost_split_conflict_point = first_conflict_point; + lowest_cost_split_conflict_reg = preg; + } + } + AllocRegResult::ConflictWithFixed(max_cost, point) => { + log::debug!(" -> conflict with fixed alloc; cost of other bundles up to point is {}, conflict at {:?}", max_cost, point); + + let loop_depth = self.cfginfo.approx_loop_depth + [self.cfginfo.insn_block[point.inst().index()].index()]; + let move_cost = spill_weight_from_policy( + OperandPolicy::Reg, + loop_depth as usize, + /* is_def = */ true, + ); + + if lowest_cost_split_conflict_cost.is_none() + || (max_cost + move_cost) < lowest_cost_split_conflict_cost.unwrap() + { + lowest_cost_split_conflict_cost = Some(max_cost + move_cost); + lowest_cost_split_conflict_point = point; + lowest_cost_split_conflict_reg = preg; + } + } + AllocRegResult::ConflictHighCost => { + // Simply don't consider -- we already have + // a lower-cost conflict bundle option + // to evict. + continue; + } + } + } + + // Otherwise, we *require* a register, but didn't fit into + // any with current bundle assignments. Hence, we will need + // to either split or attempt to evict some bundles. + + log::debug!( + " -> lowest cost evict: set {:?}, cost {:?}", + lowest_cost_evict_conflict_set, + lowest_cost_evict_conflict_cost, + ); + log::debug!( + " -> lowest cost split: cost {:?}, point {:?}, reg {:?}", + lowest_cost_split_conflict_cost, + lowest_cost_split_conflict_point, + lowest_cost_split_conflict_reg + ); + + // If we reach here, we *must* have an option either to split or evict. + assert!( + lowest_cost_split_conflict_cost.is_some() + || lowest_cost_evict_conflict_cost.is_some() + ); + + let our_spill_weight = self.bundle_spill_weight(bundle); + log::debug!(" -> our spill weight: {}", our_spill_weight); + + // We detect the "too-many-live-registers" case here and + // return an error cleanly, rather than panicking, because + // the regalloc.rs fuzzer depends on the register + // allocator to correctly reject impossible-to-allocate + // programs in order to discard invalid test cases. + if self.minimal_bundle(bundle) + && (attempts >= 2 + || lowest_cost_evict_conflict_cost.is_none() + || lowest_cost_evict_conflict_cost.unwrap() >= our_spill_weight) + { + if let Requirement::Register(class) = req { + // Check if this is a too-many-live-registers situation. + let range = self.bundles[bundle.index()].ranges[0].range; + log::debug!("checking for too many live regs"); + let mut min_bundles_assigned = 0; + let mut fixed_assigned = 0; + let mut total_regs = 0; + for preg in self.env.preferred_regs_by_class[class as u8 as usize] + .iter() + .chain(self.env.non_preferred_regs_by_class[class as u8 as usize].iter()) + { + log::debug!(" -> PR {:?}", preg); + let start = LiveRangeKey::from_range(&CodeRange { + from: range.from.prev(), + to: range.from.prev(), + }); + for (key, lr) in self.pregs[preg.index()].allocations.btree.range(start..) { + let preg_range = key.to_range(); + if preg_range.to <= range.from { + continue; + } + if preg_range.from >= range.to { + break; + } + if lr.is_valid() { + if self.minimal_bundle(self.ranges[lr.index()].bundle) { + log::debug!(" -> min bundle {:?}", lr); + min_bundles_assigned += 1; + } else { + log::debug!(" -> non-min bundle {:?}", lr); + } + } else { + log::debug!(" -> fixed bundle"); + fixed_assigned += 1; + } + } + total_regs += 1; + } + log::debug!( + " -> total {}, fixed {}, min {}", + total_regs, + fixed_assigned, + min_bundles_assigned + ); + if min_bundles_assigned + fixed_assigned >= total_regs { + return Err(RegAllocError::TooManyLiveRegs); + } + } + + panic!("Could not allocate minimal bundle, but the allocation problem should be possible to solve"); + } + + // If our bundle's weight is less than or equal to(*) the + // evict cost, choose to split. Also pick splitting if + // we're on our second or more attempt and we didn't + // allocate. Also pick splitting if the conflict set is + // empty, meaning a fixed conflict that can't be evicted. + // + // (*) the "equal to" part is very important: it prevents + // an infinite loop where two bundles with equal spill + // cost continually evict each other in an infinite + // allocation loop. In such a case, the first bundle in + // wins, and the other splits. + // + // Note that we don't split if the bundle is minimal. + if !self.minimal_bundle(bundle) + && (attempts >= 2 + || lowest_cost_evict_conflict_cost.is_none() + || our_spill_weight <= lowest_cost_evict_conflict_cost.unwrap()) + { + log::debug!( + " -> deciding to split: our spill weight is {}", + self.bundle_spill_weight(bundle) + ); + let bundle_start = self.bundles[bundle.index()].ranges[0].range.from; + let mut split_at_point = + std::cmp::max(lowest_cost_split_conflict_point, bundle_start); + let requeue_with_reg = lowest_cost_split_conflict_reg; + + // Adjust `split_at_point` if it is within a deeper loop + // than the bundle start -- hoist it to just before the + // first loop header it encounters. + let bundle_start_depth = self.cfginfo.approx_loop_depth + [self.cfginfo.insn_block[bundle_start.inst().index()].index()]; + let split_at_depth = self.cfginfo.approx_loop_depth + [self.cfginfo.insn_block[split_at_point.inst().index()].index()]; + if split_at_depth > bundle_start_depth { + for block in (self.cfginfo.insn_block[bundle_start.inst().index()].index() + 1) + ..=self.cfginfo.insn_block[split_at_point.inst().index()].index() + { + if self.cfginfo.approx_loop_depth[block] > bundle_start_depth { + split_at_point = self.cfginfo.block_entry[block]; + break; + } + } + } + + self.split_and_requeue_bundle(bundle, split_at_point, requeue_with_reg); + return Ok(()); + } else { + // Evict all bundles in `conflicting bundles` and try again. + self.stats.evict_bundle_event += 1; + for &bundle in &lowest_cost_evict_conflict_set.unwrap() { + log::debug!(" -> evicting {:?}", bundle); + self.evict_bundle(bundle); + self.stats.evict_bundle_count += 1; + } + } + } + } +} diff --git a/src/ion/redundant_moves.rs b/src/ion/redundant_moves.rs new file mode 100644 index 00000000..4367a47c --- /dev/null +++ b/src/ion/redundant_moves.rs @@ -0,0 +1,142 @@ +//! Redundant-move elimination. + +use crate::{Allocation, VReg}; +use fxhash::FxHashMap; +use smallvec::{smallvec, SmallVec}; + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum RedundantMoveState { + Copy(Allocation, Option), + Orig(VReg), + None, +} +#[derive(Clone, Debug, Default)] +pub struct RedundantMoveEliminator { + allocs: FxHashMap, + reverse_allocs: FxHashMap>, +} +#[derive(Copy, Clone, Debug)] +pub struct RedundantMoveAction { + pub elide: bool, + pub def_alloc: Option<(Allocation, VReg)>, +} + +impl RedundantMoveEliminator { + pub fn process_move( + &mut self, + from: Allocation, + to: Allocation, + to_vreg: Option, + ) -> RedundantMoveAction { + // Look up the src and dest. + let from_state = self + .allocs + .get(&from) + .map(|&p| p) + .unwrap_or(RedundantMoveState::None); + let to_state = self + .allocs + .get(&to) + .map(|&p| p) + .unwrap_or(RedundantMoveState::None); + + log::debug!( + " -> redundant move tracker: from {} to {} to_vreg {:?}", + from, + to, + to_vreg + ); + log::debug!( + " -> from_state {:?} to_state {:?}", + from_state, + to_state + ); + + if from == to && to_vreg.is_some() { + self.clear_alloc(to); + self.allocs + .insert(to, RedundantMoveState::Orig(to_vreg.unwrap())); + return RedundantMoveAction { + elide: true, + def_alloc: Some((to, to_vreg.unwrap())), + }; + } + + let src_vreg = match from_state { + RedundantMoveState::Copy(_, opt_r) => opt_r, + RedundantMoveState::Orig(r) => Some(r), + _ => None, + }; + log::debug!(" -> src_vreg {:?}", src_vreg); + let dst_vreg = to_vreg.or(src_vreg); + log::debug!(" -> dst_vreg {:?}", dst_vreg); + let existing_dst_vreg = match to_state { + RedundantMoveState::Copy(_, opt_r) => opt_r, + RedundantMoveState::Orig(r) => Some(r), + _ => None, + }; + log::debug!(" -> existing_dst_vreg {:?}", existing_dst_vreg); + + let elide = match (from_state, to_state) { + (_, RedundantMoveState::Copy(orig_alloc, _)) if orig_alloc == from => true, + (RedundantMoveState::Copy(new_alloc, _), _) if new_alloc == to => true, + _ => false, + }; + log::debug!(" -> elide {}", elide); + + let def_alloc = if dst_vreg != existing_dst_vreg && dst_vreg.is_some() { + Some((to, dst_vreg.unwrap())) + } else { + None + }; + log::debug!(" -> def_alloc {:?}", def_alloc); + + // Invalidate all existing copies of `to` if `to` actually changed value. + if !elide { + self.clear_alloc(to); + } + + // Set up forward and reverse mapping. Don't track stack-to-stack copies. + if from.is_reg() || to.is_reg() { + self.allocs + .insert(to, RedundantMoveState::Copy(from, dst_vreg)); + log::debug!( + " -> create mapping {} -> {:?}", + to, + RedundantMoveState::Copy(from, dst_vreg) + ); + self.reverse_allocs + .entry(from) + .or_insert_with(|| smallvec![]) + .push(to); + } + + RedundantMoveAction { elide, def_alloc } + } + + pub fn clear(&mut self) { + log::debug!(" redundant move eliminator cleared"); + self.allocs.clear(); + self.reverse_allocs.clear(); + } + + pub fn clear_alloc(&mut self, alloc: Allocation) { + log::debug!(" redundant move eliminator: clear {:?}", alloc); + if let Some(ref mut existing_copies) = self.reverse_allocs.get_mut(&alloc) { + for to_inval in existing_copies.iter() { + log::debug!(" -> clear existing copy: {:?}", to_inval); + if let Some(val) = self.allocs.get_mut(to_inval) { + match val { + RedundantMoveState::Copy(_, Some(vreg)) => { + *val = RedundantMoveState::Orig(*vreg); + } + _ => *val = RedundantMoveState::None, + } + } + self.allocs.remove(to_inval); + } + existing_copies.clear(); + } + self.allocs.remove(&alloc); + } +} diff --git a/src/ion/reg_traversal.rs b/src/ion/reg_traversal.rs new file mode 100644 index 00000000..0b457cba --- /dev/null +++ b/src/ion/reg_traversal.rs @@ -0,0 +1,123 @@ +use crate::{MachineEnv, PReg, RegClass}; + +/// This iterator represents a traversal through all allocatable +/// registers of a given class, in a certain order designed to +/// minimize allocation contention. +/// +/// The order in which we try registers is somewhat complex: +/// - First, if there is a hint, we try that. +/// - Then, we try registers in a traversal order that is based on an +/// "offset" (usually the bundle index) spreading pressure evenly +/// among registers to reduce commitment-map contention. +/// - Within that scan, we try registers in two groups: first, +/// prferred registers; then, non-preferred registers. (In normal +/// usage, these consist of caller-save and callee-save registers +/// respectively, to minimize clobber-saves; but they need not.) + +pub struct RegTraversalIter<'a> { + env: &'a MachineEnv, + class: usize, + hints: [Option; 2], + hint_idx: usize, + pref_idx: usize, + non_pref_idx: usize, + offset_pref: usize, + offset_non_pref: usize, + is_fixed: bool, + fixed: Option, +} + +impl<'a> RegTraversalIter<'a> { + pub fn new( + env: &'a MachineEnv, + class: RegClass, + hint_reg: PReg, + hint2_reg: PReg, + offset: usize, + fixed: Option, + ) -> Self { + let mut hint_reg = if hint_reg != PReg::invalid() { + Some(hint_reg) + } else { + None + }; + let mut hint2_reg = if hint2_reg != PReg::invalid() { + Some(hint2_reg) + } else { + None + }; + + if hint_reg.is_none() { + hint_reg = hint2_reg; + hint2_reg = None; + } + let hints = [hint_reg, hint2_reg]; + let class = class as u8 as usize; + let offset_pref = if env.preferred_regs_by_class[class].len() > 0 { + offset % env.preferred_regs_by_class[class].len() + } else { + 0 + }; + let offset_non_pref = if env.non_preferred_regs_by_class[class].len() > 0 { + offset % env.non_preferred_regs_by_class[class].len() + } else { + 0 + }; + Self { + env, + class, + hints, + hint_idx: 0, + pref_idx: 0, + non_pref_idx: 0, + offset_pref, + offset_non_pref, + is_fixed: fixed.is_some(), + fixed, + } + } +} + +impl<'a> std::iter::Iterator for RegTraversalIter<'a> { + type Item = PReg; + + fn next(&mut self) -> Option { + if self.is_fixed { + let ret = self.fixed; + self.fixed = None; + return ret; + } + + fn wrap(idx: usize, limit: usize) -> usize { + if idx >= limit { + idx - limit + } else { + idx + } + } + if self.hint_idx < 2 && self.hints[self.hint_idx].is_some() { + let h = self.hints[self.hint_idx]; + self.hint_idx += 1; + return h; + } + while self.pref_idx < self.env.preferred_regs_by_class[self.class].len() { + let arr = &self.env.preferred_regs_by_class[self.class][..]; + let r = arr[wrap(self.pref_idx + self.offset_pref, arr.len())]; + self.pref_idx += 1; + if Some(r) == self.hints[0] || Some(r) == self.hints[1] { + continue; + } + return Some(r); + } + while self.non_pref_idx < self.env.non_preferred_regs_by_class[self.class].len() { + let arr = &self.env.non_preferred_regs_by_class[self.class][..]; + let r = arr[wrap(self.non_pref_idx + self.offset_non_pref, arr.len())]; + self.non_pref_idx += 1; + if Some(r) == self.hints[0] || Some(r) == self.hints[1] { + continue; + } + return Some(r); + } + None + } +} diff --git a/src/ion/requirement.rs b/src/ion/requirement.rs new file mode 100644 index 00000000..2517f147 --- /dev/null +++ b/src/ion/requirement.rs @@ -0,0 +1,92 @@ +//! Requirements computation. + +use super::{Env, LiveBundleIndex}; +use crate::{Function, Operand, OperandPolicy, PReg, RegClass}; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Requirement { + Unknown, + Fixed(PReg), + Register(RegClass), + Stack(RegClass), + Any(RegClass), + Conflict, +} +impl Requirement { + #[inline(always)] + pub fn class(self) -> RegClass { + match self { + Requirement::Unknown => panic!("No class for unknown Requirement"), + Requirement::Fixed(preg) => preg.class(), + Requirement::Register(class) | Requirement::Any(class) | Requirement::Stack(class) => { + class + } + Requirement::Conflict => panic!("No class for conflicted Requirement"), + } + } + #[inline(always)] + pub fn merge(self, other: Requirement) -> Requirement { + match (self, other) { + (Requirement::Unknown, other) | (other, Requirement::Unknown) => other, + (Requirement::Conflict, _) | (_, Requirement::Conflict) => Requirement::Conflict, + (other, Requirement::Any(rc)) | (Requirement::Any(rc), other) => { + if other.class() == rc { + other + } else { + Requirement::Conflict + } + } + (Requirement::Stack(rc1), Requirement::Stack(rc2)) => { + if rc1 == rc2 { + self + } else { + Requirement::Conflict + } + } + (Requirement::Register(rc), Requirement::Fixed(preg)) + | (Requirement::Fixed(preg), Requirement::Register(rc)) => { + if rc == preg.class() { + Requirement::Fixed(preg) + } else { + Requirement::Conflict + } + } + (Requirement::Register(rc1), Requirement::Register(rc2)) => { + if rc1 == rc2 { + self + } else { + Requirement::Conflict + } + } + (Requirement::Fixed(a), Requirement::Fixed(b)) if a == b => self, + _ => Requirement::Conflict, + } + } + #[inline(always)] + pub fn from_operand(op: Operand) -> Requirement { + match op.policy() { + OperandPolicy::FixedReg(preg) => Requirement::Fixed(preg), + OperandPolicy::Reg | OperandPolicy::Reuse(_) => Requirement::Register(op.class()), + OperandPolicy::Stack => Requirement::Stack(op.class()), + _ => Requirement::Any(op.class()), + } + } +} + +impl<'a, F: Function> Env<'a, F> { + pub fn compute_requirement(&self, bundle: LiveBundleIndex) -> Requirement { + let mut req = Requirement::Unknown; + log::debug!("compute_requirement: {:?}", bundle); + for entry in &self.bundles[bundle.index()].ranges { + log::debug!(" -> LR {:?}", entry.index); + for u in &self.ranges[entry.index.index()].uses { + log::debug!(" -> use {:?}", u); + let r = Requirement::from_operand(u.operand); + req = req.merge(r); + log::debug!(" -> req {:?}", req); + } + } + log::debug!(" -> final: {:?}", req); + req + } +} diff --git a/src/ion/spill.rs b/src/ion/spill.rs new file mode 100644 index 00000000..2cc9b545 --- /dev/null +++ b/src/ion/spill.rs @@ -0,0 +1,218 @@ +/* + * The following license applies to this file, which was initially + * derived from the files `js/src/jit/BacktrackingAllocator.h` and + * `js/src/jit/BacktrackingAllocator.cpp` in Mozilla Firefox: + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * Since the initial port, the design has been substantially evolved + * and optimized. + */ + +//! Spillslot allocation. + +use super::{ + AllocRegResult, Env, LiveRangeKey, LiveRangeSet, PReg, PRegIndex, RegClass, RegTraversalIter, + SpillSetIndex, SpillSlotData, SpillSlotIndex, SpillSlotList, +}; +use crate::{Allocation, Function, SpillSlot}; + +impl<'a, F: Function> Env<'a, F> { + pub fn try_allocating_regs_for_spilled_bundles(&mut self) { + log::debug!("allocating regs for spilled bundles"); + for i in 0..self.spilled_bundles.len() { + let bundle = self.spilled_bundles[i]; // don't borrow self + + let class = self.spillsets[self.bundles[bundle.index()].spillset.index()].class; + let hint = self.spillsets[self.bundles[bundle.index()].spillset.index()].reg_hint; + + // This may be an empty-range bundle whose ranges are not + // sorted; sort all range-lists again here. + self.bundles[bundle.index()] + .ranges + .sort_unstable_by_key(|entry| entry.range.from); + + let mut success = false; + self.stats.spill_bundle_reg_probes += 1; + for preg in + RegTraversalIter::new(self.env, class, hint, PReg::invalid(), bundle.index(), None) + { + log::debug!("trying bundle {:?} to preg {:?}", bundle, preg); + let preg_idx = PRegIndex::new(preg.index()); + if let AllocRegResult::Allocated(_) = + self.try_to_allocate_bundle_to_reg(bundle, preg_idx, None) + { + self.stats.spill_bundle_reg_success += 1; + success = true; + break; + } + } + if !success { + log::debug!( + "spilling bundle {:?}: marking spillset {:?} as required", + bundle, + self.bundles[bundle.index()].spillset + ); + self.spillsets[self.bundles[bundle.index()].spillset.index()].required = true; + } + } + } + + pub fn spillslot_can_fit_spillset( + &mut self, + spillslot: SpillSlotIndex, + spillset: SpillSetIndex, + ) -> bool { + for &vreg in &self.spillsets[spillset.index()].vregs { + for entry in &self.vregs[vreg.index()].ranges { + if self.spillslots[spillslot.index()] + .ranges + .btree + .contains_key(&LiveRangeKey::from_range(&entry.range)) + { + return false; + } + } + } + true + } + + pub fn allocate_spillset_to_spillslot( + &mut self, + spillset: SpillSetIndex, + spillslot: SpillSlotIndex, + ) { + self.spillsets[spillset.index()].slot = spillslot; + for i in 0..self.spillsets[spillset.index()].vregs.len() { + // don't borrow self + let vreg = self.spillsets[spillset.index()].vregs[i]; + log::debug!( + "spillslot {:?} alloc'ed to spillset {:?}: vreg {:?}", + spillslot, + spillset, + vreg, + ); + for entry in &self.vregs[vreg.index()].ranges { + log::debug!( + "spillslot {:?} getting range {:?} from LR {:?} from vreg {:?}", + spillslot, + entry.range, + entry.index, + vreg, + ); + self.spillslots[spillslot.index()] + .ranges + .btree + .insert(LiveRangeKey::from_range(&entry.range), entry.index); + } + } + } + + pub fn allocate_spillslots(&mut self) { + for spillset in 0..self.spillsets.len() { + log::debug!("allocate spillslot: {}", spillset); + let spillset = SpillSetIndex::new(spillset); + if !self.spillsets[spillset.index()].required { + continue; + } + // Get or create the spillslot list for this size. + let size = self.spillsets[spillset.index()].size as usize; + if size >= self.slots_by_size.len() { + self.slots_by_size.resize( + size + 1, + SpillSlotList { + first_spillslot: SpillSlotIndex::invalid(), + last_spillslot: SpillSlotIndex::invalid(), + }, + ); + } + // Try a few existing spillslots. + let mut spillslot_iter = self.slots_by_size[size].first_spillslot; + let mut first_slot = SpillSlotIndex::invalid(); + let mut prev = SpillSlotIndex::invalid(); + let mut success = false; + for _attempt in 0..10 { + if spillslot_iter.is_invalid() { + break; + } + if spillslot_iter == first_slot { + // We've started looking at slots we placed at the end; end search. + break; + } + if first_slot.is_invalid() { + first_slot = spillslot_iter; + } + + if self.spillslot_can_fit_spillset(spillslot_iter, spillset) { + self.allocate_spillset_to_spillslot(spillset, spillslot_iter); + success = true; + break; + } + // Remove the slot and place it at the end of the respective list. + let next = self.spillslots[spillslot_iter.index()].next_spillslot; + if prev.is_valid() { + self.spillslots[prev.index()].next_spillslot = next; + } else { + self.slots_by_size[size].first_spillslot = next; + } + if !next.is_valid() { + self.slots_by_size[size].last_spillslot = prev; + } + + let last = self.slots_by_size[size].last_spillslot; + if last.is_valid() { + self.spillslots[last.index()].next_spillslot = spillslot_iter; + } else { + self.slots_by_size[size].first_spillslot = spillslot_iter; + } + self.slots_by_size[size].last_spillslot = spillslot_iter; + + prev = spillslot_iter; + spillslot_iter = next; + } + + if !success { + // Allocate a new spillslot. + let spillslot = SpillSlotIndex::new(self.spillslots.len()); + let next = self.slots_by_size[size].first_spillslot; + self.spillslots.push(SpillSlotData { + ranges: LiveRangeSet::new(), + next_spillslot: next, + alloc: Allocation::none(), + class: self.spillsets[spillset.index()].class, + }); + self.slots_by_size[size].first_spillslot = spillslot; + if !next.is_valid() { + self.slots_by_size[size].last_spillslot = spillslot; + } + + self.allocate_spillset_to_spillslot(spillset, spillslot); + } + } + + // Assign actual slot indices to spillslots. + for i in 0..self.spillslots.len() { + self.spillslots[i].alloc = self.allocate_spillslot(self.spillslots[i].class); + } + + log::debug!("spillslot allocator done"); + } + + pub fn allocate_spillslot(&mut self, class: RegClass) -> Allocation { + let size = self.func.spillslot_size(class) as u32; + let mut offset = self.num_spillslots; + // Align up to `size`. + debug_assert!(size.is_power_of_two()); + offset = (offset + size - 1) & !(size - 1); + let slot = if self.func.multi_spillslot_named_by_last_slot() { + offset + size - 1 + } else { + offset + }; + offset += size; + self.num_spillslots = offset; + Allocation::stack(SpillSlot::new(slot as usize, class)) + } +} diff --git a/src/ion/stackmap.rs b/src/ion/stackmap.rs new file mode 100644 index 00000000..56b2b5d9 --- /dev/null +++ b/src/ion/stackmap.rs @@ -0,0 +1,73 @@ +/* + * The following license applies to this file, which was initially + * derived from the files `js/src/jit/BacktrackingAllocator.h` and + * `js/src/jit/BacktrackingAllocator.cpp` in Mozilla Firefox: + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * Since the initial port, the design has been substantially evolved + * and optimized. + */ + +//! Stackmap computation. + +use super::{Env, ProgPoint, VRegIndex}; +use crate::Function; + +impl<'a, F: Function> Env<'a, F> { + pub fn compute_stackmaps(&mut self) { + // For each ref-typed vreg, iterate through ranges and find + // safepoints in-range. Add the SpillSlot to the stackmap. + + if self.func.reftype_vregs().is_empty() { + return; + } + + // Given `safepoints_per_vreg` from the liveness computation, + // all we have to do is, for each vreg in this map, step + // through the LiveRanges along with a sorted list of + // safepoints; and for each safepoint in the current range, + // emit the allocation into the `safepoint_slots` list. + + log::debug!("safepoints_per_vreg = {:?}", self.safepoints_per_vreg); + + for vreg in self.func.reftype_vregs() { + log::debug!("generating safepoint info for vreg {}", vreg); + let vreg = VRegIndex::new(vreg.vreg()); + let mut safepoints: Vec = self + .safepoints_per_vreg + .get(&vreg.index()) + .unwrap() + .iter() + .map(|&inst| ProgPoint::before(inst)) + .collect(); + safepoints.sort_unstable(); + log::debug!(" -> live over safepoints: {:?}", safepoints); + + let mut safepoint_idx = 0; + for entry in &self.vregs[vreg.index()].ranges { + let range = entry.range; + let alloc = self.get_alloc_for_range(entry.index); + log::debug!(" -> range {:?}: alloc {}", range, alloc); + while safepoint_idx < safepoints.len() && safepoints[safepoint_idx] < range.to { + if safepoints[safepoint_idx] < range.from { + safepoint_idx += 1; + continue; + } + log::debug!(" -> covers safepoint {:?}", safepoints[safepoint_idx]); + + let slot = alloc + .as_stack() + .expect("Reference-typed value not in spillslot at safepoint"); + self.safepoint_slots.push((safepoints[safepoint_idx], slot)); + safepoint_idx += 1; + } + } + } + + self.safepoint_slots.sort_unstable(); + log::debug!("final safepoint slots info: {:?}", self.safepoint_slots); + } +} From 36975b8b6fa5a477452f0174d3361c3564786ef9 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sat, 19 Jun 2021 11:34:05 -0700 Subject: [PATCH 122/155] Add doc-comment note on Edit that stack-to-stack moves are never generated. --- src/lib.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 3a6ecb45..92dd1f59 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -892,7 +892,8 @@ impl ProgPoint { #[derive(Clone, Debug)] pub enum Edit { /// Move one allocation to another. Each allocation may be a - /// register or a stack slot (spillslot). + /// register or a stack slot (spillslot). However, stack-to-stack + /// moves will never be generated. /// /// `to_vreg`, if defined, is useful as metadata: it indicates /// that the moved value is a def of a new vreg. From caf7274efd6f4b39f3a64709a9d347a51479b988 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sat, 19 Jun 2021 11:48:54 -0700 Subject: [PATCH 123/155] Remove unused pred_pos --- src/cfg.rs | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/src/cfg.rs b/src/cfg.rs index 65638e7c..dc046ffc 100644 --- a/src/cfg.rs +++ b/src/cfg.rs @@ -26,15 +26,6 @@ pub struct CFGInfo { pub block_entry: Vec, /// For each block, the last instruction. pub block_exit: Vec, - /// For each block, what is its position in its successor's preds, - /// if it has a single successor? - /// - /// (Because we require split critical edges, we always either have a single - /// successor (which itself may have multiple preds), or we have multiple - /// successors but each successor itself has only one pred; so we can store - /// just one value per block and always know any block's position in its - /// successors' preds lists.) - pub pred_pos: Vec, /// For each block, what is the approximate loop depth? /// /// This measure is fully precise iff the input CFG is reducible @@ -60,7 +51,6 @@ impl CFGInfo { let mut vreg_def_blockparam = vec![(Block::invalid(), 0); f.num_vregs()]; let mut block_entry = vec![ProgPoint::before(Inst::invalid()); f.blocks()]; let mut block_exit = vec![ProgPoint::before(Inst::invalid()); f.blocks()]; - let mut pred_pos = vec![0; f.blocks()]; let mut backedge_in = vec![0; f.blocks()]; let mut backedge_out = vec![0; f.blocks()]; @@ -88,12 +78,11 @@ impl CFGInfo { // (this block). let preds = f.block_preds(block).len() + if block == f.entry_block() { 1 } else { 0 }; if preds > 1 { - for (i, &pred) in f.block_preds(block).iter().enumerate() { + for &pred in f.block_preds(block) { let succs = f.block_succs(pred).len(); if succs > 1 { return Err(RegAllocError::CritEdge(pred, block)); } - pred_pos[pred.index()] = i; } } @@ -153,7 +142,6 @@ impl CFGInfo { vreg_def_blockparam, block_entry, block_exit, - pred_pos, approx_loop_depth, }) } @@ -161,14 +149,4 @@ impl CFGInfo { pub fn dominates(&self, a: Block, b: Block) -> bool { domtree::dominates(&self.domtree[..], a, b) } - - /// Return the position of this block in its successor's predecessor list. - /// - /// Because the CFG must have split critical edges, we actually do not need - /// to know *which* successor: if there is more than one, then each - /// successor has only one predecessor (that's this block), so the answer is - /// `0` no matter which successor we are considering. - pub fn pred_position(&self, block: Block) -> usize { - self.pred_pos[block.index()] - } } From 50eb6fc42f3c6a178f9feb79ca7157a2b4a5f805 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sat, 19 Jun 2021 12:08:37 -0700 Subject: [PATCH 124/155] Keep internal modules private, but re-export under fuzzing feature flag --- fuzz/fuzz_targets/domtree.rs | 15 +++++++++------ fuzz/fuzz_targets/ion.rs | 2 +- fuzz/fuzz_targets/ion_checker.rs | 4 ++-- fuzz/fuzz_targets/moves.rs | 2 +- fuzz/fuzz_targets/ssagen.rs | 4 ++-- src/fuzzing/mod.rs | 24 ++++++++++++++++++++++++ src/lib.rs | 16 ++++++++-------- 7 files changed, 47 insertions(+), 20 deletions(-) diff --git a/fuzz/fuzz_targets/domtree.rs b/fuzz/fuzz_targets/domtree.rs index 06885782..4cd8d79c 100644 --- a/fuzz/fuzz_targets/domtree.rs +++ b/fuzz/fuzz_targets/domtree.rs @@ -8,7 +8,10 @@ use libfuzzer_sys::arbitrary::{Arbitrary, Result, Unstructured}; use libfuzzer_sys::fuzz_target; use std::collections::HashSet; -use regalloc2::{domtree, postorder, Block}; +use regalloc2::{ + fuzzing::{domtree, postorder}, + Block, +}; #[derive(Clone, Debug)] struct CFG { @@ -96,7 +99,10 @@ fn check_idom_violations(idom: &[Block], path: &Path) { // and false for every other block. for domblock in 0..idom.len() { let domblock = Block::new(domblock); - assert_eq!(domset.contains(&domblock), domtree::dominates(idom, domblock, *block)); + assert_eq!( + domset.contains(&domblock), + domtree::dominates(idom, domblock, *block) + ); } visited.insert(*block); } @@ -112,10 +118,7 @@ impl Arbitrary for TestCase { fn arbitrary(u: &mut Unstructured) -> Result { let cfg = CFG::arbitrary(u)?; let path = Path::choose_from_cfg(&cfg, u)?; - Ok(TestCase { - cfg, - path, - }) + Ok(TestCase { cfg, path }) } } diff --git a/fuzz/fuzz_targets/ion.rs b/fuzz/fuzz_targets/ion.rs index 288b8fb5..82aee653 100644 --- a/fuzz/fuzz_targets/ion.rs +++ b/fuzz/fuzz_targets/ion.rs @@ -12,5 +12,5 @@ fuzz_target!(|func: Func| { let _ = env_logger::try_init(); log::debug!("func:\n{:?}", func); let env = regalloc2::fuzzing::func::machine_env(); - let _out = regalloc2::ion::run(&func, &env, false).expect("regalloc did not succeed"); + let _out = regalloc2::fuzzing::ion::run(&func, &env, false).expect("regalloc did not succeed"); }); diff --git a/fuzz/fuzz_targets/ion_checker.rs b/fuzz/fuzz_targets/ion_checker.rs index f6e8cd5a..950d4d7c 100644 --- a/fuzz/fuzz_targets/ion_checker.rs +++ b/fuzz/fuzz_targets/ion_checker.rs @@ -7,7 +7,7 @@ use libfuzzer_sys::arbitrary::{Arbitrary, Result, Unstructured}; use libfuzzer_sys::fuzz_target; -use regalloc2::checker::Checker; +use regalloc2::fuzzing::checker::Checker; use regalloc2::fuzzing::func::{Func, Options}; #[derive(Clone, Debug)] @@ -40,7 +40,7 @@ fuzz_target!(|testcase: TestCase| { let _ = env_logger::try_init(); log::debug!("func:\n{:?}", func); let env = regalloc2::fuzzing::func::machine_env(); - let out = regalloc2::ion::run(&func, &env, true).expect("regalloc did not succeed"); + let out = regalloc2::fuzzing::ion::run(&func, &env, true).expect("regalloc did not succeed"); let mut checker = Checker::new(&func); checker.prepare(&out); diff --git a/fuzz/fuzz_targets/moves.rs b/fuzz/fuzz_targets/moves.rs index 040c3e14..e62342f4 100644 --- a/fuzz/fuzz_targets/moves.rs +++ b/fuzz/fuzz_targets/moves.rs @@ -7,7 +7,7 @@ use libfuzzer_sys::arbitrary::{Arbitrary, Result, Unstructured}; use libfuzzer_sys::fuzz_target; -use regalloc2::moves::ParallelMoves; +use regalloc2::fuzzing::moves::ParallelMoves; use regalloc2::{Allocation, PReg, RegClass}; use std::collections::HashSet; diff --git a/fuzz/fuzz_targets/ssagen.rs b/fuzz/fuzz_targets/ssagen.rs index d68d672f..bed2253c 100644 --- a/fuzz/fuzz_targets/ssagen.rs +++ b/fuzz/fuzz_targets/ssagen.rs @@ -7,9 +7,9 @@ use libfuzzer_sys::arbitrary::{Arbitrary, Result, Unstructured}; use libfuzzer_sys::fuzz_target; -use regalloc2::cfg::CFGInfo; +use regalloc2::fuzzing::cfg::CFGInfo; use regalloc2::fuzzing::func::{Func, Options}; -use regalloc2::ssa::validate_ssa; +use regalloc2::fuzzing::ssa::validate_ssa; #[derive(Debug)] struct TestCase { diff --git a/src/fuzzing/mod.rs b/src/fuzzing/mod.rs index ae548d9f..e6a225e6 100644 --- a/src/fuzzing/mod.rs +++ b/src/fuzzing/mod.rs @@ -6,3 +6,27 @@ //! Utilities for fuzzing. pub mod func; + +// Re-exports for fuzz targets. + +pub mod domtree { + pub use crate::domtree::*; +} +pub mod postorder { + pub use crate::postorder::*; +} +pub mod moves { + pub use crate::moves::*; +} +pub mod cfg { + pub use crate::cfg::*; +} +pub mod ssa { + pub use crate::ssa::*; +} +pub mod ion { + pub use crate::ion::*; +} +pub mod checker { + pub use crate::checker::*; +} diff --git a/src/lib.rs b/src/lib.rs index 92dd1f59..ee5afb2d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,16 +12,16 @@ #![allow(dead_code)] -pub mod bitvec; -pub mod cfg; -pub mod domtree; -pub mod ion; -pub mod moves; -pub mod postorder; -pub mod ssa; +pub(crate) mod bitvec; +pub(crate) mod cfg; +pub(crate) mod domtree; +pub(crate) mod ion; +pub(crate) mod moves; +pub(crate) mod postorder; +pub(crate) mod ssa; #[macro_use] -pub mod index; +mod index; pub use index::{Block, Inst, InstRange, InstRangeIter}; pub mod checker; From 736f636c36954fb4e86701da9d5cd5a44f5ea7ed Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sat, 19 Jun 2021 12:17:18 -0700 Subject: [PATCH 125/155] Add fixed-non-allocatable operand support. --- src/ion/dump.rs | 2 +- src/ion/liveranges.rs | 12 ++++++++++++ src/lib.rs | 30 ++++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/src/ion/dump.rs b/src/ion/dump.rs index c2912532..bb1729bb 100644 --- a/src/ion/dump.rs +++ b/src/ion/dump.rs @@ -1,7 +1,7 @@ //! Debugging output. use super::Env; -use crate::{Function, ProgPoint, Block}; +use crate::{Block, Function, ProgPoint}; impl<'a, F: Function> Env<'a, F> { pub fn dump_state(&self) { diff --git a/src/ion/liveranges.rs b/src/ion/liveranges.rs index 41895025..8bf48e4a 100644 --- a/src/ion/liveranges.rs +++ b/src/ion/liveranges.rs @@ -272,6 +272,9 @@ impl<'a, F: Function> Env<'a, F> { for pos in &[OperandPos::After, OperandPos::Before] { for op in self.func.inst_operands(inst) { + if op.as_fixed().is_some() { + continue; + } if op.pos() == *pos { let was_live = live.get(op.vreg().vreg()); log::debug!("op {:?} was_live = {}", op, was_live); @@ -868,6 +871,15 @@ impl<'a, F: Function> Env<'a, F> { operand ); + // If this is a "fixed non-allocatable + // register" operand, set the alloc + // immediately and then ignore the operand + // hereafter. + if let Some(preg) = operand.as_fixed() { + self.set_alloc(inst, i, Allocation::reg(preg)); + continue; + } + match operand.kind() { OperandKind::Def | OperandKind::Mod => { log::debug!("Def of {} at {:?}", operand.vreg(), pos); diff --git a/src/lib.rs b/src/lib.rs index ee5afb2d..5535e8d1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -144,6 +144,15 @@ impl VReg { pub fn invalid() -> Self { VReg::new(Self::MAX, RegClass::Int) } + + #[inline(always)] + pub fn is_valid(self) -> bool { + self != Self::invalid() + } + #[inline(always)] + pub fn is_invalid(self) -> bool { + self == Self::invalid() + } } impl std::fmt::Debug for VReg { @@ -328,6 +337,19 @@ impl Operand { ) } + /// Create an Operand that always results in an assignment to the + /// given fixed `preg`, *without* tracking liveranges in that + /// `preg`. Must only be used for non-allocatable registers. + #[inline(always)] + pub fn fixed(preg: PReg) -> Self { + Operand::new( + VReg::invalid(), + OperandPolicy::FixedReg(preg), + OperandKind::Use, // doesn't matter + OperandPos::Before, // doesn't matter + ) + } + #[inline(always)] pub fn vreg(self) -> VReg { let vreg_idx = ((self.bits as usize) & VReg::MAX) as usize; @@ -379,6 +401,14 @@ impl Operand { } } + #[inline(always)] + pub fn as_fixed(self) -> Option { + match (self.vreg().is_invalid(), self.policy()) { + (true, OperandPolicy::FixedReg(preg)) => Some(preg), + _ => None, + } + } + #[inline(always)] pub fn bits(self) -> u32 { self.bits From 22eed0a6aee67cbff1d433fe970a6fc922d9950c Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sat, 19 Jun 2021 12:47:02 -0700 Subject: [PATCH 126/155] Make bitvec public; it is used by regalloc.rs shim too. --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 5535e8d1..1396cec1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,7 +12,7 @@ #![allow(dead_code)] -pub(crate) mod bitvec; +pub mod bitvec; pub(crate) mod cfg; pub(crate) mod domtree; pub(crate) mod ion; From a58d36fd945e90323e15ebe93a279db419ae1c50 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sat, 19 Jun 2021 13:20:11 -0700 Subject: [PATCH 127/155] TODO update: make note on idea for large-input support --- doc/TODO | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/doc/TODO b/doc/TODO index aa76ce82..88d82008 100644 --- a/doc/TODO +++ b/doc/TODO @@ -1,5 +1,14 @@ # Features +- Large-input support (> 1M vregs, > 1M blocks) + - Two operand impls: u64-based and u32-based. Always accept + u64-based `Operand` publicly (do not expose this in interface). + - Trait to generalize over them and support both internally + (parameterize the whole allocator impl) + - On data-structure init, choose one or the other based on max vreg + index + - Update halfmove keys: u128 rather than u64 + - Rematerialization - Stack-location constraints that place operands in user-defined stack locations (distinct from SpillSlots) (e.g., stack args) From 21fb233809949cc9c2edd0a81cf41a1159412509 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sat, 19 Jun 2021 13:41:26 -0700 Subject: [PATCH 128/155] reduce nesting level in DESIGN.md to make pandoc-to-pdf happy --- doc/DESIGN.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/doc/DESIGN.md b/doc/DESIGN.md index 32b207d6..82f36af4 100644 --- a/doc/DESIGN.md +++ b/doc/DESIGN.md @@ -568,16 +568,16 @@ For each instruction, we process its effects on the scan state: preg. - If not a move: - - for each program point [after, before]: - - for each operand at this point(\*): - - if a def or mod: - - if not currently live, this is a dead def; create an empty - LR. - - if a def: - - set the start of the LR for this vreg to this point. - - set as dead. - - if a use: - - create LR if not live, with start at beginning of block. + - for each program point [after, before], for each operand at + this point(\*): + - if a def or mod: + - if not currently live, this is a dead def; create an empty + LR. + - if a def: + - set the start of the LR for this vreg to this point. + - set as dead. + - if a use: + - create LR if not live, with start at beginning of block. - Else, if a move: - simple case (no pinned vregs): From 245c21228901dc540701c771d408f3301582ae50 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sun, 20 Jun 2021 23:03:44 -0700 Subject: [PATCH 129/155] Revert "Add fixed-non-allocatable operand support." This feature needs more thought; for now we will of course continue to support pinned vregs, but perhaps we can do better for "pass-through-and-forget" operands that are given non-allocatable registers. This reverts commit 736f636c36954fb4e86701da9d5cd5a44f5ea7ed. --- src/ion/dump.rs | 2 +- src/ion/liveranges.rs | 12 ------------ src/lib.rs | 30 ------------------------------ 3 files changed, 1 insertion(+), 43 deletions(-) diff --git a/src/ion/dump.rs b/src/ion/dump.rs index bb1729bb..c2912532 100644 --- a/src/ion/dump.rs +++ b/src/ion/dump.rs @@ -1,7 +1,7 @@ //! Debugging output. use super::Env; -use crate::{Block, Function, ProgPoint}; +use crate::{Function, ProgPoint, Block}; impl<'a, F: Function> Env<'a, F> { pub fn dump_state(&self) { diff --git a/src/ion/liveranges.rs b/src/ion/liveranges.rs index 8bf48e4a..41895025 100644 --- a/src/ion/liveranges.rs +++ b/src/ion/liveranges.rs @@ -272,9 +272,6 @@ impl<'a, F: Function> Env<'a, F> { for pos in &[OperandPos::After, OperandPos::Before] { for op in self.func.inst_operands(inst) { - if op.as_fixed().is_some() { - continue; - } if op.pos() == *pos { let was_live = live.get(op.vreg().vreg()); log::debug!("op {:?} was_live = {}", op, was_live); @@ -871,15 +868,6 @@ impl<'a, F: Function> Env<'a, F> { operand ); - // If this is a "fixed non-allocatable - // register" operand, set the alloc - // immediately and then ignore the operand - // hereafter. - if let Some(preg) = operand.as_fixed() { - self.set_alloc(inst, i, Allocation::reg(preg)); - continue; - } - match operand.kind() { OperandKind::Def | OperandKind::Mod => { log::debug!("Def of {} at {:?}", operand.vreg(), pos); diff --git a/src/lib.rs b/src/lib.rs index 1396cec1..31349553 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -144,15 +144,6 @@ impl VReg { pub fn invalid() -> Self { VReg::new(Self::MAX, RegClass::Int) } - - #[inline(always)] - pub fn is_valid(self) -> bool { - self != Self::invalid() - } - #[inline(always)] - pub fn is_invalid(self) -> bool { - self == Self::invalid() - } } impl std::fmt::Debug for VReg { @@ -337,19 +328,6 @@ impl Operand { ) } - /// Create an Operand that always results in an assignment to the - /// given fixed `preg`, *without* tracking liveranges in that - /// `preg`. Must only be used for non-allocatable registers. - #[inline(always)] - pub fn fixed(preg: PReg) -> Self { - Operand::new( - VReg::invalid(), - OperandPolicy::FixedReg(preg), - OperandKind::Use, // doesn't matter - OperandPos::Before, // doesn't matter - ) - } - #[inline(always)] pub fn vreg(self) -> VReg { let vreg_idx = ((self.bits as usize) & VReg::MAX) as usize; @@ -401,14 +379,6 @@ impl Operand { } } - #[inline(always)] - pub fn as_fixed(self) -> Option { - match (self.vreg().is_invalid(), self.policy()) { - (true, OperandPolicy::FixedReg(preg)) => Some(preg), - _ => None, - } - } - #[inline(always)] pub fn bits(self) -> u32 { self.bits From f27abc9c48c6f76a9833a33ecec42debf0fd7908 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 22 Jun 2021 12:06:12 -0700 Subject: [PATCH 130/155] Remove infinite-loop check: it is not a high enough bound in some pathological cases (e.g., `gc::many_live_refs` test in wasmtime), and it has served its purpose in testing. We can rely on more detailed assertions, e.g. that splits actually shrink bundles and that bundles evict only lower-priority bundles, instead. --- src/ion/data_structures.rs | 2 ++ src/ion/mod.rs | 1 + src/ion/process.rs | 6 ------ 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/src/ion/data_structures.rs b/src/ion/data_structures.rs index ce95f7a0..5a1ae838 100644 --- a/src/ion/data_structures.rs +++ b/src/ion/data_structures.rs @@ -337,6 +337,8 @@ pub struct Env<'a, F: Function> { pub num_spillslots: u32, pub safepoint_slots: Vec<(ProgPoint, SpillSlot)>, + pub allocated_bundle_count: usize, + pub stats: Stats, // For debug output only: a list of textual annotations at every diff --git a/src/ion/mod.rs b/src/ion/mod.rs index e78a0f59..abbed9ef 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -68,6 +68,7 @@ impl<'a, F: Function> Env<'a, F> { spilled_bundles: vec![], spillslots: vec![], slots_by_size: vec![], + allocated_bundle_count: 0, extra_spillslot: vec![None, None], diff --git a/src/ion/process.rs b/src/ion/process.rs index 0b272493..426e7fcc 100644 --- a/src/ion/process.rs +++ b/src/ion/process.rs @@ -36,15 +36,9 @@ pub enum AllocRegResult { impl<'a, F: Function> Env<'a, F> { pub fn process_bundles(&mut self) -> Result<(), RegAllocError> { - let mut count = 0; while let Some((bundle, reg_hint)) = self.allocation_queue.pop() { self.stats.process_bundle_count += 1; self.process_bundle(bundle, reg_hint)?; - count += 1; - if count > self.func.insts() * 50 { - self.dump_state(); - panic!("Infinite loop!"); - } } self.stats.final_liverange_count = self.ranges.len(); self.stats.final_bundle_count = self.bundles.len(); From 66d6821c7b40380290f91dbd7de2ba77a25f2bd0 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 22 Jun 2021 14:06:59 -0700 Subject: [PATCH 131/155] Fix perf issue with many safepoints. In wasmtime's `gc::many_live_refs` unit-test, approximately ~1K vregs are live over ~1K safepoints (actually, each vreg is live over half the safepoints on average, in a LIFO sort of arrangement). This causes a huge slowdown with the current heuristics. Basically, each vreg had a `Conflict` requirement because it had both stack uses (safepoints) and register uses (the actual def and normal use). The action in this case when processing the vreg's bundle is to split off the first use -- a conservative-but-correct approach that will always eventually split bundles far enough to get non-conflicting-requirement pieces. However, because each vreg had N stack uses followed by one register use, this meant that each had to be split N times (!) -- so we had O(n^2) splits and O(n^2) bundles by the end of the allocation. This instead implements another simple heuristic that is much better: when the requirements are conflicting, scan forward and find the exact point at which the requirements become conflicting, such that the prefix (first half prior to the split) still has no conflict, and split there. This turns the above test-case into an O(n)-bundle / O(n)-split situation. --- src/ion/liveranges.rs | 2 ++ src/ion/process.rs | 31 ++++++++++++++++++++++++++++--- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/src/ion/liveranges.rs b/src/ion/liveranges.rs index 41895025..04ca41ee 100644 --- a/src/ion/liveranges.rs +++ b/src/ion/liveranges.rs @@ -955,9 +955,11 @@ impl<'a, F: Function> Env<'a, F> { } if self.func.is_safepoint(inst) { + log::debug!("inst{} is safepoint", inst.index()); self.safepoints.push(inst); for vreg in live.iter() { if let Some(safepoints) = self.safepoints_per_vreg.get_mut(&vreg) { + log::debug!("vreg v{} live at safepoint inst{}", vreg, inst.index()); safepoints.insert(inst); } } diff --git a/src/ion/process.rs b/src/ion/process.rs index 426e7fcc..ef72956b 100644 --- a/src/ion/process.rs +++ b/src/ion/process.rs @@ -363,6 +363,29 @@ impl<'a, F: Function> Env<'a, F> { } } + pub fn find_conflict_split_point(&self, bundle: LiveBundleIndex) -> ProgPoint { + // Find the first use whose requirement causes the merge up to + // this point to go to Conflict. + let mut req = Requirement::Unknown; + for entry in &self.bundles[bundle.index()].ranges { + for u in &self.ranges[entry.index.index()].uses { + let this_req = Requirement::from_operand(u.operand); + req = req.merge(this_req); + if req == Requirement::Conflict { + return u.pos; + } + } + } + + // Fallback: start of bundle. + self.bundles[bundle.index()] + .ranges + .first() + .unwrap() + .range + .from + } + pub fn get_or_create_spill_bundle( &mut self, bundle: LiveBundleIndex, @@ -734,15 +757,17 @@ impl<'a, F: Function> Env<'a, F> { log::debug!("process_bundle: bundle {:?} hint {:?}", bundle, hint_reg,); if let Requirement::Conflict = req { - // We have to split right away. + // We have to split right away. We'll find a point to + // split that would allow at least the first half of the + // split to be conflict-free. assert!( !self.minimal_bundle(bundle), "Minimal bundle with conflict!" ); - let bundle_start = self.bundles[bundle.index()].ranges[0].range.from; + let split_point = self.find_conflict_split_point(bundle); self.split_and_requeue_bundle( bundle, - /* split_at_point = */ bundle_start, + /* split_at_point = */ split_point, reg_hint, ); return Ok(()); From 4c193a94991a3cf5c53a9afbebbdc26c7fec8b53 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Wed, 28 Jul 2021 12:37:32 -0700 Subject: [PATCH 132/155] Fix heuristic-cost function overflow with high loop depth (found by @Amanieu). --- src/ion/dump.rs | 2 +- src/ion/liveranges.rs | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/ion/dump.rs b/src/ion/dump.rs index c2912532..bb1729bb 100644 --- a/src/ion/dump.rs +++ b/src/ion/dump.rs @@ -1,7 +1,7 @@ //! Debugging output. use super::Env; -use crate::{Function, ProgPoint, Block}; +use crate::{Block, Function, ProgPoint}; impl<'a, F: Function> Env<'a, F> { pub fn dump_state(&self) { diff --git a/src/ion/liveranges.rs b/src/ion/liveranges.rs index 04ca41ee..231920d8 100644 --- a/src/ion/liveranges.rs +++ b/src/ion/liveranges.rs @@ -32,7 +32,9 @@ use std::convert::TryFrom; pub fn spill_weight_from_policy(policy: OperandPolicy, loop_depth: usize, is_def: bool) -> u32 { // A bonus of 1000 for one loop level, 4000 for two loop levels, // 16000 for three loop levels, etc. Avoids exponentiation. - let hot_bonus = std::cmp::min(16000, 1000 * (1 << (2 * loop_depth))); + // Bound `loop_depth` at 2 so that `hot_bonus` is at most 16000. + let loop_depth = std::cmp::min(2, loop_depth); + let hot_bonus = 1000 * (1 << (2 * loop_depth)); let def_bonus = if is_def { 2000 } else { 0 }; let policy_bonus = match policy { OperandPolicy::Any => 1000, From c9e8a87ceaefb63023039ab88e3c76db8969022b Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 12 Aug 2021 11:17:37 -0700 Subject: [PATCH 133/155] Update TODO with new items from private feedback from @julian-seward1. --- doc/TODO | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/TODO b/doc/TODO index 88d82008..a6eb0b56 100644 --- a/doc/TODO +++ b/doc/TODO @@ -9,6 +9,8 @@ index - Update halfmove keys: u128 rather than u64 +- Support allocation of register pairs (or overlapping registers generally) + - Rematerialization - Stack-location constraints that place operands in user-defined stack locations (distinct from SpillSlots) (e.g., stack args) @@ -19,6 +21,9 @@ - Investigate more principled cost functions and split locations, especially around loop nests +- Investigate ways to improve bundle-merging; e.g., merge moves before + other types of connections + # Cleanup - Remove support for non-SSA code once no longer necessary \ No newline at end of file From 84285c26fbf431a9202624942f409cdd3f797202 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 12 Aug 2021 11:17:52 -0700 Subject: [PATCH 134/155] Rename OperandPolicy to OperandConstraint as per feedback from @julian-seward1. --- src/checker.rs | 26 ++++++++--------- src/fuzzing/func.rs | 20 ++++++------- src/ion/liveranges.rs | 60 +++++++++++++++++++------------------- src/ion/merge.rs | 10 +++---- src/ion/moves.rs | 6 ++-- src/ion/process.rs | 16 +++++----- src/ion/requirement.rs | 10 +++---- src/lib.rs | 66 +++++++++++++++++++++--------------------- 8 files changed, 107 insertions(+), 107 deletions(-) diff --git a/src/checker.rs b/src/checker.rs index fa638cc6..858e35ad 100644 --- a/src/checker.rs +++ b/src/checker.rs @@ -17,7 +17,7 @@ //! conceptually generates a symbolic value "Vn" when storing to (or //! modifying) a virtual register. //! -//! Operand policies (fixed register, register, any) are also checked +//! Operand constraints (fixed register, register, any) are also checked //! at each operand. //! //! The dataflow analysis state at each program point is: @@ -67,7 +67,7 @@ use crate::{ Allocation, AllocationKind, Block, Edit, Function, Inst, InstPosition, Operand, OperandKind, - OperandPolicy, OperandPos, Output, PReg, ProgPoint, SpillSlot, VReg, + OperandConstraint, OperandPos, Output, PReg, ProgPoint, SpillSlot, VReg, }; use std::collections::{HashMap, HashSet, VecDeque}; @@ -106,7 +106,7 @@ pub enum CheckerError { alloc: Allocation, actual: VReg, }, - PolicyViolated { + ConstraintViolated { inst: Inst, op: Operand, alloc: Allocation, @@ -266,7 +266,7 @@ impl CheckerState { _ => {} } - self.check_policy(inst, op, alloc, allocs)?; + self.check_constraint(inst, op, alloc, allocs)?; Ok(()) } @@ -287,14 +287,14 @@ impl CheckerState { // happens early. let has_reused_input = operands .iter() - .any(|op| matches!(op.policy(), OperandPolicy::Reuse(_))); + .any(|op| matches!(op.constraint(), OperandConstraint::Reuse(_))); if has_reused_input && pos == InstPosition::After { return Ok(()); } // For each operand, check (i) that the allocation // contains the expected vreg, and (ii) that it meets - // the requirements of the OperandPolicy. + // the requirements of the OperandConstraint. for (op, alloc) in operands.iter().zip(allocs.iter()) { let is_here = match (op.pos(), pos) { (OperandPos::Before, InstPosition::Before) => true, @@ -413,31 +413,31 @@ impl CheckerState { } } - fn check_policy( + fn check_constraint( &self, inst: Inst, op: Operand, alloc: Allocation, allocs: &[Allocation], ) -> Result<(), CheckerError> { - match op.policy() { - OperandPolicy::Any => {} - OperandPolicy::Reg => { + match op.constraint() { + OperandConstraint::Any => {} + OperandConstraint::Reg => { if alloc.kind() != AllocationKind::Reg { return Err(CheckerError::AllocationIsNotReg { inst, op, alloc }); } } - OperandPolicy::Stack => { + OperandConstraint::Stack => { if alloc.kind() != AllocationKind::Stack { return Err(CheckerError::AllocationIsNotStack { inst, op, alloc }); } } - OperandPolicy::FixedReg(preg) => { + OperandConstraint::FixedReg(preg) => { if alloc != Allocation::reg(preg) { return Err(CheckerError::AllocationIsNotFixedReg { inst, op, alloc }); } } - OperandPolicy::Reuse(idx) => { + OperandConstraint::Reuse(idx) => { if alloc.kind() != AllocationKind::Reg { return Err(CheckerError::AllocationIsNotReg { inst, op, alloc }); } diff --git a/src/fuzzing/func.rs b/src/fuzzing/func.rs index d89df600..facb276e 100644 --- a/src/fuzzing/func.rs +++ b/src/fuzzing/func.rs @@ -5,7 +5,7 @@ use crate::{ domtree, postorder, Allocation, Block, Function, Inst, InstRange, MachineEnv, Operand, - OperandKind, OperandPolicy, OperandPos, PReg, RegClass, VReg, + OperandKind, OperandConstraint, OperandPos, PReg, RegClass, VReg, }; use arbitrary::Result as ArbitraryResult; @@ -230,9 +230,9 @@ impl FuncBuilder { } } -impl Arbitrary for OperandPolicy { +impl Arbitrary for OperandConstraint { fn arbitrary(u: &mut Unstructured) -> ArbitraryResult { - Ok(*u.choose(&[OperandPolicy::Any, OperandPolicy::Reg])?) + Ok(*u.choose(&[OperandConstraint::Any, OperandConstraint::Reg])?) } } @@ -401,13 +401,13 @@ impl Func { let mut avail = block_params[block].clone(); let mut remaining_nonlocal_uses = u.int_in_range(0..=3)?; while let Some(vreg) = vregs_by_block_to_be_defined[block].pop() { - let def_policy = OperandPolicy::arbitrary(u)?; + let def_constraint = OperandConstraint::arbitrary(u)?; let def_pos = if bool::arbitrary(u)? { OperandPos::Before } else { OperandPos::After }; - let mut operands = vec![Operand::new(vreg, def_policy, OperandKind::Def, def_pos)]; + let mut operands = vec![Operand::new(vreg, def_constraint, OperandKind::Def, def_pos)]; let mut allocations = vec![Allocation::none()]; for _ in 0..u.int_in_range(0..=3)? { let vreg = if avail.len() > 0 @@ -433,10 +433,10 @@ impl Func { } else { break; }; - let use_policy = OperandPolicy::arbitrary(u)?; + let use_constraint = OperandConstraint::arbitrary(u)?; operands.push(Operand::new( vreg, - use_policy, + use_constraint, OperandKind::Use, OperandPos::Before, )); @@ -450,14 +450,14 @@ impl Func { let reused = u.int_in_range(1..=(operands.len() - 1))?; operands[0] = Operand::new( op.vreg(), - OperandPolicy::Reuse(reused), + OperandConstraint::Reuse(reused), op.kind(), OperandPos::After, ); // Make sure reused input is a Reg. let op = operands[reused]; operands[reused] = - Operand::new(op.vreg(), OperandPolicy::Reg, op.kind(), OperandPos::Before); + Operand::new(op.vreg(), OperandConstraint::Reg, op.kind(), OperandPos::Before); } else if opts.fixed_regs && bool::arbitrary(u)? { let mut fixed = vec![]; for _ in 0..u.int_in_range(0..=operands.len() - 1)? { @@ -471,7 +471,7 @@ impl Func { let op = operands[i]; operands[i] = Operand::new( op.vreg(), - OperandPolicy::FixedReg(fixed_reg), + OperandConstraint::FixedReg(fixed_reg), op.kind(), op.pos(), ); diff --git a/src/ion/liveranges.rs b/src/ion/liveranges.rs index 231920d8..a8837c44 100644 --- a/src/ion/liveranges.rs +++ b/src/ion/liveranges.rs @@ -20,7 +20,7 @@ use super::{ }; use crate::bitvec::BitVec; use crate::{ - Allocation, Block, Function, Inst, InstPosition, Operand, OperandKind, OperandPolicy, + Allocation, Block, Function, Inst, InstPosition, Operand, OperandKind, OperandConstraint, OperandPos, PReg, ProgPoint, RegAllocError, VReg, }; use fxhash::FxHashSet; @@ -29,19 +29,19 @@ use std::collections::{HashSet, VecDeque}; use std::convert::TryFrom; #[inline(always)] -pub fn spill_weight_from_policy(policy: OperandPolicy, loop_depth: usize, is_def: bool) -> u32 { +pub fn spill_weight_from_constraint(constraint: OperandConstraint, loop_depth: usize, is_def: bool) -> u32 { // A bonus of 1000 for one loop level, 4000 for two loop levels, // 16000 for three loop levels, etc. Avoids exponentiation. // Bound `loop_depth` at 2 so that `hot_bonus` is at most 16000. let loop_depth = std::cmp::min(2, loop_depth); let hot_bonus = 1000 * (1 << (2 * loop_depth)); let def_bonus = if is_def { 2000 } else { 0 }; - let policy_bonus = match policy { - OperandPolicy::Any => 1000, - OperandPolicy::Reg | OperandPolicy::FixedReg(_) => 2000, + let constraint_bonus = match constraint { + OperandConstraint::Any => 1000, + OperandConstraint::Reg | OperandConstraint::FixedReg(_) => 2000, _ => 0, }; - hot_bonus + def_bonus + policy_bonus + hot_bonus + def_bonus + constraint_bonus } impl<'a, F: Function> Env<'a, F> { @@ -184,11 +184,11 @@ impl<'a, F: Function> Env<'a, F> { pub fn insert_use_into_liverange(&mut self, into: LiveRangeIndex, mut u: Use) { let operand = u.operand; - let policy = operand.policy(); + let constraint = operand.constraint(); let block = self.cfginfo.insn_block[u.pos.inst().index()]; let loop_depth = self.cfginfo.approx_loop_depth[block.index()] as usize; let weight = - spill_weight_from_policy(policy, loop_depth, operand.kind() != OperandKind::Use); + spill_weight_from_constraint(constraint, loop_depth, operand.kind() != OperandKind::Use); u.weight = u16::try_from(weight).expect("weight too large for u16 field"); log::debug!( @@ -415,7 +415,7 @@ impl<'a, F: Function> Env<'a, F> { // proper interference wrt other inputs. let mut reused_input = None; for op in self.func.inst_operands(inst) { - if let OperandPolicy::Reuse(i) = op.policy() { + if let OperandConstraint::Reuse(i) = op.constraint() { reused_input = Some(i); break; } @@ -465,12 +465,12 @@ impl<'a, F: Function> Env<'a, F> { ); } - let src_preg = match src.policy() { - OperandPolicy::FixedReg(r) => r, + let src_preg = match src.constraint() { + OperandConstraint::FixedReg(r) => r, _ => unreachable!(), }; - let dst_preg = match dst.policy() { - OperandPolicy::FixedReg(r) => r, + let dst_preg = match dst.constraint() { + OperandConstraint::FixedReg(r) => r, _ => unreachable!(), }; self.insert_move( @@ -484,7 +484,7 @@ impl<'a, F: Function> Env<'a, F> { // If exactly one of source and dest (but not // both) is a pinned-vreg, convert this into a // ghost use on the other vreg with a FixedReg - // policy. + // constraint. else if self.vregs[src.vreg().vreg()].is_pinned || self.vregs[dst.vreg().vreg()].is_pinned { @@ -513,20 +513,20 @@ impl<'a, F: Function> Env<'a, F> { ProgPoint::after(inst), ) }; - let policy = OperandPolicy::FixedReg(preg); - let operand = Operand::new(vreg, policy, kind, pos); + let constraint = OperandConstraint::FixedReg(preg); + let operand = Operand::new(vreg, constraint, kind, pos); log::debug!( concat!( " -> preg {:?} vreg {:?} kind {:?} ", - "pos {:?} progpoint {:?} policy {:?} operand {:?}" + "pos {:?} progpoint {:?} constraint {:?} operand {:?}" ), preg, vreg, kind, pos, progpoint, - policy, + constraint, operand ); @@ -701,23 +701,23 @@ impl<'a, F: Function> Env<'a, F> { // positions of After and Before respectively // (see note below), and to have Any // constraints if they were originally Reg. - let src_policy = match src.policy() { - OperandPolicy::Reg => OperandPolicy::Any, + let src_constraint = match src.constraint() { + OperandConstraint::Reg => OperandConstraint::Any, x => x, }; - let dst_policy = match dst.policy() { - OperandPolicy::Reg => OperandPolicy::Any, + let dst_constraint = match dst.constraint() { + OperandConstraint::Reg => OperandConstraint::Any, x => x, }; let src = Operand::new( src.vreg(), - src_policy, + src_constraint, OperandKind::Use, OperandPos::After, ); let dst = Operand::new( dst.vreg(), - dst_policy, + dst_constraint, OperandKind::Def, OperandPos::Before, ); @@ -728,9 +728,9 @@ impl<'a, F: Function> Env<'a, F> { format!( " prog-move v{} ({:?}) -> v{} ({:?})", src.vreg().vreg(), - src_policy, + src_constraint, dst.vreg().vreg(), - dst_policy, + dst_constraint, ), ); } @@ -1049,7 +1049,7 @@ impl<'a, F: Function> Env<'a, F> { let pos = ProgPoint::before(self.safepoints[safepoint_idx]); let operand = Operand::new( self.vreg_regs[vreg.index()], - OperandPolicy::Stack, + OperandConstraint::Stack, OperandKind::Use, OperandPos::Before, ); @@ -1116,7 +1116,7 @@ impl<'a, F: Function> Env<'a, F> { } last_point = Some(pos); - if let OperandPolicy::FixedReg(preg) = op.policy() { + if let OperandConstraint::FixedReg(preg) = op.constraint() { let vreg_idx = VRegIndex::new(op.vreg().vreg()); let preg_idx = PRegIndex::new(preg.index()); log::debug!( @@ -1129,11 +1129,11 @@ impl<'a, F: Function> Env<'a, F> { { let orig_preg = first_preg[idx]; if orig_preg != preg_idx { - log::debug!(" -> duplicate; switching to policy Reg"); + log::debug!(" -> duplicate; switching to constraint Reg"); fixups.push((pos, orig_preg, preg_idx, slot)); *op = Operand::new( op.vreg(), - OperandPolicy::Reg, + OperandConstraint::Reg, op.kind(), op.pos(), ); diff --git a/src/ion/merge.rs b/src/ion/merge.rs index d0219a3c..d908f6e0 100644 --- a/src/ion/merge.rs +++ b/src/ion/merge.rs @@ -17,7 +17,7 @@ use super::{ Env, LiveBundleIndex, LiveRangeIndex, LiveRangeKey, Requirement, SpillSet, SpillSetIndex, SpillSlotIndex, VRegIndex, }; -use crate::{Function, Inst, OperandPolicy, PReg}; +use crate::{Function, Inst, OperandConstraint, PReg}; use smallvec::smallvec; impl<'a, F: Function> Env<'a, F> { @@ -269,10 +269,10 @@ impl<'a, F: Function> Env<'a, F> { let mut stack = false; for entry in &self.bundles[bundle.index()].ranges { for u in &self.ranges[entry.index.index()].uses { - if let OperandPolicy::FixedReg(_) = u.operand.policy() { + if let OperandConstraint::FixedReg(_) = u.operand.constraint() { fixed = true; } - if let OperandPolicy::Stack = u.operand.policy() { + if let OperandConstraint::Stack = u.operand.constraint() { stack = true; } if fixed && stack { @@ -306,10 +306,10 @@ impl<'a, F: Function> Env<'a, F> { for inst in 0..self.func.insts() { let inst = Inst::new(inst); - // Attempt to merge Reuse-policy operand outputs with the + // Attempt to merge Reuse-constraint operand outputs with the // corresponding inputs. for op in self.func.inst_operands(inst) { - if let OperandPolicy::Reuse(reuse_idx) = op.policy() { + if let OperandConstraint::Reuse(reuse_idx) = op.constraint() { let src_vreg = op.vreg(); let dst_vreg = self.func.inst_operands(inst)[reuse_idx].vreg(); if self.vregs[src_vreg.vreg()].is_pinned diff --git a/src/ion/moves.rs b/src/ion/moves.rs index 18edd21d..bf8bea82 100644 --- a/src/ion/moves.rs +++ b/src/ion/moves.rs @@ -20,7 +20,7 @@ use super::{ use crate::moves::ParallelMoves; use crate::{ - Allocation, Block, Edit, Function, Inst, InstPosition, OperandKind, OperandPolicy, OperandPos, + Allocation, Block, Edit, Function, Inst, InstPosition, OperandKind, OperandConstraint, OperandPos, ProgPoint, RegClass, VReg, }; use log::debug; @@ -489,7 +489,7 @@ impl<'a, F: Function> Env<'a, F> { if slot != SLOT_NONE { self.set_alloc(inst, slot as usize, alloc); } - if let OperandPolicy::Reuse(_) = operand.policy() { + if let OperandConstraint::Reuse(_) = operand.constraint() { reuse_input_insts.push(inst); } } @@ -755,7 +755,7 @@ impl<'a, F: Function> Env<'a, F> { let mut input_reused: SmallVec<[usize; 4]> = smallvec![]; for output_idx in 0..self.func.inst_operands(inst).len() { let operand = self.func.inst_operands(inst)[output_idx]; - if let OperandPolicy::Reuse(input_idx) = operand.policy() { + if let OperandConstraint::Reuse(input_idx) = operand.constraint() { debug_assert!(!input_reused.contains(&input_idx)); debug_assert_eq!(operand.pos(), OperandPos::After); input_reused.push(input_idx); diff --git a/src/ion/process.rs b/src/ion/process.rs index ef72956b..e501b508 100644 --- a/src/ion/process.rs +++ b/src/ion/process.rs @@ -14,12 +14,12 @@ //! Main allocation loop that processes bundles. use super::{ - spill_weight_from_policy, CodeRange, Env, LiveBundleIndex, LiveBundleVec, LiveRangeFlag, + spill_weight_from_constraint, CodeRange, Env, LiveBundleIndex, LiveBundleVec, LiveRangeFlag, LiveRangeIndex, LiveRangeKey, LiveRangeList, LiveRangeListEntry, PRegIndex, RegTraversalIter, Requirement, UseList, }; use crate::{ - Allocation, Function, Inst, InstPosition, OperandKind, OperandPolicy, PReg, ProgPoint, + Allocation, Function, Inst, InstPosition, OperandKind, OperandConstraint, PReg, ProgPoint, RegAllocError, }; use fxhash::FxHashSet; @@ -273,11 +273,11 @@ impl<'a, F: Function> Env<'a, F> { } else { for u in &first_range_data.uses { log::debug!(" -> use: {:?}", u); - if let OperandPolicy::FixedReg(_) = u.operand.policy() { + if let OperandConstraint::FixedReg(_) = u.operand.constraint() { log::debug!(" -> fixed use at {:?}: {:?}", u.pos, u.operand); fixed = true; } - if let OperandPolicy::Stack = u.operand.policy() { + if let OperandConstraint::Stack = u.operand.constraint() { log::debug!(" -> stack use at {:?}: {:?}", u.pos, u.operand); stack = true; } @@ -886,8 +886,8 @@ impl<'a, F: Function> Env<'a, F> { let loop_depth = self.cfginfo.approx_loop_depth [self.cfginfo.insn_block[first_conflict_point.inst().index()].index()]; - let move_cost = spill_weight_from_policy( - OperandPolicy::Reg, + let move_cost = spill_weight_from_constraint( + OperandConstraint::Reg, loop_depth as usize, /* is_def = */ true, ); @@ -905,8 +905,8 @@ impl<'a, F: Function> Env<'a, F> { let loop_depth = self.cfginfo.approx_loop_depth [self.cfginfo.insn_block[point.inst().index()].index()]; - let move_cost = spill_weight_from_policy( - OperandPolicy::Reg, + let move_cost = spill_weight_from_constraint( + OperandConstraint::Reg, loop_depth as usize, /* is_def = */ true, ); diff --git a/src/ion/requirement.rs b/src/ion/requirement.rs index 2517f147..f6be76c2 100644 --- a/src/ion/requirement.rs +++ b/src/ion/requirement.rs @@ -1,7 +1,7 @@ //! Requirements computation. use super::{Env, LiveBundleIndex}; -use crate::{Function, Operand, OperandPolicy, PReg, RegClass}; +use crate::{Function, Operand, OperandConstraint, PReg, RegClass}; #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum Requirement { @@ -64,10 +64,10 @@ impl Requirement { } #[inline(always)] pub fn from_operand(op: Operand) -> Requirement { - match op.policy() { - OperandPolicy::FixedReg(preg) => Requirement::Fixed(preg), - OperandPolicy::Reg | OperandPolicy::Reuse(_) => Requirement::Register(op.class()), - OperandPolicy::Stack => Requirement::Stack(op.class()), + match op.constraint() { + OperandConstraint::FixedReg(preg) => Requirement::Fixed(preg), + OperandConstraint::Reg | OperandConstraint::Reuse(_) => Requirement::Register(op.class()), + OperandConstraint::Stack => Requirement::Stack(op.class()), _ => Requirement::Any(op.class()), } } diff --git a/src/lib.rs b/src/lib.rs index 31349553..bdcef7cd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -210,34 +210,34 @@ impl std::fmt::Display for SpillSlot { } /// An `Operand` encodes everything about a mention of a register in -/// an instruction: virtual register number, and any constraint/policy -/// that applies to the register at this program point. +/// an instruction: virtual register number, and any constraint that +/// applies to the register at this program point. /// /// An Operand may be a use or def (this corresponds to `LUse` and /// `LAllocation` in Ion). #[derive(Clone, Copy, PartialEq, Eq)] pub struct Operand { - /// Bit-pack into 32 bits. Note that `policy` overlaps with `kind` + /// Bit-pack into 32 bits. Note that `constraint` overlaps with `kind` /// in `Allocation` and we use mutually disjoint tag-value ranges /// so that clients, if they wish, can track just one `u32` per /// register slot and edit it in-place after allocation. /// - /// policy:3 kind:2 pos:1 class:1 preg:5 vreg:20 + /// constraint:3 kind:2 pos:1 class:1 preg:5 vreg:20 bits: u32, } impl Operand { #[inline(always)] - pub fn new(vreg: VReg, policy: OperandPolicy, kind: OperandKind, pos: OperandPos) -> Self { - let (preg_field, policy_field): (u32, u32) = match policy { - OperandPolicy::Any => (0, 0), - OperandPolicy::Reg => (0, 1), - OperandPolicy::Stack => (0, 2), - OperandPolicy::FixedReg(preg) => { + pub fn new(vreg: VReg, constraint: OperandConstraint, kind: OperandKind, pos: OperandPos) -> Self { + let (preg_field, constraint_field): (u32, u32) = match constraint { + OperandConstraint::Any => (0, 0), + OperandConstraint::Reg => (0, 1), + OperandConstraint::Stack => (0, 2), + OperandConstraint::FixedReg(preg) => { assert_eq!(preg.class(), vreg.class()); (preg.hw_enc() as u32, 3) } - OperandPolicy::Reuse(which) => { + OperandConstraint::Reuse(which) => { assert!(which <= PReg::MAX); (which as u32, 4) } @@ -251,7 +251,7 @@ impl Operand { | (class_field << 25) | (pos_field << 26) | (kind_field << 27) - | (policy_field << 29), + | (constraint_field << 29), } } @@ -259,7 +259,7 @@ impl Operand { pub fn reg_use(vreg: VReg) -> Self { Operand::new( vreg, - OperandPolicy::Reg, + OperandConstraint::Reg, OperandKind::Use, OperandPos::Before, ) @@ -268,7 +268,7 @@ impl Operand { pub fn reg_use_at_end(vreg: VReg) -> Self { Operand::new( vreg, - OperandPolicy::Reg, + OperandConstraint::Reg, OperandKind::Use, OperandPos::After, ) @@ -277,7 +277,7 @@ impl Operand { pub fn reg_def(vreg: VReg) -> Self { Operand::new( vreg, - OperandPolicy::Reg, + OperandConstraint::Reg, OperandKind::Def, OperandPos::After, ) @@ -286,7 +286,7 @@ impl Operand { pub fn reg_def_at_start(vreg: VReg) -> Self { Operand::new( vreg, - OperandPolicy::Reg, + OperandConstraint::Reg, OperandKind::Def, OperandPos::Before, ) @@ -295,7 +295,7 @@ impl Operand { pub fn reg_temp(vreg: VReg) -> Self { Operand::new( vreg, - OperandPolicy::Reg, + OperandConstraint::Reg, OperandKind::Def, OperandPos::Before, ) @@ -304,7 +304,7 @@ impl Operand { pub fn reg_reuse_def(vreg: VReg, idx: usize) -> Self { Operand::new( vreg, - OperandPolicy::Reuse(idx), + OperandConstraint::Reuse(idx), OperandKind::Def, OperandPos::After, ) @@ -313,7 +313,7 @@ impl Operand { pub fn reg_fixed_use(vreg: VReg, preg: PReg) -> Self { Operand::new( vreg, - OperandPolicy::FixedReg(preg), + OperandConstraint::FixedReg(preg), OperandKind::Use, OperandPos::Before, ) @@ -322,7 +322,7 @@ impl Operand { pub fn reg_fixed_def(vreg: VReg, preg: PReg) -> Self { Operand::new( vreg, - OperandPolicy::FixedReg(preg), + OperandConstraint::FixedReg(preg), OperandKind::Def, OperandPos::After, ) @@ -366,15 +366,15 @@ impl Operand { } #[inline(always)] - pub fn policy(self) -> OperandPolicy { - let policy_field = (self.bits >> 29) & 7; + pub fn constraint(self) -> OperandConstraint { + let constraint_field = (self.bits >> 29) & 7; let preg_field = ((self.bits >> 20) as usize) & PReg::MAX; - match policy_field { - 0 => OperandPolicy::Any, - 1 => OperandPolicy::Reg, - 2 => OperandPolicy::Stack, - 3 => OperandPolicy::FixedReg(PReg::new(preg_field, self.class())), - 4 => OperandPolicy::Reuse(preg_field), + match constraint_field { + 0 => OperandConstraint::Any, + 1 => OperandConstraint::Reg, + 2 => OperandConstraint::Stack, + 3 => OperandConstraint::FixedReg(PReg::new(preg_field, self.class())), + 4 => OperandConstraint::Reuse(preg_field), _ => unreachable!(), } } @@ -409,13 +409,13 @@ impl std::fmt::Display for Operand { RegClass::Int => "i", RegClass::Float => "f", }, - self.policy() + self.constraint() ) } } #[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum OperandPolicy { +pub enum OperandConstraint { /// Any location is fine (register or stack slot). Any, /// Operand must be in a register. Register is read-only for Uses. @@ -428,7 +428,7 @@ pub enum OperandPolicy { Reuse(usize), } -impl std::fmt::Display for OperandPolicy { +impl std::fmt::Display for OperandConstraint { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match self { Self::Any => write!(f, "any"), @@ -458,7 +458,7 @@ pub enum OperandPos { #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Allocation { /// Bit-pack in 32 bits. Note that `kind` overlaps with the - /// `policy` field in `Operand`, and we are careful to use + /// `constraint` field in `Operand`, and we are careful to use /// disjoint ranges of values in this field for each type. We also /// leave the def-or-use bit (`kind` for `Operand`) unused here so /// that we can use it below in `OperandOrAllocation` to record @@ -570,7 +570,7 @@ impl Allocation { } // N.B.: These values must be *disjoint* with the values used to -// encode `OperandPolicy`, because they share a 3-bit field. +// encode `OperandConstraint`, because they share a 3-bit field. #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] #[repr(u8)] pub enum AllocationKind { From a591535fe99fd78b3f8079b8c617d8e92d530ca0 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 12 Aug 2021 11:23:44 -0700 Subject: [PATCH 135/155] Use "vector" or "sequence" in lieu of "list" in DESIGN.md, except when referring to linked lists. From feedback from @julian-seward1. I had used "list" in a more generic sense, meaning ordered sequence of elements, while in a Rust context it can sometimes be confused with "linked list" specifically. These alternative terms are more precise. --- doc/DESIGN.md | 170 +++++++++++++++++++++++++------------------------- 1 file changed, 85 insertions(+), 85 deletions(-) diff --git a/doc/DESIGN.md b/doc/DESIGN.md index 82f36af4..6553e734 100644 --- a/doc/DESIGN.md +++ b/doc/DESIGN.md @@ -15,23 +15,23 @@ The toplevel API to regalloc2 consists of a single entry point `run()` that takes a register environment, which specifies all physical registers, and the input program. The function returns either an error or an `Output` struct that provides allocations for each operand and a -list of additional instructions (moves, loads, stores) to insert. +vector of additional instructions (moves, loads, stores) to insert. ## Register Environment The allocator takes a `MachineEnv` which specifies, for each of the -two register classes `Int` and `Float`, a list of `PReg`s by index. A +two register classes `Int` and `Float`, a vector of `PReg`s by index. A `PReg` is nothing more than the class and index within the class; the allocator does not need to know anything more. -The `MachineEnv` provides a list of preferred and non-preferred -physical registers per class. Any register not on either list will not -be allocated. Usually, registers that do not need to be saved in the -prologue if used (i.e., caller-save registers) are given in the -"preferred" list. The environment also provides exactly one scratch +The `MachineEnv` provides a vector of preferred and non-preferred +physical registers per class. Any register not in either vector will +not be allocated. Usually, registers that do not need to be saved in +the prologue if used (i.e., caller-save registers) are given in the +"preferred" vector. The environment also provides exactly one scratch register per class. This register must not be in the preferred or -non-preferred lists, and is used whenever a set of moves that need to -occur logically in parallel have a cycle (for a simple example, +non-preferred vectors, and is used whenever a set of moves that need +to occur logically in parallel have a cycle (for a simple example, consider a swap `r0, r1 := r1, r0`). With some more work, we could potentially remove the need for the @@ -41,17 +41,17 @@ the client ("swap"), but we have not pursued this. ## CFG and Instructions The allocator operates on an input program that is in a standard CFG -representation: the function body is a list of basic blocks, and each -block has a list of instructions and zero or more successors. The -allocator also requires the client to provide predecessors for each -block, and these must be consistent with the successor -lists. +representation: the function body is a sequence of basic blocks, and +each block has a sequence of instructions and zero or more +successors. The allocator also requires the client to provide +predecessors for each block, and these must be consistent with the +successors. Instructions are opaque to the allocator except for a few important bits: (1) `is_ret` (is a return instruction); (2) `is_branch` (is a branch instruction); (3) `is_call` (is a call instruction, for heuristic purposes only), (4) `is_move` (is a move between registers), -and (5) a list of Operands, covered below. Every block must end in a +and (5) a vector of Operands, covered below. Every block must end in a return or branch. Both instructions and blocks are named by indices in contiguous index @@ -248,8 +248,8 @@ removed. However, it is very important for performance at the moment. ## Output The allocator produces two main data structures as output: an array of -`Allocation`s and a list of edits. Some other data, such as stackmap -slot info, is also provided. +`Allocation`s and a sequence of edits. Some other data, such as +stackmap slot info, is also provided. ### Allocations @@ -266,7 +266,7 @@ In order to implement the necessary movement of data between allocations, the allocator needs to insert moves at various program points. -The list of inserted moves contains tuples that name a program point +The vector of inserted moves contains tuples that name a program point and an "edit". The edit is either a move, from one `Allocation` to another, or else a kind of metadata used by the checker to know which VReg is live in a given allocation at any particular time. The latter @@ -304,44 +304,44 @@ standard backward iterative dataflow analysis and are exact; they do not over-approximate (this turns out to be important for performance, and is also necessary for correctness in the case of stackmaps). -### Blockparam Lists: Source-Side and Dest-Side +### Blockparam Vectors: Source-Side and Dest-Side The initialization stage scans the input program and produces two -lists that represent blockparam flows from branches to destination +vectors that represent blockparam flows from branches to destination blocks: `blockparam_ins` and `blockparam_outs`. -These two lists are the first instance we will see of a recurring -pattern: the lists contain tuples that are carefully ordered in a way -such that their sort-order is meaningful. "Build a list lazily then -sort" is a common idiom: it batches the O(n log n) cost into one +These two vectors are the first instance we will see of a recurring +pattern: the vectors contain tuples that are carefully ordered in a +way such that their sort-order is meaningful. "Build a vector lazily +then sort" is a common idiom: it batches the O(n log n) cost into one operation that the stdlib has aggressively optimized, it provides dense storage, and it allows for a scan in a certain order that often lines up with a scan over the program. -In this particular case, we will build lists of (vreg, block) points +In this particular case, we will build vectors of (vreg, block) points that are meaningful either at the start or end of a block, so that later, when we scan over a particular vreg's allocations in block -order, we can generate another list of allocations. One side (the +order, we can generate another vector of allocations. One side (the "outs") also contains enough information that it can line up with the other side (the "ins") in a later sort. -To make this work, `blockparam_ins` contains a list of (to-vreg, +To make this work, `blockparam_ins` contains a vector of (to-vreg, to-block, from-block) tuples, and has an entry for every blockparam of every block. Note that we can compute this without actually observing from-blocks; we only need to iterate over `block_preds` at any given block. -Then, `blockparam_outs` contains a list of (from-vreg, from-block, +Then, `blockparam_outs` contains a vector of (from-vreg, from-block, to-block, to-vreg), and has an entry for every parameter on every branch that ends a block. There is exactly one "out" tuple for every "in" tuple. As mentioned above, we will later scan over both to generate moves. -### Program-Move Lists: Source-Side and Dest-Side +### Program-Move Vectors: Source-Side and Dest-Side Similar to blockparams, we handle moves specially. In fact, we ingest -all moves in the input program into a set of lists -- "move sources" -and "move dests", analogous to the "ins" and "outs" blockparam lists +all moves in the input program into a set of vectors -- "move sources" +and "move dests", analogous to the "ins" and "outs" blockparam vectors described above -- and then completely ignore the moves in the program thereafter. The semantics of the API are such that all program moves will be recreated with regalloc-inserted edits, and should not still @@ -353,7 +353,7 @@ opaque instructions with a source and dest, and we found that there were many redundant move-chains (A->B, B->C) that are eliminated when everything is handled centrally. -We also construct a `prog_move_merges` list of live-range index pairs +We also construct a `prog_move_merges` vector of live-range index pairs to attempt to merge when we reach that stage of allocation. ## Core Allocation State: Ranges, Uses, Bundles, VRegs, PRegs @@ -370,7 +370,7 @@ A live-range is a contiguous range of program points (half-open, i.e. including `from` and excluding `to`) for which a particular vreg is live with a value. -A live-range contains a list of uses. Each use contains four parts: +A live-range contains a vector of uses. Each use contains four parts: the Operand word (directly copied, so there is no need to dereference it); the ProgPoint at which the use occurs; the operand slot on that instruction, if any, that the operand comes from, and the use's @@ -392,14 +392,14 @@ values throughout the allocator. New live-ranges can be created state is bulk-freed at the end. Live-ranges are aggregated into "bundles". A bundle is a collection of -ranges that does not overlap. Each bundle carries: a list (inline +ranges that does not overlap. Each bundle carries: a vector (inline SmallVec) of (range, live-range index) tuples, an allocation (starts as "none"), a "spillset" (more below), and some metadata, including a spill weight (sum of ranges' weights), a priority (sum of ranges' lengths), and three property flags: "minimal", "contains fixed constraints", "contains stack constraints". -VRegs also contain their lists of live-ranges, in the same form as a +VRegs also contain their vectors of live-ranges, in the same form as a bundle does (inline SmallVec that has inline (from, to) range bounds and range indices). @@ -407,14 +407,14 @@ There are two important overlap invariants: (i) no liveranges within a bundle overlap, and (ii) no liveranges within a vreg overlap. These are extremely important and we rely on them implicitly in many places. -The live-range lists in bundles and vregs, and use-lists in ranges, +The live-range vectors in bundles and vregs, and use-vectors in ranges, have various sorting invariants as well. These invariants differ according to the phase of the allocator's computation. First, during live-range construction, live-ranges are placed into vregs in reverse order (because the computation is a reverse scan) and uses into ranges in reverse order; these are sorted into forward order at the end of live-range computation. When bundles are first constructed, their -range lists are sorted, and they remain so for the rest of allocation, +range vectors are sorted, and they remain so for the rest of allocation, as we need for interference testing. However, as ranges are created and split, sortedness of vreg ranges is *not* maintained; they are sorted once more, in bulk, when allocation is done and we start to @@ -464,7 +464,7 @@ create a second-chance spill bundle just for a liverange with an "Any" use; but if it was already forced into existence by splitting and trimming, then we might as well use it. -Note that unlike other bundles, a spill bundle's liverange list +Note that unlike other bundles, a spill bundle's liverange vector remains unsorted until we do the second-chance allocation. This allows quick appends of more liveranges. @@ -502,8 +502,8 @@ is spilled, and we traverse to the spillset then spillslot. ## Other: Fixups, Stats, Debug Annotations -There are a few fixup lists that we will cover in more detail -later. Of particular note is the "multi-fixed-reg fixup list": this +There are a few fixup vectors that we will cover in more detail +later. Of particular note is the "multi-fixed-reg fixup vector": this handles instructions that constrain the same input vreg to multiple, different, fixed registers for different operands at the same program point. The only way to satisfy such a set of constraints is to @@ -550,7 +550,7 @@ For each block, we perform a scan with the following state: - A liveness bitvec, initialized at the start from `liveouts`. - A vector of live-range indices, with one entry per vreg, initially "invalid" (this vector is allocated once and reused at each block). -- In-progress list of live-range indices per vreg in the vreg state, +- In-progress vector of live-range indices per vreg in the vreg state, in *reverse* order (we will reverse it when we're done). A vreg is live at the current point in the scan if its bit is set in @@ -630,7 +630,7 @@ pregs' allocation maps. Finally, we need to handle moves specially. With the caveat that "this is a massive hack and I am very very sorry", here is how it works. A move between two pinned vregs is easy: -we add that to the inserted-moves list right away because we know the +we add that to the inserted-moves vector right away because we know the Allocation on both sides. A move from a pinned vreg to a normal vreg is the first interesting case. In this case, we (i) create a ghost def with a fixed-register policy on the normal vreg, doing the other @@ -693,8 +693,8 @@ sees the corner case where it's necessary!) ## Bundle Merging -Once we have built the liverange lists for every vreg, we can reverse -these lists (recall, they were built in strict reverse order) and +Once we have built the liverange vectors for every vreg, we can reverse +these vectors (recall, they were built in strict reverse order) and initially assign one bundle per (non-pinned) vreg. We then try to merge bundles together as long as find pairs of bundles that do not overlap and that (heuristically) make sense to merge. @@ -711,9 +711,9 @@ corresponding output; across program moves; and across blockparam assignments. To merge two bundles, we traverse over both their sorted liverange -lists at once, checking for overlaps. Note that we can do this without +vectors at once, checking for overlaps. Note that we can do this without pointer-chasing to the liverange data; the (from, to) range is in the -liverange list itself. +liverange vector itself. We also check whether the merged bundle would have conflicting requirements (see below for more on requirements). We do a coarse @@ -724,7 +724,7 @@ on both sides, merging, and checking for Conflict (the lattice bottom value). If no conflict, we merge. A performance note: merging is extremely performance-sensitive, and it -turns out that a mergesort-like merge of the liverange lists is too +turns out that a mergesort-like merge of the liverange vectors is too expensive, partly because it requires allocating a separate result vector (in-place merge in mergesort is infamously complex). Instead, we simply append one vector onto the end of the other and invoke @@ -835,10 +835,10 @@ then we *can* use a register (either `Any`, which accepts a register as one of several options, or `Reg`, which must have one, or `Fixed`, which must have a particular one). -We determine the list of physical registers whose allocation maps we -will probe, and in what order. If a particular fixed register is -required, we probe only that register. Otherwise, we probe all -registers in the required class. +We determine which physical registers whose allocation maps we will +probe, and in what order. If a particular fixed register is required, +we probe only that register. Otherwise, we probe all registers in the +required class. The order in which we probe, if we are not constrained to a single register, is carefully chosen. First, if there is a hint register from @@ -846,13 +846,13 @@ the spillset (this is set by the last allocation into a register of any other bundle in this spillset), we probe that. Then, we probe all preferred registers; then all non-preferred registers. -For each of the preferred and non-preferred register lists, we probe -in an *offset* manner: we start at some index partway through the -list, determined by some heuristic number that is random and +For each of the preferred and non-preferred register sequences, we +probe in an *offset* manner: we start at some index partway through +the sequence, determined by some heuristic number that is random and well-dstributed. (In practice, we use the sum of the bundle index and the instruction index of the start of the first range in the bundle.) -We then march through the list and wrap around, stopping before we hit -our starting point again. +We then march through the sequence and wrap around, stopping before we +hit our starting point again. The purpose of this offset is to distribute the contention and speed up the allocation process. In the common case where there are enough @@ -863,7 +863,7 @@ order. This has a large allocation performance impact in practice. For each register in probe order, we probe the allocation map, and gather, simultaneously, several results: (i) whether the entire range -is free; (ii) if not, the list of all conflicting bundles, *and* the +is free; (ii) if not, the vector of all conflicting bundles, *and* the highest weight among those bundles; (iii) if not, the *first* conflict point. @@ -915,7 +915,7 @@ track the "lowest cost split option", which is the cost (more below), the point at which to split, and the register for this option. For each register we probe, if there is a conflict but none of the -conflicts are fixed allocations, we receive a list of bundles that +conflicts are fixed allocations, we receive a vector of bundles that conflicted, and also separately, the first conflicting program point. We update the lowest-cost eviction option if the cost (max weight) of the conflicting bundles is less than the current best. We @@ -955,14 +955,14 @@ an inner loop). The actual split procedure is fairly simple. We are given a bundle and a split-point. We create a new bundle to take on the second half -("rest") of the original. We find the point in the liverange list that -corresponds to the split, and distribute appropriately. If the +("rest") of the original. We find the point in the liverange vector +that corresponds to the split, and distribute appropriately. If the split-point lands in the middle of a liverange, then we split that liverange as well. In the case that a new liverange is created, we add the liverange to -the corresponding vreg liverange list as well. Note that, as described -above, the vreg's liverange list is unsorted while splitting is +the corresponding vreg liverange vector as well. Note that, as described +above, the vreg's liverange vector is unsorted while splitting is occurring (because we do not need to traverse it or do any lookups during this phase); so we just append. @@ -1010,14 +1010,14 @@ second-chance allocation). ## Second-Chance Allocation: Spilled Bundles Once the main allocation loop terminates, when all bundles have either -been allocated or punted to the "spilled bundles" list, we do +been allocated or punted to the "spilled bundles" vector, we do second-chance allocation. This is a simpler loop that never evicts and never splits. Instead, each bundle gets one second chance, in which it can probe pregs and attempt to allocate. If it fails, it will actually live on the stack. This is correct because we are careful to only place bundles on the -spilled-bundles list that are *allowed* to live on the +spilled-bundles vector that are *allowed* to live on the stack. Specifically, only the canonical spill bundles (which will contain only empty ranges) and other bundles that have an "any" or "unknown" requirement are placed here (but *not* "stack" requirements; @@ -1107,7 +1107,7 @@ each, and for each move that comes *to* or *from* this liverange, generate a "half-move". The key idea is that we generate a record for each "side" of the move, and these records are keyed in a way that after a sort, the "from" and "to" ends will be consecutive. We can -sort the list of halfmoves once (this is expensive, but not as +sort the vector of halfmoves once (this is expensive, but not as expensive as many separate pointer-chasing lookups), then scan it again to actually generate the move instructions. @@ -1124,7 +1124,7 @@ of every block covered by a liverange, we can generate "dest" half-moves for blockparams, and at the end of every block covered by a liverange, we can generate "source" half-moves for blockparam args on branches. Incidentally, this is the reason that `blockparam_ins` and -`blockparam_outs` are sorted tuple-lists whose tuples begin with +`blockparam_outs` are sorted tuple-vectors whose tuples begin with (vreg, block, ...): this is the order in which we do the toplevel scan over allocations. @@ -1166,9 +1166,9 @@ happen *in parallel*. For example, if multiple vregs change allocations between two instructions, all of those moves happen as part of one parallel permutation. Similarly, blockparams have parallel-assignment semantics. We thus enqueue all the moves that we -generate at program points and resolve them into lists of sequential -moves that can actually be lowered to move instructions in the machine -code. +generate at program points and resolve them into sequences of +sequential moves that can actually be lowered to move instructions in +the machine code. First, a word on *move priorities*. There are different kinds of moves that are generated between instructions, and we have to ensure that @@ -1198,7 +1198,7 @@ Every move is statically given one of these priorities by the code that generates it. We collect moves with (prog-point, prio) keys, and we short by those -keys. We then have, for each such key, a list of moves that +keys. We then have, for each such key, a set of moves that semantically happen in parallel. We then resolve those moves using a parallel-move resolver, as we now @@ -1212,7 +1212,7 @@ registers that other moves use as sources. We must carefully order moves so that this does not clobber values incorrectly. We first check if such overlap occurs. If it does not (this is -actually the most common case), the list of parallel moves can be +actually the most common case), the sequence of parallel moves can be emitted as sequential moves directly. Done! Otherwise, we have to order the moves carefully. Furthermore, if there @@ -1229,9 +1229,9 @@ move that overwrites its source. (This will be important in a bit!) Our task is now to find an ordering of moves that respects these dependencies. To do so, we perform a depth-first search on the graph -induced by the dependencies, which will generate a list of sequential -moves in reverse order. We keep a stack of moves; we start with any -move that has not been visited yet; in each iteration, if the +induced by the dependencies, which will generate a sequence of +sequential moves in reverse order. We keep a stack of moves; we start +with any move that has not been visited yet; in each iteration, if the top-of-stack has no out-edge to another move (does not need to come before any others), then push it to a result vector, followed by all others on the stack (in popped order). If it does have an out-edge and @@ -1257,8 +1257,8 @@ nodes (moves) can be part of the SCC, because every node's single out-edge is already accounted for. This is what allows us to avoid a fully general SCC algorithm. -Once the list of moves in-reverse has been constructed, we reverse it -and return. +Once the vector of moves in-reverse has been constructed, we reverse +it and return. Note that this "move resolver" is fuzzed separately with a simple symbolic move simulator (the `moves` fuzz-target). @@ -1283,7 +1283,7 @@ extra spillslot. ## Redundant-Move Elimination -As a final step before returning the list of program edits to the +As a final step before returning the vector of program edits to the client, we perform one optimization: redundant-move elimination. To understand the need for this, consider what will occur when a vreg @@ -1450,9 +1450,9 @@ Several notable high-level differences are: across blocks by, when reaching one end of a control-flow edge in a scan, doing a lookup of the allocation at the other end. This is in principle a linear lookup (so quadratic overall). We instead - generate a list of "half-moves", keyed on the edge and from/to + generate a vector of "half-moves", keyed on the edge and from/to vregs, with each holding one of the allocations. By sorting and then - scanning this list, we can generate all edge moves in one linear + scanning this vector, we can generate all edge moves in one linear scan. There are a number of other examples of simplifications: for example, we handle multiple conflicting physical-register-constrained uses of a vreg in a single instruction @@ -1513,7 +1513,7 @@ number of general principles: cache-efficient. As another example, a side-effect of the precise liveness was that we could then process operands within blocks in actual instruction order (in reverse), which allowed us to simply - append liveranges to in-progress vreg liverange lists and then + append liveranges to in-progress vreg liverange vectors and then reverse at the end. The expensive part is a single pass; only the bitset computation is a fixpoint loop. @@ -1551,11 +1551,11 @@ of the function; two separate chunks will cover that. We tried a number of other designs as well. Initially we used a simple dense bitvec, but this was prohibitively expensive: O(n^2) space when the real need is closer to O(n) (i.e., a classic sparse matrix). We -also tried a hybrid scheme that kept a list of indices when small and -used either a bitvec or a hashset when large. This did not perform as -well because (i) it was less memory-efficient (the chunking helps with -this) and (ii) insertions are more expensive when they always require -a full hashset/hashmap insert. +also tried a hybrid scheme that kept a vector of indices when small +and used either a bitvec or a hashset when large. This did not perform +as well because (i) it was less memory-efficient (the chunking helps +with this) and (ii) insertions are more expensive when they always +require a full hashset/hashmap insert. # Appendix: Fuzzing From 0c795842fd4dd3e6497fa15b06ecc7ef00d6c354 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 12 Aug 2021 11:26:45 -0700 Subject: [PATCH 136/155] Add some more detail about spill bundles to design doc. --- doc/DESIGN.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/DESIGN.md b/doc/DESIGN.md index 6553e734..1fee67f4 100644 --- a/doc/DESIGN.md +++ b/doc/DESIGN.md @@ -449,6 +449,11 @@ it is where liveranges go when we give up on processing them via the normal backtracking loop, and will only process them once more in the "second-chance" stage. +This fallback behavior implies that the spill bundle must always be +able to accept a spillslot allocation, i.e., it cannot require a +register. This invariant is what allows spill bundles to be processed +in a different way, after backtracking has completed. + The spill bundle acquires liveranges in two ways. First, as we split bundles, we will trim the split pieces in certain ways so that some liveranges are immediately placed in the spill bundle. Intuitively, From 38323e0c27ea8285c27461ba082bc7d37aec404c Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 12 Aug 2021 11:39:38 -0700 Subject: [PATCH 137/155] Some more design-doc and TODO updates from @julian-seward1's feedback. --- doc/DESIGN.md | 58 ++++++++++++++++++++++++++++++++++----------------- doc/TODO | 3 +++ 2 files changed, 42 insertions(+), 19 deletions(-) diff --git a/doc/DESIGN.md b/doc/DESIGN.md index 1fee67f4..ccb28006 100644 --- a/doc/DESIGN.md +++ b/doc/DESIGN.md @@ -805,15 +805,32 @@ them all here. different requirements meets to Conflict. Requirements are derived from the operand constraints for all uses in all liveranges in a bundle, and then merged with the lattice meet-function. + +The lattice is as follows (diagram simplified to remove multiple +classes and multiple fixed registers which parameterize nodes; any two +differently-parameterized values are unordered with respect to each +other): + +```plain + + ___Unknown_____ + | | | + | | | + | ____Any(rc) | + |/ | | + Stack(rc) FixedReg(reg) + \ / + Conflict +``` Once we have the Requirement for a bundle, we can decide what to do. ### No-Register-Required Cases If the requirement indicates that no register is needed (`Unknown` or -`Any`), *and* if the spill bundle already exists for this bundle's -spillset, then we move all the liveranges over to the spill bundle, as -described above. +`Any`, i.e. a register or stack slot would be OK), *and* if the spill +bundle already exists for this bundle's spillset, then we move all the +liveranges over to the spill bundle, as described above. If the requirement indicates that the stack is needed explicitly (e.g., for a safepoint), we set our spillset as "required" (this will @@ -822,11 +839,11 @@ no other allocation set, it will look to the spillset's spillslot by default. If the requirement indicates a conflict, we immediately split and -requeue the split pieces. This split is a special one: rather than -split in a way informed by conflicts (see below), we unconditionally -split off the first use. This is a heuristic and we could in theory do -better by finding the source of the conflict; but in practice this -works well enough. Note that a bundle can reach this stage with a +requeue the split pieces. This split is performed at the point at +which the conflict is first introduced, i.e. just before the first use +whose requirement, when merged into the requirement for all prior uses +combined, goes to `Conflict`. In this way, we always guarantee forward +progress. Note also that a bundle can reach this stage with a conflicting requirement only if the original liverange had conflicting uses (e.g., a liverange from a def in a register to a use on stack, or a liverange between two different fixed-reg-constrained operands); our @@ -854,7 +871,7 @@ preferred registers; then all non-preferred registers. For each of the preferred and non-preferred register sequences, we probe in an *offset* manner: we start at some index partway through the sequence, determined by some heuristic number that is random and -well-dstributed. (In practice, we use the sum of the bundle index and +well-distributed. (In practice, we use the sum of the bundle index and the instruction index of the start of the first range in the bundle.) We then march through the sequence and wrap around, stopping before we hit our starting point again. @@ -1202,7 +1219,7 @@ priorities: Every move is statically given one of these priorities by the code that generates it. -We collect moves with (prog-point, prio) keys, and we short by those +We collect moves with (prog-point, prio) keys, and we sort by those keys. We then have, for each such key, a set of moves that semantically happen in parallel. @@ -1286,10 +1303,10 @@ allocated, move the scratch reg to that, do the above stack-to-scratch / scratch-to-stack sequence, then reload the scratch reg from the extra spillslot. -## Redundant-Move Elimination +## Redundant-Spill/Load Elimination As a final step before returning the vector of program edits to the -client, we perform one optimization: redundant-move elimination. +client, we perform one optimization: redundant-spill/load elimination. To understand the need for this, consider what will occur when a vreg is (i) defined once, (ii) used many times, and (iii) spilled multiple @@ -1318,13 +1335,16 @@ trimmed part of the liverange between uses and put it in the spill bundle, and the spill bundle did not get a reg. In order to resolve this inefficiency, we implement a general -redundant-move elimination pass. This pass tracks, for every -allocation (reg or spillslot), whether it is a copy of another -allocation. This state is invalidated whenever either that allocation -or the allocation of which it is a copy is overwritten. When we see a -move instruction, if the destination is already a copy of the source, -we elide the move. (There are some additional complexities to preserve -checker metadata which we do not describe here.) +redundant-spill/load elimination pass (an even more general solution +would be a full redundant-move elimination pass, but we focus on moves +that are spills/loads to contain the complexity for now). This pass +tracks, for every allocation (reg or spillslot), whether it is a copy +of another allocation. This state is invalidated whenever either that +allocation or the allocation of which it is a copy is +overwritten. When we see a move instruction, if the destination is +already a copy of the source, we elide the move. (There are some +additional complexities to preserve checker metadata which we do not +describe here.) Note that this could, in principle, be done as a fixpoint analysis over the CFG; it must be, if we try to preserve state across diff --git a/doc/TODO b/doc/TODO index a6eb0b56..c4bf58ad 100644 --- a/doc/TODO +++ b/doc/TODO @@ -24,6 +24,9 @@ - Investigate ways to improve bundle-merging; e.g., merge moves before other types of connections +- Add limited inter-block redundant-move elimination: propagate across + splits but not joins. + # Cleanup - Remove support for non-SSA code once no longer necessary \ No newline at end of file From 3e1e0f39b6f612c35cc2a93bc1844154b1d8981d Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 12 Aug 2021 12:05:19 -0700 Subject: [PATCH 138/155] Convert all log::debug to log::trace. --- fuzz/fuzz_targets/ion.rs | 2 +- fuzz/fuzz_targets/ion_checker.rs | 2 +- src/checker.rs | 62 ++++++++------- src/fuzzing/func.rs | 17 +++- src/ion/dump.rs | 18 ++--- src/ion/liveranges.rs | 95 ++++++++++++----------- src/ion/merge.rs | 48 ++++++------ src/ion/moves.rs | 92 +++++++++++----------- src/ion/process.rs | 128 +++++++++++++++---------------- src/ion/redundant_moves.rs | 22 +++--- src/ion/requirement.rs | 14 ++-- src/ion/spill.rs | 14 ++-- src/ion/stackmap.rs | 12 +-- src/lib.rs | 7 +- 14 files changed, 282 insertions(+), 251 deletions(-) diff --git a/fuzz/fuzz_targets/ion.rs b/fuzz/fuzz_targets/ion.rs index 82aee653..485c36bf 100644 --- a/fuzz/fuzz_targets/ion.rs +++ b/fuzz/fuzz_targets/ion.rs @@ -10,7 +10,7 @@ use regalloc2::fuzzing::func::Func; fuzz_target!(|func: Func| { let _ = env_logger::try_init(); - log::debug!("func:\n{:?}", func); + log::trace!("func:\n{:?}", func); let env = regalloc2::fuzzing::func::machine_env(); let _out = regalloc2::fuzzing::ion::run(&func, &env, false).expect("regalloc did not succeed"); }); diff --git a/fuzz/fuzz_targets/ion_checker.rs b/fuzz/fuzz_targets/ion_checker.rs index 950d4d7c..d467a03c 100644 --- a/fuzz/fuzz_targets/ion_checker.rs +++ b/fuzz/fuzz_targets/ion_checker.rs @@ -38,7 +38,7 @@ impl Arbitrary for TestCase { fuzz_target!(|testcase: TestCase| { let func = testcase.func; let _ = env_logger::try_init(); - log::debug!("func:\n{:?}", func); + log::trace!("func:\n{:?}", func); let env = regalloc2::fuzzing::func::machine_env(); let out = regalloc2::fuzzing::ion::run(&func, &env, true).expect("regalloc did not succeed"); diff --git a/src/checker.rs b/src/checker.rs index 858e35ad..11bf2ce2 100644 --- a/src/checker.rs +++ b/src/checker.rs @@ -66,8 +66,8 @@ #![allow(dead_code)] use crate::{ - Allocation, AllocationKind, Block, Edit, Function, Inst, InstPosition, Operand, OperandKind, - OperandConstraint, OperandPos, Output, PReg, ProgPoint, SpillSlot, VReg, + Allocation, AllocationKind, Block, Edit, Function, Inst, InstPosition, Operand, + OperandConstraint, OperandKind, OperandPos, Output, PReg, ProgPoint, SpillSlot, VReg, }; use std::collections::{HashMap, HashSet, VecDeque}; @@ -75,8 +75,6 @@ use std::default::Default; use std::hash::Hash; use std::result::Result; -use log::debug; - /// A set of errors detected by the regalloc checker. #[derive(Clone, Debug)] pub struct CheckerErrors { @@ -182,7 +180,7 @@ impl CheckerValue { CheckerValue::Reg(r1, ref1) } _ => { - log::debug!("{:?} and {:?} meet to Conflicted", self, other); + log::trace!("{:?} and {:?} meet to Conflicted", self, other); CheckerValue::Conflicted } } @@ -313,9 +311,12 @@ impl CheckerState { .get(alloc) .cloned() .unwrap_or(Default::default()); - debug!( + log::trace!( "checker: checkinst {:?}: op {:?}, alloc {:?}, checker value {:?}", - checkinst, op, alloc, val + checkinst, + op, + alloc, + val ); self.check_val(inst, *op, *alloc, val, allocs)?; } @@ -328,9 +329,11 @@ impl CheckerState { .get(&alloc) .cloned() .unwrap_or(Default::default()); - debug!( + log::trace!( "checker: checkinst {:?}: safepoint slot {}, checker value {:?}", - checkinst, slot, val + checkinst, + slot, + val ); match val { @@ -359,9 +362,12 @@ impl CheckerState { .get(&from) .cloned() .unwrap_or(Default::default()); - debug!( + log::trace!( "checker: checkinst {:?} updating: move {:?} -> {:?} val {:?}", - checkinst, from, into, val + checkinst, + from, + into, + val ); self.allocations.insert(into, val); } @@ -529,7 +535,7 @@ impl<'a, F: Function> Checker<'a, F> { /// Build the list of checker instructions based on the given func /// and allocation results. pub fn prepare(&mut self, out: &Output) { - debug!("checker: out = {:?}", out); + log::trace!("checker: out = {:?}", out); // Preprocess safepoint stack-maps into per-inst vecs. let mut safepoint_slots: HashMap> = HashMap::new(); for &(progpoint, slot) in &out.safepoint_slots { @@ -574,7 +580,7 @@ impl<'a, F: Function> Checker<'a, F> { allocs, clobbers, }; - debug!("checker: adding inst {:?}", checkinst); + log::trace!("checker: adding inst {:?}", checkinst); self.bb_insts.get_mut(&block).unwrap().push(checkinst); } @@ -591,7 +597,7 @@ impl<'a, F: Function> Checker<'a, F> { if edit_pos < pos { continue; } - debug!("checker: adding edit {:?} at pos {:?}", edit, pos); + log::trace!("checker: adding edit {:?} at pos {:?}", edit, pos); match edit { &Edit::Move { from, to, to_vreg } => { self.bb_insts @@ -640,10 +646,10 @@ impl<'a, F: Function> Checker<'a, F> { let block = queue.pop_front().unwrap(); queue_set.remove(&block); let mut state = self.bb_in.get(&block).cloned().unwrap(); - debug!("analyze: block {} has state {:?}", block.index(), state); + log::trace!("analyze: block {} has state {:?}", block.index(), state); for inst in self.bb_insts.get(&block).unwrap() { state.update(inst, self); - debug!("analyze: inst {:?} -> state {:?}", inst, state); + log::trace!("analyze: inst {:?} -> state {:?}", inst, state); } for &succ in self.f.block_succs(block) { @@ -652,7 +658,7 @@ impl<'a, F: Function> Checker<'a, F> { new_state.meet_with(cur_succ_in); let changed = &new_state != cur_succ_in; if changed { - debug!( + log::trace!( "analyze: block {} state changed from {:?} to {:?}; pushing onto queue", succ.index(), cur_succ_in, @@ -677,12 +683,12 @@ impl<'a, F: Function> Checker<'a, F> { let mut state = input.clone(); for inst in self.bb_insts.get(block).unwrap() { if let Err(e) = state.check(InstPosition::Before, inst) { - debug!("Checker error: {:?}", e); + log::trace!("Checker error: {:?}", e); errors.push(e); } state.update(inst, self); if let Err(e) = state.check(InstPosition::After, inst) { - debug!("Checker error: {:?}", e); + log::trace!("Checker error: {:?}", e); errors.push(e); } } @@ -701,20 +707,20 @@ impl<'a, F: Function> Checker<'a, F> { self.analyze(); let result = self.find_errors(); - debug!("=== CHECKER RESULT ==="); + log::trace!("=== CHECKER RESULT ==="); fn print_state(state: &CheckerState) { let mut s = vec![]; for (alloc, state) in &state.allocations { s.push(format!("{} := {}", alloc, state)); } - debug!(" {{ {} }}", s.join(", ")) + log::trace!(" {{ {} }}", s.join(", ")) } for vreg in self.f.reftype_vregs() { - debug!(" REF: {}", vreg); + log::trace!(" REF: {}", vreg); } for bb in 0..self.f.blocks() { let bb = Block::new(bb); - debug!("block{}:", bb.index()); + log::trace!("block{}:", bb.index()); let insts = self.bb_insts.get(&bb).unwrap(); let mut state = self.bb_in.get(&bb).unwrap().clone(); print_state(&state); @@ -726,7 +732,7 @@ impl<'a, F: Function> Checker<'a, F> { ref allocs, ref clobbers, } => { - debug!( + log::trace!( " inst{}: {:?} ({:?}) clobbers:{:?}", inst.index(), operands, @@ -735,7 +741,7 @@ impl<'a, F: Function> Checker<'a, F> { ); } &CheckerInst::Move { from, into } => { - debug!(" {} -> {}", from, into); + log::trace!(" {} -> {}", from, into); } &CheckerInst::BlockParams { ref vregs, @@ -746,17 +752,17 @@ impl<'a, F: Function> Checker<'a, F> { for (vreg, alloc) in vregs.iter().zip(allocs.iter()) { args.push(format!("{}:{}", vreg, alloc)); } - debug!(" blockparams: {}", args.join(", ")); + log::trace!(" blockparams: {}", args.join(", ")); } &CheckerInst::DefAlloc { alloc, vreg } => { - debug!(" defalloc: {}:{}", vreg, alloc); + log::trace!(" defalloc: {}:{}", vreg, alloc); } &CheckerInst::Safepoint { ref slots, .. } => { let mut slotargs = vec![]; for &slot in slots { slotargs.push(format!("{}", slot)); } - debug!(" safepoint: {}", slotargs.join(", ")); + log::trace!(" safepoint: {}", slotargs.join(", ")); } } state.update(inst, &self); diff --git a/src/fuzzing/func.rs b/src/fuzzing/func.rs index facb276e..d50a1fd4 100644 --- a/src/fuzzing/func.rs +++ b/src/fuzzing/func.rs @@ -5,7 +5,7 @@ use crate::{ domtree, postorder, Allocation, Block, Function, Inst, InstRange, MachineEnv, Operand, - OperandKind, OperandConstraint, OperandPos, PReg, RegClass, VReg, + OperandConstraint, OperandKind, OperandPos, PReg, RegClass, VReg, }; use arbitrary::Result as ArbitraryResult; @@ -407,7 +407,12 @@ impl Func { } else { OperandPos::After }; - let mut operands = vec![Operand::new(vreg, def_constraint, OperandKind::Def, def_pos)]; + let mut operands = vec![Operand::new( + vreg, + def_constraint, + OperandKind::Def, + def_pos, + )]; let mut allocations = vec![Allocation::none()]; for _ in 0..u.int_in_range(0..=3)? { let vreg = if avail.len() > 0 @@ -456,8 +461,12 @@ impl Func { ); // Make sure reused input is a Reg. let op = operands[reused]; - operands[reused] = - Operand::new(op.vreg(), OperandConstraint::Reg, op.kind(), OperandPos::Before); + operands[reused] = Operand::new( + op.vreg(), + OperandConstraint::Reg, + op.kind(), + OperandPos::Before, + ); } else if opts.fixed_regs && bool::arbitrary(u)? { let mut fixed = vec![]; for _ in 0..u.int_in_range(0..=operands.len() - 1)? { diff --git a/src/ion/dump.rs b/src/ion/dump.rs index bb1729bb..ce0a09f9 100644 --- a/src/ion/dump.rs +++ b/src/ion/dump.rs @@ -5,16 +5,16 @@ use crate::{Block, Function, ProgPoint}; impl<'a, F: Function> Env<'a, F> { pub fn dump_state(&self) { - log::debug!("Bundles:"); + log::trace!("Bundles:"); for (i, b) in self.bundles.iter().enumerate() { - log::debug!( + log::trace!( "bundle{}: spillset={:?} alloc={:?}", i, b.spillset, b.allocation ); for entry in &b.ranges { - log::debug!( + log::trace!( " * range {:?} -- {:?}: range{}", entry.range.from, entry.range.to, @@ -22,11 +22,11 @@ impl<'a, F: Function> Env<'a, F> { ); } } - log::debug!("VRegs:"); + log::trace!("VRegs:"); for (i, v) in self.vregs.iter().enumerate() { - log::debug!("vreg{}:", i); + log::trace!("vreg{}:", i); for entry in &v.ranges { - log::debug!( + log::trace!( " * range {:?} -- {:?}: range{}", entry.range.from, entry.range.to, @@ -34,9 +34,9 @@ impl<'a, F: Function> Env<'a, F> { ); } } - log::debug!("Ranges:"); + log::trace!("Ranges:"); for (i, r) in self.ranges.iter().enumerate() { - log::debug!( + log::trace!( "range{}: range={:?} vreg={:?} bundle={:?} weight={}", i, r.range, @@ -45,7 +45,7 @@ impl<'a, F: Function> Env<'a, F> { r.uses_spill_weight(), ); for u in &r.uses { - log::debug!(" * use at {:?} (slot {}): {:?}", u.pos, u.slot, u.operand); + log::trace!(" * use at {:?} (slot {}): {:?}", u.pos, u.slot, u.operand); } } } diff --git a/src/ion/liveranges.rs b/src/ion/liveranges.rs index a8837c44..639effa5 100644 --- a/src/ion/liveranges.rs +++ b/src/ion/liveranges.rs @@ -20,7 +20,7 @@ use super::{ }; use crate::bitvec::BitVec; use crate::{ - Allocation, Block, Function, Inst, InstPosition, Operand, OperandKind, OperandConstraint, + Allocation, Block, Function, Inst, InstPosition, Operand, OperandConstraint, OperandKind, OperandPos, PReg, ProgPoint, RegAllocError, VReg, }; use fxhash::FxHashSet; @@ -29,7 +29,11 @@ use std::collections::{HashSet, VecDeque}; use std::convert::TryFrom; #[inline(always)] -pub fn spill_weight_from_constraint(constraint: OperandConstraint, loop_depth: usize, is_def: bool) -> u32 { +pub fn spill_weight_from_constraint( + constraint: OperandConstraint, + loop_depth: usize, + is_def: bool, +) -> u32 { // A bonus of 1000 for one loop level, 4000 for two loop levels, // 16000 for three loop levels, etc. Avoids exponentiation. // Bound `loop_depth` at 2 so that `hot_bonus` is at most 16000. @@ -127,7 +131,7 @@ impl<'a, F: Function> Env<'a, F> { /// /// Returns the liverange that contains the given range. pub fn add_liverange_to_vreg(&mut self, vreg: VRegIndex, range: CodeRange) -> LiveRangeIndex { - log::debug!("add_liverange_to_vreg: vreg {:?} range {:?}", vreg, range); + log::trace!("add_liverange_to_vreg: vreg {:?} range {:?}", vreg, range); // Invariant: as we are building liveness information, we // *always* process instructions bottom-to-top, and as a @@ -187,11 +191,14 @@ impl<'a, F: Function> Env<'a, F> { let constraint = operand.constraint(); let block = self.cfginfo.insn_block[u.pos.inst().index()]; let loop_depth = self.cfginfo.approx_loop_depth[block.index()] as usize; - let weight = - spill_weight_from_constraint(constraint, loop_depth, operand.kind() != OperandKind::Use); + let weight = spill_weight_from_constraint( + constraint, + loop_depth, + operand.kind() != OperandKind::Use, + ); u.weight = u16::try_from(weight).expect("weight too large for u16 field"); - log::debug!( + log::trace!( "insert use {:?} into lr {:?} with weight {}", u, into, @@ -206,7 +213,7 @@ impl<'a, F: Function> Env<'a, F> { // Update stats. self.ranges[into.index()].uses_spill_weight_and_flags += weight; - log::debug!( + log::trace!( " -> now range has weight {}", self.ranges[into.index()].uses_spill_weight(), ); @@ -226,7 +233,7 @@ impl<'a, F: Function> Env<'a, F> { } pub fn add_liverange_to_preg(&mut self, range: CodeRange, reg: PReg) { - log::debug!("adding liverange to preg: {:?} to {}", range, reg); + log::trace!("adding liverange to preg: {:?} to {}", range, reg); let preg_idx = PRegIndex::new(reg.index()); self.pregs[preg_idx.index()] .allocations @@ -259,12 +266,12 @@ impl<'a, F: Function> Env<'a, F> { let block = workqueue.pop_front().unwrap(); workqueue_set.remove(&block); - log::debug!("computing liveins for block{}", block.index()); + log::trace!("computing liveins for block{}", block.index()); self.stats.livein_iterations += 1; let mut live = self.liveouts[block.index()].clone(); - log::debug!(" -> initial liveout set: {:?}", live); + log::trace!(" -> initial liveout set: {:?}", live); for inst in self.func.block_insns(block).rev().iter() { if let Some((src, dst)) = self.func.is_move(inst) { @@ -276,7 +283,7 @@ impl<'a, F: Function> Env<'a, F> { for op in self.func.inst_operands(inst) { if op.pos() == *pos { let was_live = live.get(op.vreg().vreg()); - log::debug!("op {:?} was_live = {}", op, was_live); + log::trace!("op {:?} was_live = {}", op, was_live); match op.kind() { OperandKind::Use | OperandKind::Mod => { live.set(op.vreg().vreg(), true); @@ -302,7 +309,7 @@ impl<'a, F: Function> Env<'a, F> { } } - log::debug!("computed liveins at block{}: {:?}", block.index(), live); + log::trace!("computed liveins at block{}: {:?}", block.index(), live); self.liveins[block.index()] = live; } @@ -314,7 +321,7 @@ impl<'a, F: Function> Env<'a, F> { .next() .is_some() { - log::debug!( + log::trace!( "non-empty liveins to entry block: {:?}", self.liveins[self.func.entry_block().index()] ); @@ -354,7 +361,7 @@ impl<'a, F: Function> Env<'a, F> { from: self.cfginfo.block_entry[block.index()], to: self.cfginfo.block_exit[block.index()].next(), }; - log::debug!( + log::trace!( "vreg {:?} live at end of block --> create range {:?}", VRegIndex::new(vreg), range @@ -426,7 +433,7 @@ impl<'a, F: Function> Env<'a, F> { // We can completely skip the move if it is // trivial (vreg to same vreg). if src.vreg() != dst.vreg() { - log::debug!(" -> move inst{}: src {} -> dst {}", inst.index(), src, dst); + log::trace!(" -> move inst{}: src {} -> dst {}", inst.index(), src, dst); assert_eq!(src.class(), dst.class()); assert_eq!(src.kind(), OperandKind::Use); @@ -488,7 +495,7 @@ impl<'a, F: Function> Env<'a, F> { else if self.vregs[src.vreg().vreg()].is_pinned || self.vregs[dst.vreg().vreg()].is_pinned { - log::debug!( + log::trace!( " -> exactly one of src/dst is pinned; converting to ghost use" ); let (preg, vreg, pinned_vreg, kind, pos, progpoint) = @@ -516,7 +523,7 @@ impl<'a, F: Function> Env<'a, F> { let constraint = OperandConstraint::FixedReg(preg); let operand = Operand::new(vreg, constraint, kind, pos); - log::debug!( + log::trace!( concat!( " -> preg {:?} vreg {:?} kind {:?} ", "pos {:?} progpoint {:?} constraint {:?} operand {:?}" @@ -543,9 +550,9 @@ impl<'a, F: Function> Env<'a, F> { VRegIndex::new(vreg.vreg()), CodeRange { from, to }, ); - log::debug!(" -> dead; created LR"); + log::trace!(" -> dead; created LR"); } - log::debug!(" -> LR {:?}", lr); + log::trace!(" -> LR {:?}", lr); self.insert_use_into_liverange( lr, @@ -579,7 +586,7 @@ impl<'a, F: Function> Env<'a, F> { // (this is the last use), start it // before. if kind == OperandKind::Def { - log::debug!(" -> src on pinned vreg {:?}", pinned_vreg); + log::trace!(" -> src on pinned vreg {:?}", pinned_vreg); // The *other* vreg is a def, so the pinned-vreg // mention is a use. If already live, // end the existing LR just *after* @@ -593,7 +600,7 @@ impl<'a, F: Function> Env<'a, F> { if live.get(pinned_vreg.vreg()) { let pinned_lr = vreg_ranges[pinned_vreg.vreg()]; let orig_start = self.ranges[pinned_lr.index()].range.from; - log::debug!( + log::trace!( " -> live with LR {:?}; truncating to start at {:?}", pinned_lr, progpoint.next() @@ -607,7 +614,7 @@ impl<'a, F: Function> Env<'a, F> { }, ); vreg_ranges[pinned_vreg.vreg()] = new_lr; - log::debug!(" -> created LR {:?} with remaining range from {:?} to {:?}", new_lr, orig_start, progpoint); + log::trace!(" -> created LR {:?} with remaining range from {:?} to {:?}", new_lr, orig_start, progpoint); // Add an edit right now to indicate that at // this program point, the given @@ -641,7 +648,7 @@ impl<'a, F: Function> Env<'a, F> { ); vreg_ranges[pinned_vreg.vreg()] = new_lr; live.set(pinned_vreg.vreg(), true); - log::debug!( + log::trace!( " -> was not live; created new LR {:?}", new_lr ); @@ -661,7 +668,7 @@ impl<'a, F: Function> Env<'a, F> { ); } } else { - log::debug!(" -> dst on pinned vreg {:?}", pinned_vreg); + log::trace!(" -> dst on pinned vreg {:?}", pinned_vreg); // The *other* vreg is a use, so the pinned-vreg // mention is a def. Truncate its LR // just *after* the `progpoint` @@ -669,7 +676,7 @@ impl<'a, F: Function> Env<'a, F> { if live.get(pinned_vreg.vreg()) { let pinned_lr = vreg_ranges[pinned_vreg.vreg()]; self.ranges[pinned_lr.index()].range.from = progpoint.next(); - log::debug!( + log::trace!( " -> was live with LR {:?}; truncated start to {:?}", pinned_lr, progpoint.next() @@ -769,14 +776,14 @@ impl<'a, F: Function> Env<'a, F> { VRegIndex::new(dst.vreg().vreg()), CodeRange { from, to }, ); - log::debug!(" -> invalid LR for def; created {:?}", dst_lr); + log::trace!(" -> invalid LR for def; created {:?}", dst_lr); } - log::debug!(" -> has existing LR {:?}", dst_lr); + log::trace!(" -> has existing LR {:?}", dst_lr); // Trim the LR to start here. if self.ranges[dst_lr.index()].range.from == self.cfginfo.block_entry[block.index()] { - log::debug!(" -> started at block start; trimming to {:?}", pos); + log::trace!(" -> started at block start; trimming to {:?}", pos); self.ranges[dst_lr.index()].range.from = pos; } self.ranges[dst_lr.index()].set_flag(LiveRangeFlag::StartsAtDef); @@ -803,7 +810,7 @@ impl<'a, F: Function> Env<'a, F> { vreg_ranges[src.vreg().vreg()] }; - log::debug!(" -> src LR {:?}", src_lr); + log::trace!(" -> src LR {:?}", src_lr); // Add to live-set. let src_is_dead_after_move = !live.get(src.vreg().vreg()); @@ -863,7 +870,7 @@ impl<'a, F: Function> Env<'a, F> { continue; } - log::debug!( + log::trace!( "processing inst{} operand at {:?}: {:?}", inst.index(), pos, @@ -872,14 +879,14 @@ impl<'a, F: Function> Env<'a, F> { match operand.kind() { OperandKind::Def | OperandKind::Mod => { - log::debug!("Def of {} at {:?}", operand.vreg(), pos); + log::trace!("Def of {} at {:?}", operand.vreg(), pos); // Fill in vreg's actual data. self.vreg_regs[operand.vreg().vreg()] = operand.vreg(); // Get or create the LiveRange. let mut lr = vreg_ranges[operand.vreg().vreg()]; - log::debug!(" -> has existing LR {:?}", lr); + log::trace!(" -> has existing LR {:?}", lr); // If there was no liverange (dead def), create a trivial one. if !live.get(operand.vreg().vreg()) { let from = match operand.kind() { @@ -896,7 +903,7 @@ impl<'a, F: Function> Env<'a, F> { VRegIndex::new(operand.vreg().vreg()), CodeRange { from, to }, ); - log::debug!(" -> invalid; created {:?}", lr); + log::trace!(" -> invalid; created {:?}", lr); vreg_ranges[operand.vreg().vreg()] = lr; live.set(operand.vreg().vreg(), true); } @@ -913,7 +920,7 @@ impl<'a, F: Function> Env<'a, F> { if self.ranges[lr.index()].range.from == self.cfginfo.block_entry[block.index()] { - log::debug!( + log::trace!( " -> started at block start; trimming to {:?}", pos ); @@ -945,7 +952,7 @@ impl<'a, F: Function> Env<'a, F> { } assert!(lr.is_valid()); - log::debug!("Use of {:?} at {:?} -> {:?}", operand, pos, lr,); + log::trace!("Use of {:?} at {:?} -> {:?}", operand, pos, lr,); self.insert_use_into_liverange(lr, Use::new(operand, pos, i as u8)); @@ -957,11 +964,11 @@ impl<'a, F: Function> Env<'a, F> { } if self.func.is_safepoint(inst) { - log::debug!("inst{} is safepoint", inst.index()); + log::trace!("inst{} is safepoint", inst.index()); self.safepoints.push(inst); for vreg in live.iter() { if let Some(safepoints) = self.safepoints_per_vreg.get_mut(&vreg) { - log::debug!("vreg v{} live at safepoint inst{}", vreg, inst.index()); + log::trace!("vreg v{} live at safepoint inst{}", vreg, inst.index()); safepoints.insert(inst); } } @@ -1054,7 +1061,7 @@ impl<'a, F: Function> Env<'a, F> { OperandPos::Before, ); - log::debug!( + log::trace!( "Safepoint-induced stack use of {:?} at {:?} -> {:?}", operand, pos, @@ -1095,7 +1102,7 @@ impl<'a, F: Function> Env<'a, F> { for range_idx in 0..self.vregs[vreg].ranges.len() { let entry = self.vregs[vreg].ranges[range_idx]; let range = entry.index; - log::debug!( + log::trace!( "multi-fixed-reg cleanup: vreg {:?} range {:?}", VRegIndex::new(vreg), range, @@ -1119,7 +1126,7 @@ impl<'a, F: Function> Env<'a, F> { if let OperandConstraint::FixedReg(preg) = op.constraint() { let vreg_idx = VRegIndex::new(op.vreg().vreg()); let preg_idx = PRegIndex::new(preg.index()); - log::debug!( + log::trace!( "at pos {:?}, vreg {:?} has fixed constraint to preg {:?}", pos, vreg_idx, @@ -1129,7 +1136,7 @@ impl<'a, F: Function> Env<'a, F> { { let orig_preg = first_preg[idx]; if orig_preg != preg_idx { - log::debug!(" -> duplicate; switching to constraint Reg"); + log::trace!(" -> duplicate; switching to constraint Reg"); fixups.push((pos, orig_preg, preg_idx, slot)); *op = Operand::new( op.vreg(), @@ -1137,7 +1144,7 @@ impl<'a, F: Function> Env<'a, F> { op.kind(), op.pos(), ); - log::debug!( + log::trace!( " -> extra clobber {} at inst{}", preg, pos.inst().index() @@ -1182,8 +1189,8 @@ impl<'a, F: Function> Env<'a, F> { self.prog_move_srcs.sort_unstable_by_key(|(pos, _)| *pos); self.prog_move_dsts.sort_unstable_by_key(|(pos, _)| *pos); - log::debug!("prog_move_srcs = {:?}", self.prog_move_srcs); - log::debug!("prog_move_dsts = {:?}", self.prog_move_dsts); + log::trace!("prog_move_srcs = {:?}", self.prog_move_srcs); + log::trace!("prog_move_dsts = {:?}", self.prog_move_dsts); self.stats.initial_liverange_count = self.ranges.len(); self.stats.blockparam_ins_count = self.blockparam_ins.len(); diff --git a/src/ion/merge.rs b/src/ion/merge.rs index d908f6e0..b7a969eb 100644 --- a/src/ion/merge.rs +++ b/src/ion/merge.rs @@ -26,7 +26,7 @@ impl<'a, F: Function> Env<'a, F> { // Merge bundle into self -- trivial merge. return true; } - log::debug!( + log::trace!( "merging from bundle{} to bundle{}", from.index(), to.index() @@ -36,7 +36,7 @@ impl<'a, F: Function> Env<'a, F> { let from_rc = self.spillsets[self.bundles[from.index()].spillset.index()].class; let to_rc = self.spillsets[self.bundles[to.index()].spillset.index()].class; if from_rc != to_rc { - log::debug!(" -> mismatching reg classes"); + log::trace!(" -> mismatching reg classes"); return false; } @@ -44,7 +44,7 @@ impl<'a, F: Function> Env<'a, F> { if !self.bundles[from.index()].allocation.is_none() || !self.bundles[to.index()].allocation.is_none() { - log::debug!("one of the bundles is already assigned (pinned)"); + log::trace!("one of the bundles is already assigned (pinned)"); return false; } @@ -71,7 +71,7 @@ impl<'a, F: Function> Env<'a, F> { while idx_from < ranges_from.len() && idx_to < ranges_to.len() { range_count += 1; if range_count > 200 { - log::debug!( + log::trace!( "reached merge complexity (range_count = {}); exiting", range_count ); @@ -85,7 +85,7 @@ impl<'a, F: Function> Env<'a, F> { idx_from += 1; } else { // Overlap -- cannot merge. - log::debug!( + log::trace!( " -> overlap between {:?} and {:?}, exiting", ranges_from[idx_from].index, ranges_to[idx_to].index @@ -104,12 +104,12 @@ impl<'a, F: Function> Env<'a, F> { .compute_requirement(from) .merge(self.compute_requirement(to)); if req == Requirement::Conflict { - log::debug!(" -> conflicting requirements; aborting merge"); + log::trace!(" -> conflicting requirements; aborting merge"); return false; } } - log::debug!(" -> committing to merge"); + log::trace!(" -> committing to merge"); // If we reach here, then the bundles do not overlap -- merge // them! We do this with a merge-sort-like scan over both @@ -117,13 +117,13 @@ impl<'a, F: Function> Env<'a, F> { // `to` when we're done. if ranges_from.is_empty() { // `from` bundle is empty -- trivial merge. - log::debug!(" -> from bundle{} is empty; trivial merge", from.index()); + log::trace!(" -> from bundle{} is empty; trivial merge", from.index()); return true; } if ranges_to.is_empty() { // `to` bundle is empty -- just move the list over from // `from` and set `bundle` up-link on all ranges. - log::debug!(" -> to bundle{} is empty; trivial merge", to.index()); + log::trace!(" -> to bundle{} is empty; trivial merge", to.index()); let list = std::mem::replace(&mut self.bundles[from.index()].ranges, smallvec![]); for entry in &list { self.ranges[entry.index.index()].bundle = to; @@ -153,7 +153,7 @@ impl<'a, F: Function> Env<'a, F> { return true; } - log::debug!( + log::trace!( "merging: ranges_from = {:?} ranges_to = {:?}", ranges_from, ranges_to @@ -174,7 +174,7 @@ impl<'a, F: Function> Env<'a, F> { .sort_unstable_by_key(|entry| entry.range.from); if self.annotations_enabled { - log::debug!("merging: merged = {:?}", self.bundles[to.index()].ranges); + log::trace!("merging: merged = {:?}", self.bundles[to.index()].ranges); let mut last_range = None; for i in 0..self.bundles[to.index()].ranges.len() { let entry = self.bundles[to.index()].ranges[i]; @@ -196,7 +196,7 @@ impl<'a, F: Function> Env<'a, F> { ); } - log::debug!( + log::trace!( " -> merged result for bundle{}: range{}", to.index(), entry.index.index(), @@ -229,7 +229,7 @@ impl<'a, F: Function> Env<'a, F> { pub fn merge_vreg_bundles(&mut self) { // Create a bundle for every vreg, initially. - log::debug!("merge_vreg_bundles: creating vreg bundles"); + log::trace!("merge_vreg_bundles: creating vreg bundles"); for vreg in 0..self.vregs.len() { let vreg = VRegIndex::new(vreg); if self.vregs[vreg.index()].ranges.is_empty() { @@ -255,9 +255,9 @@ impl<'a, F: Function> Env<'a, F> { let bundle = self.create_bundle(); self.bundles[bundle.index()].ranges = self.vregs[vreg.index()].ranges.clone(); - log::debug!("vreg v{} gets bundle{}", vreg.index(), bundle.index()); + log::trace!("vreg v{} gets bundle{}", vreg.index(), bundle.index()); for entry in &self.bundles[bundle.index()].ranges { - log::debug!( + log::trace!( " -> with LR range{}: {:?}", entry.index.index(), entry.range @@ -318,7 +318,7 @@ impl<'a, F: Function> Env<'a, F> { continue; } - log::debug!( + log::trace!( "trying to merge reused-input def: src {} to dst {}", src_vreg, dst_vreg @@ -337,7 +337,7 @@ impl<'a, F: Function> Env<'a, F> { // Attempt to merge blockparams with their inputs. for i in 0..self.blockparam_outs.len() { let (from_vreg, _, _, to_vreg) = self.blockparam_outs[i]; - log::debug!( + log::trace!( "trying to merge blockparam v{} with input v{}", to_vreg.index(), from_vreg.index() @@ -347,7 +347,7 @@ impl<'a, F: Function> Env<'a, F> { let from_bundle = self.ranges[self.vregs[from_vreg.index()].ranges[0].index.index()].bundle; assert!(from_bundle.is_valid()); - log::debug!( + log::trace!( " -> from bundle{} to bundle{}", from_bundle.index(), to_bundle.index() @@ -358,10 +358,10 @@ impl<'a, F: Function> Env<'a, F> { // Attempt to merge move srcs/dsts. for i in 0..self.prog_move_merges.len() { let (src, dst) = self.prog_move_merges[i]; - log::debug!("trying to merge move src LR {:?} to dst LR {:?}", src, dst); + log::trace!("trying to merge move src LR {:?} to dst LR {:?}", src, dst); let src = self.resolve_merged_lr(src); let dst = self.resolve_merged_lr(dst); - log::debug!( + log::trace!( "resolved LR-construction merging chains: move-merge is now src LR {:?} to dst LR {:?}", src, dst @@ -397,7 +397,7 @@ impl<'a, F: Function> Env<'a, F> { } } - log::debug!("done merging bundles"); + log::trace!("done merging bundles"); } pub fn resolve_merged_lr(&self, mut lr: LiveRangeIndex) -> LiveRangeIndex { @@ -421,14 +421,14 @@ impl<'a, F: Function> Env<'a, F> { pub fn queue_bundles(&mut self) { for bundle in 0..self.bundles.len() { - log::debug!("enqueueing bundle{}", bundle); + log::trace!("enqueueing bundle{}", bundle); if self.bundles[bundle].ranges.is_empty() { - log::debug!(" -> no ranges; skipping"); + log::trace!(" -> no ranges; skipping"); continue; } let bundle = LiveBundleIndex::new(bundle); let prio = self.compute_bundle_prio(bundle); - log::debug!(" -> prio {}", prio); + log::trace!(" -> prio {}", prio); self.bundles[bundle.index()].prio = prio; self.recompute_bundle_properties(bundle); self.allocation_queue diff --git a/src/ion/moves.rs b/src/ion/moves.rs index bf8bea82..351c0cad 100644 --- a/src/ion/moves.rs +++ b/src/ion/moves.rs @@ -20,10 +20,9 @@ use super::{ use crate::moves::ParallelMoves; use crate::{ - Allocation, Block, Edit, Function, Inst, InstPosition, OperandKind, OperandConstraint, OperandPos, - ProgPoint, RegClass, VReg, + Allocation, Block, Edit, Function, Inst, InstPosition, OperandConstraint, OperandKind, + OperandPos, ProgPoint, RegClass, VReg, }; -use log::debug; use smallvec::{smallvec, SmallVec}; use std::fmt::Debug; @@ -45,9 +44,12 @@ impl<'a, F: Function> Env<'a, F> { to_alloc: Allocation, to_vreg: Option, ) { - debug!( + log::trace!( "insert_move: pos {:?} prio {:?} from_alloc {:?} to_alloc {:?}", - pos, prio, from_alloc, to_alloc + pos, + prio, + from_alloc, + to_alloc ); match (from_alloc.as_reg(), to_alloc.as_reg()) { (Some(from), Some(to)) => { @@ -75,16 +77,16 @@ impl<'a, F: Function> Env<'a, F> { } pub fn get_alloc_for_range(&self, range: LiveRangeIndex) -> Allocation { - log::debug!("get_alloc_for_range: {:?}", range); + log::trace!("get_alloc_for_range: {:?}", range); let bundle = self.ranges[range.index()].bundle; - log::debug!(" -> bundle: {:?}", bundle); + log::trace!(" -> bundle: {:?}", bundle); let bundledata = &self.bundles[bundle.index()]; - log::debug!(" -> allocation {:?}", bundledata.allocation); + log::trace!(" -> allocation {:?}", bundledata.allocation); if bundledata.allocation != Allocation::none() { bundledata.allocation } else { - log::debug!(" -> spillset {:?}", bundledata.spillset); - log::debug!( + log::trace!(" -> spillset {:?}", bundledata.spillset); + log::trace!( " -> spill slot {:?}", self.spillsets[bundledata.spillset.index()].slot ); @@ -93,9 +95,9 @@ impl<'a, F: Function> Env<'a, F> { } pub fn apply_allocations_and_insert_moves(&mut self) { - log::debug!("apply_allocations_and_insert_moves"); - log::debug!("blockparam_ins: {:?}", self.blockparam_ins); - log::debug!("blockparam_outs: {:?}", self.blockparam_outs); + log::trace!("apply_allocations_and_insert_moves"); + log::trace!("blockparam_ins: {:?}", self.blockparam_ins); + log::trace!("blockparam_outs: {:?}", self.blockparam_outs); // Now that all splits are done, we can pay the cost once to // sort VReg range lists and update with the final ranges. @@ -191,7 +193,7 @@ impl<'a, F: Function> Env<'a, F> { .map(|preg| Allocation::reg(preg)) .unwrap_or_else(|| self.get_alloc_for_range(entry.index)); let range = entry.range; - log::debug!( + log::trace!( "apply_allocations: vreg {:?} LR {:?} with range {:?} has alloc {:?} (pinned {:?})", vreg, entry.index, @@ -258,7 +260,7 @@ impl<'a, F: Function> Env<'a, F> { && !self.is_start_of_block(range.from) && !first_is_def { - log::debug!( + log::trace!( "prev LR {} abuts LR {} in same block; moving {} -> {} for v{}", prev.index(), entry.index.index(), @@ -292,9 +294,9 @@ impl<'a, F: Function> Env<'a, F> { if range.to < self.cfginfo.block_exit[block.index()].next() { break; } - log::debug!("examining block with end in range: block{}", block.index()); + log::trace!("examining block with end in range: block{}", block.index()); for &succ in self.func.block_succs(block) { - log::debug!( + log::trace!( " -> has succ block {} with entry {:?}", succ.index(), self.cfginfo.block_entry[succ.index()] @@ -302,9 +304,9 @@ impl<'a, F: Function> Env<'a, F> { if range.contains_point(self.cfginfo.block_entry[succ.index()]) { continue; } - log::debug!(" -> out of this range, requires half-move if live"); + log::trace!(" -> out of this range, requires half-move if live"); if self.is_live_in(succ, vreg) { - log::debug!(" -> live at input to succ, adding halfmove"); + log::trace!(" -> live at input to succ, adding halfmove"); half_moves.push(HalfMove { key: half_move_key(block, succ, vreg, HalfMoveKind::Source), alloc, @@ -315,7 +317,7 @@ impl<'a, F: Function> Env<'a, F> { // Scan forward in `blockparam_outs`, adding all // half-moves for outgoing values to blockparams // in succs. - log::debug!( + log::trace!( "scanning blockparam_outs for v{} block{}: blockparam_out_idx = {}", vreg.index(), block.index(), @@ -328,7 +330,7 @@ impl<'a, F: Function> Env<'a, F> { break; } if (from_vreg, from_block) == (vreg, block) { - log::debug!( + log::trace!( " -> found: from v{} block{} to v{} block{}", from_vreg.index(), from_block.index(), @@ -380,7 +382,7 @@ impl<'a, F: Function> Env<'a, F> { } // Add half-moves for blockparam inputs. - log::debug!( + log::trace!( "scanning blockparam_ins at vreg {} block {}: blockparam_in_idx = {}", vreg.index(), block.index(), @@ -402,7 +404,7 @@ impl<'a, F: Function> Env<'a, F> { ), alloc, }); - log::debug!( + log::trace!( "match: blockparam_in: v{} in block{} from block{} into {}", to_vreg.index(), to_block.index(), @@ -433,7 +435,7 @@ impl<'a, F: Function> Env<'a, F> { continue; } - log::debug!( + log::trace!( "scanning preds at vreg {} block {} for ends outside the range", vreg.index(), block.index() @@ -442,7 +444,7 @@ impl<'a, F: Function> Env<'a, F> { // Now find any preds whose ends are not in the // same range, and insert appropriate moves. for &pred in self.func.block_preds(block) { - log::debug!( + log::trace!( "pred block {} has exit {:?}", pred.index(), self.cfginfo.block_exit[pred.index()] @@ -450,7 +452,7 @@ impl<'a, F: Function> Env<'a, F> { if range.contains_point(self.cfginfo.block_exit[pred.index()]) { continue; } - log::debug!(" -> requires half-move"); + log::trace!(" -> requires half-move"); half_moves.push(HalfMove { key: half_move_key(pred, block, vreg, HalfMoveKind::Dest), alloc, @@ -479,7 +481,7 @@ impl<'a, F: Function> Env<'a, F> { // Scan over def/uses and apply allocations. for use_idx in 0..self.ranges[entry.index.index()].uses.len() { let usedata = self.ranges[entry.index.index()].uses[use_idx]; - log::debug!("applying to use: {:?}", usedata); + log::trace!("applying to use: {:?}", usedata); debug_assert!(range.contains_point(usedata.pos)); let inst = usedata.pos.inst(); let slot = usedata.slot; @@ -508,7 +510,7 @@ impl<'a, F: Function> Env<'a, F> { // covers only prev inst's After; so includes move // srcs to (exclusive) inst. let move_src_end = (vreg, range.to.inst()); - log::debug!( + log::trace!( "vreg {:?} range {:?}: looking for program-move sources from {:?} to {:?}", vreg, range, @@ -518,13 +520,13 @@ impl<'a, F: Function> Env<'a, F> { while prog_move_src_idx < self.prog_move_srcs.len() && self.prog_move_srcs[prog_move_src_idx].0 < move_src_start { - log::debug!(" -> skipping idx {}", prog_move_src_idx); + log::trace!(" -> skipping idx {}", prog_move_src_idx); prog_move_src_idx += 1; } while prog_move_src_idx < self.prog_move_srcs.len() && self.prog_move_srcs[prog_move_src_idx].0 < move_src_end { - log::debug!( + log::trace!( " -> setting idx {} ({:?}) to alloc {:?}", prog_move_src_idx, self.prog_move_srcs[prog_move_src_idx].0, @@ -551,7 +553,7 @@ impl<'a, F: Function> Env<'a, F> { } else { (vreg, range.to.inst().next()) }; - log::debug!( + log::trace!( "vreg {:?} range {:?}: looking for program-move dests from {:?} to {:?}", vreg, range, @@ -561,13 +563,13 @@ impl<'a, F: Function> Env<'a, F> { while prog_move_dst_idx < self.prog_move_dsts.len() && self.prog_move_dsts[prog_move_dst_idx].0 < move_dst_start { - log::debug!(" -> skipping idx {}", prog_move_dst_idx); + log::trace!(" -> skipping idx {}", prog_move_dst_idx); prog_move_dst_idx += 1; } while prog_move_dst_idx < self.prog_move_dsts.len() && self.prog_move_dsts[prog_move_dst_idx].0 < move_dst_end { - log::debug!( + log::trace!( " -> setting idx {} ({:?}) to alloc {:?}", prog_move_dst_idx, self.prog_move_dsts[prog_move_dst_idx].0, @@ -585,7 +587,7 @@ impl<'a, F: Function> Env<'a, F> { // from-vreg) tuple, find the from-alloc and all the // to-allocs, and insert moves on the block edge. half_moves.sort_unstable_by_key(|h| h.key); - log::debug!("halfmoves: {:?}", half_moves); + log::trace!("halfmoves: {:?}", half_moves); self.stats.halfmoves_count = half_moves.len(); let mut i = 0; @@ -608,7 +610,7 @@ impl<'a, F: Function> Env<'a, F> { } let last_dest = i; - log::debug!( + log::trace!( "halfmove match: src {:?} dests {:?}", src, &half_moves[first_dest..last_dest] @@ -687,7 +689,7 @@ impl<'a, F: Function> Env<'a, F> { for (progpoint, from_preg, to_preg, slot) in std::mem::replace(&mut self.multi_fixed_reg_fixups, vec![]) { - log::debug!( + log::trace!( "multi-fixed-move constraint at {:?} from p{} to p{}", progpoint, from_preg.index(), @@ -761,7 +763,7 @@ impl<'a, F: Function> Env<'a, F> { input_reused.push(input_idx); let input_alloc = self.get_alloc(inst, input_idx); let output_alloc = self.get_alloc(inst, output_idx); - log::debug!( + log::trace!( "reuse-input inst {:?}: output {} has alloc {:?}, input {} has alloc {:?}", inst, output_idx, @@ -808,7 +810,7 @@ impl<'a, F: Function> Env<'a, F> { for (&((_, from_inst), from_alloc), &((to_vreg, to_inst), to_alloc)) in prog_move_srcs.iter().zip(prog_move_dsts.iter()) { - log::debug!( + log::trace!( "program move at inst {:?}: alloc {:?} -> {:?} (v{})", from_inst, from_alloc, @@ -947,10 +949,10 @@ impl<'a, F: Function> Env<'a, F> { // that can be done one at a time. let scratch = self.env.scratch_by_class[regclass as u8 as usize]; let mut parallel_moves = ParallelMoves::new(Allocation::reg(scratch)); - log::debug!("parallel moves at pos {:?} prio {:?}", pos, prio); + log::trace!("parallel moves at pos {:?} prio {:?}", pos, prio); for m in moves { if (m.from_alloc != m.to_alloc) || m.to_vreg.is_some() { - log::debug!(" {} -> {}", m.from_alloc, m.to_alloc,); + log::trace!(" {} -> {}", m.from_alloc, m.to_alloc,); parallel_moves.add(m.from_alloc, m.to_alloc, m.to_vreg); } } @@ -981,7 +983,7 @@ impl<'a, F: Function> Env<'a, F> { let mut scratch_used_yet = false; for (src, dst, to_vreg) in resolved { - log::debug!(" resolved: {} -> {} ({:?})", src, dst, to_vreg); + log::trace!(" resolved: {} -> {} ({:?})", src, dst, to_vreg); let action = redundant_moves.process_move(src, dst, to_vreg); if !action.elide { if dst == Allocation::reg(scratch) { @@ -1058,10 +1060,10 @@ impl<'a, F: Function> Env<'a, F> { ); } } else { - log::debug!(" -> redundant move elided"); + log::trace!(" -> redundant move elided"); } if let Some((alloc, vreg)) = action.def_alloc { - log::debug!( + log::trace!( " -> converted to DefAlloc: alloc {} vreg {}", alloc, vreg @@ -1072,7 +1074,7 @@ impl<'a, F: Function> Env<'a, F> { } for m in &self_moves { - log::debug!( + log::trace!( "self move at pos {:?} prio {:?}: {} -> {} to_vreg {:?}", pos, prio, @@ -1083,7 +1085,7 @@ impl<'a, F: Function> Env<'a, F> { let action = redundant_moves.process_move(m.from_alloc, m.to_alloc, m.to_vreg); assert!(action.elide); if let Some((alloc, vreg)) = action.def_alloc { - log::debug!(" -> DefAlloc: alloc {} vreg {}", alloc, vreg); + log::trace!(" -> DefAlloc: alloc {} vreg {}", alloc, vreg); self.add_edit(pos, prio, Edit::DefAlloc { alloc, vreg }); } } diff --git a/src/ion/process.rs b/src/ion/process.rs index e501b508..5f43a96d 100644 --- a/src/ion/process.rs +++ b/src/ion/process.rs @@ -19,7 +19,7 @@ use super::{ Requirement, UseList, }; use crate::{ - Allocation, Function, Inst, InstPosition, OperandKind, OperandConstraint, PReg, ProgPoint, + Allocation, Function, Inst, InstPosition, OperandConstraint, OperandKind, PReg, ProgPoint, RegAllocError, }; use fxhash::FxHashSet; @@ -56,7 +56,7 @@ impl<'a, F: Function> Env<'a, F> { // `AllocRegResult::ConflictHighCost`. max_allowable_cost: Option, ) -> AllocRegResult { - log::debug!("try_to_allocate_bundle_to_reg: {:?} -> {:?}", bundle, reg); + log::trace!("try_to_allocate_bundle_to_reg: {:?} -> {:?}", bundle, reg); let mut conflicts = smallvec![]; let mut conflict_set = FxHashSet::default(); let mut max_conflict_weight = 0; @@ -83,7 +83,7 @@ impl<'a, F: Function> Env<'a, F> { .btree .range(from_key..) .peekable(); - log::debug!( + log::trace!( "alloc map for {:?} in range {:?}..: {:?}", reg, from_key, @@ -92,19 +92,19 @@ impl<'a, F: Function> Env<'a, F> { let mut first_conflict: Option = None; 'ranges: for entry in bundle_ranges { - log::debug!(" -> range LR {:?}: {:?}", entry.index, entry.range); + log::trace!(" -> range LR {:?}: {:?}", entry.index, entry.range); let key = LiveRangeKey::from_range(&entry.range); let mut skips = 0; 'alloc: loop { - log::debug!(" -> PReg range {:?}", preg_range_iter.peek()); + log::trace!(" -> PReg range {:?}", preg_range_iter.peek()); // Advance our BTree traversal until it is >= this bundle // range (i.e., skip PReg allocations in the BTree that // are completely before this bundle range). if preg_range_iter.peek().is_some() && *preg_range_iter.peek().unwrap().0 < key { - log::debug!( + log::trace!( "Skipping PReg range {:?}", preg_range_iter.peek().unwrap().0 ); @@ -129,13 +129,13 @@ impl<'a, F: Function> Env<'a, F> { // If there are no more PReg allocations, we're done! if preg_range_iter.peek().is_none() { - log::debug!(" -> no more PReg allocations; so no conflict possible!"); + log::trace!(" -> no more PReg allocations; so no conflict possible!"); break 'ranges; } // If the current PReg range is beyond this range, there is no conflict; continue. if *preg_range_iter.peek().unwrap().0 > key { - log::debug!( + log::trace!( " -> next PReg allocation is at {:?}; moving to next VReg range", preg_range_iter.peek().unwrap().0 ); @@ -147,13 +147,13 @@ impl<'a, F: Function> Env<'a, F> { assert_eq!(preg_key, key); // Assert that this range overlaps. let preg_range = preg_range_iter.next().unwrap().1; - log::debug!(" -> btree contains range {:?} that overlaps", preg_range); + log::trace!(" -> btree contains range {:?} that overlaps", preg_range); if preg_range.is_valid() { - log::debug!(" -> from vreg {:?}", self.ranges[preg_range.index()].vreg); + log::trace!(" -> from vreg {:?}", self.ranges[preg_range.index()].vreg); // range from an allocated bundle: find the bundle and add to // conflicts list. let conflict_bundle = self.ranges[preg_range.index()].bundle; - log::debug!(" -> conflict bundle {:?}", conflict_bundle); + log::trace!(" -> conflict bundle {:?}", conflict_bundle); if !conflict_set.contains(&conflict_bundle) { conflicts.push(conflict_bundle); conflict_set.insert(conflict_bundle); @@ -164,7 +164,7 @@ impl<'a, F: Function> Env<'a, F> { if max_allowable_cost.is_some() && max_conflict_weight > max_allowable_cost.unwrap() { - log::debug!(" -> reached high cost, retrying early"); + log::trace!(" -> reached high cost, retrying early"); return AllocRegResult::ConflictHighCost; } } @@ -176,7 +176,7 @@ impl<'a, F: Function> Env<'a, F> { ))); } } else { - log::debug!(" -> conflict with fixed reservation"); + log::trace!(" -> conflict with fixed reservation"); // range from a direct use of the PReg (due to clobber). return AllocRegResult::ConflictWithFixed( max_conflict_weight, @@ -192,7 +192,7 @@ impl<'a, F: Function> Env<'a, F> { // We can allocate! Add our ranges to the preg's BTree. let preg = self.pregs[reg.index()].reg; - log::debug!(" -> bundle {:?} assigned to preg {:?}", bundle, preg); + log::trace!(" -> bundle {:?} assigned to preg {:?}", bundle, preg); self.bundles[bundle.index()].allocation = Allocation::reg(preg); for entry in &self.bundles[bundle.index()].ranges { self.pregs[reg.index()] @@ -205,7 +205,7 @@ impl<'a, F: Function> Env<'a, F> { } pub fn evict_bundle(&mut self, bundle: LiveBundleIndex) { - log::debug!( + log::trace!( "evicting bundle {:?}: alloc {:?}", bundle, self.bundles[bundle.index()].allocation @@ -213,7 +213,7 @@ impl<'a, F: Function> Env<'a, F> { let preg = match self.bundles[bundle.index()].allocation.as_reg() { Some(preg) => preg, None => { - log::debug!( + log::trace!( " -> has no allocation! {:?}", self.bundles[bundle.index()].allocation ); @@ -223,14 +223,14 @@ impl<'a, F: Function> Env<'a, F> { let preg_idx = PRegIndex::new(preg.index()); self.bundles[bundle.index()].allocation = Allocation::none(); for entry in &self.bundles[bundle.index()].ranges { - log::debug!(" -> removing LR {:?} from reg {:?}", entry.index, preg_idx); + log::trace!(" -> removing LR {:?} from reg {:?}", entry.index, preg_idx); self.pregs[preg_idx.index()] .allocations .btree .remove(&LiveRangeKey::from_range(&entry.range)); } let prio = self.bundles[bundle.index()].prio; - log::debug!(" -> prio {}; back into queue", prio); + log::trace!(" -> prio {}; back into queue", prio); self.allocation_queue .insert(bundle, prio as usize, PReg::invalid()); } @@ -240,22 +240,22 @@ impl<'a, F: Function> Env<'a, F> { } pub fn maximum_spill_weight_in_bundle_set(&self, bundles: &LiveBundleVec) -> u32 { - log::debug!("maximum_spill_weight_in_bundle_set: {:?}", bundles); + log::trace!("maximum_spill_weight_in_bundle_set: {:?}", bundles); let m = bundles .iter() .map(|&b| { let w = self.bundles[b.index()].cached_spill_weight(); - log::debug!("bundle{}: {}", b.index(), w); + log::trace!("bundle{}: {}", b.index(), w); w }) .max() .unwrap_or(0); - log::debug!(" -> max: {}", m); + log::trace!(" -> max: {}", m); m } pub fn recompute_bundle_properties(&mut self, bundle: LiveBundleIndex) { - log::debug!("recompute bundle properties: bundle {:?}", bundle); + log::trace!("recompute bundle properties: bundle {:?}", bundle); let minimal; let mut fixed = false; @@ -267,18 +267,18 @@ impl<'a, F: Function> Env<'a, F> { self.bundles[bundle.index()].prio = self.compute_bundle_prio(bundle); if first_range_data.vreg.is_invalid() { - log::debug!(" -> no vreg; minimal and fixed"); + log::trace!(" -> no vreg; minimal and fixed"); minimal = true; fixed = true; } else { for u in &first_range_data.uses { - log::debug!(" -> use: {:?}", u); + log::trace!(" -> use: {:?}", u); if let OperandConstraint::FixedReg(_) = u.operand.constraint() { - log::debug!(" -> fixed use at {:?}: {:?}", u.pos, u.operand); + log::trace!(" -> fixed use at {:?}: {:?}", u.pos, u.operand); fixed = true; } if let OperandConstraint::Stack = u.operand.constraint() { - log::debug!(" -> stack use at {:?}: {:?}", u.pos, u.operand); + log::trace!(" -> stack use at {:?}: {:?}", u.pos, u.operand); stack = true; } if stack && fixed { @@ -289,7 +289,7 @@ impl<'a, F: Function> Env<'a, F> { // that it could cover just one ProgPoint, // i.e. X.Before..X.After, or two ProgPoints, // i.e. X.Before..X+1.Before. - log::debug!(" -> first range has range {:?}", first_range_data.range); + log::trace!(" -> first range has range {:?}", first_range_data.range); let bundle_start = self.bundles[bundle.index()] .ranges .first() @@ -298,22 +298,22 @@ impl<'a, F: Function> Env<'a, F> { .from; let bundle_end = self.bundles[bundle.index()].ranges.last().unwrap().range.to; minimal = bundle_start.inst() == bundle_end.prev().inst(); - log::debug!(" -> minimal: {}", minimal); + log::trace!(" -> minimal: {}", minimal); } let spill_weight = if minimal { if fixed { - log::debug!(" -> fixed and minimal: spill weight 2000000"); + log::trace!(" -> fixed and minimal: spill weight 2000000"); 2_000_000 } else { - log::debug!(" -> non-fixed and minimal: spill weight 1000000"); + log::trace!(" -> non-fixed and minimal: spill weight 1000000"); 1_000_000 } } else { let mut total = 0; for entry in &self.bundles[bundle.index()].ranges { let range_data = &self.ranges[entry.index.index()]; - log::debug!( + log::trace!( " -> uses spill weight: +{}", range_data.uses_spill_weight() ); @@ -321,7 +321,7 @@ impl<'a, F: Function> Env<'a, F> { } if self.bundles[bundle.index()].prio > 0 { - log::debug!( + log::trace!( " -> dividing by prio {}; final weight {}", self.bundles[bundle.index()].prio, total / self.bundles[bundle.index()].prio @@ -349,7 +349,7 @@ impl<'a, F: Function> Env<'a, F> { let mut w = 0; for u in &rangedata.uses { w += u.weight as u32; - log::debug!("range{}: use {:?}", range.index(), u); + log::trace!("range{}: use {:?}", range.index(), u); } rangedata.set_uses_spill_weight(w); if rangedata.uses.len() > 0 && rangedata.uses[0].operand.kind() == OperandKind::Def { @@ -413,7 +413,7 @@ impl<'a, F: Function> Env<'a, F> { reg_hint: PReg, ) { self.stats.splits += 1; - log::debug!( + log::trace!( "split bundle {:?} at {:?} and requeue with reg hint (for first part) {:?}", bundle, split_at, @@ -451,7 +451,7 @@ impl<'a, F: Function> Env<'a, F> { break 'outer; } } - log::debug!(" -> first use loc is {:?}", first_use); + log::trace!(" -> first use loc is {:?}", first_use); split_at = match first_use { Some(pos) => { if pos.inst() == bundle_start.inst() { @@ -471,7 +471,7 @@ impl<'a, F: Function> Env<'a, F> { .next(), ), }; - log::debug!( + log::trace!( "split point is at bundle start; advancing to {:?}", split_at ); @@ -493,7 +493,7 @@ impl<'a, F: Function> Env<'a, F> { // which LR we need to split down the middle, then update the // current bundle, create a new one, and (re)-queue both. - log::debug!(" -> LRs: {:?}", self.bundles[bundle.index()].ranges); + log::trace!(" -> LRs: {:?}", self.bundles[bundle.index()].ranges); let mut last_lr_in_old_bundle_idx = 0; // last LR-list index in old bundle let mut first_lr_in_new_bundle_idx = 0; // first LR-list index in new bundle @@ -508,11 +508,11 @@ impl<'a, F: Function> Env<'a, F> { } } - log::debug!( + log::trace!( " -> last LR in old bundle: LR {:?}", self.bundles[bundle.index()].ranges[last_lr_in_old_bundle_idx] ); - log::debug!( + log::trace!( " -> first LR in new bundle: LR {:?}", self.bundles[bundle.index()].ranges[first_lr_in_new_bundle_idx] ); @@ -539,7 +539,7 @@ impl<'a, F: Function> Env<'a, F> { to: new_lr_list[0].range.to, }); self.ranges[new_lr.index()].vreg = self.ranges[orig_lr.index()].vreg; - log::debug!(" -> splitting LR {:?} into {:?}", orig_lr, new_lr); + log::trace!(" -> splitting LR {:?} into {:?}", orig_lr, new_lr); let first_use = self.ranges[orig_lr.index()] .uses .iter() @@ -576,7 +576,7 @@ impl<'a, F: Function> Env<'a, F> { } let new_bundle = self.create_bundle(); - log::debug!(" -> creating new bundle {:?}", new_bundle); + log::trace!(" -> creating new bundle {:?}", new_bundle); self.bundles[new_bundle.index()].spillset = spillset; for entry in &new_lr_list { self.ranges[entry.index.index()].bundle = new_bundle; @@ -598,7 +598,7 @@ impl<'a, F: Function> Env<'a, F> { let spill = self .get_or_create_spill_bundle(bundle, /* create_if_absent = */ true) .unwrap(); - log::debug!( + log::trace!( " -> bundle {:?} range {:?}: no uses; moving to spill bundle {:?}", bundle, entry.index, @@ -643,13 +643,13 @@ impl<'a, F: Function> Env<'a, F> { range, index: empty_lr, }); - log::debug!( + log::trace!( " -> bundle {:?} range {:?}: last use implies split point {:?}", bundle, entry.index, split ); - log::debug!( + log::trace!( " -> moving trailing empty region to new spill bundle {:?} with new LR {:?}", spill, empty_lr @@ -668,7 +668,7 @@ impl<'a, F: Function> Env<'a, F> { let spill = self .get_or_create_spill_bundle(new_bundle, /* create_if_absent = */ true) .unwrap(); - log::debug!( + log::trace!( " -> bundle {:?} range {:?}: no uses; moving to spill bundle {:?}", new_bundle, entry.index, @@ -713,13 +713,13 @@ impl<'a, F: Function> Env<'a, F> { range, index: empty_lr, }); - log::debug!( + log::trace!( " -> bundle {:?} range {:?}: first use implies split point {:?}", bundle, entry.index, first_use, ); - log::debug!( + log::trace!( " -> moving leading empty region to new spill bundle {:?} with new LR {:?}", spill, empty_lr @@ -754,7 +754,7 @@ impl<'a, F: Function> Env<'a, F> { } else { self.spillsets[self.bundles[bundle.index()].spillset.index()].reg_hint }; - log::debug!("process_bundle: bundle {:?} hint {:?}", bundle, hint_reg,); + log::trace!("process_bundle: bundle {:?} hint {:?}", bundle, hint_reg,); if let Requirement::Conflict = req { // We have to split right away. We'll find a point to @@ -797,7 +797,7 @@ impl<'a, F: Function> Env<'a, F> { let mut attempts = 0; loop { attempts += 1; - log::debug!("attempt {}, req {:?}", attempts, req); + log::trace!("attempt {}, req {:?}", attempts, req); debug_assert!(attempts < 100 * self.func.insts()); let (class, fixed_preg) = match req { @@ -851,7 +851,7 @@ impl<'a, F: Function> Env<'a, F> { ) { self.stats.process_bundle_reg_probes_any += 1; let preg_idx = PRegIndex::new(preg.index()); - log::debug!("trying preg {:?}", preg_idx); + log::trace!("trying preg {:?}", preg_idx); let scan_limit_cost = match ( lowest_cost_evict_conflict_cost, @@ -863,13 +863,13 @@ impl<'a, F: Function> Env<'a, F> { match self.try_to_allocate_bundle_to_reg(bundle, preg_idx, scan_limit_cost) { AllocRegResult::Allocated(alloc) => { self.stats.process_bundle_reg_success_any += 1; - log::debug!(" -> allocated to any {:?}", preg_idx); + log::trace!(" -> allocated to any {:?}", preg_idx); self.spillsets[self.bundles[bundle.index()].spillset.index()].reg_hint = alloc.as_reg().unwrap(); return Ok(()); } AllocRegResult::Conflict(bundles, first_conflict_point) => { - log::debug!( + log::trace!( " -> conflict with bundles {:?}, first conflict at {:?}", bundles, first_conflict_point @@ -901,7 +901,7 @@ impl<'a, F: Function> Env<'a, F> { } } AllocRegResult::ConflictWithFixed(max_cost, point) => { - log::debug!(" -> conflict with fixed alloc; cost of other bundles up to point is {}, conflict at {:?}", max_cost, point); + log::trace!(" -> conflict with fixed alloc; cost of other bundles up to point is {}, conflict at {:?}", max_cost, point); let loop_depth = self.cfginfo.approx_loop_depth [self.cfginfo.insn_block[point.inst().index()].index()]; @@ -932,12 +932,12 @@ impl<'a, F: Function> Env<'a, F> { // any with current bundle assignments. Hence, we will need // to either split or attempt to evict some bundles. - log::debug!( + log::trace!( " -> lowest cost evict: set {:?}, cost {:?}", lowest_cost_evict_conflict_set, lowest_cost_evict_conflict_cost, ); - log::debug!( + log::trace!( " -> lowest cost split: cost {:?}, point {:?}, reg {:?}", lowest_cost_split_conflict_cost, lowest_cost_split_conflict_point, @@ -951,7 +951,7 @@ impl<'a, F: Function> Env<'a, F> { ); let our_spill_weight = self.bundle_spill_weight(bundle); - log::debug!(" -> our spill weight: {}", our_spill_weight); + log::trace!(" -> our spill weight: {}", our_spill_weight); // We detect the "too-many-live-registers" case here and // return an error cleanly, rather than panicking, because @@ -966,7 +966,7 @@ impl<'a, F: Function> Env<'a, F> { if let Requirement::Register(class) = req { // Check if this is a too-many-live-registers situation. let range = self.bundles[bundle.index()].ranges[0].range; - log::debug!("checking for too many live regs"); + log::trace!("checking for too many live regs"); let mut min_bundles_assigned = 0; let mut fixed_assigned = 0; let mut total_regs = 0; @@ -974,7 +974,7 @@ impl<'a, F: Function> Env<'a, F> { .iter() .chain(self.env.non_preferred_regs_by_class[class as u8 as usize].iter()) { - log::debug!(" -> PR {:?}", preg); + log::trace!(" -> PR {:?}", preg); let start = LiveRangeKey::from_range(&CodeRange { from: range.from.prev(), to: range.from.prev(), @@ -989,19 +989,19 @@ impl<'a, F: Function> Env<'a, F> { } if lr.is_valid() { if self.minimal_bundle(self.ranges[lr.index()].bundle) { - log::debug!(" -> min bundle {:?}", lr); + log::trace!(" -> min bundle {:?}", lr); min_bundles_assigned += 1; } else { - log::debug!(" -> non-min bundle {:?}", lr); + log::trace!(" -> non-min bundle {:?}", lr); } } else { - log::debug!(" -> fixed bundle"); + log::trace!(" -> fixed bundle"); fixed_assigned += 1; } } total_regs += 1; } - log::debug!( + log::trace!( " -> total {}, fixed {}, min {}", total_regs, fixed_assigned, @@ -1033,7 +1033,7 @@ impl<'a, F: Function> Env<'a, F> { || lowest_cost_evict_conflict_cost.is_none() || our_spill_weight <= lowest_cost_evict_conflict_cost.unwrap()) { - log::debug!( + log::trace!( " -> deciding to split: our spill weight is {}", self.bundle_spill_weight(bundle) ); @@ -1066,7 +1066,7 @@ impl<'a, F: Function> Env<'a, F> { // Evict all bundles in `conflicting bundles` and try again. self.stats.evict_bundle_event += 1; for &bundle in &lowest_cost_evict_conflict_set.unwrap() { - log::debug!(" -> evicting {:?}", bundle); + log::trace!(" -> evicting {:?}", bundle); self.evict_bundle(bundle); self.stats.evict_bundle_count += 1; } diff --git a/src/ion/redundant_moves.rs b/src/ion/redundant_moves.rs index 4367a47c..44f15d75 100644 --- a/src/ion/redundant_moves.rs +++ b/src/ion/redundant_moves.rs @@ -40,13 +40,13 @@ impl RedundantMoveEliminator { .map(|&p| p) .unwrap_or(RedundantMoveState::None); - log::debug!( + log::trace!( " -> redundant move tracker: from {} to {} to_vreg {:?}", from, to, to_vreg ); - log::debug!( + log::trace!( " -> from_state {:?} to_state {:?}", from_state, to_state @@ -67,29 +67,29 @@ impl RedundantMoveEliminator { RedundantMoveState::Orig(r) => Some(r), _ => None, }; - log::debug!(" -> src_vreg {:?}", src_vreg); + log::trace!(" -> src_vreg {:?}", src_vreg); let dst_vreg = to_vreg.or(src_vreg); - log::debug!(" -> dst_vreg {:?}", dst_vreg); + log::trace!(" -> dst_vreg {:?}", dst_vreg); let existing_dst_vreg = match to_state { RedundantMoveState::Copy(_, opt_r) => opt_r, RedundantMoveState::Orig(r) => Some(r), _ => None, }; - log::debug!(" -> existing_dst_vreg {:?}", existing_dst_vreg); + log::trace!(" -> existing_dst_vreg {:?}", existing_dst_vreg); let elide = match (from_state, to_state) { (_, RedundantMoveState::Copy(orig_alloc, _)) if orig_alloc == from => true, (RedundantMoveState::Copy(new_alloc, _), _) if new_alloc == to => true, _ => false, }; - log::debug!(" -> elide {}", elide); + log::trace!(" -> elide {}", elide); let def_alloc = if dst_vreg != existing_dst_vreg && dst_vreg.is_some() { Some((to, dst_vreg.unwrap())) } else { None }; - log::debug!(" -> def_alloc {:?}", def_alloc); + log::trace!(" -> def_alloc {:?}", def_alloc); // Invalidate all existing copies of `to` if `to` actually changed value. if !elide { @@ -100,7 +100,7 @@ impl RedundantMoveEliminator { if from.is_reg() || to.is_reg() { self.allocs .insert(to, RedundantMoveState::Copy(from, dst_vreg)); - log::debug!( + log::trace!( " -> create mapping {} -> {:?}", to, RedundantMoveState::Copy(from, dst_vreg) @@ -115,16 +115,16 @@ impl RedundantMoveEliminator { } pub fn clear(&mut self) { - log::debug!(" redundant move eliminator cleared"); + log::trace!(" redundant move eliminator cleared"); self.allocs.clear(); self.reverse_allocs.clear(); } pub fn clear_alloc(&mut self, alloc: Allocation) { - log::debug!(" redundant move eliminator: clear {:?}", alloc); + log::trace!(" redundant move eliminator: clear {:?}", alloc); if let Some(ref mut existing_copies) = self.reverse_allocs.get_mut(&alloc) { for to_inval in existing_copies.iter() { - log::debug!(" -> clear existing copy: {:?}", to_inval); + log::trace!(" -> clear existing copy: {:?}", to_inval); if let Some(val) = self.allocs.get_mut(to_inval) { match val { RedundantMoveState::Copy(_, Some(vreg)) => { diff --git a/src/ion/requirement.rs b/src/ion/requirement.rs index f6be76c2..1540fe4e 100644 --- a/src/ion/requirement.rs +++ b/src/ion/requirement.rs @@ -66,7 +66,9 @@ impl Requirement { pub fn from_operand(op: Operand) -> Requirement { match op.constraint() { OperandConstraint::FixedReg(preg) => Requirement::Fixed(preg), - OperandConstraint::Reg | OperandConstraint::Reuse(_) => Requirement::Register(op.class()), + OperandConstraint::Reg | OperandConstraint::Reuse(_) => { + Requirement::Register(op.class()) + } OperandConstraint::Stack => Requirement::Stack(op.class()), _ => Requirement::Any(op.class()), } @@ -76,17 +78,17 @@ impl Requirement { impl<'a, F: Function> Env<'a, F> { pub fn compute_requirement(&self, bundle: LiveBundleIndex) -> Requirement { let mut req = Requirement::Unknown; - log::debug!("compute_requirement: {:?}", bundle); + log::trace!("compute_requirement: {:?}", bundle); for entry in &self.bundles[bundle.index()].ranges { - log::debug!(" -> LR {:?}", entry.index); + log::trace!(" -> LR {:?}", entry.index); for u in &self.ranges[entry.index.index()].uses { - log::debug!(" -> use {:?}", u); + log::trace!(" -> use {:?}", u); let r = Requirement::from_operand(u.operand); req = req.merge(r); - log::debug!(" -> req {:?}", req); + log::trace!(" -> req {:?}", req); } } - log::debug!(" -> final: {:?}", req); + log::trace!(" -> final: {:?}", req); req } } diff --git a/src/ion/spill.rs b/src/ion/spill.rs index 2cc9b545..e625f89a 100644 --- a/src/ion/spill.rs +++ b/src/ion/spill.rs @@ -21,7 +21,7 @@ use crate::{Allocation, Function, SpillSlot}; impl<'a, F: Function> Env<'a, F> { pub fn try_allocating_regs_for_spilled_bundles(&mut self) { - log::debug!("allocating regs for spilled bundles"); + log::trace!("allocating regs for spilled bundles"); for i in 0..self.spilled_bundles.len() { let bundle = self.spilled_bundles[i]; // don't borrow self @@ -39,7 +39,7 @@ impl<'a, F: Function> Env<'a, F> { for preg in RegTraversalIter::new(self.env, class, hint, PReg::invalid(), bundle.index(), None) { - log::debug!("trying bundle {:?} to preg {:?}", bundle, preg); + log::trace!("trying bundle {:?} to preg {:?}", bundle, preg); let preg_idx = PRegIndex::new(preg.index()); if let AllocRegResult::Allocated(_) = self.try_to_allocate_bundle_to_reg(bundle, preg_idx, None) @@ -50,7 +50,7 @@ impl<'a, F: Function> Env<'a, F> { } } if !success { - log::debug!( + log::trace!( "spilling bundle {:?}: marking spillset {:?} as required", bundle, self.bundles[bundle.index()].spillset @@ -88,14 +88,14 @@ impl<'a, F: Function> Env<'a, F> { for i in 0..self.spillsets[spillset.index()].vregs.len() { // don't borrow self let vreg = self.spillsets[spillset.index()].vregs[i]; - log::debug!( + log::trace!( "spillslot {:?} alloc'ed to spillset {:?}: vreg {:?}", spillslot, spillset, vreg, ); for entry in &self.vregs[vreg.index()].ranges { - log::debug!( + log::trace!( "spillslot {:?} getting range {:?} from LR {:?} from vreg {:?}", spillslot, entry.range, @@ -112,7 +112,7 @@ impl<'a, F: Function> Env<'a, F> { pub fn allocate_spillslots(&mut self) { for spillset in 0..self.spillsets.len() { - log::debug!("allocate spillslot: {}", spillset); + log::trace!("allocate spillslot: {}", spillset); let spillset = SpillSetIndex::new(spillset); if !self.spillsets[spillset.index()].required { continue; @@ -197,7 +197,7 @@ impl<'a, F: Function> Env<'a, F> { self.spillslots[i].alloc = self.allocate_spillslot(self.spillslots[i].class); } - log::debug!("spillslot allocator done"); + log::trace!("spillslot allocator done"); } pub fn allocate_spillslot(&mut self, class: RegClass) -> Allocation { diff --git a/src/ion/stackmap.rs b/src/ion/stackmap.rs index 56b2b5d9..c48475cc 100644 --- a/src/ion/stackmap.rs +++ b/src/ion/stackmap.rs @@ -31,10 +31,10 @@ impl<'a, F: Function> Env<'a, F> { // safepoints; and for each safepoint in the current range, // emit the allocation into the `safepoint_slots` list. - log::debug!("safepoints_per_vreg = {:?}", self.safepoints_per_vreg); + log::trace!("safepoints_per_vreg = {:?}", self.safepoints_per_vreg); for vreg in self.func.reftype_vregs() { - log::debug!("generating safepoint info for vreg {}", vreg); + log::trace!("generating safepoint info for vreg {}", vreg); let vreg = VRegIndex::new(vreg.vreg()); let mut safepoints: Vec = self .safepoints_per_vreg @@ -44,19 +44,19 @@ impl<'a, F: Function> Env<'a, F> { .map(|&inst| ProgPoint::before(inst)) .collect(); safepoints.sort_unstable(); - log::debug!(" -> live over safepoints: {:?}", safepoints); + log::trace!(" -> live over safepoints: {:?}", safepoints); let mut safepoint_idx = 0; for entry in &self.vregs[vreg.index()].ranges { let range = entry.range; let alloc = self.get_alloc_for_range(entry.index); - log::debug!(" -> range {:?}: alloc {}", range, alloc); + log::trace!(" -> range {:?}: alloc {}", range, alloc); while safepoint_idx < safepoints.len() && safepoints[safepoint_idx] < range.to { if safepoints[safepoint_idx] < range.from { safepoint_idx += 1; continue; } - log::debug!(" -> covers safepoint {:?}", safepoints[safepoint_idx]); + log::trace!(" -> covers safepoint {:?}", safepoints[safepoint_idx]); let slot = alloc .as_stack() @@ -68,6 +68,6 @@ impl<'a, F: Function> Env<'a, F> { } self.safepoint_slots.sort_unstable(); - log::debug!("final safepoint slots info: {:?}", self.safepoint_slots); + log::trace!("final safepoint slots info: {:?}", self.safepoint_slots); } } diff --git a/src/lib.rs b/src/lib.rs index bdcef7cd..30030816 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -228,7 +228,12 @@ pub struct Operand { impl Operand { #[inline(always)] - pub fn new(vreg: VReg, constraint: OperandConstraint, kind: OperandKind, pos: OperandPos) -> Self { + pub fn new( + vreg: VReg, + constraint: OperandConstraint, + kind: OperandKind, + pos: OperandPos, + ) -> Self { let (preg_field, constraint_field): (u32, u32) = match constraint { OperandConstraint::Any => (0, 0), OperandConstraint::Reg => (0, 1), From 1f30958b5a72242d788cba61b676cc51a1af0b3a Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 12 Aug 2021 12:13:56 -0700 Subject: [PATCH 139/155] Improve domtree as per @Amanieu's feedback. --- fuzz/fuzz_targets/domtree.rs | 7 ++----- src/domtree.rs | 8 ++++---- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/fuzz/fuzz_targets/domtree.rs b/fuzz/fuzz_targets/domtree.rs index 4cd8d79c..c89b443c 100644 --- a/fuzz/fuzz_targets/domtree.rs +++ b/fuzz/fuzz_targets/domtree.rs @@ -85,16 +85,13 @@ fn check_idom_violations(idom: &[Block], path: &Path) { let mut parent = idom[block.index()]; let mut domset = HashSet::new(); domset.insert(*block); - loop { - assert!(parent.is_valid()); + while parent.is_valid() { assert!(visited.contains(&parent)); domset.insert(parent); let next = idom[parent.index()]; - if next == parent { - break; - } parent = next; } + // Check that `dominates()` returns true for every block in domset, // and false for every other block. for domblock in 0..idom.len() { diff --git a/src/domtree.rs b/src/domtree.rs index 7677583f..4300e04f 100644 --- a/src/domtree.rs +++ b/src/domtree.rs @@ -98,6 +98,10 @@ pub fn calculate<'a, PredFn: Fn(Block) -> &'a [Block]>( } } + // Now set the start node's dominator-tree parent to "invalid"; + // this allows the loop in `dominates` to terminate. + idom[start.index()] = Block::invalid(); + idom } @@ -109,10 +113,6 @@ pub fn dominates(idom: &[Block], a: Block, mut b: Block) -> bool { if b.is_invalid() { return false; } - let parent = idom[b.index()]; - if b == parent { - return false; - } b = idom[b.index()]; } } From b76b7747d0c309e688fb67f60468ec8bcff0ade6 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 12 Aug 2021 14:00:20 -0700 Subject: [PATCH 140/155] Fix comment in postorder.rs. --- src/postorder.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/postorder.rs b/src/postorder.rs index 9e6eea86..96e9787f 100644 --- a/src/postorder.rs +++ b/src/postorder.rs @@ -3,7 +3,7 @@ * exception. See `LICENSE` for details. */ -//! Fast postorder computation with no allocations (aside from result). +//! Fast postorder computation. use crate::Block; use smallvec::{smallvec, SmallVec}; From 2f856435f413819ad1af05f62bddd08615c18d5e Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 12 Aug 2021 14:08:10 -0700 Subject: [PATCH 141/155] Review feedback. --- src/bitvec.rs | 15 ++++++++------- src/ion/mod.rs | 2 +- src/ion/moves.rs | 8 ++++---- src/lib.rs | 14 ++++++++------ 4 files changed, 21 insertions(+), 18 deletions(-) diff --git a/src/bitvec.rs b/src/bitvec.rs index 5c2cc2f4..bb6665b0 100644 --- a/src/bitvec.rs +++ b/src/bitvec.rs @@ -265,13 +265,14 @@ pub struct SetBitsIter(u64); impl Iterator for SetBitsIter { type Item = usize; fn next(&mut self) -> Option { - if self.0 == 0 { - None - } else { - let bitidx = self.0.trailing_zeros(); - self.0 &= !(1 << bitidx); - Some(bitidx as usize) - } + // Build an `Option` so that on the nonzero path, + // the compiler can optimize the trailing-zeroes operator + // using that knowledge. + std::num::NonZeroU64::new(self.0).map(|nz| { + let bitidx = nz.trailing_zeros(); + self.0 &= self.0 - 1; // clear highest set bit + bitidx as usize + }) } } diff --git a/src/ion/mod.rs b/src/ion/mod.rs index abbed9ef..b224f34d 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -96,7 +96,7 @@ impl<'a, F: Function> Env<'a, F> { self.compute_liveness()?; self.merge_vreg_bundles(); self.queue_bundles(); - if log::log_enabled!(log::Level::Debug) { + if log::log_enabled!(log::Level::Trace) { self.dump_state(); } Ok(()) diff --git a/src/ion/moves.rs b/src/ion/moves.rs index 351c0cad..45648903 100644 --- a/src/ion/moves.rs +++ b/src/ion/moves.rs @@ -411,9 +411,9 @@ impl<'a, F: Function> Env<'a, F> { from_block.index(), alloc, ); - #[cfg(debug)] + #[cfg(debug_assertions)] { - if log::log_enabled!(log::Level::Debug) { + if log::log_enabled!(log::Level::Trace) { self.annotate( self.cfginfo.block_entry[block.index()], format!( @@ -772,9 +772,9 @@ impl<'a, F: Function> Env<'a, F> { input_alloc ); if input_alloc != output_alloc { - #[cfg(debug)] + #[cfg(debug_assertions)] { - if log::log_enabled!(log::Level::Debug) { + if log::log_enabled!(log::Level::Trace) { self.annotate( ProgPoint::before(inst), format!( diff --git a/src/lib.rs b/src/lib.rs index 30030816..9e634b3c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -47,15 +47,16 @@ impl PReg { /// Create a new PReg. The `hw_enc` range is 6 bits. #[inline(always)] - pub fn new(hw_enc: usize, class: RegClass) -> Self { - assert!(hw_enc <= Self::MAX); + pub const fn new(hw_enc: usize, class: RegClass) -> Self { PReg(hw_enc as u8, class) } /// The physical register number, as encoded by the ISA for the particular register class. #[inline(always)] pub fn hw_enc(self) -> usize { - self.0 as usize + let hw_enc = self.0 as usize; + debug_assert!(hw_enc <= Self::MAX); + hw_enc } /// The register class. @@ -121,14 +122,15 @@ impl VReg { pub const MAX: usize = (1 << Self::MAX_BITS) - 1; #[inline(always)] - pub fn new(virt_reg: usize, class: RegClass) -> Self { - assert!(virt_reg <= Self::MAX); + pub const fn new(virt_reg: usize, class: RegClass) -> Self { VReg(((virt_reg as u32) << 1) | (class as u8 as u32)) } #[inline(always)] pub fn vreg(self) -> usize { - (self.0 >> 1) as usize + let vreg = (self.0 >> 1) as usize; + debug_assert!(vreg <= Self::MAX); + vreg } #[inline(always)] From 7652b4b1096bd7897b76f8e5998e5e0bcb187b2c Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 12 Aug 2021 14:27:20 -0700 Subject: [PATCH 142/155] Review feedback. --- src/fuzzing/func.rs | 13 ++---- src/lib.rs | 110 ++++++++------------------------------------ 2 files changed, 23 insertions(+), 100 deletions(-) diff --git a/src/fuzzing/func.rs b/src/fuzzing/func.rs index d50a1fd4..4c7f25ce 100644 --- a/src/fuzzing/func.rs +++ b/src/fuzzing/func.rs @@ -248,11 +248,10 @@ fn choose_dominating_block( if (allow_self || block != orig_block) && bool::arbitrary(u)? { break; } - if idom[block.index()] == block { + if idom[block.index()].is_invalid() { break; } block = idom[block.index()]; - assert!(block.is_valid()); } let block = if block != orig_block || allow_self { block @@ -591,12 +590,10 @@ impl std::fmt::Debug for Func { pub fn machine_env() -> MachineEnv { // Reg 31 is the scratch reg. let regs: Vec = (0..31).map(|i| PReg::new(i, RegClass::Int)).collect(); - let preferred_regs_by_class: Vec> = - vec![regs.iter().cloned().take(24).collect(), vec![]]; - let non_preferred_regs_by_class: Vec> = - vec![regs.iter().cloned().skip(24).collect(), vec![]]; - let scratch_by_class: Vec = - vec![PReg::new(31, RegClass::Int), PReg::new(0, RegClass::Float)]; + let preferred_regs_by_class: [Vec; 2] = [regs.iter().cloned().take(24).collect(), vec![]]; + let non_preferred_regs_by_class: [Vec; 2] = + [regs.iter().cloned().skip(24).collect(), vec![]]; + let scratch_by_class: [PReg; 2] = [PReg::new(31, RegClass::Int), PReg::new(0, RegClass::Float)]; MachineEnv { regs, preferred_regs_by_class, diff --git a/src/lib.rs b/src/lib.rs index 9e634b3c..70194474 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -219,10 +219,7 @@ impl std::fmt::Display for SpillSlot { /// `LAllocation` in Ion). #[derive(Clone, Copy, PartialEq, Eq)] pub struct Operand { - /// Bit-pack into 32 bits. Note that `constraint` overlaps with `kind` - /// in `Allocation` and we use mutually disjoint tag-value ranges - /// so that clients, if they wish, can track just one `u32` per - /// register slot and edit it in-place after allocation. + /// Bit-pack into 32 bits. /// /// constraint:3 kind:2 pos:1 class:1 preg:5 vreg:20 bits: u32, @@ -464,13 +461,7 @@ pub enum OperandPos { /// Operand. #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Allocation { - /// Bit-pack in 32 bits. Note that `kind` overlaps with the - /// `constraint` field in `Operand`, and we are careful to use - /// disjoint ranges of values in this field for each type. We also - /// leave the def-or-use bit (`kind` for `Operand`) unused here so - /// that we can use it below in `OperandOrAllocation` to record - /// whether `Allocation`s are defs or uses (which is often useful - /// to know). + /// Bit-pack in 32 bits. /// /// kind:3 unused:1 index:28 bits: u32, @@ -519,9 +510,9 @@ impl Allocation { #[inline(always)] pub fn kind(self) -> AllocationKind { match (self.bits >> 29) & 7 { - 5 => AllocationKind::None, - 6 => AllocationKind::Reg, - 7 => AllocationKind::Stack, + 0 => AllocationKind::None, + 1 => AllocationKind::Reg, + 2 => AllocationKind::Stack, _ => unreachable!(), } } @@ -576,14 +567,12 @@ impl Allocation { } } -// N.B.: These values must be *disjoint* with the values used to -// encode `OperandConstraint`, because they share a 3-bit field. #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] #[repr(u8)] pub enum AllocationKind { - None = 5, - Reg = 6, - Stack = 7, + None = 0, + Reg = 1, + Stack = 2, } impl Allocation { @@ -597,76 +586,6 @@ impl Allocation { } } -/// A helper that wraps either an `Operand` or an `Allocation` and is -/// able to tell which it is based on the tag bits. -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct OperandOrAllocation { - bits: u32, -} - -impl OperandOrAllocation { - pub fn from_operand(operand: Operand) -> Self { - debug_assert!(operand.bits() >> 29 <= 4); - Self { - bits: operand.bits(), - } - } - pub fn from_alloc(alloc: Allocation) -> Self { - debug_assert!(alloc.bits() >> 29 >= 5); - Self { bits: alloc.bits() } - } - pub fn from_alloc_and_kind(alloc: Allocation, kind: OperandKind) -> Self { - debug_assert!(alloc.bits() >> 29 >= 5); - let bits = alloc.bits() - | match kind { - OperandKind::Def => 0, - OperandKind::Mod => 1 << 27, - OperandKind::Use => 2 << 27, - }; - Self { bits } - } - pub fn is_operand(&self) -> bool { - (self.bits >> 29) <= 4 - } - pub fn is_allocation(&self) -> bool { - (self.bits >> 29) >= 5 - } - pub fn as_operand(&self) -> Option { - if self.is_operand() { - Some(Operand::from_bits(self.bits)) - } else { - None - } - } - pub fn as_allocation(&self) -> Option { - if self.is_allocation() { - // Remove the kind (def/use/mod) bits -- the canonical - // `Allocation` does not have this, and we want allocs to - // continue to be comparable whether they are used for - // reads or writes. - Some(Allocation::from_bits(self.bits & !(3 << 27))) - } else { - None - } - } - - pub fn kind(&self) -> OperandKind { - let kind_field = (self.bits >> 28) & 1; - match kind_field { - 0 => OperandKind::Def, - 1 => OperandKind::Mod, - 2 => OperandKind::Use, - _ => unreachable!(), - } - } - - /// Replaces the Operand with an Allocation, keeping the def/use bit. - pub fn replace_with_alloc(&mut self, alloc: Allocation) { - self.bits &= 1 << 28; - self.bits |= alloc.bits; - } -} - /// A trait defined by the regalloc client to provide access to its /// machine-instruction / CFG representation. /// @@ -937,17 +856,24 @@ pub struct MachineEnv { pub regs: Vec, /// Preferred physical registers for each class. These are the /// registers that will be allocated first, if free. - pub preferred_regs_by_class: Vec>, + pub preferred_regs_by_class: [Vec; 2], /// Non-preferred physical registers for each class. These are the /// registers that will be allocated if a preferred register is /// not available; using one of these is considered suboptimal, /// but still better than spilling. - pub non_preferred_regs_by_class: Vec>, + pub non_preferred_regs_by_class: [Vec; 2], /// One scratch register per class. This is needed to perform /// moves between registers when cyclic move patterns occur. The /// register should not be placed in either the preferred or /// non-preferred list (i.e., it is not otherwise allocatable). - pub scratch_by_class: Vec, + /// + /// Note that the register allocator will freely use this register + /// between instructions, but *within* the machine code generated + /// by a single (regalloc-level) instruction, the client is free + /// to use the scratch register. E.g., if one "instruction" causes + /// the emission of two machine-code instructions, this lowering + /// can use the scratch register between them. + pub scratch_by_class: [PReg; 2], } /// The output of the register allocator. From 82b7e6ba7b3cdd8dd583653d056014b8ced29ce2 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 12 Aug 2021 14:33:35 -0700 Subject: [PATCH 143/155] Review feedback: bitvec: struct-like enum variants, and factor out one-item cache. --- src/bitvec.rs | 109 ++++++++++++++++++++++++++------------------------ 1 file changed, 56 insertions(+), 53 deletions(-) diff --git a/src/bitvec.rs b/src/bitvec.rs index bb6665b0..a226c0ba 100644 --- a/src/bitvec.rs +++ b/src/bitvec.rs @@ -21,35 +21,37 @@ const SMALL_ELEMS: usize = 12; /// cache to allow fast access when streaming through. #[derive(Clone, Debug)] enum AdaptiveMap { - Small( - u32, - [u32; SMALL_ELEMS], - [u64; SMALL_ELEMS], - Cell<(u32, u64)>, - ), - Large(FxHashMap, Cell<(u32, u64)>), + Small { + len: u32, + keys: [u32; SMALL_ELEMS], + values: [u64; SMALL_ELEMS], + }, + Large(FxHashMap), } const INVALID: u32 = 0xffff_ffff; impl AdaptiveMap { fn new() -> Self { - Self::Small( - 0, - [INVALID; SMALL_ELEMS], - [0; SMALL_ELEMS], - Cell::new((INVALID, 0)), - ) + Self::Small { + len: 0, + keys: [INVALID; SMALL_ELEMS], + values: [0; SMALL_ELEMS], + } } #[inline(never)] fn expand(&mut self) { match self { - &mut Self::Small(len, ref keys, ref values, ref cache) => { + &mut Self::Small { + len, + ref keys, + ref values, + } => { let mut map = FxHashMap::default(); for i in 0..len { map.insert(keys[i as usize], values[i as usize]); } - *self = Self::Large(map, cache.clone()); + *self = Self::Large(map); } _ => {} } @@ -57,7 +59,7 @@ impl AdaptiveMap { #[inline(always)] fn get_or_insert<'a>(&'a mut self, key: u32) -> &'a mut u64 { let needs_expand = match self { - &mut Self::Small(len, ref keys, ..) => { + &mut Self::Small { len, ref keys, .. } => { len == SMALL_ELEMS as u32 && !keys.iter().any(|k| *k == key) } _ => false, @@ -67,10 +69,11 @@ impl AdaptiveMap { } match self { - &mut Self::Small(ref mut len, ref mut keys, ref mut values, ref cached) => { - if cached.get().0 == key { - cached.set((INVALID, 0)); - } + &mut Self::Small { + ref mut len, + ref mut keys, + ref mut values, + } => { for i in 0..*len { if keys[i as usize] == key { return &mut values[i as usize]; @@ -83,21 +86,17 @@ impl AdaptiveMap { values[idx as usize] = 0; &mut values[idx as usize] } - &mut Self::Large(ref mut map, ref cached) => { - if cached.get().0 == key { - cached.set((INVALID, 0)); - } - map.entry(key).or_insert(0) - } + &mut Self::Large(ref mut map) => map.entry(key).or_insert(0), } } #[inline(always)] fn get_mut(&mut self, key: u32) -> Option<&mut u64> { match self { - &mut Self::Small(len, ref keys, ref mut values, ref cached) => { - if cached.get().0 == key { - cached.set((INVALID, 0)); - } + &mut Self::Small { + len, + ref keys, + ref mut values, + } => { for i in 0..len { if keys[i as usize] == key { return Some(&mut values[i as usize]); @@ -105,48 +104,39 @@ impl AdaptiveMap { } None } - &mut Self::Large(ref mut map, ref cached) => { - if cached.get().0 == key { - cached.set((INVALID, 0)); - } - map.get_mut(&key) - } + &mut Self::Large(ref mut map) => map.get_mut(&key), } } #[inline(always)] fn get(&self, key: u32) -> Option { match self { - &Self::Small(len, ref keys, ref values, ref cached) => { - if cached.get().0 == key { - return Some(cached.get().1); - } + &Self::Small { + len, + ref keys, + ref values, + } => { for i in 0..len { if keys[i as usize] == key { let value = values[i as usize]; - cached.set((key, value)); return Some(value); } } None } - &Self::Large(ref map, ref cached) => { - if cached.get().0 == key { - return Some(cached.get().1); - } + &Self::Large(ref map) => { let value = map.get(&key).cloned(); - if let Some(value) = value { - cached.set((key, value)); - } value } } } fn iter<'a>(&'a self) -> AdaptiveMapIter<'a> { match self { - &Self::Small(len, ref keys, ref values, ..) => { - AdaptiveMapIter::Small(&keys[0..len as usize], &values[0..len as usize]) - } - &Self::Large(ref map, ..) => AdaptiveMapIter::Large(map.iter()), + &Self::Small { + len, + ref keys, + ref values, + } => AdaptiveMapIter::Small(&keys[0..len as usize], &values[0..len as usize]), + &Self::Large(ref map) => AdaptiveMapIter::Large(map.iter()), } } } @@ -180,6 +170,7 @@ impl<'a> std::iter::Iterator for AdaptiveMapIter<'a> { #[derive(Clone)] pub struct BitVec { elems: AdaptiveMap, + cache: Cell<(u32, u64)>, } const BITS_PER_WORD: usize = 64; @@ -188,25 +179,36 @@ impl BitVec { pub fn new() -> Self { Self { elems: AdaptiveMap::new(), + cache: Cell::new((INVALID, 0)), } } #[inline(always)] fn elem(&mut self, bit_index: usize) -> &mut u64 { let word_index = (bit_index / BITS_PER_WORD) as u32; + if self.cache.get().0 == word_index { + self.cache.set((INVALID, 0)); + } self.elems.get_or_insert(word_index) } #[inline(always)] fn maybe_elem_mut(&mut self, bit_index: usize) -> Option<&mut u64> { let word_index = (bit_index / BITS_PER_WORD) as u32; + if self.cache.get().0 == word_index { + self.cache.set((INVALID, 0)); + } self.elems.get_mut(word_index) } #[inline(always)] fn maybe_elem(&self, bit_index: usize) -> Option { let word_index = (bit_index / BITS_PER_WORD) as u32; - self.elems.get(word_index) + if self.cache.get().0 == word_index { + Some(self.cache.get().1) + } else { + self.elems.get(word_index) + } } #[inline(always)] @@ -221,6 +223,7 @@ impl BitVec { pub fn assign(&mut self, other: &Self) { self.elems = other.elems.clone(); + self.cache = other.cache.clone(); } #[inline(always)] From eaf8647fdf09ec33588b968e4a66401d2927253a Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 12 Aug 2021 14:40:18 -0700 Subject: [PATCH 144/155] BitVec: remove zero words to avoid expanding when unnecessary. --- src/bitvec.rs | 55 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 48 insertions(+), 7 deletions(-) diff --git a/src/bitvec.rs b/src/bitvec.rs index a226c0ba..ab453899 100644 --- a/src/bitvec.rs +++ b/src/bitvec.rs @@ -43,15 +43,33 @@ impl AdaptiveMap { fn expand(&mut self) { match self { &mut Self::Small { - len, - ref keys, - ref values, + ref mut len, + ref mut keys, + ref mut values, } => { - let mut map = FxHashMap::default(); - for i in 0..len { - map.insert(keys[i as usize], values[i as usize]); + // Note: we *may* remain as `Small` if there are any + // zero elements. Try removing them first, before we + // commit to a memory allocation. + if values.iter().any(|v| *v == 0) { + let mut out = 0; + for i in 0..(*len as usize) { + if values[i] == 0 { + continue; + } + if out < i { + keys[out] = keys[i]; + values[out] = values[i]; + } + out += 1; + } + *len = out as u32; + } else { + let mut map = FxHashMap::default(); + for i in 0..(*len as usize) { + map.insert(keys[i], values[i]); + } + *self = Self::Large(map); } - *self = Self::Large(map); } _ => {} } @@ -256,6 +274,15 @@ impl BitVec { set_bits(bits).map(move |i| BITS_PER_WORD * word_idx + i) }) } + + /// Is the adaptive data structure in "small" mode? This is meant + /// for testing assertions only. + pub(crate) fn is_small(&self) -> bool { + match &self.elems { + &AdaptiveMap::Small { .. } => true, + _ => false, + } + } } fn set_bits(bits: u64) -> impl Iterator { @@ -309,4 +336,18 @@ mod test { assert_eq!(sum, checksum); } + + #[test] + fn test_expand_remove_zero_elems() { + let mut vec = BitVec::new(); + // Set 12 different words (this is the max small-mode size). + for i in 0..12 { + vec.set(64 * i, true); + } + // Now clear a bit, and set a bit in a different word. We + // should still be in small mode. + vec.set(64 * 5, false); + vec.set(64 * 100, true); + assert!(vec.is_small()); + } } From c071e44fc09596a1b82732cd6f70c84af351f27f Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 12 Aug 2021 14:43:13 -0700 Subject: [PATCH 145/155] Derive PartialOrd/Ord/Hash for Operand. --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 70194474..6511df7f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -217,7 +217,7 @@ impl std::fmt::Display for SpillSlot { /// /// An Operand may be a use or def (this corresponds to `LUse` and /// `LAllocation` in Ion). -#[derive(Clone, Copy, PartialEq, Eq)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Operand { /// Bit-pack into 32 bits. /// From ffc06b2099943771b582117393c03a350b4018da Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 12 Aug 2021 14:49:42 -0700 Subject: [PATCH 146/155] Debug output for Operands: omit default/most common positions. --- src/lib.rs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 6511df7f..c77540a7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -403,11 +403,18 @@ impl std::fmt::Debug for Operand { impl std::fmt::Display for Operand { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match (self.kind(), self.pos()) { + (OperandKind::Def, OperandPos::After) + | (OperandKind::Mod | OperandKind::Use, OperandPos::Before) => { + write!(f, "{:?}", self.kind())?; + } + _ => { + write!(f, "{:?}@{:?}", self.kind(), self.pos())?; + } + } write!( f, - "{:?}@{:?}: {}{} {}", - self.kind(), - self.pos(), + ": {}{} {}", self.vreg(), match self.class() { RegClass::Int => "i", From f1a989f1b79b59699c5b88305d348d67e18e58ac Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 12 Aug 2021 14:53:53 -0700 Subject: [PATCH 147/155] Add malloc/free optimization to TODO --- doc/TODO | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/TODO b/doc/TODO index c4bf58ad..3e430305 100644 --- a/doc/TODO +++ b/doc/TODO @@ -27,6 +27,8 @@ - Add limited inter-block redundant-move elimination: propagate across splits but not joins. +- Optimize allocations (some reports of 5-7% of time spent in allocator) + # Cleanup - Remove support for non-SSA code once no longer necessary \ No newline at end of file From 8ed83e3a573e5b6c72d4c68289a682fc3e863ac5 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 12 Aug 2021 15:40:34 -0700 Subject: [PATCH 148/155] Fix `BitVec::get_or_insert` to scan only once. --- src/bitvec.rs | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/src/bitvec.rs b/src/bitvec.rs index ab453899..907756f1 100644 --- a/src/bitvec.rs +++ b/src/bitvec.rs @@ -39,6 +39,9 @@ impl AdaptiveMap { values: [0; SMALL_ELEMS], } } + + /// Expand into `Large` mode if we are at capacity and have no + /// zero-value pairs that can be trimmed. #[inline(never)] fn expand(&mut self) { match self { @@ -76,13 +79,25 @@ impl AdaptiveMap { } #[inline(always)] fn get_or_insert<'a>(&'a mut self, key: u32) -> &'a mut u64 { - let needs_expand = match self { + // Check whether the key is present and we are in small mode; + // if no to both, we need to expand first. + let (needs_expand, small_mode_idx) = match self { &mut Self::Small { len, ref keys, .. } => { - len == SMALL_ELEMS as u32 && !keys.iter().any(|k| *k == key) + // Perform this scan but do not return right away; + // doing so runs into overlapping-borrow issues + // because the current non-lexical lifetimes + // implementation is not able to see that the `self` + // mutable borrow on return is only on the + // early-return path. + let small_mode_idx = keys.iter().position(|k| *k == key); + let needs_expand = small_mode_idx.is_none() && len == SMALL_ELEMS as u32; + (needs_expand, small_mode_idx) } - _ => false, + _ => (false, None), }; + if needs_expand { + assert!(small_mode_idx.is_none()); self.expand(); } @@ -92,11 +107,14 @@ impl AdaptiveMap { ref mut keys, ref mut values, } => { - for i in 0..*len { - if keys[i as usize] == key { - return &mut values[i as usize]; - } + // If we found the key already while checking whether + // we need to expand above, use that index to return + // early. + if let Some(i) = small_mode_idx { + return &mut values[i]; } + // Otherwise, the key must not be present; add a new + // entry. assert!(*len < SMALL_ELEMS as u32); let idx = *len; *len += 1; From 69ad31f013a8d35e6f14c168d6ce3159a9363898 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 12 Aug 2021 17:35:55 -0700 Subject: [PATCH 149/155] Replace remaining instances of use of `debug` feature with `debug_assertions`. Also fix some code that did not build in debug mode anymore (d'oh!) in `src/ion/merges.rs`, as exposed by this change. --- src/ion/merge.rs | 6 +++--- src/moves.rs | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ion/merge.rs b/src/ion/merge.rs index b7a969eb..1b04bd56 100644 --- a/src/ion/merge.rs +++ b/src/ion/merge.rs @@ -48,16 +48,16 @@ impl<'a, F: Function> Env<'a, F> { return false; } - #[cfg(debug)] + #[cfg(debug_assertions)] { // Sanity check: both bundles should contain only ranges with appropriate VReg classes. for entry in &self.bundles[from.index()].ranges { let vreg = self.ranges[entry.index.index()].vreg; - assert_eq!(rc, self.vregs[vreg.index()].reg.class()); + assert_eq!(from_rc, self.vreg_regs[vreg.index()].class()); } for entry in &self.bundles[to.index()].ranges { let vreg = self.ranges[entry.index.index()].vreg; - assert_eq!(rc, self.vregs[vreg.index()].reg.class()); + assert_eq!(to_rc, self.vreg_regs[vreg.index()].class()); } } diff --git a/src/moves.rs b/src/moves.rs index 25b1e819..2cb10e64 100644 --- a/src/moves.rs +++ b/src/moves.rs @@ -78,11 +78,11 @@ impl ParallelMoves { // Sort moves by destination and check that each destination // has only one writer. self.parallel_moves.sort_by_key(|&(_, dst, _)| dst); - if cfg!(debug) { + if cfg!(debug_assertions) { let mut last_dst = None; for &(_, dst, _) in &self.parallel_moves { if last_dst.is_some() { - assert!(last_dst.unwrap() != dst); + debug_assert!(last_dst.unwrap() != dst); } last_dst = Some(dst); } From e10bffbca84218e9a3b78ca58ad177b20e3ad466 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sat, 14 Aug 2021 13:40:43 -0700 Subject: [PATCH 150/155] Fix bug in refactored BitVec (found by @Amanieu). --- src/bitvec.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bitvec.rs b/src/bitvec.rs index 907756f1..b9592a17 100644 --- a/src/bitvec.rs +++ b/src/bitvec.rs @@ -89,7 +89,7 @@ impl AdaptiveMap { // implementation is not able to see that the `self` // mutable borrow on return is only on the // early-return path. - let small_mode_idx = keys.iter().position(|k| *k == key); + let small_mode_idx = keys.iter().take(len as usize).position(|k| *k == key); let needs_expand = small_mode_idx.is_none() && len == SMALL_ELEMS as u32; (needs_expand, small_mode_idx) } From 6d313f2b566299bc88b835bc526d99fe1411e52d Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 30 Aug 2021 17:15:37 -0700 Subject: [PATCH 151/155] Address review comments: more doc comments and some minor refactorings. --- src/cfg.rs | 21 ++-- src/checker.rs | 49 +------- src/fuzzing/func.rs | 4 +- src/ion/dump.rs | 2 +- src/ion/liveranges.rs | 6 +- src/ion/merge.rs | 2 +- src/ion/mod.rs | 6 +- src/ion/moves.rs | 27 ++-- src/ion/process.rs | 2 +- src/lib.rs | 286 ++++++++++++++++++++++++++++++++---------- src/ssa.rs | 4 +- 11 files changed, 256 insertions(+), 153 deletions(-) diff --git a/src/cfg.rs b/src/cfg.rs index dc046ffc..f2abc47d 100644 --- a/src/cfg.rs +++ b/src/cfg.rs @@ -38,23 +38,24 @@ pub struct CFGInfo { impl CFGInfo { pub fn new(f: &F) -> Result { - let postorder = - postorder::calculate(f.blocks(), f.entry_block(), |block| f.block_succs(block)); + let postorder = postorder::calculate(f.num_blocks(), f.entry_block(), |block| { + f.block_succs(block) + }); let domtree = domtree::calculate( - f.blocks(), + f.num_blocks(), |block| f.block_preds(block), &postorder[..], f.entry_block(), ); - let mut insn_block = vec![Block::invalid(); f.insts()]; + let mut insn_block = vec![Block::invalid(); f.num_insts()]; let mut vreg_def_inst = vec![Inst::invalid(); f.num_vregs()]; let mut vreg_def_blockparam = vec![(Block::invalid(), 0); f.num_vregs()]; - let mut block_entry = vec![ProgPoint::before(Inst::invalid()); f.blocks()]; - let mut block_exit = vec![ProgPoint::before(Inst::invalid()); f.blocks()]; - let mut backedge_in = vec![0; f.blocks()]; - let mut backedge_out = vec![0; f.blocks()]; + let mut block_entry = vec![ProgPoint::before(Inst::invalid()); f.num_blocks()]; + let mut block_exit = vec![ProgPoint::before(Inst::invalid()); f.num_blocks()]; + let mut backedge_in = vec![0; f.num_blocks()]; + let mut backedge_out = vec![0; f.num_blocks()]; - for block in 0..f.blocks() { + for block in 0..f.num_blocks() { let block = Block::new(block); for (i, param) in f.block_params(block).iter().enumerate() { vreg_def_blockparam[param.vreg()] = (block, i as u32); @@ -116,7 +117,7 @@ impl CFGInfo { let mut approx_loop_depth = vec![]; let mut backedge_stack: SmallVec<[usize; 4]> = smallvec![]; let mut cur_depth = 0; - for block in 0..f.blocks() { + for block in 0..f.num_blocks() { if backedge_in[block] > 0 { cur_depth += 1; backedge_stack.push(backedge_in[block]); diff --git a/src/checker.rs b/src/checker.rs index 11bf2ce2..ac4ee7f8 100644 --- a/src/checker.rs +++ b/src/checker.rs @@ -389,17 +389,6 @@ impl CheckerState { self.allocations.remove(&Allocation::reg(*clobber)); } } - &CheckerInst::BlockParams { - ref vregs, - ref allocs, - .. - } => { - for (vreg, alloc) in vregs.iter().zip(allocs.iter()) { - let reftyped = checker.reftyped_vregs.contains(vreg); - self.allocations - .insert(*alloc, CheckerValue::Reg(*vreg, reftyped)); - } - } &CheckerInst::DefAlloc { alloc, vreg } => { let reftyped = checker.reftyped_vregs.contains(&vreg); self.allocations @@ -478,14 +467,6 @@ pub(crate) enum CheckerInst { clobbers: Vec, }, - /// The top of a block with blockparams. We define the given vregs - /// into the given allocations. - BlockParams { - block: Block, - vregs: Vec, - allocs: Vec, - }, - /// Define an allocation's contents. Like BlockParams but for one /// allocation. Used sometimes when moves are elided but ownership /// of a value is logically transferred to a new vreg. @@ -514,7 +495,7 @@ impl<'a, F: Function> Checker<'a, F> { let mut bb_insts = HashMap::new(); let mut reftyped_vregs = HashSet::new(); - for block in 0..f.blocks() { + for block in 0..f.num_blocks() { let block = Block::new(block); bb_in.insert(block, Default::default()); bb_insts.insert(block, vec![]); @@ -548,7 +529,7 @@ impl<'a, F: Function> Checker<'a, F> { // For each original instruction, create an `Op`. let mut last_inst = None; let mut insert_idx = 0; - for block in 0..self.f.blocks() { + for block in 0..self.f.num_blocks() { let block = Block::new(block); for inst in self.f.block_insns(block).iter() { assert!(last_inst.is_none() || inst > last_inst.unwrap()); @@ -617,17 +598,6 @@ impl<'a, F: Function> Checker<'a, F> { .unwrap() .push(CheckerInst::DefAlloc { alloc, vreg }); } - &Edit::BlockParams { - ref vregs, - ref allocs, - } => { - let inst = CheckerInst::BlockParams { - block, - vregs: vregs.clone(), - allocs: allocs.clone(), - }; - self.bb_insts.get_mut(&block).unwrap().push(inst); - } } } } @@ -636,7 +606,7 @@ impl<'a, F: Function> Checker<'a, F> { fn analyze(&mut self) { let mut queue = VecDeque::new(); let mut queue_set = HashSet::new(); - for block in 0..self.f.blocks() { + for block in 0..self.f.num_blocks() { let block = Block::new(block); queue.push_back(block); queue_set.insert(block); @@ -718,7 +688,7 @@ impl<'a, F: Function> Checker<'a, F> { for vreg in self.f.reftype_vregs() { log::trace!(" REF: {}", vreg); } - for bb in 0..self.f.blocks() { + for bb in 0..self.f.num_blocks() { let bb = Block::new(bb); log::trace!("block{}:", bb.index()); let insts = self.bb_insts.get(&bb).unwrap(); @@ -743,17 +713,6 @@ impl<'a, F: Function> Checker<'a, F> { &CheckerInst::Move { from, into } => { log::trace!(" {} -> {}", from, into); } - &CheckerInst::BlockParams { - ref vregs, - ref allocs, - .. - } => { - let mut args = vec![]; - for (vreg, alloc) in vregs.iter().zip(allocs.iter()) { - args.push(format!("{}:{}", vreg, alloc)); - } - log::trace!(" blockparams: {}", args.join(", ")); - } &CheckerInst::DefAlloc { alloc, vreg } => { log::trace!(" defalloc: {}:{}", vreg, alloc); } diff --git a/src/fuzzing/func.rs b/src/fuzzing/func.rs index 4c7f25ce..0d0eeda5 100644 --- a/src/fuzzing/func.rs +++ b/src/fuzzing/func.rs @@ -75,11 +75,11 @@ pub struct Func { } impl Function for Func { - fn insts(&self) -> usize { + fn num_insts(&self) -> usize { self.insts.len() } - fn blocks(&self) -> usize { + fn num_blocks(&self) -> usize { self.blocks.len() } diff --git a/src/ion/dump.rs b/src/ion/dump.rs index ce0a09f9..b45d90c9 100644 --- a/src/ion/dump.rs +++ b/src/ion/dump.rs @@ -61,7 +61,7 @@ impl<'a, F: Function> Env<'a, F> { pub fn dump_results(&self) { log::info!("=== REGALLOC RESULTS ==="); - for block in 0..self.func.blocks() { + for block in 0..self.func.num_blocks() { let block = Block::new(block); log::info!( "block{}: [succs {:?} preds {:?}]", diff --git a/src/ion/liveranges.rs b/src/ion/liveranges.rs index 639effa5..3160c6c4 100644 --- a/src/ion/liveranges.rs +++ b/src/ion/liveranges.rs @@ -82,7 +82,7 @@ impl<'a, F: Function> Env<'a, F> { self.vregs[v.vreg()].is_pinned = true; } // Create allocations too. - for inst in 0..self.func.insts() { + for inst in 0..self.func.num_insts() { let start = self.allocs.len() as u32; self.inst_alloc_offsets.push(start); for _ in 0..self.func.inst_operands(Inst::new(inst)).len() { @@ -247,7 +247,7 @@ impl<'a, F: Function> Env<'a, F> { pub fn compute_liveness(&mut self) -> Result<(), RegAllocError> { // Create initial LiveIn and LiveOut bitsets. - for _ in 0..self.func.blocks() { + for _ in 0..self.func.num_blocks() { self.liveins.push(BitVec::new()); self.liveouts.push(BitVec::new()); } @@ -347,7 +347,7 @@ impl<'a, F: Function> Env<'a, F> { let mut vreg_ranges: Vec = vec![LiveRangeIndex::invalid(); self.func.num_vregs()]; - for i in (0..self.func.blocks()).rev() { + for i in (0..self.func.num_blocks()).rev() { let block = Block::new(i); self.stats.livein_blocks += 1; diff --git a/src/ion/merge.rs b/src/ion/merge.rs index 1b04bd56..a5c4fe20 100644 --- a/src/ion/merge.rs +++ b/src/ion/merge.rs @@ -303,7 +303,7 @@ impl<'a, F: Function> Env<'a, F> { self.bundles[bundle.index()].spillset = ssidx; } - for inst in 0..self.func.insts() { + for inst in 0..self.func.num_insts() { let inst = Inst::new(inst); // Attempt to merge Reuse-constraint operand outputs with the diff --git a/src/ion/mod.rs b/src/ion/mod.rs index b224f34d..e2d73b58 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -44,14 +44,14 @@ impl<'a, F: Function> Env<'a, F> { cfginfo: CFGInfo, annotations_enabled: bool, ) -> Self { - let n = func.insts(); + let n = func.num_insts(); Self { func, env, cfginfo, - liveins: Vec::with_capacity(func.blocks()), - liveouts: Vec::with_capacity(func.blocks()), + liveins: Vec::with_capacity(func.num_blocks()), + liveouts: Vec::with_capacity(func.num_blocks()), blockparam_outs: vec![], blockparam_ins: vec![], blockparam_allocs: vec![], diff --git a/src/ion/moves.rs b/src/ion/moves.rs index 45648903..d8479e70 100644 --- a/src/ion/moves.rs +++ b/src/ion/moves.rs @@ -166,8 +166,8 @@ impl<'a, F: Function> Env<'a, F> { } } - let mut half_moves: Vec = Vec::with_capacity(6 * self.func.insts()); - let mut reuse_input_insts = Vec::with_capacity(self.func.insts() / 2); + let mut half_moves: Vec = Vec::with_capacity(6 * self.func.num_insts()); + let mut reuse_input_insts = Vec::with_capacity(self.func.num_insts() / 2); let mut blockparam_in_idx = 0; let mut blockparam_out_idx = 0; @@ -290,7 +290,7 @@ impl<'a, F: Function> Env<'a, F> { // same allocation) and if the vreg is live, add a // Source half-move. let mut block = self.cfginfo.insn_block[range.from.inst().index()]; - while block.is_valid() && block.index() < self.func.blocks() { + while block.is_valid() && block.index() < self.func.num_blocks() { if range.to < self.cfginfo.block_exit[block.index()].next() { break; } @@ -376,7 +376,7 @@ impl<'a, F: Function> Env<'a, F> { if self.cfginfo.block_entry[block.index()] < range.from { block = block.next(); } - while block.is_valid() && block.index() < self.func.blocks() { + while block.is_valid() && block.index() < self.func.num_blocks() { if self.cfginfo.block_entry[block.index()] >= range.to { break; } @@ -1114,11 +1114,13 @@ impl<'a, F: Function> Env<'a, F> { .collect::>(); assert_eq!(vregs.len(), self.func.block_params(block).len()); assert_eq!(allocs.len(), self.func.block_params(block).len()); - self.add_edit( - self.cfginfo.block_entry[block.index()], - InsertMovePrio::BlockParam, - Edit::BlockParams { vregs, allocs }, - ); + for (vreg, alloc) in vregs.into_iter().zip(allocs.into_iter()) { + self.add_edit( + self.cfginfo.block_entry[block.index()], + InsertMovePrio::BlockParam, + Edit::DefAlloc { alloc, vreg }, + ); + } } // Ensure edits are in sorted ProgPoint order. N.B.: this must @@ -1139,13 +1141,6 @@ impl<'a, F: Function> Env<'a, F> { format!("move {} -> {} ({:?})", from, to, to_vreg), ); } - &Edit::BlockParams { - ref vregs, - ref allocs, - } => { - let s = format!("blockparams vregs:{:?} allocs:{:?}", vregs, allocs); - self.annotate(ProgPoint::from_index(pos), s); - } &Edit::DefAlloc { alloc, vreg } => { let s = format!("defalloc {:?} := {:?}", alloc, vreg); self.annotate(ProgPoint::from_index(pos), s); diff --git a/src/ion/process.rs b/src/ion/process.rs index 5f43a96d..8f37b133 100644 --- a/src/ion/process.rs +++ b/src/ion/process.rs @@ -798,7 +798,7 @@ impl<'a, F: Function> Env<'a, F> { loop { attempts += 1; log::trace!("attempt {}, req {:?}", attempts, req); - debug_assert!(attempts < 100 * self.func.insts()); + debug_assert!(attempts < 100 * self.func.num_insts()); let (class, fixed_preg) = match req { Requirement::Fixed(preg) => (preg.class(), Some(preg)), diff --git a/src/lib.rs b/src/lib.rs index c77540a7..087e97e9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -37,8 +37,24 @@ pub enum RegClass { } /// A physical register. Contains a physical register number and a class. +/// +/// The `hw_enc` field contains the physical register number and is in +/// a logically separate index space per class; in other words, Int +/// register 0 is different than Float register 0. +/// +/// Because of bit-packed encodings throughout the implementation, +/// `hw_enc` must fit in 5 bits, i.e., at most 32 registers per class. +/// +/// The value returned by `index()`, in contrast, is in a single index +/// space shared by all classes, in order to enable uniform reasoning +/// about physical registers. This is done by putting the class bit at +/// the MSB, or equivalently, declaring that indices 0..31 are the 32 +/// integer registers and indices 32..63 are the 32 float registers. #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct PReg(u8, RegClass); +pub struct PReg { + hw_enc: u8, + class: RegClass, +} impl PReg { pub const MAX_BITS: usize = 5; @@ -48,21 +64,31 @@ impl PReg { /// Create a new PReg. The `hw_enc` range is 6 bits. #[inline(always)] pub const fn new(hw_enc: usize, class: RegClass) -> Self { - PReg(hw_enc as u8, class) + // We don't have const panics yet (rust-lang/rust#85194) so we + // need to use a little indexing trick here. We unfortunately + // can't use the `static-assertions` crate because we need + // this to work both for const `hw_enc` and for runtime + // values. + const HW_ENC_MUST_BE_IN_BOUNDS: &[bool; PReg::MAX + 1] = &[true; PReg::MAX + 1]; + let _ = HW_ENC_MUST_BE_IN_BOUNDS[hw_enc]; + + PReg { + hw_enc: hw_enc as u8, + class, + } } /// The physical register number, as encoded by the ISA for the particular register class. #[inline(always)] pub fn hw_enc(self) -> usize { - let hw_enc = self.0 as usize; - debug_assert!(hw_enc <= Self::MAX); + let hw_enc = self.hw_enc as usize; hw_enc } /// The register class. #[inline(always)] pub fn class(self) -> RegClass { - self.1 + self.class } /// Get an index into the (not necessarily contiguous) index space of @@ -70,7 +96,7 @@ impl PReg { /// all PRegs and index it efficiently. #[inline(always)] pub fn index(self) -> usize { - ((self.1 as u8 as usize) << 5) | (self.0 as usize) + ((self.class as u8 as usize) << 5) | (self.hw_enc as usize) } #[inline(always)] @@ -115,7 +141,9 @@ impl std::fmt::Display for PReg { /// A virtual register. Contains a virtual register number and a class. #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct VReg(u32); +pub struct VReg { + bits: u32, +} impl VReg { pub const MAX_BITS: usize = 20; @@ -123,19 +151,25 @@ impl VReg { #[inline(always)] pub const fn new(virt_reg: usize, class: RegClass) -> Self { - VReg(((virt_reg as u32) << 1) | (class as u8 as u32)) + // See comment in `PReg::new()`: we are emulating a const + // assert here until const panics are stable. + const VIRT_REG_MUST_BE_IN_BOUNDS: &[bool; VReg::MAX + 1] = &[true; VReg::MAX + 1]; + let _ = VIRT_REG_MUST_BE_IN_BOUNDS[virt_reg]; + + VReg { + bits: ((virt_reg as u32) << 1) | (class as u8 as u32), + } } #[inline(always)] pub fn vreg(self) -> usize { - let vreg = (self.0 >> 1) as usize; - debug_assert!(vreg <= Self::MAX); + let vreg = (self.bits >> 1) as usize; vreg } #[inline(always)] pub fn class(self) -> RegClass { - match self.0 & 1 { + match self.bits & 1 { 0 => RegClass::Int, 1 => RegClass::Float, _ => unreachable!(), @@ -166,21 +200,25 @@ impl std::fmt::Display for VReg { } #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct SpillSlot(u32); +pub struct SpillSlot { + bits: u32, +} impl SpillSlot { #[inline(always)] pub fn new(slot: usize, class: RegClass) -> Self { assert!(slot < (1 << 24)); - SpillSlot((slot as u32) | (class as u8 as u32) << 24) + SpillSlot { + bits: (slot as u32) | (class as u8 as u32) << 24, + } } #[inline(always)] pub fn index(self) -> usize { - (self.0 & 0x00ffffff) as usize + (self.bits & 0x00ffffff) as usize } #[inline(always)] pub fn class(self) -> RegClass { - match (self.0 >> 24) as u8 { + match (self.bits >> 24) as u8 { 0 => RegClass::Int, 1 => RegClass::Float, _ => unreachable!(), @@ -193,7 +231,7 @@ impl SpillSlot { #[inline(always)] pub fn invalid() -> Self { - SpillSlot(0xffff_ffff) + SpillSlot { bits: 0xffff_ffff } } #[inline(always)] pub fn is_invalid(self) -> bool { @@ -211,17 +249,78 @@ impl std::fmt::Display for SpillSlot { } } +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum OperandConstraint { + /// Any location is fine (register or stack slot). + Any, + /// Operand must be in a register. Register is read-only for Uses. + Reg, + /// Operand must be on the stack. + Stack, + /// Operand must be in a fixed register. + FixedReg(PReg), + /// On defs only: reuse a use's register. + Reuse(usize), +} + +impl std::fmt::Display for OperandConstraint { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Self::Any => write!(f, "any"), + Self::Reg => write!(f, "reg"), + Self::Stack => write!(f, "stack"), + Self::FixedReg(preg) => write!(f, "fixed({})", preg), + Self::Reuse(idx) => write!(f, "reuse({})", idx), + } + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum OperandKind { + Def = 0, + Mod = 1, + Use = 2, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum OperandPos { + Before = 0, + After = 1, +} + /// An `Operand` encodes everything about a mention of a register in /// an instruction: virtual register number, and any constraint that /// applies to the register at this program point. /// /// An Operand may be a use or def (this corresponds to `LUse` and /// `LAllocation` in Ion). +/// +/// Generally, regalloc2 considers operands to have their effects at +/// one of two program points that surround an instruction: "Before" +/// or "After". All operands at a given program-point are assigned +/// non-conflicting locations based on their constraints. Each operand +/// has a "kind", one of use/def/mod, corresponding to +/// read/write/read-write, respectively. +/// +/// Usually, an instruction's inputs will be uses-at-Before and +/// outputs will be defs-at-After, though there are valid use-cases +/// for other combinations too. For example, a single "instruction" +/// seen by the regalloc that lowers into multiple machine +/// instructions and reads some of its inputs after it starts to write +/// outputs must either make those input(s) uses-at-After or those +/// output(s) defs-at-Before so that the conflict (overlap) is +/// properly accounted for. See comments on the constructors below for +/// more. #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Operand { /// Bit-pack into 32 bits. /// /// constraint:3 kind:2 pos:1 class:1 preg:5 vreg:20 + /// + /// where `constraint` is an `OperandConstraint`, `kind` is an + /// `OperandKind`, `pos` is an `OperandPos`, `class` is a + /// `RegClass`, `preg` is a `PReg` or an index for a reused-input + /// constraint, and `vreg` is a vreg index. bits: u32, } @@ -259,6 +358,9 @@ impl Operand { } } + /// Create an `Operand` that designates a use of a VReg that must + /// be in a register, and that is used at the "before" point, + /// i.e., can be overwritten by a result. #[inline(always)] pub fn reg_use(vreg: VReg) -> Self { Operand::new( @@ -268,6 +370,10 @@ impl Operand { OperandPos::Before, ) } + + /// Create an `Operand` that designates a use of a VReg that must + /// be in a register, and that is used up until the "after" point, + /// i.e., must not conflict with any results. #[inline(always)] pub fn reg_use_at_end(vreg: VReg) -> Self { Operand::new( @@ -277,6 +383,11 @@ impl Operand { OperandPos::After, ) } + + /// Create an `Operand` that designates a definition of a VReg + /// that must be in a register, and that occurs at the "after" + /// point, i.e. may reuse a register that carried a use into this + /// instruction. #[inline(always)] pub fn reg_def(vreg: VReg) -> Self { Operand::new( @@ -286,6 +397,11 @@ impl Operand { OperandPos::After, ) } + + /// Create an `Operand` that designates a definition of a VReg + /// that must be in a register, and that occurs early at the + /// "before" point, i.e., must not conflict with any input to the + /// instruction. #[inline(always)] pub fn reg_def_at_start(vreg: VReg) -> Self { Operand::new( @@ -295,8 +411,17 @@ impl Operand { OperandPos::Before, ) } + + /// Create an `Operand` that designates a def (and use) of a + /// temporary *within* the instruction. This register is assumed + /// to be written by the instruction, and will not conflict with + /// any input or output, but should not be used after the + /// instruction completes. #[inline(always)] pub fn reg_temp(vreg: VReg) -> Self { + // For now a temp is equivalent to a def-at-start operand, + // which gives the desired semantics but does not enforce the + // "not reused later" constraint. Operand::new( vreg, OperandConstraint::Reg, @@ -304,6 +429,12 @@ impl Operand { OperandPos::Before, ) } + + /// Create an `Operand` that designates a def of a vreg that must + /// reuse the register assigned to an input to the + /// instruction. The input is identified by `idx` (is the `idx`th + /// `Operand` for the instruction) and must be constraint to a + /// register, i.e., be the result of `Operand::reg_use(vreg)`. #[inline(always)] pub fn reg_reuse_def(vreg: VReg, idx: usize) -> Self { Operand::new( @@ -313,6 +444,11 @@ impl Operand { OperandPos::After, ) } + + /// Create an `Operand` that designates a use of a vreg and + /// ensures that it is placed in the given, fixed PReg at the + /// use. It is guaranteed that the `Allocation` resulting for this + /// operand will be `preg`. #[inline(always)] pub fn reg_fixed_use(vreg: VReg, preg: PReg) -> Self { Operand::new( @@ -322,6 +458,11 @@ impl Operand { OperandPos::Before, ) } + + /// Create an `Operand` that designates a def of a vreg and + /// ensures that it is placed in the given, fixed PReg at the + /// def. It is guaranteed that the `Allocation` resulting for this + /// operand will be `preg`. #[inline(always)] pub fn reg_fixed_def(vreg: VReg, preg: PReg) -> Self { Operand::new( @@ -332,12 +473,17 @@ impl Operand { ) } + /// Get the virtual register designated by an operand. Every + /// operand must name some virtual register, even if it constrains + /// the operand to a fixed physical register as well; the vregs + /// are used to track dataflow. #[inline(always)] pub fn vreg(self) -> VReg { let vreg_idx = ((self.bits as usize) & VReg::MAX) as usize; VReg::new(vreg_idx, self.class()) } + /// Get the register class used by this operand. #[inline(always)] pub fn class(self) -> RegClass { let class_field = (self.bits >> 25) & 1; @@ -348,6 +494,8 @@ impl Operand { } } + /// Get the "kind" of this operand: a definition (write), a use + /// (read), or a "mod" / modify (a read followed by a write). #[inline(always)] pub fn kind(self) -> OperandKind { let kind_field = (self.bits >> 27) & 3; @@ -359,6 +507,10 @@ impl Operand { } } + /// Get the "position" of this operand, i.e., where its read + /// and/or write occurs: either before the instruction executes, + /// or after it does. Ordinarily, uses occur at "before" and defs + /// at "after", though there are cases where this is not true. #[inline(always)] pub fn pos(self) -> OperandPos { let pos_field = (self.bits >> 26) & 1; @@ -369,6 +521,8 @@ impl Operand { } } + /// Get the "constraint" of this operand, i.e., what requirements + /// its allocation must fulfill. #[inline(always)] pub fn constraint(self) -> OperandConstraint { let constraint_field = (self.bits >> 29) & 7; @@ -383,11 +537,14 @@ impl Operand { } } + /// Get the raw 32-bit encoding of this operand's fields. #[inline(always)] pub fn bits(self) -> u32 { self.bits } + /// Construct an `Operand` from the raw 32-bit encoding returned + /// from `bits()`. #[inline(always)] pub fn from_bits(bits: u32) -> Self { debug_assert!(bits >> 29 <= 4); @@ -425,45 +582,6 @@ impl std::fmt::Display for Operand { } } -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum OperandConstraint { - /// Any location is fine (register or stack slot). - Any, - /// Operand must be in a register. Register is read-only for Uses. - Reg, - /// Operand must be on the stack. - Stack, - /// Operand must be in a fixed register. - FixedReg(PReg), - /// On defs only: reuse a use's register. Which use is given by `preg` field. - Reuse(usize), -} - -impl std::fmt::Display for OperandConstraint { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - match self { - Self::Any => write!(f, "any"), - Self::Reg => write!(f, "reg"), - Self::Stack => write!(f, "stack"), - Self::FixedReg(preg) => write!(f, "fixed({})", preg), - Self::Reuse(idx) => write!(f, "reuse({})", idx), - } - } -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum OperandKind { - Def = 0, - Mod = 1, - Use = 2, -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum OperandPos { - Before = 0, - After = 1, -} - /// An Allocation represents the end result of regalloc for an /// Operand. #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] @@ -511,7 +629,7 @@ impl Allocation { #[inline(always)] pub fn stack(slot: SpillSlot) -> Allocation { - Allocation::new(AllocationKind::Stack, slot.0 as usize) + Allocation::new(AllocationKind::Stack, slot.bits as usize) } #[inline(always)] @@ -556,7 +674,9 @@ impl Allocation { #[inline(always)] pub fn as_stack(self) -> Option { if self.kind() == AllocationKind::Stack { - Some(SpillSlot(self.index() as u32)) + Some(SpillSlot { + bits: self.index() as u32, + }) } else { None } @@ -604,10 +724,10 @@ pub trait Function { // ------------- /// How many instructions are there? - fn insts(&self) -> usize; + fn num_insts(&self) -> usize; /// How many blocks are there? - fn blocks(&self) -> usize; + fn num_blocks(&self) -> usize; /// Get the index of the entry block. fn entry_block(&self) -> Block; @@ -649,6 +769,16 @@ pub trait Function { fn branch_blockparam_arg_offset(&self, block: Block, insn: Inst) -> usize; /// Determine whether an instruction is a safepoint and requires a stackmap. + /// + /// Strictly speaking, these two parts (is a safepoint, requires a + /// stackmap) are orthogonal. An instruction could want to see a + /// stackmap of refs on the stack (without forcing them), or it + /// could want all refs to be on the stack (without knowing where + /// they are). Only the latter strictly follows from "is a + /// safepoint". But in practice, both are true at the same time, + /// so we combine the two notions: for regalloc2, a "safepoint + /// instruction" is one that both forces refs onto the stack, and + /// provides a stackmap indicating where they are. fn is_safepoint(&self, _: Inst) -> bool { false } @@ -664,7 +794,16 @@ pub trait Function { /// Get the Operands for an instruction. fn inst_operands(&self, insn: Inst) -> &[Operand]; - /// Get the clobbers for an instruction. + /// Get the clobbers for an instruction; these are the registers + /// that the instruction is known to overwrite, separate from its + /// outputs described by its `Operand`s. This can be used to, for + /// example, describe ABI-specified registers that are not + /// preserved by a call instruction, or fixed physical registers + /// written by an instruction but not used as a vreg output, or + /// fixed physical registers used as temps within an instruction + /// out of necessity. Every register written to by an instruction + /// must either be described by an Operand of kind `Def` or `Mod`, + /// or else must be a "clobber". fn inst_clobbers(&self, insn: Inst) -> &[PReg]; /// Get the number of `VReg` in use in this function. @@ -743,7 +882,15 @@ pub trait Function { } } -/// A position before or after an instruction. +/// A position before or after an instruction at which we can make an +/// edit. +/// +/// Note that this differs from `OperandPos` in that the former +/// describes specifically a constraint on an operand, while this +/// describes a program point. `OperandPos` could grow more options in +/// the future, for example if we decide that an "early write" or +/// "late read" phase makes sense, while `InstPosition` will always +/// describe these two insertion points. #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] #[repr(u8)] pub enum InstPosition { @@ -839,13 +986,7 @@ pub enum Edit { to: Allocation, to_vreg: Option, }, - /// Define blockparams' locations. Note that this is not typically - /// turned into machine code, but can be useful metadata (e.g. for - /// the checker). - BlockParams { - vregs: Vec, - allocs: Vec, - }, + /// Define a particular Allocation to contain a particular VReg. Useful /// for the checker. DefAlloc { alloc: Allocation, vreg: VReg }, @@ -859,16 +1000,20 @@ pub enum Edit { pub struct MachineEnv { /// Physical registers. Every register that might be mentioned in /// any constraint must be listed here, even if it is not - /// allocatable under normal conditions. + /// allocatable (present in one of + /// `{preferred,non_preferred}_regs_by_class`). pub regs: Vec, + /// Preferred physical registers for each class. These are the /// registers that will be allocated first, if free. pub preferred_regs_by_class: [Vec; 2], + /// Non-preferred physical registers for each class. These are the /// registers that will be allocated if a preferred register is /// not available; using one of these is considered suboptimal, /// but still better than spilling. pub non_preferred_regs_by_class: [Vec; 2], + /// One scratch register per class. This is needed to perform /// moves between registers when cyclic move patterns occur. The /// register should not be placed in either the preferred or @@ -888,12 +1033,15 @@ pub struct MachineEnv { pub struct Output { /// How many spillslots are needed in the frame? pub num_spillslots: usize, + /// Edits (insertions or removals). Guaranteed to be sorted by /// program point. pub edits: Vec<(ProgPoint, Edit)>, + /// Allocations for each operand. Mapping from instruction to /// allocations provided by `inst_alloc_offsets` below. pub allocs: Vec, + /// Allocation offset in `allocs` for each instruction. pub inst_alloc_offsets: Vec, diff --git a/src/ssa.rs b/src/ssa.rs index de69841b..d8df647d 100644 --- a/src/ssa.rs +++ b/src/ssa.rs @@ -16,7 +16,7 @@ pub fn validate_ssa(f: &F, cfginfo: &CFGInfo) -> Result<(), RegAllo // dominates this one. Also check that for every block param and // inst def, that this is the only def. let mut defined = vec![false; f.num_vregs()]; - for block in 0..f.blocks() { + for block in 0..f.num_blocks() { let block = Block::new(block); for blockparam in f.block_params(block) { if defined[blockparam.vreg()] { @@ -62,7 +62,7 @@ pub fn validate_ssa(f: &F, cfginfo: &CFGInfo) -> Result<(), RegAllo // number of blockparams in their succs, and that the end of every // block ends in this branch or in a ret, and that there are no // other branches or rets in the middle of the block. - for block in 0..f.blocks() { + for block in 0..f.num_blocks() { let block = Block::new(block); let insns = f.block_insns(block); for insn in insns.iter() { From 3a18564e987a177c6bbef939969ea59f1c8062c1 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 30 Aug 2021 17:51:55 -0700 Subject: [PATCH 152/155] Addressed more review comments. --- src/checker.rs | 15 ++++- src/{bitvec.rs => indexset.rs} | 22 +++---- src/ion/data_structures.rs | 6 +- src/ion/liveranges.rs | 8 +-- src/lib.rs | 115 ++++++++++++++++++++++++++++++++- src/moves.rs | 14 ++-- 6 files changed, 150 insertions(+), 30 deletions(-) rename src/{bitvec.rs => indexset.rs} (95%) diff --git a/src/checker.rs b/src/checker.rs index ac4ee7f8..94660d74 100644 --- a/src/checker.rs +++ b/src/checker.rs @@ -17,6 +17,16 @@ //! conceptually generates a symbolic value "Vn" when storing to (or //! modifying) a virtual register. //! +//! These symbolic values are precise but partial: in other words, if +//! a physical register is described as containing a virtual register +//! at a program point, it must actually contain the value of this +//! register (modulo any analysis bugs); but it may resolve to +//! `Conflicts` even in cases where one *could* statically prove that +//! it contains a certain register, because the analysis is not +//! perfectly path-sensitive or value-sensitive. However, all +//! assignments *produced by our register allocator* should be +//! analyzed fully precisely. +//! //! Operand constraints (fixed register, register, any) are also checked //! at each operand. //! @@ -24,7 +34,8 @@ //! //! - map of: Allocation -> lattice value (top > Vn symbols (unordered) > bottom) //! -//! And the transfer functions for instructions are: +//! And the transfer functions for instructions are (where `A` is the +//! above map from allocated physical registers to symbolic values): //! //! - `Edit::Move` inserted by RA: [ alloc_d := alloc_s ] //! @@ -36,7 +47,7 @@ //! machine code, but we include their allocations so that this //! checker can work) //! -//! A[A_i] := meet(A_j, A_k, ...) +//! A[A_i] := meet(A[A_j], A[A_k], ...) //! //! - statement in pre-regalloc function [ V_i := op V_j, V_k, ... ] //! with allocated form [ A_i := op A_j, A_k, ... ] diff --git a/src/bitvec.rs b/src/indexset.rs similarity index 95% rename from src/bitvec.rs rename to src/indexset.rs index b9592a17..35d90ddc 100644 --- a/src/bitvec.rs +++ b/src/indexset.rs @@ -4,10 +4,6 @@ */ //! Index sets: sets of integers that represent indices into a space. -//! -//! For historical reasons this is called a `BitVec` but it is no -//! longer a dense bitvector; the chunked adaptive-sparse data -//! structure here has better performance. use fxhash::FxHashMap; use std::cell::Cell; @@ -201,17 +197,17 @@ impl<'a> std::iter::Iterator for AdaptiveMapIter<'a> { } } -/// A conceptually infinite-length bitvector that allows bitwise operations and -/// iteration over set bits efficiently. +/// A conceptually infinite-length set of indices that allows union +/// and efficient iteration over elements. #[derive(Clone)] -pub struct BitVec { +pub struct IndexSet { elems: AdaptiveMap, cache: Cell<(u32, u64)>, } const BITS_PER_WORD: usize = 64; -impl BitVec { +impl IndexSet { pub fn new() -> Self { Self { elems: AdaptiveMap::new(), @@ -272,7 +268,7 @@ impl BitVec { } } - pub fn or(&mut self, other: &Self) -> bool { + pub fn union_with(&mut self, other: &Self) -> bool { let mut changed = 0; for (word_idx, bits) in other.elems.iter() { if bits == 0 { @@ -324,7 +320,7 @@ impl Iterator for SetBitsIter { } } -impl std::fmt::Debug for BitVec { +impl std::fmt::Debug for IndexSet { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { let vals = self.iter().collect::>(); write!(f, "{:?}", vals) @@ -333,11 +329,11 @@ impl std::fmt::Debug for BitVec { #[cfg(test)] mod test { - use super::BitVec; + use super::IndexSet; #[test] fn test_set_bits_iter() { - let mut vec = BitVec::new(); + let mut vec = IndexSet::new(); let mut sum = 0; for i in 0..1024 { if i % 17 == 0 { @@ -357,7 +353,7 @@ mod test { #[test] fn test_expand_remove_zero_elems() { - let mut vec = BitVec::new(); + let mut vec = IndexSet::new(); // Set 12 different words (this is the max small-mode size). for i in 0..12 { vec.set(64 * i, true); diff --git a/src/ion/data_structures.rs b/src/ion/data_structures.rs index 5a1ae838..33f56849 100644 --- a/src/ion/data_structures.rs +++ b/src/ion/data_structures.rs @@ -13,9 +13,9 @@ //! Data structures for backtracking allocator. -use crate::bitvec::BitVec; use crate::cfg::CFGInfo; use crate::index::ContainerComparator; +use crate::indexset::IndexSet; use crate::{ define_index, Allocation, Block, Edit, Function, Inst, MachineEnv, Operand, PReg, ProgPoint, RegClass, SpillSlot, VReg, @@ -267,8 +267,8 @@ pub struct Env<'a, F: Function> { pub func: &'a F, pub env: &'a MachineEnv, pub cfginfo: CFGInfo, - pub liveins: Vec, - pub liveouts: Vec, + pub liveins: Vec, + pub liveouts: Vec, /// Blockparam outputs: from-vreg, (end of) from-block, (start of) /// to-block, to-vreg. The field order is significant: these are sorted so /// that a scan over vregs, then blocks in each range, can scan in diff --git a/src/ion/liveranges.rs b/src/ion/liveranges.rs index 3160c6c4..9e74ed39 100644 --- a/src/ion/liveranges.rs +++ b/src/ion/liveranges.rs @@ -18,7 +18,7 @@ use super::{ LiveRangeIndex, LiveRangeKey, LiveRangeListEntry, LiveRangeSet, PRegData, PRegIndex, RegClass, SpillSetIndex, Use, VRegData, VRegIndex, SLOT_NONE, }; -use crate::bitvec::BitVec; +use crate::indexset::IndexSet; use crate::{ Allocation, Block, Function, Inst, InstPosition, Operand, OperandConstraint, OperandKind, OperandPos, PReg, ProgPoint, RegAllocError, VReg, @@ -248,8 +248,8 @@ impl<'a, F: Function> Env<'a, F> { pub fn compute_liveness(&mut self) -> Result<(), RegAllocError> { // Create initial LiveIn and LiveOut bitsets. for _ in 0..self.func.num_blocks() { - self.liveins.push(BitVec::new()); - self.liveouts.push(BitVec::new()); + self.liveins.push(IndexSet::new()); + self.liveouts.push(IndexSet::new()); } // Run a worklist algorithm to precisely compute liveins and @@ -301,7 +301,7 @@ impl<'a, F: Function> Env<'a, F> { } for &pred in self.func.block_preds(block) { - if self.liveouts[pred.index()].or(&live) { + if self.liveouts[pred.index()].union_with(&live) { if !workqueue_set.contains(&pred) { workqueue_set.insert(pred); workqueue.push_back(pred); diff --git a/src/lib.rs b/src/lib.rs index 087e97e9..379c05fc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,9 +12,9 @@ #![allow(dead_code)] -pub mod bitvec; pub(crate) mod cfg; pub(crate) mod domtree; +pub mod indexset; pub(crate) mod ion; pub(crate) mod moves; pub(crate) mod postorder; @@ -30,6 +30,18 @@ pub mod checker; pub mod fuzzing; /// Register classes. +/// +/// Every value has a "register class", which is like a type at the +/// register-allocator level. Every register must belong to only one +/// class; i.e., they are disjoint. +/// +/// For tight bit-packing throughout our data structures, we support +/// only two classes, "int" and "float". This will usually be enough +/// on modern machines, as they have one class of general-purpose +/// integer registers of machine width (e.g. 64 bits), and another +/// class of float/vector registers used both for FP and for vector +/// operations. If needed, we could adjust bitpacking to allow for +/// more classes in the future. #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum RegClass { Int = 0, @@ -99,6 +111,7 @@ impl PReg { ((self.class as u8 as usize) << 5) | (self.hw_enc as usize) } + /// Construct a PReg from the value returned from `.index()`. #[inline(always)] pub fn from_index(index: usize) -> Self { let class = (index >> 5) & 1; @@ -111,6 +124,8 @@ impl PReg { PReg::new(index, class) } + /// Return the "invalid PReg", which can be used to initialize + /// data structures. #[inline(always)] pub fn invalid() -> Self { PReg::new(Self::MAX, RegClass::Int) @@ -139,7 +154,16 @@ impl std::fmt::Display for PReg { } } -/// A virtual register. Contains a virtual register number and a class. +/// A virtual register. Contains a virtual register number and a +/// class. +/// +/// A virtual register ("vreg") corresponds to an SSA value for SSA +/// input, or just a register when we allow for non-SSA input. All +/// dataflow in the input program is specified via flow through a +/// virtual register; even uses of specially-constrained locations, +/// such as fixed physical registers, are done by using vregs, because +/// we need the vreg's live range in order to track the use of that +/// location. #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct VReg { bits: u32, @@ -199,12 +223,19 @@ impl std::fmt::Display for VReg { } } +/// A spillslot is a space in the stackframe used by the allocator to +/// temporarily store a value. +/// +/// The allocator is responsible for allocating indices in this space, +/// and will specify how many spillslots have been used when the +/// allocation is completed. #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct SpillSlot { bits: u32, } impl SpillSlot { + /// Create a new SpillSlot of a given class. #[inline(always)] pub fn new(slot: usize, class: RegClass) -> Self { assert!(slot < (1 << 24)); @@ -212,10 +243,14 @@ impl SpillSlot { bits: (slot as u32) | (class as u8 as u32) << 24, } } + + /// Get the spillslot index for this spillslot. #[inline(always)] pub fn index(self) -> usize { (self.bits & 0x00ffffff) as usize } + + /// Get the class for this spillslot. #[inline(always)] pub fn class(self) -> RegClass { match (self.bits >> 24) as u8 { @@ -224,19 +259,26 @@ impl SpillSlot { _ => unreachable!(), } } + + /// Get the spillslot `offset` slots away. #[inline(always)] pub fn plus(self, offset: usize) -> Self { SpillSlot::new(self.index() + offset, self.class()) } + /// Get the invalid spillslot, used for initializing data structures. #[inline(always)] pub fn invalid() -> Self { SpillSlot { bits: 0xffff_ffff } } + + /// Is this the invalid spillslot? #[inline(always)] pub fn is_invalid(self) -> bool { self == Self::invalid() } + + /// Is this a valid spillslot (not `SpillSlot::invalid()`)? #[inline(always)] pub fn is_valid(self) -> bool { self != Self::invalid() @@ -249,6 +291,14 @@ impl std::fmt::Display for SpillSlot { } } +/// An `OperandConstraint` specifies where a vreg's value must be +/// placed at a particular reference to that vreg via an +/// `Operand`. The constraint may be loose -- "any register of a given +/// class", for example -- or very specific, such as "this particular +/// physical register". The allocator's result will always satisfy all +/// given constraints; however, if the input has a combination of +/// constraints that are impossible to satisfy, then allocation may +/// fail. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum OperandConstraint { /// Any location is fine (register or stack slot). @@ -275,6 +325,8 @@ impl std::fmt::Display for OperandConstraint { } } +/// The "kind" of the operand: whether it reads a vreg (Use), writes a +/// vreg (Def), or reads and then writes (Mod, for "modify"). #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum OperandKind { Def = 0, @@ -282,6 +334,23 @@ pub enum OperandKind { Use = 2, } +/// The "position" of the operand: where it has its read/write +/// effects. These are positions "in" the instruction, and "before" +/// and "after" are relative to the instruction's actual semantics. In +/// other words, the allocator assumes that the instruction (i) +/// performs all reads of "before" operands, (ii) does its work, and +/// (iii) performs all writes of its "after" operands. +/// +/// A "write" (def) at "before" or a "read" (use) at "after" may be +/// slightly nonsensical, given the above; but, it is consistent with +/// the notion that the value (even if a result of execution) *could* +/// have been written to the register at "Before", or the value (even +/// if depended upon by the execution) *could* have been read from the +/// regster at "After". In other words, these write-before or +/// use-after operands ensure that the particular allocations are +/// valid for longer than usual and that a register is not reused +/// between the use (normally complete at "Before") and the def +/// (normally starting at "After"). See `Operand` for more. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum OperandPos { Before = 0, @@ -325,6 +394,7 @@ pub struct Operand { } impl Operand { + /// Construct a new operand. #[inline(always)] pub fn new( vreg: VReg, @@ -609,6 +679,7 @@ impl std::fmt::Display for Allocation { } impl Allocation { + /// Construct a new Allocation. #[inline(always)] pub(crate) fn new(kind: AllocationKind, index: usize) -> Self { assert!(index < (1 << 28)); @@ -617,21 +688,26 @@ impl Allocation { } } + /// Get the "none" allocation, which is distinct from the other + /// possibilities and is used to initialize data structures. #[inline(always)] pub fn none() -> Allocation { Allocation::new(AllocationKind::None, 0) } + /// Create an allocation into a register. #[inline(always)] pub fn reg(preg: PReg) -> Allocation { Allocation::new(AllocationKind::Reg, preg.index()) } + /// Create an allocation into a spillslot. #[inline(always)] pub fn stack(slot: SpillSlot) -> Allocation { Allocation::new(AllocationKind::Stack, slot.bits as usize) } + /// Get the allocation's "kind": none, register, or stack (spillslot). #[inline(always)] pub fn kind(self) -> AllocationKind { match (self.bits >> 29) & 7 { @@ -642,26 +718,32 @@ impl Allocation { } } + /// Is the allocation "none"? #[inline(always)] pub fn is_none(self) -> bool { self.kind() == AllocationKind::None } + /// Is the allocation a register? #[inline(always)] pub fn is_reg(self) -> bool { self.kind() == AllocationKind::Reg } + /// Is the allocation on the stack (a spillslot)? #[inline(always)] pub fn is_stack(self) -> bool { self.kind() == AllocationKind::Stack } + /// Get the index of the spillslot or register. If register, this + /// is an index that can be used by `PReg::from_index()`. #[inline(always)] pub fn index(self) -> usize { (self.bits & ((1 << 28) - 1)) as usize } + /// Get the allocation as a physical register, if any. #[inline(always)] pub fn as_reg(self) -> Option { if self.kind() == AllocationKind::Reg { @@ -671,6 +753,7 @@ impl Allocation { } } + /// Get the allocation as a spillslot, if any. #[inline(always)] pub fn as_stack(self) -> Option { if self.kind() == AllocationKind::Stack { @@ -682,11 +765,13 @@ impl Allocation { } } + /// Get the raw bits for the packed encoding of this allocation. #[inline(always)] pub fn bits(self) -> u32 { self.bits } + /// Construct an allocation from its packed encoding. #[inline(always)] pub fn from_bits(bits: u32) -> Self { debug_assert!(bits >> 29 >= 5); @@ -694,6 +779,8 @@ impl Allocation { } } +/// An allocation is one of two "kinds" (or "none"): register or +/// spillslot/stack. #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] #[repr(u8)] pub enum AllocationKind { @@ -703,6 +790,7 @@ pub enum AllocationKind { } impl Allocation { + /// Get the register class of an allocation's value. #[inline(always)] pub fn class(self) -> RegClass { match self.kind() { @@ -919,25 +1007,35 @@ impl std::fmt::Debug for ProgPoint { } impl ProgPoint { + /// Create a new ProgPoint before or after the given instruction. #[inline(always)] pub fn new(inst: Inst, pos: InstPosition) -> Self { let bits = ((inst.0 as u32) << 1) | (pos as u8 as u32); Self { bits } } + + /// Create a new ProgPoint before the given instruction. #[inline(always)] pub fn before(inst: Inst) -> Self { Self::new(inst, InstPosition::Before) } + + /// Create a new ProgPoint after the given instruction. #[inline(always)] pub fn after(inst: Inst) -> Self { Self::new(inst, InstPosition::After) } + + /// Get the instruction that this ProgPoint is before or after. #[inline(always)] pub fn inst(self) -> Inst { // Cast to i32 to do an arithmetic right-shift, which will // preserve an `Inst::invalid()` (which is -1, or all-ones). Inst::new(((self.bits as i32) >> 1) as usize) } + + /// Get the "position" (Before or After) relative to the + /// instruction. #[inline(always)] pub fn pos(self) -> InstPosition { match self.bits & 1 { @@ -946,22 +1044,33 @@ impl ProgPoint { _ => unreachable!(), } } + + /// Get the "next" program point: for After, this is the Before of + /// the next instruction, while for Before, this is After of the + /// same instruction. #[inline(always)] pub fn next(self) -> ProgPoint { Self { bits: self.bits + 1, } } + + /// Get the "previous" program point, the inverse of `.next()` + /// above. #[inline(always)] pub fn prev(self) -> ProgPoint { Self { bits: self.bits - 1, } } + + /// Convert to a raw encoding in 32 bits. #[inline(always)] pub fn to_index(self) -> u32 { self.bits } + + /// Construct from the raw 32-bit encoding. #[inline(always)] pub fn from_index(index: u32) -> Self { Self { bits: index } @@ -1061,6 +1170,7 @@ pub struct Output { } impl Output { + /// Get the allocations assigned to a given instruction. pub fn inst_allocs(&self, inst: Inst) -> &[Allocation] { let start = self.inst_alloc_offsets[inst.index()] as usize; let end = if inst.index() + 1 == self.inst_alloc_offsets.len() { @@ -1108,6 +1218,7 @@ impl std::fmt::Display for RegAllocError { impl std::error::Error for RegAllocError {} +/// Run the allocator. pub fn run( func: &F, env: &MachineEnv, diff --git a/src/moves.rs b/src/moves.rs index 2cb10e64..0bb388ea 100644 --- a/src/moves.rs +++ b/src/moves.rs @@ -68,12 +68,14 @@ impl ParallelMoves { // has only one writer (otherwise the effect of the parallel // move is undefined), each move can only block one other move // (with its one source corresponding to the one writer of - // that source). Thus, we *can only have simple cycles*: there - // are no SCCs that are more complex than that. We leverage - // this fact below to avoid having to do a full Tarjan SCC DFS - // (with lowest-index computation, etc.): instead, as soon as - // we find a cycle, we know we have the full cycle and we can - // do a cyclic move sequence and continue. + // that source). Thus, we *can only have simple cycles* (those + // that are a ring of nodes, i.e., with only one path from a + // node back to itself); there are no SCCs that are more + // complex than that. We leverage this fact below to avoid + // having to do a full Tarjan SCC DFS (with lowest-index + // computation, etc.): instead, as soon as we find a cycle, we + // know we have the full cycle and we can do a cyclic move + // sequence and continue. // Sort moves by destination and check that each destination // has only one writer. From b19fa4857f1a2612e5ae37bcaa62f035456358c8 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 31 Aug 2021 17:31:23 -0700 Subject: [PATCH 153/155] Rename operand positions to Early and Late, and make weights f16/f32 values. --- src/checker.rs | 4 +- src/fuzzing/func.rs | 10 ++-- src/ion/data_structures.rs | 12 +++-- src/ion/dump.rs | 2 +- src/ion/liveranges.rs | 104 +++++++++++++++++++++++++++---------- src/ion/moves.rs | 2 +- src/ion/process.rs | 23 ++++---- src/lib.rs | 80 ++++++++++++++-------------- 8 files changed, 145 insertions(+), 92 deletions(-) diff --git a/src/checker.rs b/src/checker.rs index 94660d74..d074101a 100644 --- a/src/checker.rs +++ b/src/checker.rs @@ -306,8 +306,8 @@ impl CheckerState { // the requirements of the OperandConstraint. for (op, alloc) in operands.iter().zip(allocs.iter()) { let is_here = match (op.pos(), pos) { - (OperandPos::Before, InstPosition::Before) => true, - (OperandPos::After, InstPosition::After) => true, + (OperandPos::Early, InstPosition::Before) => true, + (OperandPos::Late, InstPosition::After) => true, _ => false, }; if !is_here { diff --git a/src/fuzzing/func.rs b/src/fuzzing/func.rs index 0d0eeda5..657ed616 100644 --- a/src/fuzzing/func.rs +++ b/src/fuzzing/func.rs @@ -402,9 +402,9 @@ impl Func { while let Some(vreg) = vregs_by_block_to_be_defined[block].pop() { let def_constraint = OperandConstraint::arbitrary(u)?; let def_pos = if bool::arbitrary(u)? { - OperandPos::Before + OperandPos::Early } else { - OperandPos::After + OperandPos::Late }; let mut operands = vec![Operand::new( vreg, @@ -442,7 +442,7 @@ impl Func { vreg, use_constraint, OperandKind::Use, - OperandPos::Before, + OperandPos::Early, )); allocations.push(Allocation::none()); } @@ -456,7 +456,7 @@ impl Func { op.vreg(), OperandConstraint::Reuse(reused), op.kind(), - OperandPos::After, + OperandPos::Late, ); // Make sure reused input is a Reg. let op = operands[reused]; @@ -464,7 +464,7 @@ impl Func { op.vreg(), OperandConstraint::Reg, op.kind(), - OperandPos::Before, + OperandPos::Early, ); } else if opts.fixed_regs && bool::arbitrary(u)? { let mut fixed = vec![]; diff --git a/src/ion/data_structures.rs b/src/ion/data_structures.rs index 33f56849..dbf1d777 100644 --- a/src/ion/data_structures.rs +++ b/src/ion/data_structures.rs @@ -13,6 +13,7 @@ //! Data structures for backtracking allocator. +use super::liveranges::SpillWeight; use crate::cfg::CFGInfo; use crate::index::ContainerComparator; use crate::indexset::IndexSet; @@ -141,14 +142,15 @@ impl LiveRange { self.uses_spill_weight_and_flags |= flag_word; } #[inline(always)] - pub fn uses_spill_weight(&self) -> u32 { - self.uses_spill_weight_and_flags & 0x1fff_ffff + pub fn uses_spill_weight(&self) -> SpillWeight { + let bits = (self.uses_spill_weight_and_flags & 0x1fff_ffff) << 2; + SpillWeight::from_f32(f32::from_bits(bits)) } #[inline(always)] - pub fn set_uses_spill_weight(&mut self, weight: u32) { - assert!(weight < (1 << 29)); + pub fn set_uses_spill_weight(&mut self, weight: SpillWeight) { + let weight_bits = (weight.to_f32().to_bits() >> 2) & 0x1fff_ffff; self.uses_spill_weight_and_flags = - (self.uses_spill_weight_and_flags & 0xe000_0000) | weight; + (self.uses_spill_weight_and_flags & 0xe000_0000) | weight_bits; } } diff --git a/src/ion/dump.rs b/src/ion/dump.rs index b45d90c9..0048f801 100644 --- a/src/ion/dump.rs +++ b/src/ion/dump.rs @@ -37,7 +37,7 @@ impl<'a, F: Function> Env<'a, F> { log::trace!("Ranges:"); for (i, r) in self.ranges.iter().enumerate() { log::trace!( - "range{}: range={:?} vreg={:?} bundle={:?} weight={}", + "range{}: range={:?} vreg={:?} bundle={:?} weight={:?}", i, r.range, r.vreg, diff --git a/src/ion/liveranges.rs b/src/ion/liveranges.rs index 9e74ed39..e73c8214 100644 --- a/src/ion/liveranges.rs +++ b/src/ion/liveranges.rs @@ -26,26 +26,73 @@ use crate::{ use fxhash::FxHashSet; use smallvec::{smallvec, SmallVec}; use std::collections::{HashSet, VecDeque}; -use std::convert::TryFrom; + +/// A spill weight computed for a certain Use. +#[derive(Clone, Copy, Debug)] +pub struct SpillWeight(f32); #[inline(always)] pub fn spill_weight_from_constraint( constraint: OperandConstraint, loop_depth: usize, is_def: bool, -) -> u32 { +) -> SpillWeight { // A bonus of 1000 for one loop level, 4000 for two loop levels, // 16000 for three loop levels, etc. Avoids exponentiation. - // Bound `loop_depth` at 2 so that `hot_bonus` is at most 16000. - let loop_depth = std::cmp::min(2, loop_depth); - let hot_bonus = 1000 * (1 << (2 * loop_depth)); - let def_bonus = if is_def { 2000 } else { 0 }; - let constraint_bonus = match constraint { - OperandConstraint::Any => 1000, - OperandConstraint::Reg | OperandConstraint::FixedReg(_) => 2000, - _ => 0, + let loop_depth = std::cmp::min(10, loop_depth); + let hot_bonus: f32 = (0..loop_depth).fold(1000.0, |a, _| a * 4.0); + let def_bonus: f32 = if is_def { 2000.0 } else { 0.0 }; + let constraint_bonus: f32 = match constraint { + OperandConstraint::Any => 1000.0, + OperandConstraint::Reg | OperandConstraint::FixedReg(_) => 2000.0, + _ => 0.0, }; - hot_bonus + def_bonus + constraint_bonus + SpillWeight(hot_bonus + def_bonus + constraint_bonus) +} + +impl SpillWeight { + /// Convert a floating-point weight to a u16 that can be compactly + /// stored in a `Use`. We simply take the top 16 bits of the f32; this + /// is equivalent to the bfloat16 format + /// (https://en.wikipedia.org/wiki/Bfloat16_floating-point_format). + pub fn to_bits(self) -> u16 { + (self.0.to_bits() >> 15) as u16 + } + + /// Convert a value that was returned from + /// `SpillWeight::to_bits()` back into a `SpillWeight`. Note that + /// some precision may be lost when round-tripping from a spill + /// weight to packed bits and back. + pub fn from_bits(bits: u16) -> SpillWeight { + let x = f32::from_bits((bits as u32) << 15); + SpillWeight(x) + } + + /// Get a zero spill weight. + pub fn zero() -> SpillWeight { + SpillWeight(0.0) + } + + /// Convert to a raw floating-point value. + pub fn to_f32(self) -> f32 { + self.0 + } + + /// Create a `SpillWeight` from a raw floating-point value. + pub fn from_f32(x: f32) -> SpillWeight { + SpillWeight(x) + } + + pub fn to_int(self) -> u32 { + self.0 as u32 + } +} + +impl std::ops::Add for SpillWeight { + type Output = SpillWeight; + fn add(self, other: SpillWeight) -> Self { + SpillWeight(self.0 + other.0) + } } impl<'a, F: Function> Env<'a, F> { @@ -196,10 +243,10 @@ impl<'a, F: Function> Env<'a, F> { loop_depth, operand.kind() != OperandKind::Use, ); - u.weight = u16::try_from(weight).expect("weight too large for u16 field"); + u.weight = weight.to_bits(); log::trace!( - "insert use {:?} into lr {:?} with weight {}", + "insert use {:?} into lr {:?} with weight {:?}", u, into, weight, @@ -212,9 +259,10 @@ impl<'a, F: Function> Env<'a, F> { self.ranges[into.index()].uses.push(u); // Update stats. - self.ranges[into.index()].uses_spill_weight_and_flags += weight; + let range_weight = self.ranges[into.index()].uses_spill_weight() + weight; + self.ranges[into.index()].set_uses_spill_weight(range_weight); log::trace!( - " -> now range has weight {}", + " -> now range has weight {:?}", self.ranges[into.index()].uses_spill_weight(), ); } @@ -279,7 +327,7 @@ impl<'a, F: Function> Env<'a, F> { live.set(src.vreg().vreg(), true); } - for pos in &[OperandPos::After, OperandPos::Before] { + for pos in &[OperandPos::Late, OperandPos::Early] { for op in self.func.inst_operands(inst) { if op.pos() == *pos { let was_live = live.get(op.vreg().vreg()); @@ -437,9 +485,9 @@ impl<'a, F: Function> Env<'a, F> { assert_eq!(src.class(), dst.class()); assert_eq!(src.kind(), OperandKind::Use); - assert_eq!(src.pos(), OperandPos::Before); + assert_eq!(src.pos(), OperandPos::Early); assert_eq!(dst.kind(), OperandKind::Def); - assert_eq!(dst.pos(), OperandPos::After); + assert_eq!(dst.pos(), OperandPos::Late); // If both src and dest are pinned, emit the // move right here, right now. @@ -506,7 +554,7 @@ impl<'a, F: Function> Env<'a, F> { dst.vreg(), src.vreg(), OperandKind::Def, - OperandPos::After, + OperandPos::Late, ProgPoint::after(inst), ) } else { @@ -516,7 +564,7 @@ impl<'a, F: Function> Env<'a, F> { src.vreg(), dst.vreg(), OperandKind::Use, - OperandPos::Before, + OperandPos::Early, ProgPoint::after(inst), ) }; @@ -720,13 +768,13 @@ impl<'a, F: Function> Env<'a, F> { src.vreg(), src_constraint, OperandKind::Use, - OperandPos::After, + OperandPos::Late, ); let dst = Operand::new( dst.vreg(), dst_constraint, OperandKind::Def, - OperandPos::Before, + OperandPos::Early, ); if self.annotations_enabled { @@ -843,9 +891,9 @@ impl<'a, F: Function> Env<'a, F> { let operand = self.func.inst_operands(inst)[i]; let pos = match (operand.kind(), operand.pos()) { (OperandKind::Mod, _) => ProgPoint::before(inst), - (OperandKind::Def, OperandPos::Before) => ProgPoint::before(inst), - (OperandKind::Def, OperandPos::After) => ProgPoint::after(inst), - (OperandKind::Use, OperandPos::After) => ProgPoint::after(inst), + (OperandKind::Def, OperandPos::Early) => ProgPoint::before(inst), + (OperandKind::Def, OperandPos::Late) => ProgPoint::after(inst), + (OperandKind::Use, OperandPos::Late) => ProgPoint::after(inst), // If this is a branch, extend `pos` to // the end of the block. (Branch uses are // blockparams and need to be live at the @@ -858,12 +906,12 @@ impl<'a, F: Function> Env<'a, F> { // reused input, force `pos` to // `After`. (See note below for why; it's // very subtle!) - (OperandKind::Use, OperandPos::Before) + (OperandKind::Use, OperandPos::Early) if reused_input.is_some() && reused_input.unwrap() != i => { ProgPoint::after(inst) } - (OperandKind::Use, OperandPos::Before) => ProgPoint::before(inst), + (OperandKind::Use, OperandPos::Early) => ProgPoint::before(inst), }; if pos.pos() != cur_pos { @@ -1058,7 +1106,7 @@ impl<'a, F: Function> Env<'a, F> { self.vreg_regs[vreg.index()], OperandConstraint::Stack, OperandKind::Use, - OperandPos::Before, + OperandPos::Early, ); log::trace!( diff --git a/src/ion/moves.rs b/src/ion/moves.rs index d8479e70..6ba259cc 100644 --- a/src/ion/moves.rs +++ b/src/ion/moves.rs @@ -759,7 +759,7 @@ impl<'a, F: Function> Env<'a, F> { let operand = self.func.inst_operands(inst)[output_idx]; if let OperandConstraint::Reuse(input_idx) = operand.constraint() { debug_assert!(!input_reused.contains(&input_idx)); - debug_assert_eq!(operand.pos(), OperandPos::After); + debug_assert_eq!(operand.pos(), OperandPos::Late); input_reused.push(input_idx); let input_alloc = self.get_alloc(inst, input_idx); let output_alloc = self.get_alloc(inst, output_idx); diff --git a/src/ion/process.rs b/src/ion/process.rs index 8f37b133..8c10a9bb 100644 --- a/src/ion/process.rs +++ b/src/ion/process.rs @@ -16,7 +16,7 @@ use super::{ spill_weight_from_constraint, CodeRange, Env, LiveBundleIndex, LiveBundleVec, LiveRangeFlag, LiveRangeIndex, LiveRangeKey, LiveRangeList, LiveRangeListEntry, PRegIndex, RegTraversalIter, - Requirement, UseList, + Requirement, SpillWeight, UseList, }; use crate::{ Allocation, Function, Inst, InstPosition, OperandConstraint, OperandKind, PReg, ProgPoint, @@ -310,23 +310,24 @@ impl<'a, F: Function> Env<'a, F> { 1_000_000 } } else { - let mut total = 0; + let mut total = SpillWeight::zero(); for entry in &self.bundles[bundle.index()].ranges { let range_data = &self.ranges[entry.index.index()]; log::trace!( - " -> uses spill weight: +{}", + " -> uses spill weight: +{:?}", range_data.uses_spill_weight() ); - total += range_data.uses_spill_weight(); + total = total + range_data.uses_spill_weight(); } if self.bundles[bundle.index()].prio > 0 { + let final_weight = (total.to_f32() as u32) / self.bundles[bundle.index()].prio; log::trace!( " -> dividing by prio {}; final weight {}", self.bundles[bundle.index()].prio, - total / self.bundles[bundle.index()].prio + final_weight ); - total / self.bundles[bundle.index()].prio + final_weight } else { 0 } @@ -346,9 +347,9 @@ impl<'a, F: Function> Env<'a, F> { pub fn recompute_range_properties(&mut self, range: LiveRangeIndex) { let rangedata = &mut self.ranges[range.index()]; - let mut w = 0; + let mut w = SpillWeight::zero(); for u in &rangedata.uses { - w += u.weight as u32; + w = w + SpillWeight::from_bits(u.weight); log::trace!("range{}: use {:?}", range.index(), u); } rangedata.set_uses_spill_weight(w); @@ -890,7 +891,8 @@ impl<'a, F: Function> Env<'a, F> { OperandConstraint::Reg, loop_depth as usize, /* is_def = */ true, - ); + ) + .to_int(); if lowest_cost_split_conflict_cost.is_none() || (conflict_cost + move_cost) < lowest_cost_split_conflict_cost.unwrap() @@ -909,7 +911,8 @@ impl<'a, F: Function> Env<'a, F> { OperandConstraint::Reg, loop_depth as usize, /* is_def = */ true, - ); + ) + .to_int(); if lowest_cost_split_conflict_cost.is_none() || (max_cost + move_cost) < lowest_cost_split_conflict_cost.unwrap() diff --git a/src/lib.rs b/src/lib.rs index 379c05fc..e5cda3ba 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -335,26 +335,27 @@ pub enum OperandKind { } /// The "position" of the operand: where it has its read/write -/// effects. These are positions "in" the instruction, and "before" -/// and "after" are relative to the instruction's actual semantics. In -/// other words, the allocator assumes that the instruction (i) -/// performs all reads of "before" operands, (ii) does its work, and -/// (iii) performs all writes of its "after" operands. +/// effects. These are positions "in" the instruction, and "early" and +/// "late" are relative to the instruction's main effect or +/// computation. In other words, the allocator assumes that the +/// instruction (i) performs all reads and writes of "early" operands, +/// (ii) does its work, and (iii) performs all reads and writes of its +/// "late" operands. /// -/// A "write" (def) at "before" or a "read" (use) at "after" may be -/// slightly nonsensical, given the above; but, it is consistent with -/// the notion that the value (even if a result of execution) *could* -/// have been written to the register at "Before", or the value (even -/// if depended upon by the execution) *could* have been read from the -/// regster at "After". In other words, these write-before or -/// use-after operands ensure that the particular allocations are -/// valid for longer than usual and that a register is not reused -/// between the use (normally complete at "Before") and the def -/// (normally starting at "After"). See `Operand` for more. +/// A "write" (def) at "early" or a "read" (use) at "late" may be +/// slightly nonsensical, given the above, if the read is necessary +/// for the computation or the write is a result of it. A way to think +/// of it is that the value (even if a result of execution) *could* +/// have been read or written at the given location without causing +/// any register-usage conflicts. In other words, these write-early or +/// use-late operands ensure that the particular allocations are valid +/// for longer than usual and that a register is not reused between +/// the use (normally complete at "Early") and the def (normally +/// starting at "Late"). See `Operand` for more. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum OperandPos { - Before = 0, - After = 1, + Early = 0, + Late = 1, } /// An `Operand` encodes everything about a mention of a register in @@ -365,21 +366,20 @@ pub enum OperandPos { /// `LAllocation` in Ion). /// /// Generally, regalloc2 considers operands to have their effects at -/// one of two program points that surround an instruction: "Before" -/// or "After". All operands at a given program-point are assigned +/// one of two points that exist in an instruction: "Early" or +/// "Late". All operands at a given program-point are assigned /// non-conflicting locations based on their constraints. Each operand /// has a "kind", one of use/def/mod, corresponding to /// read/write/read-write, respectively. /// -/// Usually, an instruction's inputs will be uses-at-Before and -/// outputs will be defs-at-After, though there are valid use-cases -/// for other combinations too. For example, a single "instruction" -/// seen by the regalloc that lowers into multiple machine -/// instructions and reads some of its inputs after it starts to write -/// outputs must either make those input(s) uses-at-After or those -/// output(s) defs-at-Before so that the conflict (overlap) is -/// properly accounted for. See comments on the constructors below for -/// more. +/// Usually, an instruction's inputs will be "early uses" and outputs +/// will be "late defs", though there are valid use-cases for other +/// combinations too. For example, a single "instruction" seen by the +/// regalloc that lowers into multiple machine instructions and reads +/// some of its inputs after it starts to write outputs must either +/// make those input(s) "late uses" or those output(s) "early defs" so +/// that the conflict (overlap) is properly accounted for. See +/// comments on the constructors below for more. #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Operand { /// Bit-pack into 32 bits. @@ -437,7 +437,7 @@ impl Operand { vreg, OperandConstraint::Reg, OperandKind::Use, - OperandPos::Before, + OperandPos::Early, ) } @@ -450,7 +450,7 @@ impl Operand { vreg, OperandConstraint::Reg, OperandKind::Use, - OperandPos::After, + OperandPos::Late, ) } @@ -464,7 +464,7 @@ impl Operand { vreg, OperandConstraint::Reg, OperandKind::Def, - OperandPos::After, + OperandPos::Late, ) } @@ -478,7 +478,7 @@ impl Operand { vreg, OperandConstraint::Reg, OperandKind::Def, - OperandPos::Before, + OperandPos::Early, ) } @@ -496,7 +496,7 @@ impl Operand { vreg, OperandConstraint::Reg, OperandKind::Def, - OperandPos::Before, + OperandPos::Early, ) } @@ -511,7 +511,7 @@ impl Operand { vreg, OperandConstraint::Reuse(idx), OperandKind::Def, - OperandPos::After, + OperandPos::Late, ) } @@ -525,7 +525,7 @@ impl Operand { vreg, OperandConstraint::FixedReg(preg), OperandKind::Use, - OperandPos::Before, + OperandPos::Early, ) } @@ -539,7 +539,7 @@ impl Operand { vreg, OperandConstraint::FixedReg(preg), OperandKind::Def, - OperandPos::After, + OperandPos::Late, ) } @@ -585,8 +585,8 @@ impl Operand { pub fn pos(self) -> OperandPos { let pos_field = (self.bits >> 26) & 1; match pos_field { - 0 => OperandPos::Before, - 1 => OperandPos::After, + 0 => OperandPos::Early, + 1 => OperandPos::Late, _ => unreachable!(), } } @@ -631,8 +631,8 @@ impl std::fmt::Debug for Operand { impl std::fmt::Display for Operand { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match (self.kind(), self.pos()) { - (OperandKind::Def, OperandPos::After) - | (OperandKind::Mod | OperandKind::Use, OperandPos::Before) => { + (OperandKind::Def, OperandPos::Late) + | (OperandKind::Mod | OperandKind::Use, OperandPos::Early) => { write!(f, "{:?}", self.kind())?; } _ => { From 6389071e090896c6e1c87f9a65f9c7229840eb07 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 31 Aug 2021 17:42:50 -0700 Subject: [PATCH 154/155] Address review comments. --- src/checker.rs | 2 +- src/fuzzing/func.rs | 4 ++-- src/ion/liveranges.rs | 2 +- src/ion/moves.rs | 2 +- src/lib.rs | 52 ++++++++++++++++++++++++++----------------- 5 files changed, 37 insertions(+), 25 deletions(-) diff --git a/src/checker.rs b/src/checker.rs index d074101a..146dbeeb 100644 --- a/src/checker.rs +++ b/src/checker.rs @@ -550,7 +550,7 @@ impl<'a, F: Function> Checker<'a, F> { self.handle_edits(block, out, &mut insert_idx, ProgPoint::before(inst)); // If this is a safepoint, then check the spillslots at this point. - if self.f.is_safepoint(inst) { + if self.f.requires_refs_on_stack(inst) { let slots = safepoint_slots.remove(&inst).unwrap_or_else(|| vec![]); let checkinst = CheckerInst::Safepoint { inst, slots }; diff --git a/src/fuzzing/func.rs b/src/fuzzing/func.rs index 657ed616..6151a7c7 100644 --- a/src/fuzzing/func.rs +++ b/src/fuzzing/func.rs @@ -122,7 +122,7 @@ impl Function for Func { 0 } - fn is_safepoint(&self, insn: Inst) -> bool { + fn requires_refs_on_stack(&self, insn: Inst) -> bool { self.insts[insn.index()].is_safepoint } @@ -569,7 +569,7 @@ impl std::fmt::Debug for Func { i, params, succs, preds )?; for inst in blockrange.iter() { - if self.is_safepoint(inst) { + if self.requires_refs_on_stack(inst) { write!(f, " -- SAFEPOINT --\n")?; } write!( diff --git a/src/ion/liveranges.rs b/src/ion/liveranges.rs index e73c8214..501d9f5c 100644 --- a/src/ion/liveranges.rs +++ b/src/ion/liveranges.rs @@ -1011,7 +1011,7 @@ impl<'a, F: Function> Env<'a, F> { } } - if self.func.is_safepoint(inst) { + if self.func.requires_refs_on_stack(inst) { log::trace!("inst{} is safepoint", inst.index()); self.safepoints.push(inst); for vreg in live.iter() { diff --git a/src/ion/moves.rs b/src/ion/moves.rs index 6ba259cc..190292c8 100644 --- a/src/ion/moves.rs +++ b/src/ion/moves.rs @@ -860,7 +860,7 @@ impl<'a, F: Function> Env<'a, F> { return; } for inst in from.inst().index()..=to.inst().index() { - if this.func.is_safepoint(Inst::new(inst)) { + if this.func.requires_refs_on_stack(Inst::new(inst)) { redundant_moves.clear(); return; } diff --git a/src/lib.rs b/src/lib.rs index e5cda3ba..dda85755 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -856,18 +856,17 @@ pub trait Function { /// to the sum of blockparam counts for all successor blocks. fn branch_blockparam_arg_offset(&self, block: Block, insn: Inst) -> usize; - /// Determine whether an instruction is a safepoint and requires a stackmap. + /// Determine whether an instruction requires all reference-typed + /// values to be placed onto the stack. For these instructions, + /// stackmaps will be provided. /// - /// Strictly speaking, these two parts (is a safepoint, requires a - /// stackmap) are orthogonal. An instruction could want to see a - /// stackmap of refs on the stack (without forcing them), or it - /// could want all refs to be on the stack (without knowing where - /// they are). Only the latter strictly follows from "is a - /// safepoint". But in practice, both are true at the same time, - /// so we combine the two notions: for regalloc2, a "safepoint - /// instruction" is one that both forces refs onto the stack, and - /// provides a stackmap indicating where they are. - fn is_safepoint(&self, _: Inst) -> bool { + /// This is usually associated with the concept of a "safepoint", + /// though strictly speaking, a safepoint could also support + /// reference-typed values in registers if there were a way to + /// denote their locations and if this were acceptable to the + /// client. Usually garbage-collector implementations want to see + /// roots on the stack, so we do that for now. + fn requires_refs_on_stack(&self, _: Inst) -> bool { false } @@ -883,15 +882,28 @@ pub trait Function { fn inst_operands(&self, insn: Inst) -> &[Operand]; /// Get the clobbers for an instruction; these are the registers - /// that the instruction is known to overwrite, separate from its - /// outputs described by its `Operand`s. This can be used to, for - /// example, describe ABI-specified registers that are not - /// preserved by a call instruction, or fixed physical registers - /// written by an instruction but not used as a vreg output, or - /// fixed physical registers used as temps within an instruction - /// out of necessity. Every register written to by an instruction - /// must either be described by an Operand of kind `Def` or `Mod`, - /// or else must be a "clobber". + /// that, after the instruction has executed, hold values that are + /// arbitrary, separately from the usual outputs to the + /// instruction. It is invalid to read a register that has been + /// clobbered; the register allocator is free to assume that + /// clobbered registers are filled with garbage and available for + /// reuse. It will avoid storing any value in a clobbered register + /// that must be live across the instruction. + /// + /// Another way of seeing this is that a clobber is equivalent to + /// an "early def" of a fresh vreg that is not used anywhere else + /// in the program, with a fixed-register constraint that places + /// it in a given PReg chosen by the client prior to regalloc. + /// + /// Every register written by an instruction must either + /// correspond to (be assigned to) an Operand of kind `Def` or + /// `Mod`, or else must be a "clobber". + /// + /// This can be used to, for example, describe ABI-specified + /// registers that are not preserved by a call instruction, or + /// fixed physical registers written by an instruction but not + /// used as a vreg output, or fixed physical registers used as + /// temps within an instruction out of necessity. fn inst_clobbers(&self, insn: Inst) -> &[PReg]; /// Get the number of `VReg` in use in this function. From 6f0893d69df3641b434872e3a81cd8cc2caa06fc Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 31 Aug 2021 17:56:06 -0700 Subject: [PATCH 155/155] Address review comments. --- src/ion/merge.rs | 4 ++-- src/ion/moves.rs | 4 ++-- src/lib.rs | 17 ++++++++++++++++- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/src/ion/merge.rs b/src/ion/merge.rs index a5c4fe20..f3eb808b 100644 --- a/src/ion/merge.rs +++ b/src/ion/merge.rs @@ -41,8 +41,8 @@ impl<'a, F: Function> Env<'a, F> { } // If either bundle is already assigned (due to a pinned vreg), don't merge. - if !self.bundles[from.index()].allocation.is_none() - || !self.bundles[to.index()].allocation.is_none() + if self.bundles[from.index()].allocation.is_some() + || self.bundles[to.index()].allocation.is_some() { log::trace!("one of the bundles is already assigned (pinned)"); return false; diff --git a/src/ion/moves.rs b/src/ion/moves.rs index 190292c8..56336c4e 100644 --- a/src/ion/moves.rs +++ b/src/ion/moves.rs @@ -817,8 +817,8 @@ impl<'a, F: Function> Env<'a, F> { to_alloc, to_vreg.index(), ); - assert!(!from_alloc.is_none()); - assert!(!to_alloc.is_none()); + assert!(from_alloc.is_some()); + assert!(to_alloc.is_some()); assert_eq!(from_inst, to_inst.prev()); // N.B.: these moves happen with the *same* priority as // LR-to-LR moves, because they work just like them: they diff --git a/src/lib.rs b/src/lib.rs index dda85755..19809089 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -298,7 +298,9 @@ impl std::fmt::Display for SpillSlot { /// physical register". The allocator's result will always satisfy all /// given constraints; however, if the input has a combination of /// constraints that are impossible to satisfy, then allocation may -/// fail. +/// fail or the allocator may panic (providing impossible constraints +/// is usually a programming error in the client, rather than a +/// function of bad input). #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum OperandConstraint { /// Any location is fine (register or stack slot). @@ -487,6 +489,13 @@ impl Operand { /// to be written by the instruction, and will not conflict with /// any input or output, but should not be used after the /// instruction completes. + /// + /// Note that within a single instruction, the dedicated scratch + /// register (as specified in the `MachineEnv`) is also always + /// available for use. The register allocator may use the register + /// *between* instructions in order to implement certain sequences + /// of moves, but will never hold a value live in the scratch + /// register across an instruction. #[inline(always)] pub fn reg_temp(vreg: VReg) -> Self { // For now a temp is equivalent to a def-at-start operand, @@ -724,6 +733,12 @@ impl Allocation { self.kind() == AllocationKind::None } + /// Is the allocation not "none"? + #[inline(always)] + pub fn is_some(self) -> bool { + self.kind() != AllocationKind::None + } + /// Is the allocation a register? #[inline(always)] pub fn is_reg(self) -> bool {