@@ -1150,39 +1150,61 @@ impl<'a, 'tcx> Builder<'a, 'tcx> {
1150
1150
/// the value, we will set and generate a branch to the appropriate
1151
1151
/// pre-binding block.
1152
1152
///
1153
- /// If we find that *NONE* of the candidates apply, we branch to the
1154
- /// `otherwise_block`, setting it to `Some` if required. In principle, this
1155
- /// means that the input list was not exhaustive, though at present we
1156
- /// sometimes are not smart enough to recognize all exhaustive inputs.
1153
+ /// If we find that *NONE* of the candidates apply, we branch to `otherwise_block`.
1157
1154
///
1158
1155
/// It might be surprising that the input can be non-exhaustive.
1159
1156
/// Indeed, initially, it is not, because all matches are
1160
1157
/// exhaustive in Rust. But during processing we sometimes divide
1161
1158
/// up the list of candidates and recurse with a non-exhaustive
1162
- /// list. This is important to keep the size of the generated code
1163
- /// under control. See [`Builder::test_candidates`] for more details.
1159
+ /// list. This is how our lowering approach (called "backtracking
1160
+ /// automaton" in the literature) works.
1161
+ /// See [`Builder::test_candidates`] for more details.
1164
1162
///
1165
1163
/// If `fake_borrows` is `Some`, then places which need fake borrows
1166
1164
/// will be added to it.
1167
1165
///
1168
- /// For an example of a case where we set `otherwise_block`, even for an
1169
- /// exhaustive match, consider:
1170
- ///
1166
+ /// For an example of how we use `otherwise_block`, consider:
1171
1167
/// ```
1172
- /// # fn foo(x : (bool, bool)) {
1173
- /// match x {
1174
- /// (true, true) => () ,
1175
- /// (_, false) => () ,
1176
- /// (false, true) => () ,
1168
+ /// # fn foo((x, y) : (bool, bool)) -> u32 {
1169
+ /// match (x, y) {
1170
+ /// (true, true) => 1 ,
1171
+ /// (_, false) => 2 ,
1172
+ /// (false, true) => 3 ,
1177
1173
/// }
1178
1174
/// # }
1179
1175
/// ```
1176
+ /// For this match, we generate something like:
1177
+ /// ```
1178
+ /// # fn foo((x, y): (bool, bool)) -> u32 {
1179
+ /// if x {
1180
+ /// if y {
1181
+ /// return 1
1182
+ /// } else {
1183
+ /// // continue
1184
+ /// }
1185
+ /// } else {
1186
+ /// // continue
1187
+ /// }
1188
+ /// if y {
1189
+ /// if x {
1190
+ /// // This is actually unreachable because the `(true, true)` case was handled above.
1191
+ /// // continue
1192
+ /// } else {
1193
+ /// return 3
1194
+ /// }
1195
+ /// } else {
1196
+ /// return 2
1197
+ /// }
1198
+ /// // this is the final `otherwise_block`, which is unreachable because the match was exhaustive.
1199
+ /// unreachable!()
1200
+ /// # }
1201
+ /// ```
1202
+ ///
1203
+ /// Every `continue` is an instance of branching to some `otherwise_block` somewhere deep within
1204
+ /// the algorithm. For more details on why we lower like this, see [`Builder::test_candidates`].
1180
1205
///
1181
- /// For this match, we check if `x.0` matches `true` (for the first
1182
- /// arm). If it doesn't match, we check `x.1`. If `x.1` is `true` we check
1183
- /// if `x.0` matches `false` (for the third arm). In the (impossible at
1184
- /// runtime) case when `x.0` is now `true`, we branch to
1185
- /// `otherwise_block`.
1206
+ /// Note how we test `x` twice. This is the tradeoff of backtracking automata: we prefer smaller
1207
+ /// code size at the expense of non-optimal code paths.
1186
1208
#[ instrument( skip( self , fake_borrows) , level = "debug" ) ]
1187
1209
fn match_candidates < ' pat > (
1188
1210
& mut self ,
@@ -1557,18 +1579,12 @@ impl<'a, 'tcx> Builder<'a, 'tcx> {
1557
1579
}
1558
1580
}
1559
1581
1560
- /// This is the most subtle part of the matching algorithm. At
1561
- /// this point, the input candidates have been fully simplified,
1562
- /// and so we know that all remaining match-pairs require some
1563
- /// sort of test. To decide what test to perform, we take the highest
1564
- /// priority candidate (the first one in the list, as of January 2021)
1565
- /// and extract the first match-pair from the list. From this we decide
1566
- /// what kind of test is needed using [`Builder::test`], defined in the
1567
- /// [`test` module](mod@test).
1582
+ /// Pick a test to run. Which test doesn't matter as long as it is guaranteed to fully match at
1583
+ /// least one match pair. We currently simply pick the test corresponding to the first match
1584
+ /// pair of the first candidate in the list.
1568
1585
///
1569
- /// *Note:* taking the first match pair is somewhat arbitrary, and
1570
- /// we might do better here by choosing more carefully what to
1571
- /// test.
1586
+ /// *Note:* taking the first match pair is somewhat arbitrary, and we might do better here by
1587
+ /// choosing more carefully what to test.
1572
1588
///
1573
1589
/// For example, consider the following possible match-pairs:
1574
1590
///
@@ -1580,121 +1596,19 @@ impl<'a, 'tcx> Builder<'a, 'tcx> {
1580
1596
/// [`Switch`]: TestKind::Switch
1581
1597
/// [`SwitchInt`]: TestKind::SwitchInt
1582
1598
/// [`Range`]: TestKind::Range
1583
- ///
1584
- /// Once we know what sort of test we are going to perform, this
1585
- /// test may also help us winnow down our candidates. So we walk over
1586
- /// the candidates (from high to low priority) and check. This
1587
- /// gives us, for each outcome of the test, a transformed list of
1588
- /// candidates. For example, if we are testing `x.0`'s variant,
1589
- /// and we have a candidate `(x.0 @ Some(v), x.1 @ 22)`,
1590
- /// then we would have a resulting candidate of `((x.0 as Some).0 @ v, x.1 @ 22)`.
1591
- /// Note that the first match-pair is now simpler (and, in fact, irrefutable).
1592
- ///
1593
- /// But there may also be candidates that the test just doesn't
1594
- /// apply to. The classical example involves wildcards:
1595
- ///
1596
- /// ```
1597
- /// # let (x, y, z) = (true, true, true);
1598
- /// match (x, y, z) {
1599
- /// (true , _ , true ) => true, // (0)
1600
- /// (_ , true , _ ) => true, // (1)
1601
- /// (false, false, _ ) => false, // (2)
1602
- /// (true , _ , false) => false, // (3)
1603
- /// }
1604
- /// # ;
1605
- /// ```
1606
- ///
1607
- /// In that case, after we test on `x`, there are 2 overlapping candidate
1608
- /// sets:
1609
- ///
1610
- /// - If the outcome is that `x` is true, candidates 0, 1, and 3
1611
- /// - If the outcome is that `x` is false, candidates 1 and 2
1612
- ///
1613
- /// Here, the traditional "decision tree" method would generate 2
1614
- /// separate code-paths for the 2 separate cases.
1615
- ///
1616
- /// In some cases, this duplication can create an exponential amount of
1617
- /// code. This is most easily seen by noticing that this method terminates
1618
- /// with precisely the reachable arms being reachable - but that problem
1619
- /// is trivially NP-complete:
1620
- ///
1621
- /// ```ignore (illustrative)
1622
- /// match (var0, var1, var2, var3, ...) {
1623
- /// (true , _ , _ , false, true, ...) => false,
1624
- /// (_ , true, true , false, _ , ...) => false,
1625
- /// (false, _ , false, false, _ , ...) => false,
1626
- /// ...
1627
- /// _ => true
1628
- /// }
1629
- /// ```
1630
- ///
1631
- /// Here the last arm is reachable only if there is an assignment to
1632
- /// the variables that does not match any of the literals. Therefore,
1633
- /// compilation would take an exponential amount of time in some cases.
1634
- ///
1635
- /// That kind of exponential worst-case might not occur in practice, but
1636
- /// our simplistic treatment of constants and guards would make it occur
1637
- /// in very common situations - for example [#29740]:
1638
- ///
1639
- /// ```ignore (illustrative)
1640
- /// match x {
1641
- /// "foo" if foo_guard => ...,
1642
- /// "bar" if bar_guard => ...,
1643
- /// "baz" if baz_guard => ...,
1644
- /// ...
1645
- /// }
1646
- /// ```
1647
- ///
1648
- /// [#29740]: https://github.com/rust-lang/rust/issues/29740
1649
- ///
1650
- /// Here we first test the match-pair `x @ "foo"`, which is an [`Eq` test].
1651
- ///
1652
- /// [`Eq` test]: TestKind::Eq
1653
- ///
1654
- /// It might seem that we would end up with 2 disjoint candidate
1655
- /// sets, consisting of the first candidate or the other two, but our
1656
- /// algorithm doesn't reason about `"foo"` being distinct from the other
1657
- /// constants; it considers the latter arms to potentially match after
1658
- /// both outcomes, which obviously leads to an exponential number
1659
- /// of tests.
1660
- ///
1661
- /// To avoid these kinds of problems, our algorithm tries to ensure
1662
- /// the amount of generated tests is linear. When we do a k-way test,
1663
- /// we return an additional "unmatched" set alongside the obvious `k`
1664
- /// sets. When we encounter a candidate that would be present in more
1665
- /// than one of the sets, we put it and all candidates below it into the
1666
- /// "unmatched" set. This ensures these `k+1` sets are disjoint.
1667
- ///
1668
- /// After we perform our test, we branch into the appropriate candidate
1669
- /// set and recurse with `match_candidates`. These sub-matches are
1670
- /// obviously non-exhaustive - as we discarded our otherwise set - so
1671
- /// we set their continuation to do `match_candidates` on the
1672
- /// "unmatched" set (which is again non-exhaustive).
1673
- ///
1674
- /// If you apply this to the above test, you basically wind up
1675
- /// with an if-else-if chain, testing each candidate in turn,
1676
- /// which is precisely what we want.
1677
- ///
1678
- /// In addition to avoiding exponential-time blowups, this algorithm
1679
- /// also has the nice property that each guard and arm is only generated
1680
- /// once.
1681
- fn test_candidates < ' pat , ' b , ' c > (
1599
+ fn pick_test (
1682
1600
& mut self ,
1683
- span : Span ,
1684
- scrutinee_span : Span ,
1685
- mut candidates : & ' b mut [ & ' c mut Candidate < ' pat , ' tcx > ] ,
1686
- start_block : BasicBlock ,
1687
- otherwise_block : BasicBlock ,
1601
+ candidates : & mut [ & mut Candidate < ' _ , ' tcx > ] ,
1688
1602
fake_borrows : & mut Option < FxIndexSet < Place < ' tcx > > > ,
1689
- ) {
1690
- // extract the match-pair from the highest priority candidate
1603
+ ) -> ( PlaceBuilder < ' tcx > , Test < ' tcx > ) {
1604
+ // Extract the match-pair from the highest priority candidate
1691
1605
let match_pair = & candidates. first ( ) . unwrap ( ) . match_pairs [ 0 ] ;
1692
1606
let mut test = self . test ( match_pair) ;
1693
1607
let match_place = match_pair. place . clone ( ) ;
1694
1608
1695
- // most of the time, the test to perform is simply a function
1696
- // of the main candidate; but for a test like SwitchInt, we
1697
- // may want to add cases based on the candidates that are
1609
+ debug ! ( ?test , ?match_pair ) ;
1610
+ // Most of the time, the test to perform is simply a function of the main candidate; but for
1611
+ // a test like SwitchInt, we may want to add cases based on the candidates that are
1698
1612
// available
1699
1613
match test. kind {
1700
1614
TestKind :: SwitchInt { switch_ty : _, ref mut options } => {
@@ -1721,20 +1635,58 @@ impl<'a, 'tcx> Builder<'a, 'tcx> {
1721
1635
fb. insert ( resolved_place) ;
1722
1636
}
1723
1637
1724
- // perform the test, branching to one of N blocks. For each of
1725
- // those N possible outcomes, create a (initially empty)
1726
- // vector of candidates. Those are the candidates that still
1727
- // apply if the test has that particular outcome.
1728
- debug ! ( "test_candidates: test={:?} match_pair={:?}" , test, match_pair) ;
1638
+ ( match_place, test)
1639
+ }
1640
+
1641
+ /// Given a test, we sort the input candidates into several buckets. If a candidate only matches
1642
+ /// in one of the branches of `test`, we move it there. If it could match in more than one of
1643
+ /// the branches of `test`, we stop sorting candidates.
1644
+ ///
1645
+ /// This returns a pair of
1646
+ /// - the candidates that weren't sorted;
1647
+ /// - for each possible outcome of the test, the candidates that match in that outcome.
1648
+ ///
1649
+ /// Moreover, we transform the branched candidates to reflect the fact that we know which
1650
+ /// outcome of `test` occurred.
1651
+ ///
1652
+ /// For example:
1653
+ /// ```
1654
+ /// # let (x, y, z) = (true, true, true);
1655
+ /// match (x, y, z) {
1656
+ /// (true , _ , true ) => true, // (0)
1657
+ /// (false, false, _ ) => false, // (1)
1658
+ /// (_ , true , _ ) => true, // (2)
1659
+ /// (true , _ , false) => false, // (3)
1660
+ /// }
1661
+ /// # ;
1662
+ /// ```
1663
+ ///
1664
+ /// Assume we are testing on `x`. There are 2 overlapping candidate sets:
1665
+ /// - If the outcome is that `x` is true, candidates 0, 2, and 3
1666
+ /// - If the outcome is that `x` is false, candidates 1 and 2
1667
+ ///
1668
+ /// Following our algorithm, candidate 0 is sorted into outcome `x == true`, candidate 1 goes
1669
+ /// into outcome `x == false`, and candidate 2 and 3 remain unsorted.
1670
+ ///
1671
+ /// The sorted candidates are transformed:
1672
+ /// - candidate 0 becomes `[z @ true]` since we know that `x` was `true`;
1673
+ /// - candidate 1 becomes `[y @ false]` since we know that `x` was `false`.
1674
+ fn sort_candidates < ' b , ' c , ' pat > (
1675
+ & mut self ,
1676
+ match_place : & PlaceBuilder < ' tcx > ,
1677
+ test : & Test < ' tcx > ,
1678
+ mut candidates : & ' b mut [ & ' c mut Candidate < ' pat , ' tcx > ] ,
1679
+ ) -> ( & ' b mut [ & ' c mut Candidate < ' pat , ' tcx > ] , Vec < Vec < & ' b mut Candidate < ' pat , ' tcx > > > ) {
1680
+ // For each of the N possible outcomes, create a (initially empty) vector of candidates.
1681
+ // Those are the candidates that apply if the test has that particular outcome.
1729
1682
let mut target_candidates: Vec < Vec < & mut Candidate < ' pat , ' tcx > > > = vec ! [ ] ;
1730
1683
target_candidates. resize_with ( test. targets ( ) , Default :: default) ;
1731
1684
1732
1685
let total_candidate_count = candidates. len ( ) ;
1733
1686
1734
- // Sort the candidates into the appropriate vector in
1735
- // `target_candidates`. Note that at some point we may
1736
- // encounter a candidate where the test is not relevant; at
1737
- // that point, we stop sorting.
1687
+ // Sort the candidates into the appropriate vector in `target_candidates`. Note that at some
1688
+ // point we may encounter a candidate where the test is not relevant; at that point, we stop
1689
+ // sorting.
1738
1690
while let Some ( candidate) = candidates. first_mut ( ) {
1739
1691
let Some ( idx) = self . sort_candidate ( & match_place, & test, candidate) else {
1740
1692
break ;
@@ -1743,24 +1695,139 @@ impl<'a, 'tcx> Builder<'a, 'tcx> {
1743
1695
target_candidates[ idx] . push ( candidate) ;
1744
1696
candidates = rest;
1745
1697
}
1746
- // at least the first candidate ought to be tested
1698
+
1699
+ // At least the first candidate ought to be tested
1747
1700
assert ! (
1748
1701
total_candidate_count > candidates. len( ) ,
1749
1702
"{total_candidate_count}, {candidates:#?}"
1750
1703
) ;
1751
1704
debug ! ( "tested_candidates: {}" , total_candidate_count - candidates. len( ) ) ;
1752
1705
debug ! ( "untested_candidates: {}" , candidates. len( ) ) ;
1753
1706
1707
+ ( candidates, target_candidates)
1708
+ }
1709
+
1710
+ /// This is the most subtle part of the match lowering algorithm. At this point, the input
1711
+ /// candidates have been fully simplified, so all remaining match-pairs require some sort of
1712
+ /// test.
1713
+ ///
1714
+ /// Once we pick what sort of test we are going to perform, this test will help us winnow down
1715
+ /// our candidates. So we walk over the candidates (from high to low priority) and check. We
1716
+ /// compute, for each outcome of the test, a transformed list of candidates. If a candidate
1717
+ /// matches in a single branch of our test, we add it to the corresponding outcome. We also
1718
+ /// transform it to record the fact that we know which outcome occurred.
1719
+ ///
1720
+ /// For example, if we are testing `x.0`'s variant, and we have a candidate `(x.0 @ Some(v), x.1
1721
+ /// @ 22)`, then we would have a resulting candidate of `((x.0 as Some).0 @ v, x.1 @ 22)` in the
1722
+ /// branch corresponding to `Some`. To ensure we make progress, we always pick a test that
1723
+ /// results in simplifying the first candidate.
1724
+ ///
1725
+ /// But there may also be candidates that the test doesn't
1726
+ /// apply to. The classical example is wildcards:
1727
+ ///
1728
+ /// ```
1729
+ /// # let (x, y, z) = (true, true, true);
1730
+ /// match (x, y, z) {
1731
+ /// (true , _ , true ) => true, // (0)
1732
+ /// (false, false, _ ) => false, // (1)
1733
+ /// (_ , true , _ ) => true, // (2)
1734
+ /// (true , _ , false) => false, // (3)
1735
+ /// }
1736
+ /// # ;
1737
+ /// ```
1738
+ ///
1739
+ /// Here, the traditional "decision tree" method would generate 2 separate code-paths for the 2
1740
+ /// possible values of `x`. This would however duplicate some candidates, which would need to be
1741
+ /// lowered several times.
1742
+ ///
1743
+ /// In some cases, this duplication can create an exponential amount of
1744
+ /// code. This is most easily seen by noticing that this method terminates
1745
+ /// with precisely the reachable arms being reachable - but that problem
1746
+ /// is trivially NP-complete:
1747
+ ///
1748
+ /// ```ignore (illustrative)
1749
+ /// match (var0, var1, var2, var3, ...) {
1750
+ /// (true , _ , _ , false, true, ...) => false,
1751
+ /// (_ , true, true , false, _ , ...) => false,
1752
+ /// (false, _ , false, false, _ , ...) => false,
1753
+ /// ...
1754
+ /// _ => true
1755
+ /// }
1756
+ /// ```
1757
+ ///
1758
+ /// Here the last arm is reachable only if there is an assignment to
1759
+ /// the variables that does not match any of the literals. Therefore,
1760
+ /// compilation would take an exponential amount of time in some cases.
1761
+ ///
1762
+ /// In rustc, we opt instead for the "backtracking automaton" approach. This guarantees we never
1763
+ /// duplicate a candidate (except in the presence of or-patterns). In fact this guarantee is
1764
+ /// ensured by the fact that we carry around `&mut Candidate`s which can't be duplicated.
1765
+ ///
1766
+ /// To make this work, whenever we decide to perform a test, if we encounter a candidate that
1767
+ /// could match in more than one branch of the test, we stop. We generate code for the test and
1768
+ /// for the candidates in its branches; the remaining candidates will be tested if the
1769
+ /// candidates in the branches fail to match.
1770
+ ///
1771
+ /// For example, if we test on `x` in the following:
1772
+ /// ```
1773
+ /// # fn foo((x, y, z): (bool, bool, bool)) -> u32 {
1774
+ /// match (x, y, z) {
1775
+ /// (true , _ , true ) => 0,
1776
+ /// (false, false, _ ) => 1,
1777
+ /// (_ , true , _ ) => 2,
1778
+ /// (true , _ , false) => 3,
1779
+ /// }
1780
+ /// # }
1781
+ /// ```
1782
+ /// this function generates code that looks more of less like:
1783
+ /// ```
1784
+ /// # fn foo((x, y, z): (bool, bool, bool)) -> u32 {
1785
+ /// if x {
1786
+ /// match (y, z) {
1787
+ /// (_, true) => return 0,
1788
+ /// _ => {} // continue matching
1789
+ /// }
1790
+ /// } else {
1791
+ /// match (y, z) {
1792
+ /// (false, _) => return 1,
1793
+ /// _ => {} // continue matching
1794
+ /// }
1795
+ /// }
1796
+ /// // the block here is `remainder_start`
1797
+ /// match (x, y, z) {
1798
+ /// (_ , true , _ ) => 2,
1799
+ /// (true , _ , false) => 3,
1800
+ /// _ => unreachable!(),
1801
+ /// }
1802
+ /// # }
1803
+ /// ```
1804
+ fn test_candidates < ' pat , ' b , ' c > (
1805
+ & mut self ,
1806
+ span : Span ,
1807
+ scrutinee_span : Span ,
1808
+ candidates : & ' b mut [ & ' c mut Candidate < ' pat , ' tcx > ] ,
1809
+ start_block : BasicBlock ,
1810
+ otherwise_block : BasicBlock ,
1811
+ fake_borrows : & mut Option < FxIndexSet < Place < ' tcx > > > ,
1812
+ ) {
1813
+ // Extract the match-pair from the highest priority candidate and build a test from it.
1814
+ let ( match_place, test) = self . pick_test ( candidates, fake_borrows) ;
1815
+
1816
+ // For each of the N possible test outcomes, build the vector of candidates that applies if
1817
+ // the test has that particular outcome.
1818
+ let ( remaining_candidates, target_candidates) =
1819
+ self . sort_candidates ( & match_place, & test, candidates) ;
1820
+
1754
1821
// The block that we should branch to if none of the
1755
1822
// `target_candidates` match.
1756
- let remainder_start = if !candidates . is_empty ( ) {
1823
+ let remainder_start = if !remaining_candidates . is_empty ( ) {
1757
1824
let remainder_start = self . cfg . start_new_block ( ) ;
1758
1825
self . match_candidates (
1759
1826
span,
1760
1827
scrutinee_span,
1761
1828
remainder_start,
1762
1829
otherwise_block,
1763
- candidates ,
1830
+ remaining_candidates ,
1764
1831
fake_borrows,
1765
1832
) ;
1766
1833
remainder_start
0 commit comments