Skip to content

Commit 1f5bc17

Browse files
committed
Auto merge of #80104 - Nadrieril:usefulness-merging, r=varkor
Improve and fix diagnostics of exhaustiveness checking Primarily, this fixes #56379. This also fixes incorrect interactions between or-patterns and slice patterns that I discovered while working on #56379. Those two examples show the incorrect diagnostics: ```rust match &[][..] { [true] => {} [true // detected as unreachable but that's not true | false, ..] => {} _ => {} } match (true, None) { (true, Some(_)) => {} (false, Some(true)) => {} (true | false, None | Some(true // should be detected as unreachable | false)) => {} } ``` I did not measure any perf impact. However, I suspect that [`616ba9f`](616ba9f) should have a negative impact on large or-patterns. I'll see what the perf run says; I have optimization ideas up my sleeve if needed. EDIT: I initially had a noticeable perf impact that I thought unavoidable. I then proceeded to avoid it x) r? `@varkor` `@rustbot` label +A-exhaustiveness-checking
2 parents 1b6b06a + cefcadb commit 1f5bc17

12 files changed

+286
-139
lines changed

compiler/rustc_mir_build/src/thir/pattern/check_match.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -433,7 +433,7 @@ fn report_arm_reachability<'p, 'tcx>(
433433
Useful(unreachables) if unreachables.is_empty() => {}
434434
// The arm is reachable, but contains unreachable subpatterns (from or-patterns).
435435
Useful(unreachables) => {
436-
let mut unreachables: Vec<_> = unreachables.iter().flatten().copied().collect();
436+
let mut unreachables: Vec<_> = unreachables.iter().collect();
437437
// Emit lints in the order in which they occur in the file.
438438
unreachables.sort_unstable();
439439
for span in unreachables {

compiler/rustc_mir_build/src/thir/pattern/usefulness.rs

+184-113
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,6 @@ use super::{Pat, PatKind};
311311
use super::{PatternFoldable, PatternFolder};
312312

313313
use rustc_data_structures::captures::Captures;
314-
use rustc_data_structures::fx::FxHashSet;
315314
use rustc_data_structures::sync::OnceCell;
316315

317316
use rustc_arena::TypedArena;
@@ -626,11 +625,82 @@ impl<'p, 'tcx> FromIterator<PatStack<'p, 'tcx>> for Matrix<'p, 'tcx> {
626625
}
627626
}
628627

628+
/// Represents a set of `Span`s closed under the containment relation. That is, if a `Span` is
629+
/// contained in the set then all `Span`s contained in it are also implicitly contained in the set.
630+
/// In particular this means that when intersecting two sets, taking the intersection of some span
631+
/// and one of its subspans returns the subspan, whereas a simple `HashSet` would have returned an
632+
/// empty intersection.
633+
/// It is assumed that two spans don't overlap without one being contained in the other; in other
634+
/// words, that the inclusion structure forms a tree and not a DAG.
635+
/// Intersection is not very efficient. It compares everything pairwise. If needed it could be made
636+
/// faster by sorting the `Span`s and merging cleverly.
637+
#[derive(Debug, Clone, Default)]
638+
pub(crate) struct SpanSet {
639+
/// The minimal set of `Span`s required to represent the whole set. If A and B are `Span`s in
640+
/// the `SpanSet`, and A is a descendant of B, then only B will be in `root_spans`.
641+
/// Invariant: the spans are disjoint.
642+
root_spans: Vec<Span>,
643+
}
644+
645+
impl SpanSet {
646+
/// Creates an empty set.
647+
fn new() -> Self {
648+
Self::default()
649+
}
650+
651+
/// Tests whether the set is empty.
652+
pub(crate) fn is_empty(&self) -> bool {
653+
self.root_spans.is_empty()
654+
}
655+
656+
/// Iterate over the disjoint list of spans at the roots of this set.
657+
pub(crate) fn iter<'a>(&'a self) -> impl Iterator<Item = Span> + Captures<'a> {
658+
self.root_spans.iter().copied()
659+
}
660+
661+
/// Tests whether the set contains a given Span.
662+
fn contains(&self, span: Span) -> bool {
663+
self.iter().any(|root_span| root_span.contains(span))
664+
}
665+
666+
/// Add a span to the set if we know the span has no intersection in this set.
667+
fn push_nonintersecting(&mut self, new_span: Span) {
668+
self.root_spans.push(new_span);
669+
}
670+
671+
fn intersection_mut(&mut self, other: &Self) {
672+
if self.is_empty() || other.is_empty() {
673+
*self = Self::new();
674+
return;
675+
}
676+
// Those that were in `self` but not contained in `other`
677+
let mut leftover = SpanSet::new();
678+
// We keep the elements in `self` that are also in `other`.
679+
self.root_spans.retain(|span| {
680+
let retain = other.contains(*span);
681+
if !retain {
682+
leftover.root_spans.push(*span);
683+
}
684+
retain
685+
});
686+
// We keep the elements in `other` that are also in the original `self`. You might think
687+
// this is not needed because `self` already contains the intersection. But those aren't
688+
// just sets of things. If `self = [a]`, `other = [b]` and `a` contains `b`, then `b`
689+
// belongs in the intersection but we didn't catch it in the filtering above. We look at
690+
// `leftover` instead of the full original `self` to avoid duplicates.
691+
for span in other.iter() {
692+
if leftover.contains(span) {
693+
self.root_spans.push(span);
694+
}
695+
}
696+
}
697+
}
698+
629699
#[derive(Clone, Debug)]
630700
crate enum Usefulness<'tcx> {
631-
/// Carries, for each column in the matrix, a set of sub-branches that have been found to be
632-
/// unreachable. Used only in the presence of or-patterns, otherwise it stays empty.
633-
Useful(Vec<FxHashSet<Span>>),
701+
/// Pontentially carries a set of sub-branches that have been found to be unreachable. Used
702+
/// only in the presence of or-patterns, otherwise it stays empty.
703+
Useful(SpanSet),
634704
/// Carries a list of witnesses of non-exhaustiveness.
635705
UsefulWithWitness(Vec<Witness<'tcx>>),
636706
NotUseful,
@@ -640,14 +710,97 @@ impl<'tcx> Usefulness<'tcx> {
640710
fn new_useful(preference: WitnessPreference) -> Self {
641711
match preference {
642712
ConstructWitness => UsefulWithWitness(vec![Witness(vec![])]),
643-
LeaveOutWitness => Useful(vec![]),
713+
LeaveOutWitness => Useful(Default::default()),
644714
}
645715
}
646716

647-
fn is_useful(&self) -> bool {
648-
!matches!(*self, NotUseful)
717+
/// When trying several branches and each returns a `Usefulness`, we need to combine the
718+
/// results together.
719+
fn merge(usefulnesses: impl Iterator<Item = Self>) -> Self {
720+
// If we have detected some unreachable sub-branches, we only want to keep them when they
721+
// were unreachable in _all_ branches. Eg. in the following, the last `true` is unreachable
722+
// in the second branch of the first or-pattern, but not otherwise. Therefore we don't want
723+
// to lint that it is unreachable.
724+
// ```
725+
// match (true, true) {
726+
// (true, true) => {}
727+
// (false | true, false | true) => {}
728+
// }
729+
// ```
730+
// Here however we _do_ want to lint that the last `false` is unreachable. So we don't want
731+
// to intersect the spans that come directly from the or-pattern, since each branch of the
732+
// or-pattern brings a new disjoint pattern.
733+
// ```
734+
// match None {
735+
// Some(false) => {}
736+
// None | Some(true | false) => {}
737+
// }
738+
// ```
739+
740+
// Is `None` when no branch was useful. Will often be `Some(Spanset::new())` because the
741+
// sets are only non-empty in the presence of or-patterns.
742+
let mut unreachables: Option<SpanSet> = None;
743+
// Witnesses of usefulness, if any.
744+
let mut witnesses = Vec::new();
745+
746+
for u in usefulnesses {
747+
match u {
748+
Useful(spans) if spans.is_empty() => {
749+
// Once we reach the empty set, more intersections won't change the result.
750+
return Useful(SpanSet::new());
751+
}
752+
Useful(spans) => {
753+
if let Some(unreachables) = &mut unreachables {
754+
if !unreachables.is_empty() {
755+
unreachables.intersection_mut(&spans);
756+
}
757+
if unreachables.is_empty() {
758+
return Useful(SpanSet::new());
759+
}
760+
} else {
761+
unreachables = Some(spans);
762+
}
763+
}
764+
NotUseful => {}
765+
UsefulWithWitness(wits) => {
766+
witnesses.extend(wits);
767+
}
768+
}
769+
}
770+
771+
if !witnesses.is_empty() {
772+
UsefulWithWitness(witnesses)
773+
} else if let Some(unreachables) = unreachables {
774+
Useful(unreachables)
775+
} else {
776+
NotUseful
777+
}
649778
}
650779

780+
/// After calculating the usefulness for a branch of an or-pattern, call this to make this
781+
/// usefulness mergeable with those from the other branches.
782+
fn unsplit_or_pat(self, this_span: Span, or_pat_spans: &[Span]) -> Self {
783+
match self {
784+
Useful(mut spans) => {
785+
// We register the spans of the other branches of this or-pattern as being
786+
// unreachable from this one. This ensures that intersecting together the sets of
787+
// spans returns what we want.
788+
// Until we optimize `SpanSet` however, intersecting this entails a number of
789+
// comparisons quadratic in the number of branches.
790+
for &span in or_pat_spans {
791+
if span != this_span {
792+
spans.push_nonintersecting(span);
793+
}
794+
}
795+
Useful(spans)
796+
}
797+
x => x,
798+
}
799+
}
800+
801+
/// After calculating usefulness after a specialization, call this to recontruct a usefulness
802+
/// that makes sense for the matrix pre-specialization. This new usefulness can then be merged
803+
/// with the results of specializing with the other constructors.
651804
fn apply_constructor<'p>(
652805
self,
653806
pcx: PatCtxt<'_, 'p, 'tcx>,
@@ -677,23 +830,6 @@ impl<'tcx> Usefulness<'tcx> {
677830
};
678831
UsefulWithWitness(new_witnesses)
679832
}
680-
Useful(mut unreachables) => {
681-
if !unreachables.is_empty() {
682-
// When we apply a constructor, there are `arity` columns of the matrix that
683-
// corresponded to its arguments. All the unreachables found in these columns
684-
// will, after `apply`, come from the first column. So we take the union of all
685-
// the corresponding sets and put them in the first column.
686-
// Note that `arity` may be 0, in which case we just push a new empty set.
687-
let len = unreachables.len();
688-
let arity = ctor_wild_subpatterns.len();
689-
let mut unioned = FxHashSet::default();
690-
for set in unreachables.drain((len - arity)..) {
691-
unioned.extend(set)
692-
}
693-
unreachables.push(unioned);
694-
}
695-
Useful(unreachables)
696-
}
697833
x => x,
698834
}
699835
}
@@ -829,112 +965,47 @@ fn is_useful<'p, 'tcx>(
829965

830966
assert!(rows.iter().all(|r| r.len() == v.len()));
831967

968+
// FIXME(Nadrieril): Hack to work around type normalization issues (see #72476).
969+
let ty = matrix.heads().next().map(|r| r.ty).unwrap_or(v.head().ty);
970+
let pcx = PatCtxt { cx, matrix, ty, span: v.head().span, is_top_level };
971+
972+
debug!("is_useful_expand_first_col: ty={:#?}, expanding {:#?}", pcx.ty, v.head());
973+
832974
// If the first pattern is an or-pattern, expand it.
833-
if let Some(vs) = v.expand_or_pat() {
975+
let ret = if let Some(vs) = v.expand_or_pat() {
976+
let subspans: Vec<_> = vs.iter().map(|v| v.head().span).collect();
834977
// We expand the or pattern, trying each of its branches in turn and keeping careful track
835978
// of possible unreachable sub-branches.
836-
//
837-
// If two branches have detected some unreachable sub-branches, we need to be careful. If
838-
// they were detected in columns that are not the current one, we want to keep only the
839-
// sub-branches that were unreachable in _all_ branches. Eg. in the following, the last
840-
// `true` is unreachable in the second branch of the first or-pattern, but not otherwise.
841-
// Therefore we don't want to lint that it is unreachable.
842-
//
843-
// ```
844-
// match (true, true) {
845-
// (true, true) => {}
846-
// (false | true, false | true) => {}
847-
// }
848-
// ```
849-
// If however the sub-branches come from the current column, they come from the inside of
850-
// the current or-pattern, and we want to keep them all. Eg. in the following, we _do_ want
851-
// to lint that the last `false` is unreachable.
852-
// ```
853-
// match None {
854-
// Some(false) => {}
855-
// None | Some(true | false) => {}
856-
// }
857-
// ```
858-
859979
let mut matrix = matrix.clone();
860-
// We keep track of sub-branches separately depending on whether they come from this column
861-
// or from others.
862-
let mut unreachables_this_column: FxHashSet<Span> = FxHashSet::default();
863-
let mut unreachables_other_columns: Vec<FxHashSet<Span>> = Vec::default();
864-
// Whether at least one branch is reachable.
865-
let mut any_is_useful = false;
866-
867-
for v in vs {
868-
let res = is_useful(cx, &matrix, &v, witness_preference, hir_id, is_under_guard, false);
869-
match res {
870-
Useful(unreachables) => {
871-
if let Some((this_column, other_columns)) = unreachables.split_last() {
872-
// We keep the union of unreachables found in the first column.
873-
unreachables_this_column.extend(this_column);
874-
// We keep the intersection of unreachables found in other columns.
875-
if unreachables_other_columns.is_empty() {
876-
unreachables_other_columns = other_columns.to_vec();
877-
} else {
878-
unreachables_other_columns = unreachables_other_columns
879-
.into_iter()
880-
.zip(other_columns)
881-
.map(|(x, y)| x.intersection(&y).copied().collect())
882-
.collect();
883-
}
884-
}
885-
any_is_useful = true;
886-
}
887-
NotUseful => {
888-
unreachables_this_column.insert(v.head().span);
889-
}
890-
UsefulWithWitness(_) => bug!(
891-
"encountered or-pat in the expansion of `_` during exhaustiveness checking"
892-
),
893-
}
894-
980+
let usefulnesses = vs.into_iter().map(|v| {
981+
let v_span = v.head().span;
982+
let usefulness =
983+
is_useful(cx, &matrix, &v, witness_preference, hir_id, is_under_guard, false);
895984
// If pattern has a guard don't add it to the matrix.
896985
if !is_under_guard {
897986
// We push the already-seen patterns into the matrix in order to detect redundant
898987
// branches like `Some(_) | Some(0)`.
899988
matrix.push(v);
900989
}
901-
}
902-
903-
return if any_is_useful {
904-
let mut unreachables = if unreachables_other_columns.is_empty() {
905-
let n_columns = v.len();
906-
(0..n_columns - 1).map(|_| FxHashSet::default()).collect()
907-
} else {
908-
unreachables_other_columns
909-
};
910-
unreachables.push(unreachables_this_column);
911-
Useful(unreachables)
912-
} else {
913-
NotUseful
914-
};
915-
}
916-
917-
// FIXME(Nadrieril): Hack to work around type normalization issues (see #72476).
918-
let ty = matrix.heads().next().map(|r| r.ty).unwrap_or(v.head().ty);
919-
let pcx = PatCtxt { cx, matrix, ty, span: v.head().span, is_top_level };
920-
921-
debug!("is_useful_expand_first_col: ty={:#?}, expanding {:#?}", pcx.ty, v.head());
922-
923-
let ret = v
924-
.head_ctor(cx)
925-
.split(pcx, Some(hir_id))
926-
.into_iter()
927-
.map(|ctor| {
990+
usefulness.unsplit_or_pat(v_span, &subspans)
991+
});
992+
Usefulness::merge(usefulnesses)
993+
} else {
994+
// We split the head constructor of `v`.
995+
let ctors = v.head_ctor(cx).split(pcx, Some(hir_id));
996+
// For each constructor, we compute whether there's a value that starts with it that would
997+
// witness the usefulness of `v`.
998+
let usefulnesses = ctors.into_iter().map(|ctor| {
928999
// We cache the result of `Fields::wildcards` because it is used a lot.
9291000
let ctor_wild_subpatterns = Fields::wildcards(pcx, &ctor);
9301001
let matrix = pcx.matrix.specialize_constructor(pcx, &ctor, &ctor_wild_subpatterns);
9311002
let v = v.pop_head_constructor(&ctor_wild_subpatterns);
9321003
let usefulness =
9331004
is_useful(pcx.cx, &matrix, &v, witness_preference, hir_id, is_under_guard, false);
9341005
usefulness.apply_constructor(pcx, &ctor, &ctor_wild_subpatterns)
935-
})
936-
.find(|result| result.is_useful())
937-
.unwrap_or(NotUseful);
1006+
});
1007+
Usefulness::merge(usefulnesses)
1008+
};
9381009
debug!("is_useful::returns({:#?}, {:#?}) = {:?}", matrix, v, ret);
9391010
ret
9401011
}

0 commit comments

Comments
 (0)