leanprover-community · chabulhwi · Mar 26, 2024 · Feb 12, 2024 · Apr 1, 2024 · Mar 18, 2024
diff --git a/Std/Data/List.lean b/Std/Data/List.lean
@@ -4,3 +4,4 @@ import Std.Data.List.Init.Attach
 import Std.Data.List.Lemmas
 import Std.Data.List.Pairwise
 import Std.Data.List.Perm
+import Std.Data.List.SplitOnList
diff --git a/Std/Data/List/Lemmas.lean b/Std/Data/List/Lemmas.lean
@@ -132,11 +132,30 @@ theorem drop_left : ∀ l₁ l₂ : List α, drop (length l₁) (l₁ ++ l₂) =
 theorem drop_left' {l₁ l₂ : List α} {n} (h : length l₁ = n) : drop n (l₁ ++ l₂) = l₂ := by
   rw [← h]; apply drop_left
 
+theorem drop_append_left (l l₁ l₂ : List α) :
+    drop (length l + length l₁) (l ++ l₂) = drop (length l₁) l₂ := by
+  match l with
+  | [] => simp
+  | [a] =>
+    simp only [length_singleton, singleton_append]
+    rw [Nat.add_comm, drop_add, drop_one, tail_cons]
+  | a :: b :: l =>
+    rw [← singleton_append, length_append, Nat.add_assoc, append_assoc]
+    have ih := by simpa only [length_append] using drop_append_left [a] (b::l++l₁) (b::l++l₂)
+    rw [ih, drop_append_left (b::l) l₁ l₂]
+termination_by length l
+
 /-! ### isEmpty -/
 
 @[simp] theorem isEmpty_nil : ([] : List α).isEmpty = true := rfl
 @[simp] theorem isEmpty_cons : (x :: xs : List α).isEmpty = false := rfl
 
+theorem isEmpty_iff_eq_nil {l : List α} : l.isEmpty ↔ l = [] := by cases l <;> simp [isEmpty]
+
+@[simp] theorem isEmpty_append : (l₁ ++ l₂ : List α).isEmpty ↔ l₁.isEmpty ∧ l₂.isEmpty := by
+  repeat rw [isEmpty_iff_eq_nil]
+  apply append_eq_nil
+
 /-! ### append -/
 
 theorem append_eq_append : List.append l₁ l₂ = l₁ ++ l₂ := rfl
@@ -165,6 +184,16 @@ theorem append_eq_append_iff {a b c d : List α} :
   | nil => simp; exact (or_iff_left_of_imp fun ⟨_, ⟨e, rfl⟩, h⟩ => e ▸ h.symm).symm
   | cons a as ih => cases c <;> simp [eq_comm, and_assoc, ih, and_or_left]
 
+theorem append_left_eq_self {a b : List α} : a ++ b = b ↔ a = [] := by
+  constructor <;> intro h
+  · exact List.eq_nil_of_length_eq_zero <| Nat.add_left_eq_self.mp (h ▸ List.length_append a b).symm
+  · simp [h]
+
+theorem append_right_eq_self {a b : List α} : a ++ b = a ↔ b = [] := by
+  constructor <;> intro h
+  · exact List.eq_nil_of_length_eq_zero <| Nat.add_right_eq_self.mp (h ▸ List.length_append a b).symm
+  · simp [h]
+
 @[simp] theorem mem_append {a : α} {s t : List α} : a ∈ s ++ t ↔ a ∈ s ∨ a ∈ t := by
   induction s <;> simp_all [or_assoc]
 
@@ -652,6 +681,12 @@ theorem head!_of_head? [Inhabited α] : ∀ {l : List α}, head? l = some a →
 theorem head?_eq_head : ∀ l h, @head? α l = some (head l h)
   | _::_, _ => rfl
 
+theorem headD_eq_head : ∀ l {a₀} h, @headD α l a₀ = head l h
+  | _::_, _, _ => rfl
+
+theorem head_append : ∀ l m h, @head α (l ++ m) (by simp [h]) = head l h
+  | _::_, _, _ => rfl
+
 /-! ### tail -/
 
 @[simp] theorem tailD_eq_tail? (l l' : List α) : tailD l l' = (tail? l).getD l' := by
@@ -661,6 +696,19 @@ theorem tail_eq_tailD (l) : @tail α l = tailD l [] := by cases l <;> rfl
 
 theorem tail_eq_tail? (l) : @tail α l = (tail? l).getD [] := by simp [tail_eq_tailD]
 
+/-! ### head and tail -/
+
+theorem head_cons_tail : ∀ l h, @head α l h :: l.tail = l
+  | _::_, _ => rfl
+
+theorem tail_append (l m) (h : l ≠ []) : @tail α (l ++ m) = tail l ++ m := by
+  rw [← head_cons_tail l h]
+  simp
+
+theorem singleton_head_eq_self (l : List α) (hne : l ≠ []) (htl : l.tail = []) :
+    [l.head hne] = l := by
+  conv => rhs; rw [← head_cons_tail l hne, htl]
+
 /-! ### next? -/
 
 @[simp] theorem next?_nil : @next? α [] = none := rfl
@@ -1163,6 +1211,22 @@ theorem drop_eq_nil_of_eq_nil : ∀ {as : List α} {i}, as = [] → as.drop i =
 theorem ne_nil_of_drop_ne_nil {as : List α} {i : Nat} (h: as.drop i ≠ []) : as ≠ [] :=
   mt drop_eq_nil_of_eq_nil h
 
+/-! ### modify head -/
+
+theorem modifyHead_id : ∀ (l : List α), l.modifyHead id = l
+  | [] => rfl
+  | _::_ => rfl
+
+-- Porting note: List.modifyHead has @[simp], and Lean 4 treats this as
+-- an invitation to unfold modifyHead in any context,
+-- not just use the equational lemmas.
+
+-- @[simp]
+-- @[simp 1100, nolint simpNF]
+@[simp 1100]
+theorem modifyHead_modifyHead (l : List α) (f g : α → α) :
+    (l.modifyHead f).modifyHead g = l.modifyHead (g ∘ f) := by cases l <;> simp
+
 /-! ### modify nth -/
 
 theorem modifyNthTail_id : ∀ n (l : List α), l.modifyNthTail id n = l
@@ -2244,6 +2308,15 @@ theorem IsPrefix.eq_of_length (h : l₁ <+: l₂) : l₁.length = l₂.length
 theorem IsSuffix.eq_of_length (h : l₁ <:+ l₂) : l₁.length = l₂.length → l₁ = l₂ :=
   h.sublist.eq_of_length
 
+theorem IsInfix.length_lt_of_ne (hin : l₁ <:+: l₂) (hne : l₁ ≠ l₂) : l₁.length < l₂.length :=
+  Nat.lt_of_le_of_ne (IsInfix.length_le hin) (mt (IsInfix.eq_of_length hin) hne)
+
+theorem IsPrefix.length_lt_of_ne (hpf : l₁ <+: l₂) (hne : l₁ ≠ l₂) : l₁.length < l₂.length :=
+  Nat.lt_of_le_of_ne (IsPrefix.length_le hpf) (mt (IsPrefix.eq_of_length hpf) hne)
+
+theorem IsSuffix.length_lt_of_ne (hsf : l₁ <:+ l₂) (hne : l₁ ≠ l₂) : l₁.length < l₂.length :=
+  Nat.lt_of_le_of_ne (IsSuffix.length_le hsf) (mt (IsSuffix.eq_of_length hsf) hne)
+
 theorem prefix_of_prefix_length_le :
     ∀ {l₁ l₂ l₃ : List α}, l₁ <+: l₃ → l₂ <+: l₃ → length l₁ ≤ length l₂ → l₁ <+: l₂
   | [], l₂, _, _, _, _ => nil_prefix _
@@ -2300,6 +2373,9 @@ theorem prefix_append_right_inj (l) : l ++ l₁ <+: l ++ l₂ ↔ l₁ <+: l₂
 theorem prefix_cons_inj (a) : a :: l₁ <+: a :: l₂ ↔ l₁ <+: l₂ :=
   prefix_append_right_inj [a]
 
+theorem singleton_prefix_cons (a) : [a] <+: a :: l :=
+  (prefix_cons_inj a).mpr (nil_prefix l)
+
 theorem take_prefix (n) (l : List α) : take n l <+: l :=
   ⟨_, take_append_drop _ _⟩
 
@@ -2336,6 +2412,101 @@ theorem IsInfix.filter (p : α → Bool) ⦃l₁ l₂ : List α⦄ (h : l₁ <:+
   obtain ⟨xs, ys, rfl⟩ := h
   rw [filter_append, filter_append]; apply infix_append _
 
+theorem IsPrefix.iff_isPrefixOf [BEq α] [LawfulBEq α] {l₁ l₂ : List α} :
+    l₁ <+: l₂ ↔ isPrefixOf l₁ l₂ := by
+  match l₁, l₂ with
+  | [],   _  => simp [nil_prefix]
+  | _::_, [] => simp
+  | a::as, b::bs =>
+    constructor
+    · intro ⟨t, ht⟩
+      simp only [cons_append, cons.injEq] at ht
+      have hpf : as <+: bs := ⟨t, ht.2⟩
+      simpa [isPrefixOf, ht.1] using IsPrefix.iff_isPrefixOf.mp hpf
+    · intro h
+      simp only [isPrefixOf, Bool.and_eq_true, beq_iff_eq] at h
+      let ⟨t, ht⟩ := IsPrefix.iff_isPrefixOf.mpr h.2
+      exists t
+      simpa [h.1]
+
+theorem IsSuffix.iff_isSuffixOf [BEq α] [LawfulBEq α] {l₁ l₂ : List α} :
+    l₁ <:+ l₂ ↔ isSuffixOf l₁ l₂ := by
+  match l₁, l₂ with
+  | [],   _  => simp [nil_suffix, isSuffixOf]
+  | _::_, [] => simp [isSuffixOf, isPrefixOf]
+  | a::as, b::bs =>
+    constructor
+    · intro ⟨t, ht⟩
+      have hpf : (as.reverse ++ [a]) <+: (bs.reverse ++ [b]) := by
+        exists t.reverse
+        simpa using congrArg reverse ht
+      simpa [isSuffixOf, reverse_cons] using IsPrefix.iff_isPrefixOf.mp hpf
+    · intro h
+      simp only [isSuffixOf, reverse_cons] at h
+      let ⟨t, ht⟩ := IsPrefix.iff_isPrefixOf.mpr h
+      exists t.reverse
+      simpa using congrArg reverse ht
+
+theorem eq_of_cons_prefix_cons {a b : α} {l₁ l₂} (h : a :: l₁ <+: b :: l₂) : a = b := by
+  let ⟨t, ht⟩ := h
+  simp only [cons_append, cons.injEq] at ht
+  exact ht.1
+
+theorem head_eq_head_of_prefix (hl₁ : l₁ ≠ []) (hl₂ : l₂ ≠ []) (h : l₁ <+: l₂) :
+    l₁.head hl₁ = l₂.head hl₂ := by
+  obtain ⟨a, l₁, rfl⟩ := l₁.exists_cons_of_ne_nil hl₁
+  obtain ⟨b, l₂, rfl⟩ := l₂.exists_cons_of_ne_nil hl₂
+  simp [eq_of_cons_prefix_cons h, head_cons]
+
+theorem tail_prefix_tail_of_prefix (hl₁ : l₁ ≠ []) (hl₂ : l₂ ≠ []) (h : l₁ <+: l₂) :
+    l₁.tail <+: l₂.tail := by
+  have heq := head_eq_head_of_prefix hl₁ hl₂ h
+  let ⟨t, ht⟩ := h
+  rw [← head_cons_tail l₁ hl₁, ← head_cons_tail l₂ hl₂, ← heq] at ht
+  simp only [cons_append, cons.injEq, true_and] at ht
+  exact ⟨t, ht⟩
+
+theorem cons_prefix_iff : a :: l₁ <+: b :: l₂ ↔ a = b ∧ l₁ <+: l₂ := by
+  constructor
+  · rintro ⟨L, hL⟩
+    simp only [cons_append] at hL
+    injection hL with hLLeft hLRight
+    exact ⟨hLLeft, ⟨L, hLRight⟩⟩
+  · rintro ⟨rfl, h⟩
+    rwa [prefix_cons_inj]
+
+theorem prefix_iff_head_eq_and_tail_prefix (hl₁ : l₁ ≠ []) (hl₂ : l₂ ≠ []) :
+    l₁ <+: l₂ ↔ l₁.head hl₁ = l₂.head hl₂ ∧ l₁.tail <+: l₂.tail := by
+  constructor <;> intro h
+  · exact ⟨head_eq_head_of_prefix hl₁ hl₂ h, tail_prefix_tail_of_prefix hl₁ hl₂ h⟩
+  · let ⟨t, ht⟩ := h.2
+    exists t
+    rw [← head_cons_tail l₁ hl₁, ← head_cons_tail l₂ hl₂]
+    simpa [h.1]
+
+theorem ne_nil_of_not_prefix (h : ¬l₁ <+: l₂) : l₁ ≠ [] := by
+  intro heq
+  simp [heq, nil_prefix] at h
+
+theorem not_prefix_and_not_prefix_symm_iff_exists [BEq α] [LawfulBEq α] [DecidableEq α]
+    {l₁ l₂ : List α} : ¬l₁ <+: l₂ ∧ ¬l₂ <+: l₁ ↔ ∃ c₁ c₂ pre suf₁ suf₂, c₁ ≠ c₂ ∧
+      l₁ = pre ++ c₁ :: suf₁ ∧ l₂ = pre ++ c₂ :: suf₂ := by
+  constructor <;> intro h
+  · obtain ⟨c₁, l₁, rfl⟩ := l₁.exists_cons_of_ne_nil (ne_nil_of_not_prefix h.1)
+    obtain ⟨c₂, l₂, rfl⟩ := l₂.exists_cons_of_ne_nil (ne_nil_of_not_prefix h.2)
+    simp only [cons_prefix_iff, not_and] at h
+    cases Decidable.em (c₁ = c₂)
+    · subst c₂
+      simp only [forall_const] at h
+      let ⟨c₁', c₂', pre, suf₁, suf₂, hc, heq₁, heq₂⟩ :=
+        not_prefix_and_not_prefix_symm_iff_exists.mp h
+      exact ⟨c₁', c₂', c₁::pre, suf₁, suf₂, hc, by simp [heq₁], by simp [heq₂]⟩
+    · next hc =>
+      exact ⟨c₁, c₂, [], l₁, l₂, hc, nil_append .., nil_append ..⟩
+  · let ⟨c₁, c₂, pre, suf₁, suf₂, hc, heq₁, heq₂⟩ := h
+    rw [heq₁, heq₂]
+    simp [prefix_append_right_inj, cons_prefix_iff, hc, hc.symm]
+
 /-! ### drop -/
 
 theorem mem_of_mem_drop {n} {l : List α} (h : a ∈ l.drop n) : a ∈ l := drop_subset _ _ h

diff --git a/Std/Data/List/SplitOnList.lean b/Std/Data/List/SplitOnList.lean
@@ -0,0 +1,160 @@
+/-
+Copyright (c) 2024 Bulhwi Cha, Mario Carneiro. All rights reserved.
+Released under Apache 2.0 license as described in the file LICENSE.
+Authors: Bulhwi Cha, Mario Carneiro
+-/
+import Std.Data.List.Lemmas
+
+/-!
+# `List.splitOnList`
+
+This file introduces the `List.splitOnList` function, which is a specification for `String.splitOn`.
+We need it to prove `String.splitOn_of_valid` in `Std.Data.String.Lemmas`.
+```
+[1, 1, 2, 3, 2, 4, 4].splitOnList []     = [[1, 1, 2, 3, 2, 4, 4]]
+[1, 1, 2, 3, 2, 4, 4].splitOnList [5, 6] = [[1, 1, 2, 3, 2, 4, 4]]
+[1, 1, 2, 3, 2, 4, 4].splitOnList [2, 3] = [[1, 1], [2, 4, 4]]
+```
+-/
+
+namespace List
+
+/-- Returns `(l₁, l₂)` for the first split `l = l₁ ++ l₂` such that `P l₂` returns true. -/
+def splitOnceRightP (P : List α → Bool) (l : List α) : Option (List α × List α) :=
+  if P l then
+    some ([], l)
+  else
+    match l with
+    | [] => none
+    | a::l => (splitOnceRightP P l).map fun (l, r) => (a :: l, r)
+
+theorem splitOnceRightP_of_P {P : List α → Bool} {l} (h : P l) :
+    splitOnceRightP P l = some ([], l) := by
+  unfold splitOnceRightP
+  simp [h]
+
+theorem splitOnceRight_nil (P : List α → Bool) :
+    splitOnceRightP P [] = if P [] then some ([], []) else none :=
+  rfl
+
+theorem splitOnceRight_of_ne_nil_of_not_P {P : List α → Bool} {l} (hne : l ≠ []) (hnp : ¬P l) :
+    splitOnceRightP P l = (splitOnceRightP P l.tail).map fun (lf, rt) => (l.head hne :: lf, rt) :=
+      by
+  obtain ⟨a, l, rfl⟩ := exists_cons_of_ne_nil hne
+  conv => lhs; unfold splitOnceRightP
+  simp [hnp]
+
+theorem eq_append_of_splitOnceRightP {P : List α → Bool} {l} :
+    ∀ l₁ l₂, splitOnceRightP P l = some (l₁, l₂) → l = l₁ ++ l₂ := by
+  induction l with
+  | nil => simp [splitOnceRightP]
+  | cons a l ih =>
+    if h : P (a :: l) then
+      simp [splitOnceRightP, h]
+    else
+      simp only [splitOnceRightP, h]
+      match e : splitOnceRightP P l with
+      | none => simp
+      | some (lf, rt) => simp; apply ih; assumption
+
+theorem P_of_splitOnceRightP {P : List α → Bool} {l} :
+    ∀ l₁ l₂, splitOnceRightP P l = some (l₁, l₂) → P l₂ := by
+  induction l with
+  | nil => simp [splitOnceRightP]
+  | cons a l ih =>
+    if h : P (a :: l) then
+      simp [splitOnceRightP, h]
+    else
+      simp only [splitOnceRightP, h]
+      match e : splitOnceRightP P l with
+      | none => simp
+      | some (lf, rt) => simp; apply ih; assumption
+
+variable [BEq α] [LawfulBEq α]
+
+theorem splitOnceRightP_isPrefixOf_eq_none_of_length_lt {l sep : List α}
+    (h : l.length < sep.length) : splitOnceRightP sep.isPrefixOf l = none := by
+  have not_isPrefixOf_of_length_lt {l} (h : l.length < sep.length) : ¬sep.isPrefixOf l := by
+    rw [← IsPrefix.iff_isPrefixOf]
+    exact mt IsPrefix.length_le (Nat.not_le_of_lt h)
+  induction l with
+  | nil => unfold splitOnceRightP; simp [not_isPrefixOf_of_length_lt h]
+  | cons a l ih =>
+    unfold splitOnceRightP
+    simp only [not_isPrefixOf_of_length_lt h, ↓reduceIte, Option.map_eq_none']
+    have h' : l.length < sep.length :=
+      calc
+        l.length < (a :: l).length := by simp
+        _ < sep.length := h
+    exact ih h'
+
+/--
+Split a list at every occurrence of a separator list. The separators are not in the result.
+```
+[1, 1, 2, 3, 2, 4, 4].splitOnList [2, 3] = [[1, 1], [2, 4, 4]]
+```
+-/
+def splitOnList (l sep : List α) : List (List α) :=
+  if h : sep.isEmpty then
+    [l]
+  else
+    match e : splitOnceRightP sep.isPrefixOf l with
+    | none => [l]
+    | some (l₁, l₂) =>
+      have : l₂.length - sep.length < l.length := by
+        rw [eq_append_of_splitOnceRightP l₁ l₂ e, length_append]
+        calc
+          l₂.length - sep.length < l₂.length := Nat.sub_lt_self (by simp [length_pos,
+            ← isEmpty_iff_eq_nil, h]) (IsPrefix.length_le <| IsPrefix.iff_isPrefixOf.mpr
+            (P_of_splitOnceRightP l₁ l₂ e))
+          _ ≤ l₁.length + l₂.length := Nat.le_add_left ..
+      l₁ :: splitOnList (l₂.drop sep.length) sep
+termination_by l.length
+
+theorem splitOnList_nil_left (sep : List α) : splitOnList [] sep = [[]] := by
+  cases sep <;> rfl
+
+theorem splitOnList_nil_right (l : List α) : splitOnList l [] = [l] := by
+  simp [splitOnList]
+
+theorem splitOnList_append_append_of_isPrefixOf {l : List α} (sep₁) {sep₂} (hsp₂ : sep₂ ≠ [])
+    (hpf : sep₂.isPrefixOf l) :
+    splitOnList (sep₁ ++ l) (sep₁ ++ sep₂) =
+      [] :: splitOnList (l.drop sep₂.length) (sep₁ ++ sep₂) := by
+  have hnem : ¬(sep₁ ++ sep₂).isEmpty := by simp [isEmpty_iff_eq_nil, hsp₂]
+  have hpf : (sep₁ ++ sep₂).isPrefixOf (sep₁ ++ l) := by
+    rwa [← IsPrefix.iff_isPrefixOf, prefix_append_right_inj, IsPrefix.iff_isPrefixOf]
+  conv => lhs; unfold splitOnList
+  simp only [hnem, ↓reduceDite, length_append]
+  rw [splitOnceRightP_of_P hpf]
+  simp [drop_append_left]
+
+theorem splitOnList_of_isPrefixOf {l sep : List α} (hsp : sep ≠ []) (hpf : sep.isPrefixOf l) :
+    splitOnList l sep = [] :: splitOnList (l.drop sep.length) sep :=
+  splitOnList_append_append_of_isPrefixOf [] hsp hpf
+
+theorem splitOnList_refl_of_ne_nil (l : List α) (h : l ≠ []) : splitOnList l l = [[], []] := by
+  have hpf : l.isPrefixOf l := IsPrefix.iff_isPrefixOf.mp (prefix_refl l)
+  simp [splitOnList_of_isPrefixOf h hpf, splitOnList_nil_left]
+
+theorem splitOnList_of_ne_nil_of_not_isPrefixOf {l sep : List α} (hne : l ≠ [])
+    (hnpf : ¬sep.isPrefixOf l) :
+    splitOnList l sep = modifyHead (l.head hne :: ·) (splitOnList l.tail sep) := by
+  unfold splitOnList
+  obtain ⟨a, sep, rfl⟩ := exists_cons_of_ne_nil (ne_nil_of_not_prefix (mt IsPrefix.iff_isPrefixOf.mp
+    hnpf))
+  simp only [isEmpty_cons, ↓reduceDite]
+  rw [splitOnceRight_of_ne_nil_of_not_P hne hnpf]
+  match splitOnceRightP (a::sep).isPrefixOf l.tail with
+  | none => simp [head_cons_tail]
+  | some (lf, rt) => rfl
+
+theorem splitOnList_eq_singleton_of_length_lt {l sep : List α} (h : l.length < sep.length) :
+    splitOnList l sep = [l] := by
+  have hne : ¬sep.isEmpty := by
+    simpa [isEmpty_iff_eq_nil, length_pos] using Nat.lt_of_le_of_lt (Nat.zero_le l.length) h
+  unfold splitOnList
+  simp only [hne, ↓reduceDite]
+  rw [splitOnceRightP_isPrefixOf_eq_none_of_length_lt h]
+
+end List