From 6da9c52a011302dd4bf9c4ad7714ad0db738a98e Mon Sep 17 00:00:00 2001 From: Gordon Brander Date: Mon, 1 Nov 2021 11:47:00 -0700 Subject: [PATCH] Subtext recursive descent parser (#3) Re-implementing Subtext via recursive descent. - Two passes instead of 4x passes with Regexp - Gives us a DOM to work with so we can extract titles, etc, later on --- .../Shared/Components/AppView.swift | 6 +- .../Subconscious/Shared/Library/Subtext.swift | 211 ++++++++++++++++++ .../Shared/Library/Subtext4.swift | 130 ----------- xcode/Subconscious/Shared/Library/Tape.swift | 134 +++++++++++ .../Subconscious.xcodeproj/project.pbxproj | 24 +- 5 files changed, 363 insertions(+), 142 deletions(-) create mode 100644 xcode/Subconscious/Shared/Library/Subtext.swift delete mode 100644 xcode/Subconscious/Shared/Library/Subtext4.swift create mode 100644 xcode/Subconscious/Shared/Library/Tape.swift diff --git a/xcode/Subconscious/Shared/Components/AppView.swift b/xcode/Subconscious/Shared/Components/AppView.swift index e85c7540..c812ab6c 100644 --- a/xcode/Subconscious/Shared/Components/AppView.swift +++ b/xcode/Subconscious/Shared/Components/AppView.swift @@ -111,10 +111,8 @@ struct AppModel: Updatable { markup: String, selection: NSRange ) -> NSAttributedString { - Subtext4( - markup: markup, - range: selection - ).renderMarkup(url: Slashlink.slashlinkToURLString) + Subtext(markup: markup) + .renderMarkup(url: Slashlink.slashlinkToURLString) } // MARK: Update diff --git a/xcode/Subconscious/Shared/Library/Subtext.swift b/xcode/Subconscious/Shared/Library/Subtext.swift new file mode 100644 index 00000000..c4a2e0a8 --- /dev/null +++ b/xcode/Subconscious/Shared/Library/Subtext.swift @@ -0,0 +1,211 @@ +// +// Subtext5.swift +// Subconscious +// +// Created by Gordon Brander on 10/25/21. +// + +import Foundation +import SwiftUI + +struct Subtext { + enum Block { + case text(span: Substring, inline: [Inline]) + case list(span: Substring, inline: [Inline]) + case quote(span: Substring, inline: [Inline]) + case heading(span: Substring) + + /// Returns the body of a block, without the leading sigil + func body() -> Substring { + switch self { + case .text(let span, _): + return span + case .quote(let span, _), .list(let span, _), .heading(let span): + return span.dropFirst() + } + } + } + + struct Link { + var span: Substring + } + + struct Bracketlink { + var span: Substring + + func body() -> Substring { + span.dropFirst().dropLast() + } + } + + struct Slashlink { + var span: Substring + } + + enum Inline { + case link(Link) + case bracketlink(Bracketlink) + case slashlink(Slashlink) + } + + /// Consume a well-formed bracket link, or else backtrack + private static func consumeBracketLink(tape: inout Tape) -> Substring? { + tape.save() + while !tape.isExhausted() { + if tape.consumeMatch(" ") { + tape.backtrack() + return nil + } else if tape.consumeMatch(">") { + return tape.cut() + } else { + tape.consume() + } + } + tape.backtrack() + return nil + } + + private static func consumeInlineWordBoundaryForm( + tape: inout Tape + ) -> Inline? { + if tape.consumeMatch("<") { + if let link = consumeBracketLink(tape: &tape) { + return .bracketlink(Bracketlink(span: link)) + } else { + return nil + } + } else if tape.consumeMatch("https://") { + tape.consumeUntil(" ") + return .link(Link(span: tape.cut())) + } else if tape.consumeMatch("http://") { + tape.consumeUntil(" ") + return .link(Link(span: tape.cut())) + } else if tape.consumeMatch("/") { + tape.consumeUntil(" ") + return .slashlink(Slashlink(span: tape.cut())) + } else { + return nil + } + } + + private static func parseInline(tape: inout Tape) -> [Inline] { + var inlines: [Inline] = [] + + /// Capture word-boundary-delimited forms at beginning of line. + tape.start() + if let inline = consumeInlineWordBoundaryForm(tape: &tape) { + inlines.append(inline) + } + + while !tape.isExhausted() { + tape.start() + let curr = tape.consume() + /// Capture word-boundary-delimited forms after space + if curr == " " { + tape.start() + if let inline = consumeInlineWordBoundaryForm(tape: &tape) { + inlines.append(inline) + } + } + } + + return inlines + } + + private static func parseLine(_ line: Substring) -> Block { + if line.hasPrefix("#") { + return Block.heading(span: line) + } else if line.hasPrefix(">") { + var tape = Tape(line) + // Discard prefix + tape.consume() + let inline = parseInline(tape: &tape) + return Block.quote(span: line, inline: inline) + } else if line.hasPrefix("-") { + var tape = Tape(line) + // Discard prefix + tape.consume() + let inline = parseInline(tape: &tape) + return Block.list(span: line, inline: inline) + } else { + var tape = Tape(line) + let inline = parseInline(tape: &tape) + return Block.list(span: line, inline: inline) + } + } + + let base: String + let blocks: [Block] + + init(markup: String) { + self.base = markup + self.blocks = markup.split( + omittingEmptySubsequences: false, + whereSeparator: \.isNewline + ).map(Self.parseLine) + } +} + +extension Subtext { + /// Render markup verbatim with syntax highlighting and links + func renderMarkup(url: (String) -> String?) -> NSAttributedString { + let attributedString = NSMutableAttributedString(string: base) + // Set default styles for entire string + attributedString.addAttribute( + .font, + value: UIFont.appText, + range: NSRange(base.startIndex..]+)[\.,;]?"# - ) - - static let bracketlink = try! NSRegularExpression( - pattern: #"<([^>\s]+)>"# - ) - - /// Static property for empty document - static let empty = Self(markup: "") - - let base: String - let headings: Set - let slashlinks: Set - let links: Set - - init( - markup: String, - cursor: String.Index? = nil - ) { - let nsRange = NSRange(markup.startIndex.. String?) -> NSAttributedString { - let attributedString = NSMutableAttributedString(string: base) - // Set default styles for entire string - attributedString.addAttribute( - .font, - value: UIFont.appText, - range: NSRange(base.startIndex.. +where T: Collection, + T.SubSequence: Equatable +{ + private(set) var savedIndex: T.Index + private(set) var startIndex: T.Index + private(set) var currentIndex: T.Index + let collection: T + + init(_ collection: T) { + self.collection = collection + self.startIndex = collection.startIndex + self.currentIndex = collection.startIndex + self.savedIndex = collection.startIndex + } + + /// Get current subsequence + var subsequence: T.SubSequence { + collection[startIndex.. Bool { + return self.currentIndex >= self.collection.endIndex + } + + /// Sets the start of the current range to the current index + /// Generally called at the beginning of each loop. + mutating func start() { + startIndex = currentIndex + } + + /// Get current subsequence, and advance start index to current index. + /// Conceptually like snipping off a piece of tape so that you have the piece up until the cut, + /// and the cut becomes the new start of the tape. + mutating func cut() -> T.SubSequence { + let subsequence = collection[startIndex.. T.SubSequence { + let subsequence = collection[currentIndex...currentIndex] + self.collection.formIndex( + after: &self.currentIndex + ) + return subsequence + } + + /// Peek forward, and consume if match + mutating func consumeMatch(_ subsequence: T.SubSequence) -> Bool { + if let endIndex = collection.index( + currentIndex, + offsetBy: subsequence.count, + limitedBy: collection.endIndex + ) { + if collection[currentIndex.. T.SubSequence { + while !self.isExhausted() { + if self.peek(next: delimiter.count) == delimiter { + if includeDelimiter { + self.consume() + } + return self.subsequence + } else { + self.consume() + } + } + return self.subsequence + } + + /// Get a single-item SubSequence offset by `offset` of the `currentStartIndex`. + /// Returns a single-item SubSequence, or nil if `offset` is invalid. + func peek(_ offset: Int = 0) -> T.SubSequence? { + if + let startIndex = collection.index( + currentIndex, + offsetBy: offset, + limitedBy: collection.endIndex + ), + let endIndex = collection.index( + currentIndex, + offsetBy: offset + 1, + limitedBy: collection.endIndex + ) + { + return collection[startIndex.. T.SubSequence? { + if let endIndex = collection.index( + currentIndex, + offsetBy: offset, + limitedBy: collection.endIndex + ) { + return collection[currentIndex..