Skip to content
This repository has been archived by the owner on Jan 20, 2025. It is now read-only.

Commit

Permalink
Precompute prediction sets.
Browse files Browse the repository at this point in the history
  • Loading branch information
dabrahams committed Jan 15, 2025
1 parent 86b7ffd commit cf595b0
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 29 deletions.
5 changes: 5 additions & 0 deletions Sources/Lotsawa/Grammar.swift
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,11 @@ extension Grammar {
let terminals = Set(rules.lazy.map(\.rhs).joined()).subtracting(nonTerminals)
return (terminals, nonTerminals)
}

func allSymbols() -> Set<Symbol> {
Set(rules.lazy.map(\.lhs)).union(Set(rules.lazy.map(\.rhs).joined()))
}

}

extension Grammar {
Expand Down
15 changes: 12 additions & 3 deletions Sources/Lotsawa/ItemID.swift
Original file line number Diff line number Diff line change
Expand Up @@ -148,9 +148,18 @@ extension Chart.ItemID {
///
/// - Precondition: !self.isLeo
var origin: UInt32 {
assert(!isLeo)
return
storage.isCompletion_symbol_isEarley_originHi << 16 | storage.originLow_dotPosition >> 16
get {
assert(!isLeo)
return
storage.isCompletion_symbol_isEarley_originHi << 16 | storage.originLow_dotPosition >> 16
}
set {
assert(!isLeo)
storage.isCompletion_symbol_isEarley_originHi &= ~0 << 16
storage.isCompletion_symbol_isEarley_originHi |= newValue >> 16
storage.originLow_dotPosition &= ~0 >> 16
storage.originLow_dotPosition |= newValue << 16
}
}

/// The dot position representing this Earley Item's parse progress.
Expand Down
3 changes: 2 additions & 1 deletion Sources/Lotsawa/MultiMap.swift
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/// A mapping from keys `K` to arrays of values `[V]`, where by default every
/// key maps to the empty array.
struct MultiMap<K: Hashable, V> {
struct
MultiMap<K: Hashable, V> {
/// The type used as underlying storage.
typealias Storage = Dictionary<K, [V]>

Expand Down
39 changes: 37 additions & 2 deletions Sources/Lotsawa/PreprocessedGrammar.swift
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,45 @@ public struct PreprocessedGrammar<StoredSymbol: SignedInteger & FixedWidthIntege

let first: [RuleID: Symbol]

let predictions: MultiMap<Symbol, Chart.Entry>

/// Creates a preprocessed version of `raw`, ready for recognition.
public init(_ raw: Grammar<StoredSymbol>) {
(base, rawPosition, isNullable) = raw.eliminatingNulls()
rulesByLHS = MultiMap(grouping: base.ruleIDs, by: base.lhs)
leoPositions = base.leoPositions()
first = base.firstSymbols()

/// Returns the chart entry that predicts the start of `r` at earleme 0.
func prediction(_ r: RuleID) -> Chart.Entry {
// FIXME: overflow here on 32-bit systems
.init(
item: .init(predicting: r, in: base, at: 0, first: first[r]!),
mainstemIndex: .init(UInt32.max))
}

let allSymbols = base.allSymbols()
var p: [Symbol: Set<Chart.Entry>] = Dictionary(uniqueKeysWithValues: allSymbols.map { ($0, []) })

var foundPrediction = false
repeat {
foundPrediction = false
for s in allSymbols {
for r in rulesByLHS[s] {
let oldCount = p[s]!.count
// FIXME: overflow here on 32-bit systems
p[s]!.insert(
.init(
item: .init(predicting: r, in: base, at: 0, first: first[r]!),
mainstemIndex: .init(UInt32.max)))
p[s]!.formUnion(p[first[r]!]!)
if p[s]!.count != oldCount { foundPrediction = true }
}
}
}
while foundPrediction

predictions = .init(storage: p.mapValues { $0.sorted() })
}
/*
func rhsStartAndPostdot(_ r: RuleID) -> (Position, Symbol) {
Expand All @@ -41,14 +74,16 @@ extension PreprocessedGrammar {
rulesByLHS: MultiMap<Symbol, RuleID>,
leoPositions: Set<Position>,
rawPosition: DiscreteMap<Position, Position>,
isNullable: Bool
isNullable: Bool,
predictions: MultiMap<Symbol, Chart.Entry>
) {
self.base = base
self.rulesByLHS = rulesByLHS
self.leoPositions = leoPositions
self.rawPosition = rawPosition
self.isNullable = isNullable
first = base.firstSymbols()
self.predictions = predictions
}

/// Returns a complete string representation of `self` from which it
Expand All @@ -61,7 +96,7 @@ extension PreprocessedGrammar {
rawPosition: \(rawPosition.serialized()),
leoPositions: \(leoPositions),
isNullable: \(isNullable),
first: \(first)
predictions: \(predictions)
)
"""
}
Expand Down
33 changes: 10 additions & 23 deletions Sources/Lotsawa/Recognizer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,15 @@ public struct Recognizer<StoredSymbol: SignedInteger & FixedWidthInteger> {

private let first: [RuleID: Symbol]

private let predictions: MultiMap<Symbol, Chart.Entry>

/// Storage for all DerivationGroups, grouped by Earleme and sorted within each Earleme.
public private(set) var chart = Chart()

/// True iff at least one Leo candidate item was added to the current earley set.
private var leoCandidateFound = false

private var pendingDiscoveries: [(Symbol, startingAt: SourcePosition)] = []
private var pendingPredictions: [Symbol] = []
}

/// applies `f` to `x`.
Expand All @@ -43,6 +44,7 @@ extension Recognizer {
self.leoPositions = g.leoPositions
self.acceptsNull = g.isNullable
self.first = g.first
self.predictions = g.predictions
initialize()
}

Expand All @@ -55,30 +57,14 @@ extension Recognizer {
/// The index of the Earley set currently being worked on.
public var currentEarleme: UInt32 { chart.currentEarleme }

/// Returns the chart entry that predicts the start of `r`.
private func prediction(_ r: RuleID) -> Chart.Entry {
// FIXME: overflow here on 32-bit systems
.init(
item: .init(predicting: r, in: g, at: currentEarleme, first: first[r]!),
mainstemIndex: .init(UInt32.max))
}

/// Seed the current item set with rules implied by the predicted recognition of `s` starting at
/// the current earleme.
/// Seed the current item set with rules implied by the predicted recognition of `s` starting at
/// the current earleme.
mutating func predict(_ s: Symbol) {
predict1(s)
while let s = pendingPredictions.popLast() {
predict1(s)
}
}

mutating func predict1(_ s: Symbol) {
for r in rulesByLHS[s] {
if insert(prediction(r)) {
pendingPredictions.append(first[r]!)
}
for var p in predictions[s] {
p.item.origin = currentEarleme
_ = insert(p)
}
}

Expand Down Expand Up @@ -115,9 +101,10 @@ extension Recognizer {
mainstems.allSatisfy(\.isEarley),
"Leo item is not first in mainstems.")

// Make sure this isn't some lazy collection dependent on the
// chart or an unsafe buffer pointer; we're going to insert
// stuff.
// Use type annotation to make sure this isn't some lazy
// collection dependent on the chart or an unsafe buffer
// pointer; we're going to insert stuff and we don't want to
// cause needless copies or invalidate transitionItems.
let transitionItems: Range<Int> = mainstems.indices
for i in transitionItems
where i == transitionItems.first || chart.entries[i - 1].item != chart.entries[i].item
Expand Down

0 comments on commit cf595b0

Please sign in to comment.