diff --git a/.github/workflows/continuous-integration-workflow.yml b/.github/workflows/continuous-integration-workflow.yml new file mode 100644 index 0000000..1863442 --- /dev/null +++ b/.github/workflows/continuous-integration-workflow.yml @@ -0,0 +1,14 @@ +name: unit-test +on: [push] + +jobs: + build: + name: unit-test + runs-on: macOS-latest + env: + GIT_SSL_NO_VERIFY: true + DEVELOPER_DIR: /Applications/Xcode.app/Contents/Developer + steps: + - uses: actions/checkout@v1 + - name: test + run: swift test diff --git a/Megrez.xcodeproj/project.pbxproj b/Megrez.xcodeproj/project.pbxproj index c0b5945..e1bcab8 100644 --- a/Megrez.xcodeproj/project.pbxproj +++ b/Megrez.xcodeproj/project.pbxproj @@ -7,19 +7,8 @@ objects = { /* Begin PBXBuildFile section */ - 5B7B55222813B1E5007AD17C /* main.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B7B55212813B1E5007AD17C /* main.swift */; }; - 5B7B55292813B217007AD17C /* Megrez.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B7B55282813B217007AD17C /* Megrez.swift */; }; - 5B7B552B2813B2F3007AD17C /* KeyValuePair.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B7B552A2813B2F3007AD17C /* KeyValuePair.swift */; }; - 5B7B553E2813FA97007AD17C /* Unigram.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B7B552C2813B421007AD17C /* Unigram.swift */; }; - 5B7B553F2813FAA1007AD17C /* Bigram.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B7B55372813D707007AD17C /* Bigram.swift */; }; - 5B7B55402813FB86007AD17C /* LanguageModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B7B55332813D2E3007AD17C /* LanguageModel.swift */; }; - 5B7B55412813FB91007AD17C /* Node.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B7B55352813D5B9007AD17C /* Node.swift */; }; - 5B7B554328140F22007AD17C /* NodeAnchor.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B7B554228140F22007AD17C /* NodeAnchor.swift */; }; - 5B7B5545281413EA007AD17C /* Span.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B7B5544281413EA007AD17C /* Span.swift */; }; - 5B7B554828143719007AD17C /* OrderedCollections in Frameworks */ = {isa = PBXBuildFile; productRef = 5B7B554728143719007AD17C /* OrderedCollections */; }; - 5B7B554A2814462A007AD17C /* Grid.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B7B55492814462A007AD17C /* Grid.swift */; }; - 5B7B554C2814F62D007AD17C /* Walker.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B7B554B2814F62D007AD17C /* Walker.swift */; }; - 5B7B554E2814FFCB007AD17C /* BlockReadingBuilder.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B7B554D2814FFCB007AD17C /* BlockReadingBuilder.swift */; }; + 5B7766822815B7900098C46B /* Megrez in Frameworks */ = {isa = PBXBuildFile; productRef = 5B7766812815B7900098C46B /* Megrez */; }; + 5B7766832815B8920098C46B /* main.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B77667C2815B6730098C46B /* main.swift */; }; /* End PBXBuildFile section */ /* Begin PBXCopyFilesBuildPhase section */ @@ -35,21 +24,11 @@ /* End PBXCopyFilesBuildPhase section */ /* Begin PBXFileReference section */ - 5B7B551E2813B1E5007AD17C /* Megrez */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = Megrez; sourceTree = BUILT_PRODUCTS_DIR; }; - 5B7B55212813B1E5007AD17C /* main.swift */ = {isa = PBXFileReference; indentWidth = 2; lastKnownFileType = sourcecode.swift; path = main.swift; sourceTree = ""; }; - 5B7B55282813B217007AD17C /* Megrez.swift */ = {isa = PBXFileReference; indentWidth = 2; lastKnownFileType = sourcecode.swift; path = Megrez.swift; sourceTree = ""; }; - 5B7B552A2813B2F3007AD17C /* KeyValuePair.swift */ = {isa = PBXFileReference; indentWidth = 2; lastKnownFileType = sourcecode.swift; path = KeyValuePair.swift; sourceTree = ""; }; - 5B7B552C2813B421007AD17C /* Unigram.swift */ = {isa = PBXFileReference; indentWidth = 2; lastKnownFileType = sourcecode.swift; path = Unigram.swift; sourceTree = ""; }; + 5B77667C2815B6730098C46B /* main.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = main.swift; sourceTree = ""; }; + 5B77667F2815B7490098C46B /* */ = {isa = PBXFileReference; lastKnownFileType = wrapper; name = ""; sourceTree = ""; }; + 5B7B551E2813B1E5007AD17C /* MegrezSample */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = MegrezSample; sourceTree = BUILT_PRODUCTS_DIR; }; 5B7B552F2813BE49007AD17C /* README.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = ""; }; 5B7B55302813BF8A007AD17C /* LICENSE */ = {isa = PBXFileReference; lastKnownFileType = text; path = LICENSE; sourceTree = ""; }; - 5B7B55332813D2E3007AD17C /* LanguageModel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LanguageModel.swift; sourceTree = ""; }; - 5B7B55352813D5B9007AD17C /* Node.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Node.swift; sourceTree = ""; }; - 5B7B55372813D707007AD17C /* Bigram.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Bigram.swift; sourceTree = ""; }; - 5B7B554228140F22007AD17C /* NodeAnchor.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = NodeAnchor.swift; sourceTree = ""; }; - 5B7B5544281413EA007AD17C /* Span.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Span.swift; sourceTree = ""; }; - 5B7B55492814462A007AD17C /* Grid.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Grid.swift; sourceTree = ""; }; - 5B7B554B2814F62D007AD17C /* Walker.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Walker.swift; sourceTree = ""; }; - 5B7B554D2814FFCB007AD17C /* BlockReadingBuilder.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BlockReadingBuilder.swift; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -57,56 +36,54 @@ isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; files = ( - 5B7B554828143719007AD17C /* OrderedCollections in Frameworks */, + 5B7766822815B7900098C46B /* Megrez in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; }; /* End PBXFrameworksBuildPhase section */ /* Begin PBXGroup section */ + 5B77667E2815B7490098C46B /* Packages */ = { + isa = PBXGroup; + children = ( + 5B77667F2815B7490098C46B /* */, + ); + name = Packages; + sourceTree = ""; + }; + 5B7766802815B7900098C46B /* Frameworks */ = { + isa = PBXGroup; + children = ( + ); + name = Frameworks; + sourceTree = ""; + }; 5B7B55152813B1E5007AD17C = { isa = PBXGroup; children = ( + 5B77667E2815B7490098C46B /* Packages */, + 5B77667C2815B6730098C46B /* main.swift */, 5B7B552F2813BE49007AD17C /* README.md */, 5B7B55302813BF8A007AD17C /* LICENSE */, - 5B7B55202813B1E5007AD17C /* Megrez */, 5B7B551F2813B1E5007AD17C /* Products */, + 5B7766802815B7900098C46B /* Frameworks */, ); sourceTree = ""; }; 5B7B551F2813B1E5007AD17C /* Products */ = { isa = PBXGroup; children = ( - 5B7B551E2813B1E5007AD17C /* Megrez */, + 5B7B551E2813B1E5007AD17C /* MegrezSample */, ); name = Products; sourceTree = ""; }; - 5B7B55202813B1E5007AD17C /* Megrez */ = { - isa = PBXGroup; - children = ( - 5B7B55282813B217007AD17C /* Megrez.swift */, - 5B7B552A2813B2F3007AD17C /* KeyValuePair.swift */, - 5B7B552C2813B421007AD17C /* Unigram.swift */, - 5B7B55372813D707007AD17C /* Bigram.swift */, - 5B7B55332813D2E3007AD17C /* LanguageModel.swift */, - 5B7B55352813D5B9007AD17C /* Node.swift */, - 5B7B554228140F22007AD17C /* NodeAnchor.swift */, - 5B7B5544281413EA007AD17C /* Span.swift */, - 5B7B55492814462A007AD17C /* Grid.swift */, - 5B7B554B2814F62D007AD17C /* Walker.swift */, - 5B7B554D2814FFCB007AD17C /* BlockReadingBuilder.swift */, - 5B7B55212813B1E5007AD17C /* main.swift */, - ); - path = Megrez; - sourceTree = ""; - }; /* End PBXGroup section */ /* Begin PBXNativeTarget section */ - 5B7B551D2813B1E5007AD17C /* Megrez */ = { + 5B7B551D2813B1E5007AD17C /* MegrezSample */ = { isa = PBXNativeTarget; - buildConfigurationList = 5B7B55252813B1E5007AD17C /* Build configuration list for PBXNativeTarget "Megrez" */; + buildConfigurationList = 5B7B55252813B1E5007AD17C /* Build configuration list for PBXNativeTarget "MegrezSample" */; buildPhases = ( 5B7B551A2813B1E5007AD17C /* Sources */, 5B7B551B2813B1E5007AD17C /* Frameworks */, @@ -116,12 +93,12 @@ ); dependencies = ( ); - name = Megrez; + name = MegrezSample; packageProductDependencies = ( - 5B7B554728143719007AD17C /* OrderedCollections */, + 5B7766812815B7900098C46B /* Megrez */, ); productName = Megrez; - productReference = 5B7B551E2813B1E5007AD17C /* Megrez */; + productReference = 5B7B551E2813B1E5007AD17C /* MegrezSample */; productType = "com.apple.product-type.tool"; }; /* End PBXNativeTarget section */ @@ -149,13 +126,12 @@ ); mainGroup = 5B7B55152813B1E5007AD17C; packageReferences = ( - 5B7B554628143719007AD17C /* XCRemoteSwiftPackageReference "swift-collections" */, ); productRefGroup = 5B7B551F2813B1E5007AD17C /* Products */; projectDirPath = ""; projectRoot = ""; targets = ( - 5B7B551D2813B1E5007AD17C /* Megrez */, + 5B7B551D2813B1E5007AD17C /* MegrezSample */, ); }; /* End PBXProject section */ @@ -165,18 +141,7 @@ isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; files = ( - 5B7B553F2813FAA1007AD17C /* Bigram.swift in Sources */, - 5B7B55402813FB86007AD17C /* LanguageModel.swift in Sources */, - 5B7B554C2814F62D007AD17C /* Walker.swift in Sources */, - 5B7B55292813B217007AD17C /* Megrez.swift in Sources */, - 5B7B554328140F22007AD17C /* NodeAnchor.swift in Sources */, - 5B7B554E2814FFCB007AD17C /* BlockReadingBuilder.swift in Sources */, - 5B7B5545281413EA007AD17C /* Span.swift in Sources */, - 5B7B55222813B1E5007AD17C /* main.swift in Sources */, - 5B7B552B2813B2F3007AD17C /* KeyValuePair.swift in Sources */, - 5B7B554A2814462A007AD17C /* Grid.swift in Sources */, - 5B7B553E2813FA97007AD17C /* Unigram.swift in Sources */, - 5B7B55412813FB91007AD17C /* Node.swift in Sources */, + 5B7766832815B8920098C46B /* main.swift in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -334,7 +299,7 @@ defaultConfigurationIsVisible = 0; defaultConfigurationName = Release; }; - 5B7B55252813B1E5007AD17C /* Build configuration list for PBXNativeTarget "Megrez" */ = { + 5B7B55252813B1E5007AD17C /* Build configuration list for PBXNativeTarget "MegrezSample" */ = { isa = XCConfigurationList; buildConfigurations = ( 5B7B55262813B1E5007AD17C /* Debug */, @@ -345,22 +310,10 @@ }; /* End XCConfigurationList section */ -/* Begin XCRemoteSwiftPackageReference section */ - 5B7B554628143719007AD17C /* XCRemoteSwiftPackageReference "swift-collections" */ = { - isa = XCRemoteSwiftPackageReference; - repositoryURL = "https://gitee.com/mirrors_apple/swift-collections"; - requirement = { - kind = upToNextMajorVersion; - minimumVersion = 1.0.0; - }; - }; -/* End XCRemoteSwiftPackageReference section */ - /* Begin XCSwiftPackageProductDependency section */ - 5B7B554728143719007AD17C /* OrderedCollections */ = { + 5B7766812815B7900098C46B /* Megrez */ = { isa = XCSwiftPackageProductDependency; - package = 5B7B554628143719007AD17C /* XCRemoteSwiftPackageReference "swift-collections" */; - productName = OrderedCollections; + productName = Megrez; }; /* End XCSwiftPackageProductDependency section */ }; diff --git a/Megrez.xcodeproj/xcshareddata/xcschemes/Megrez.xcscheme b/Megrez.xcodeproj/xcshareddata/xcschemes/Megrez.xcscheme deleted file mode 100644 index 3c01fcf..0000000 --- a/Megrez.xcodeproj/xcshareddata/xcschemes/Megrez.xcscheme +++ /dev/null @@ -1,78 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/Package.resolved b/Package.resolved new file mode 100644 index 0000000..87b9034 --- /dev/null +++ b/Package.resolved @@ -0,0 +1,16 @@ +{ + "object": { + "pins": [ + { + "package": "swift-collections", + "repositoryURL": "https://gitee.com/mirrors_apple/swift-collections", + "state": { + "branch": null, + "revision": "48254824bb4248676bf7ce56014ff57b142b77eb", + "version": "1.0.2" + } + } + ] + }, + "version": 1 +} diff --git a/Package.swift b/Package.swift new file mode 100644 index 0000000..41ee704 --- /dev/null +++ b/Package.swift @@ -0,0 +1,28 @@ +// swift-tools-version:5.3 + +import PackageDescription + +let package = Package( + name: "Megrez", + products: [ + .library( + name: "Megrez", + targets: ["Megrez"] + ) + ], + dependencies: [ + .package(url: "https://gitee.com/mirrors_apple/swift-collections", from: "1.0.2") + ], + targets: [ + .target( + name: "Megrez", + dependencies: [ + .product(name: "OrderedCollections", package: "swift-collections") + ] + ), + .testTarget( + name: "MegrezTests", + dependencies: ["Megrez"] + ), + ] +) diff --git a/README.md b/README.md index 0c938e5..af29e3e 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Megrez Engine 天權星引擎 +# Megrez Engine 天權星引擎 天權星引擎是用來處理輸入法語彙庫的一個模組。該倉庫乃威注音專案的弒神行動(Operation Longinus)的一部分。 @@ -8,3 +8,5 @@ Megrez Engine is a module made for processing lingual data of an input method. T - Swift programmer: Shiki Suen - C++ migration review: Hiraku Wong - Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License). + +該專案依賴於 Apple 官方的 swift-collections 擴展當中的 OrderedCollections 套件。 diff --git a/Megrez/Megrez.swift b/Sources/Megrez/0_Megrez.swift similarity index 99% rename from Megrez/Megrez.swift rename to Sources/Megrez/0_Megrez.swift index cc4b480..637b014 100644 --- a/Megrez/Megrez.swift +++ b/Sources/Megrez/0_Megrez.swift @@ -22,6 +22,5 @@ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - /// The namespace for this package. public enum Megrez {} diff --git a/Megrez/BlockReadingBuilder.swift b/Sources/Megrez/1_BlockReadingBuilder.swift similarity index 82% rename from Megrez/BlockReadingBuilder.swift rename to Sources/Megrez/1_BlockReadingBuilder.swift index a1e523d..25aae3a 100644 --- a/Megrez/BlockReadingBuilder.swift +++ b/Sources/Megrez/1_BlockReadingBuilder.swift @@ -22,9 +22,8 @@ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - extension Megrez { - class BlockReadingBuilder { + public class BlockReadingBuilder { let kMaximumBuildSpanLength = 10 // 規定最多可以組成的詞的字數上限為 10 var mutCursorIndex: Int = 0 var mutReadings: [String] = [] @@ -32,40 +31,40 @@ extension Megrez { var mutLM: LanguageModel var mutJoinSeparator: String = "" - init(lm: LanguageModel) { + public init(lm: LanguageModel) { mutLM = lm } - func clear() { + public func clear() { mutCursorIndex = 0 mutReadings.removeAll() mutGrid.clear() } - func length() -> Int { + public func length() -> Int { mutReadings.count } - func cursorIndex() -> Int { + public func cursorIndex() -> Int { mutCursorIndex } - func setCursorIndex(newIndex: Int) { + public func setCursorIndex(newIndex: Int) { mutCursorIndex = min(newIndex, mutReadings.count) } - func insertReadingAtCursor(reading: String) { + public func insertReadingAtCursor(reading: String) { mutReadings.insert(reading, at: mutCursorIndex) mutGrid.expandGridByOneAt(location: mutCursorIndex) build() mutCursorIndex += 1 } - func readings() -> [String] { + public func readings() -> [String] { mutReadings } - func deleteReadingBeforeCursor() -> Bool { + public func deleteReadingBeforeCursor() -> Bool { if mutCursorIndex == 0 { return false } @@ -77,7 +76,7 @@ extension Megrez { return true } - @discardableResult func deleteReadingAfterCursor() -> Bool { + @discardableResult public func deleteReadingAfterCursor() -> Bool { if mutCursorIndex == mutReadings.count { return false } @@ -88,7 +87,7 @@ extension Megrez { return true } - func removeHeadReadings(count: Int) -> Bool { + public func removeHeadReadings(count: Int) -> Bool { if count > length() { return false } @@ -105,19 +104,19 @@ extension Megrez { return true } - func setJoinSeparator(separator: String) { + public func setJoinSeparator(separator: String) { mutJoinSeparator = separator } - func joinSeparator() -> String { + public func joinSeparator() -> String { mutJoinSeparator } - func grid() -> Grid { + public func grid() -> Grid { mutGrid } - func build() { + public func build() { // if (mutLM == nil) { return } // 這個出不了 nil,所以註釋掉。 let itrBegin: Int = @@ -133,7 +132,7 @@ extension Megrez { let strSlice = (p == itrEnd) ? [mutReadings[itrEnd]] : mutReadings[p.., separator: String) -> String { + public func join(slice strSlice: ArraySlice, separator: String) -> String { var arrResult: [String] = [] for value in strSlice { arrResult.append(value) diff --git a/Megrez/Walker.swift b/Sources/Megrez/1_Walker.swift similarity index 92% rename from Megrez/Walker.swift rename to Sources/Megrez/1_Walker.swift index 3f6887e..e0b6dfb 100644 --- a/Megrez/Walker.swift +++ b/Sources/Megrez/1_Walker.swift @@ -22,16 +22,15 @@ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - extension Megrez { - class Walker { + public class Walker { var mutGrid: Grid - init(grid: Megrez.Grid = Megrez.Grid()) { + public init(grid: Megrez.Grid = Megrez.Grid()) { mutGrid = grid } - func reverseWalk(at location: Int, score accumulatedScore: Double = 0.0) -> [NodeAnchor] { + public func reverseWalk(at location: Int, score accumulatedScore: Double = 0.0) -> [NodeAnchor] { if location == 0 || location > mutGrid.width() { return [] as [NodeAnchor] } diff --git a/Megrez/Grid.swift b/Sources/Megrez/2_Grid.swift similarity index 79% rename from Megrez/Grid.swift rename to Sources/Megrez/2_Grid.swift index 3801a1f..451ed5d 100644 --- a/Megrez/Grid.swift +++ b/Sources/Megrez/2_Grid.swift @@ -22,39 +22,38 @@ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - extension Megrez { - class Grid { + public class Grid { var mutSpans: [Megrez.Span] - init() { + public init() { mutSpans = [Megrez.Span]() } - func clear() { + public func clear() { mutSpans = [Megrez.Span]() } - func insertNode(node: Node, location: Int, spanningLength: Int) { + public func insertNode(node: Node, location: Int, spanningLength: Int) { if mutSpans.count <= location { let diff = location - mutSpans.count + 1 for _ in 0.. Bool { + public func hasMatchedNode(location: Int, spanningLength: Int, key: String) -> Bool { if location > mutSpans.count { return false } - let n = mutSpans[location].nodeOfLength(spanningLength) + let n = mutSpans[location].node(length: spanningLength) return n == nil ? false : key == n?.key() } - func expandGridByOneAt(location: Int) { + public func expandGridByOneAt(location: Int) { if location != 0 || location == mutSpans.count { mutSpans.insert(Span(), at: location) } else { @@ -66,7 +65,7 @@ extension Megrez { } } - func shrinkGridByOneAt(location: Int) { + public func shrinkGridByOneAt(location: Int) { if location >= mutSpans.count { return } @@ -78,17 +77,17 @@ extension Megrez { } } - func width() -> Int { + public func width() -> Int { mutSpans.count } - func nodesEndingAt(location: Int) -> [NodeAnchor] { + public func nodesEndingAt(location: Int) -> [NodeAnchor] { var results: [NodeAnchor] = [] if !mutSpans.isEmpty, location <= mutSpans.count { for i in 0..= location { - if let np = span.nodeOfLength(location - i) { + if let np = span.node(length: location - i) { var na = NodeAnchor() na.node = np na.location = i @@ -101,7 +100,7 @@ extension Megrez { return results } - func nodesCrossingOrEndingAt(location: Int) -> [NodeAnchor] { + public func nodesCrossingOrEndingAt(location: Int) -> [NodeAnchor] { var results: [NodeAnchor] = [] if !mutSpans.isEmpty, location <= mutSpans.count { for i in 0.. NodeAnchor { + public func fixNodeSelectedCandidate(location: Int, value: String) -> NodeAnchor { let nodes = nodesCrossingOrEndingAt(location: location) var node = NodeAnchor() for nodeAnchor in nodes { @@ -135,7 +134,7 @@ extension Megrez { for i in 0.. mutMaximumLength { mutMaximumLength = length @@ -67,7 +66,7 @@ extension Megrez { mutMaximumLength = max } - func nodeOfLength(_ length: Int) -> Node? { + public func node(length: Int) -> Node? { mutLengthNodeMap[length] } } diff --git a/Megrez/Node.swift b/Sources/Megrez/4_Node.swift similarity index 84% rename from Megrez/Node.swift rename to Sources/Megrez/4_Node.swift index ed1c420..d0b39f5 100644 --- a/Megrez/Node.swift +++ b/Sources/Megrez/4_Node.swift @@ -22,11 +22,10 @@ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - import OrderedCollections extension Megrez { - class Node { + public class Node { let mutLM: LanguageModel var mutKey: String var mutScore: Double = 0 @@ -38,7 +37,7 @@ extension Megrez { var mutCandidateFixed: Bool = false var mutSelectedUnigramIndex: Int = 0 - init(key: String, unigrams: [Megrez.Unigram], bigrams: [Megrez.Bigram] = []) { + public init(key: String, unigrams: [Megrez.Unigram], bigrams: [Megrez.Bigram] = []) { mutLM = LanguageModel() mutKey = key @@ -59,7 +58,7 @@ extension Megrez { } } - func node(key: String, unigrams: [Megrez.Unigram], bigrams: [Megrez.Bigram] = []) { + public func node(key: String, unigrams: [Megrez.Unigram], bigrams: [Megrez.Bigram] = []) { mutKey = key var unigrams = unigrams unigrams.sort { @@ -82,14 +81,14 @@ extension Megrez { } } - func primeNodeWithPreceedingKeyValues(keyValues: [KeyValuePair]) { + public func primeNodeWith(preceedingKeyValues: [KeyValuePair]) { // TODO: primeNodeWithPreceedingKeyValues // Please check the same function in C++ version of Gramambumlar for references. var newIndex = mutSelectedUnigramIndex var max = mutScore if !isCandidateFixed() { - for (index, _) in keyValues.enumerated() { + for (index, _) in preceedingKeyValues.enumerated() { let bigrams = mutPreceedingGramBigramMap.elements[index].1 for (_, bigram) in bigrams.enumerated() { if bigram.score > max { @@ -111,15 +110,15 @@ extension Megrez { } } - func isCandidateFixed() -> Bool { + public func isCandidateFixed() -> Bool { mutCandidateFixed } - func candidates() -> [KeyValuePair] { + public func candidates() -> [KeyValuePair] { mutCandidates } - func selectCandidateAtIndex(index: Int = 0, fix: Bool = true) { + public func selectCandidateAt(index: Int = 0, fix: Bool = true) { if index >= mutUnigrams.count { mutSelectedUnigramIndex = 0 } else { @@ -129,7 +128,7 @@ extension Megrez { mutScore = 99 } - func resetCandidate() { + public func resetCandidate() { mutSelectedUnigramIndex = 0 mutCandidateFixed = false if !mutUnigrams.isEmpty { @@ -137,7 +136,7 @@ extension Megrez { } } - func selectFloatingCandidateAtIndex(index: Int, score: Double) { + public func selectFloatingCandidateAt(index: Int, score: Double) { if index >= mutUnigrams.count { mutSelectedUnigramIndex = 0 } else { @@ -147,15 +146,15 @@ extension Megrez { mutScore = score } - func key() -> String { + public func key() -> String { mutKey } - func score() -> Double { + public func score() -> Double { mutScore } - func scoreForCandidate(candidate: String) -> Double { + public func scoreFor(candidate: String) -> Double { for unigram in mutUnigrams { if unigram.keyValue.value == candidate { return unigram.score @@ -164,15 +163,15 @@ extension Megrez { return 0.0 } - func currentKeyValue() -> KeyValuePair { + public func currentKeyValue() -> KeyValuePair { mutSelectedUnigramIndex >= mutUnigrams.count ? KeyValuePair() : mutCandidates[mutSelectedUnigramIndex] } - func highestUnigramScore() -> Double { + public func highestUnigramScore() -> Double { mutUnigrams.isEmpty ? 0.0 : mutUnigrams[0].score } - static func == (lhs: Node, rhs: Node) -> Bool { + public static func == (lhs: Node, rhs: Node) -> Bool { lhs.mutUnigrams == rhs.mutUnigrams && lhs.mutCandidates == rhs.mutCandidates && lhs.mutValueUnigramIndexMap == rhs.mutValueUnigramIndexMap && lhs.mutPreceedingGramBigramMap == rhs.mutPreceedingGramBigramMap diff --git a/Megrez/LanguageModel.swift b/Sources/Megrez/5_LanguageModel.swift similarity index 76% rename from Megrez/LanguageModel.swift rename to Sources/Megrez/5_LanguageModel.swift index d42c080..be59f1a 100644 --- a/Megrez/LanguageModel.swift +++ b/Sources/Megrez/5_LanguageModel.swift @@ -22,19 +22,21 @@ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - extension Megrez { // 這裡充其量只是框架,回頭實際使用時需要派生一個型別、且重寫相關函數。 - class LanguageModel { - func unigramsForKey(key _: String) -> [Megrez.Unigram] { - [Megrez.Unigram]() + // 這裡寫了一點假內容,不然有些 Swift 格式化工具會破壞掉函數的參數設計。 + open class LanguageModel { + public init() {} + + open func unigramsFor(key: String) -> [Megrez.Unigram] { + key.isEmpty ? [Megrez.Unigram]() : [Megrez.Unigram]() } - func bigramsForKeys(preceedingKey _: String, key _: String) -> [Megrez.Bigram] { - [Megrez.Bigram]() + open func bigramsForKeys(preceedingKey: String, key: String) -> [Megrez.Bigram] { + preceedingKey == key ? [Megrez.Bigram]() : [Megrez.Bigram]() } - func hasUnigramsForKey(key: String) -> Bool { + open func hasUnigramsFor(key: String) -> Bool { key.count != 0 } } diff --git a/Megrez/Bigram.swift b/Sources/Megrez/6_Bigram.swift similarity index 86% rename from Megrez/Bigram.swift rename to Sources/Megrez/6_Bigram.swift index d85a228..92f81f7 100644 --- a/Megrez/Bigram.swift +++ b/Sources/Megrez/6_Bigram.swift @@ -22,22 +22,21 @@ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - extension Megrez { - class Bigram: Equatable { - var keyValue: KeyValuePair - var preceedingKeyValue: KeyValuePair - var score: Double + public class Bigram: Equatable { + public var keyValue: KeyValuePair + public var preceedingKeyValue: KeyValuePair + public var score: Double // var paired: String - init(preceedingKeyValue: KeyValuePair, keyValue: KeyValuePair, score: Double) { + public init(preceedingKeyValue: KeyValuePair, keyValue: KeyValuePair, score: Double) { self.keyValue = keyValue self.preceedingKeyValue = preceedingKeyValue self.score = score // paired = "(" + keyValue.paired + "|" + preceedingKeyValue.paired + "," + String(score) + ")" } - func hash(into hasher: inout Hasher) { + public func hash(into hasher: inout Hasher) { hasher.combine(keyValue) hasher.combine(preceedingKeyValue) hasher.combine(score) @@ -54,11 +53,11 @@ extension Megrez { // return "[" + String(grams.count) + "]=>{" + arrOutputContent.joined(separator: ",") + "}" // } - static func == (lhs: Bigram, rhs: Bigram) -> Bool { + public static func == (lhs: Bigram, rhs: Bigram) -> Bool { lhs.preceedingKeyValue == rhs.preceedingKeyValue && lhs.keyValue == rhs.keyValue && lhs.score == rhs.score } - static func < (lhs: Bigram, rhs: Bigram) -> Bool { + public static func < (lhs: Bigram, rhs: Bigram) -> Bool { lhs.preceedingKeyValue < rhs.preceedingKeyValue || (lhs.keyValue < rhs.keyValue || (lhs.keyValue == rhs.keyValue && lhs.keyValue < rhs.keyValue)) } diff --git a/Megrez/Unigram.swift b/Sources/Megrez/6_Unigram.swift similarity index 85% rename from Megrez/Unigram.swift rename to Sources/Megrez/6_Unigram.swift index 9277874..eab26c6 100644 --- a/Megrez/Unigram.swift +++ b/Sources/Megrez/6_Unigram.swift @@ -22,27 +22,26 @@ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - extension Megrez { - class Unigram: Equatable { - var keyValue: KeyValuePair - var score: Double + public class Unigram: Equatable { + public var keyValue: KeyValuePair + public var score: Double // var paired: String - init(keyValue: KeyValuePair, score: Double) { + public init(keyValue: KeyValuePair, score: Double) { self.keyValue = keyValue self.score = score // paired = "(" + keyValue.paired + "," + String(score) + ")" } - func hash(into hasher: inout Hasher) { + public func hash(into hasher: inout Hasher) { hasher.combine(keyValue) hasher.combine(score) // hasher.combine(paired) } // 這個函數不再需要了。 - static func compareScore(a: Unigram, b: Unigram) -> Bool { + public static func compareScore(a: Unigram, b: Unigram) -> Bool { a.score > b.score } @@ -56,11 +55,11 @@ extension Megrez { // return "[" + String(grams.count) + "]=>{" + arrOutputContent.joined(separator: ",") + "}" // } - static func == (lhs: Unigram, rhs: Unigram) -> Bool { + public static func == (lhs: Unigram, rhs: Unigram) -> Bool { lhs.keyValue == rhs.keyValue && lhs.score == rhs.score } - static func < (lhs: Unigram, rhs: Unigram) -> Bool { + public static func < (lhs: Unigram, rhs: Unigram) -> Bool { lhs.keyValue < rhs.keyValue || (lhs.keyValue == rhs.keyValue && lhs.keyValue < rhs.keyValue) } diff --git a/Megrez/KeyValuePair.swift b/Sources/Megrez/7_KeyValuePair.swift similarity index 75% rename from Megrez/KeyValuePair.swift rename to Sources/Megrez/7_KeyValuePair.swift index aba3837..13c2f70 100644 --- a/Megrez/KeyValuePair.swift +++ b/Sources/Megrez/7_KeyValuePair.swift @@ -22,50 +22,49 @@ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - extension Megrez { - class KeyValuePair: Equatable, Hashable, Comparable { - var key: String - var value: String - // var paired: String + public class KeyValuePair: Equatable, Hashable, Comparable { + public var key: String + public var value: String + // public var paired: String - init(key: String = "", value: String = "") { + public init(key: String = "", value: String = "") { self.key = key self.value = value // paired = "(" + key + "," + value + ")" } - func hash(into hasher: inout Hasher) { + public func hash(into hasher: inout Hasher) { hasher.combine(key) hasher.combine(value) // hasher.combine(paired) } - static func == (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool { + public static func == (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool { lhs.key.count == rhs.key.count && lhs.value == rhs.value } - static func < (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool { + public static func < (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool { (lhs.key.count < rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value < rhs.value) } - static func > (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool { + public static func > (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool { (lhs.key.count > rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value > rhs.value) } - static func <= (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool { + public static func <= (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool { (lhs.key.count <= rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value <= rhs.value) } - static func >= (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool { + public static func >= (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool { (lhs.key.count >= rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value >= rhs.value) } - var description: String { + public var description: String { "(\(key), \(value))" } - var debugDescription: String { + public var debugDescription: String { "KeyValuePair(key: \(key), value: \(value))" } } diff --git a/Tests/MegrezTests/MegrezTests.swift b/Tests/MegrezTests/MegrezTests.swift new file mode 100644 index 0000000..ebcc32d --- /dev/null +++ b/Tests/MegrezTests/MegrezTests.swift @@ -0,0 +1,216 @@ +import OrderedCollections +import XCTest + +@testable import Megrez + +final class MegrezTests: XCTestCase { + // MARK: - Input Test + + func testInput() throws { + print("// 開始測試語言文字輸入處理") + let lmTestInput = SimpleLM(input: strSampleData) + let builder = Megrez.BlockReadingBuilder(lm: lmTestInput) + + builder.insertReadingAtCursor(reading: "gao1") + builder.insertReadingAtCursor(reading: "ji4") + builder.setCursorIndex(newIndex: 1) + builder.insertReadingAtCursor(reading: "ke1") + builder.setCursorIndex(newIndex: 0) + builder.deleteReadingAfterCursor() + builder.insertReadingAtCursor(reading: "gao1") + builder.setCursorIndex(newIndex: builder.length()) + builder.insertReadingAtCursor(reading: "gong1") + builder.insertReadingAtCursor(reading: "si1") + builder.insertReadingAtCursor(reading: "de5") + builder.insertReadingAtCursor(reading: "nian2") + builder.insertReadingAtCursor(reading: "zhong1") + builder.insertReadingAtCursor(reading: "jiang3") + builder.insertReadingAtCursor(reading: "jin1") + + let walker = Megrez.Walker(grid: builder.grid()) + + var walked: [Megrez.NodeAnchor] = walker.reverseWalk(at: builder.grid().width(), score: 0.0) + walked = walked.reversed() + + var composed: [String] = [] + for phrase in walked { + if let value = phrase.node?.currentKeyValue().value { + composed.append(value) + } + } + print(composed) + let correctResult = ["高科技", "公司", "的", "年中", "獎金"] + print(" - 上述列印結果理應於下面這行一致:") + print(correctResult) + + XCTAssertEqual(composed, correctResult) + } + + // MARK: - Test Word Segmentation + + func testWordSegmentation() throws { + print("// 開始測試語句分節處理") + let lmTestSegmentation = SimpleLM(input: strSampleData, swapKeyValue: true) + let builder = Megrez.BlockReadingBuilder(lm: lmTestSegmentation) + + builder.insertReadingAtCursor(reading: "高") + builder.insertReadingAtCursor(reading: "科") + builder.insertReadingAtCursor(reading: "技") + builder.insertReadingAtCursor(reading: "公") + builder.insertReadingAtCursor(reading: "司") + builder.insertReadingAtCursor(reading: "的") + builder.insertReadingAtCursor(reading: "年") + builder.insertReadingAtCursor(reading: "終") + builder.insertReadingAtCursor(reading: "獎") + builder.insertReadingAtCursor(reading: "金") + + let walker = Megrez.Walker(grid: builder.grid()) + var walked: [Megrez.NodeAnchor] = walker.reverseWalk(at: builder.grid().width(), score: 0.0) + walked = walked.reversed() + + var segmented: [String] = [] + for phrase in walked { + if let key = phrase.node?.currentKeyValue().key { + segmented.append(key) + } + } + print(segmented) + let correctResult = ["高科技", "公司", "的", "年終", "獎金"] + print(" - 上述列印結果理應於下面這行一致:") + print(correctResult) + + XCTAssertEqual(segmented, correctResult) + } +} + +// MARK: - 用以測試的型別 + +class SimpleLM: Megrez.LanguageModel { + var mutDatabase: OrderedDictionary = [:] + + init(input: String, swapKeyValue: Bool = false) { + super.init() + let sstream = input.components(separatedBy: "\n") + for line in sstream { + if line.isEmpty || line.hasPrefix("#") { + continue + } + + let linestream = line.components(separatedBy: " ") + let col0 = linestream[0] + let col1 = linestream[1] + let col2 = linestream[2] + + let u = Megrez.Unigram(keyValue: Megrez.KeyValuePair(), score: 0) + + if swapKeyValue { + u.keyValue.key = col1 + u.keyValue.value = col0 + } else { + u.keyValue.key = col0 + u.keyValue.value = col1 + } + + u.score = Double(col2)! + mutDatabase[u.keyValue.key, default: []].append(u) + } + } + + override func unigramsFor(key: String) -> [Megrez.Unigram] { + if let f = mutDatabase[key] { + return f + } else { + return [Megrez.Unigram]() + } + } + + override func hasUnigramsFor(key: String) -> Bool { + mutDatabase.keys.contains(key) + } +} + +// MARK: - 用以測試的詞頻數據 + +let strSampleData = #""" +# +# 下述詞頻資料取自 libTaBE 資料庫 (http://sourceforge.net/projects/libtabe/) +# (2002 最終版). 該專案於 1999 年由 Pai-Hsiang Hsiao 發起、以 BSD 授權發行。 +# +si1 絲 -9.495858 +si1 思 -9.00644 +si1 私 -99.000000 +si1 斯 -8.091803 +si1 司 -99.000000 +si1 嘶 -3.53987 +si1 撕 -2.259095 +gao1 高 -7.17551 +ke1 顆 -10.574273 +ke1 棵 -11.504072 +ke1 刻 -10.450457 +ke1 科 -7.171052 +ke1 柯 -99.000000 +gao1 膏 -11.928720 +gao1 篙 -3.624335 +gao1 糕 -2.390804 +de5 的 -3.516024 +di2 的 -3.516024 +di4 的 -3.516024 +zhong1 中 -5.809297 +de5 得 -7.427179 +gong1 共 -8.381971 +gong1 供 -8.50463 +ji4 既 -99.000000 +jin1 今 -8.034095 +gong1 紅 -8.858181 +ji4 際 -7.608341 +ji4 季 -99.000000 +jin1 金 -7.290109 +ji4 騎 -10.939895 +zhong1 終 -99.000000 +ji4 記 -99.000000 +ji4 寄 -99.000000 +jin1 斤 -99.000000 +ji4 繼 -9.75317 +ji4 計 -7.926683 +ji4 暨 -8.373022 +zhong1 鐘 -9.877580 +jin1 禁 -10.711079 +gong1 公 -7.877973 +gong1 工 -7.822167 +gong1 攻 -99.000000 +gong1 功 -99.000000 +gong1 宮 -99.000000 +zhong1 鍾 -9.685671 +ji4 繫 -10.425662 +gong1 弓 -99.000000 +gong1 恭 -99.000000 +ji4 劑 -8.888722 +ji4 祭 -10.204425 +jin1 浸 -11.378321 +zhong1 盅 -99.000000 +ji4 忌 -99.000000 +ji4 技 -8.450826 +jin1 筋 -11.074890 +gong1 躬 -99.000000 +ji4 冀 -2.045357 +zhong1 忠 -99.000000 +ji4 妓 -99.000000 +ji4 濟 -9.517568 +ji4 薊 -2.02587 +jin1 巾 -99.000000 +jin1 襟 -2.784206 +nian2 年 -6.08655 +jiang3 講 -9.164384 +jiang3 獎 -8.690941 +jiang3 蔣 -10.27828 +nian2 黏 -11.336864 +nian2 粘 -11.285740 +jiang3 槳 -2.492933 +gong1si1 公司 -6.299461 +ke1ji4 科技 -6.73663 +ji4gong1 濟公 -3.336653 +jiang3jin1 獎金 -10.344678 +nian2zhong1 年終 -11.668947 +nian2zhong1 年中 -11.373044 +gao1ke1ji4 高科技 -9.842421 +"""# diff --git a/Megrez/main.swift b/main.swift similarity index 97% rename from Megrez/main.swift rename to main.swift index cf8db8b..fc2a5d5 100644 --- a/Megrez/main.swift +++ b/main.swift @@ -22,8 +22,8 @@ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - import Foundation +import Megrez import OrderedCollections // MARK: - 用以測試的詞頻數據 @@ -118,6 +118,7 @@ class SimpleLM: Megrez.LanguageModel { var mutDatabase: OrderedDictionary = [:] init(input: String, swapKeyValue: Bool = false) { + super.init() let sstream = input.components(separatedBy: "\n") for line in sstream { if line.isEmpty || line.hasPrefix("#") { @@ -144,7 +145,7 @@ class SimpleLM: Megrez.LanguageModel { } } - override func unigramsForKey(key: String) -> [Megrez.Unigram] { + override func unigramsFor(key: String) -> [Megrez.Unigram] { if let f = mutDatabase[key] { return f } else { @@ -152,7 +153,7 @@ class SimpleLM: Megrez.LanguageModel { } } - func hasUnigramsForKey(_ key: String) -> Bool { + override func hasUnigramsFor(key: String) -> Bool { mutDatabase.keys.contains(key) } }