From 82e9b8cbff5a44311f1dff164bb3c199c77b11a0 Mon Sep 17 00:00:00 2001 From: Miwa / Ensan Date: Tue, 8 Jul 2025 22:57:11 +0900 Subject: [PATCH] refactor: introduce Lattice type for more unified implementation --- .../Core/FullInputProcessing.swift | 6 ++-- ...lInputProcessingWithPrefixConstraint.swift | 10 +++--- .../Core/PrefixCompletionProcessing.swift | 12 +++---- .../Core/SuffixReplacementProcessing.swift | 36 +++++++++---------- .../Core/UnchangedInputProcessing.swift | 6 ++-- .../ConversionAlgorithms/Lattice.swift | 7 ++++ .../ConversionAlgorithms/Zenzai/zenzai.swift | 16 ++++----- .../ConverterAPI/KanaKanjiConverter.swift | 18 +++++----- 8 files changed, 59 insertions(+), 52 deletions(-) create mode 100644 Sources/KanaKanjiConverterModule/ConversionAlgorithms/Lattice.swift diff --git a/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/FullInputProcessing.swift b/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/FullInputProcessing.swift index 69bd523..a560218 100644 --- a/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/FullInputProcessing.swift +++ b/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/FullInputProcessing.swift @@ -11,7 +11,7 @@ import SwiftUtils extension Kana2Kanji { /// Latticeを構成する基本単位 - typealias Nodes = [[LatticeNode]] + typealias Nodes = Lattice /// カナを漢字に変換する関数, 前提はなくかな列が与えられた場合。 /// - Parameters: @@ -29,7 +29,7 @@ extension Kana2Kanji { /// (3)(1)のregisterされた結果をresultノードに追加していく。この際EOSとの連接計算を行っておく。 /// /// (4)ノードをアップデートした上で返却する。 - func kana2lattice_all(_ inputData: ComposingText, N_best: Int, needTypoCorrection: Bool) -> (result: LatticeNode, nodes: Nodes) { + func kana2lattice_all(_ inputData: ComposingText, N_best: Int, needTypoCorrection: Bool) -> (result: LatticeNode, lattice: Lattice) { debug("新規に計算を行います。inputされた文字列は\(inputData.input.count)文字分の\(inputData.convertTarget)") let count: Int = inputData.input.count let result: LatticeNode = LatticeNode.EOSNode @@ -90,7 +90,7 @@ extension Kana2Kanji { } } } - return (result: result, nodes: nodes) + return (result: result, lattice: Lattice(nodes: nodes)) } } diff --git a/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/FullInputProcessingWithPrefixConstraint.swift b/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/FullInputProcessingWithPrefixConstraint.swift index 32b2411..efd097d 100644 --- a/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/FullInputProcessingWithPrefixConstraint.swift +++ b/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/FullInputProcessingWithPrefixConstraint.swift @@ -18,13 +18,13 @@ extension Kana2Kanji { /// (3)(1)のregisterされた結果をresultノードに追加していく。この際EOSとの連接計算を行っておく。 /// /// (4)ノードをアップデートした上で返却する。 - func kana2lattice_all_with_prefix_constraint(_ inputData: ComposingText, N_best: Int, constraint: PrefixConstraint) -> (result: LatticeNode, nodes: Nodes) { + func kana2lattice_all_with_prefix_constraint(_ inputData: ComposingText, N_best: Int, constraint: PrefixConstraint) -> (result: LatticeNode, lattice: Lattice) { debug("新規に計算を行います。inputされた文字列は\(inputData.input.count)文字分の\(inputData.convertTarget)。制約は\(constraint)") let count: Int = inputData.input.count let result: LatticeNode = LatticeNode.EOSNode - let nodes: [[LatticeNode]] = (.zero ..< count).map {dicdataStore.getLOUDSDataInRange(inputData: inputData, from: $0, needTypoCorrection: false)} + let lattice: Lattice = Lattice(nodes: (.zero ..< count).map {dicdataStore.getLOUDSDataInRange(inputData: inputData, from: $0, needTypoCorrection: false)}) // 「i文字目から始まるnodes」に対して - for (i, nodeArray) in nodes.enumerated() { + for (i, nodeArray) in lattice.nodes.enumerated() { // それぞれのnodeに対して for node in nodeArray { if node.prevs.isEmpty { @@ -61,7 +61,7 @@ extension Kana2Kanji { Array(($0.data.reduce(into: "") { $0.append(contentsOf: $1.word)} + node.data.word).utf8) } // nodeの繋がる次にあり得る全てのnextnodeに対して - for nextnode in nodes[nextIndex] { + for nextnode in lattice.nodes[nextIndex] { // クラスの連続確率を計算する。 let ccValue: PValue = self.dicdataStore.getCCValue(node.data.rcid, nextnode.data.lcid) // nodeの持っている全てのprevnodeに対して @@ -97,7 +97,7 @@ extension Kana2Kanji { } } } - return (result: result, nodes: nodes) + return (result: result, lattice: lattice) } } diff --git a/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/PrefixCompletionProcessing.swift b/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/PrefixCompletionProcessing.swift index 996d04b..689e4be 100644 --- a/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/PrefixCompletionProcessing.swift +++ b/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/PrefixCompletionProcessing.swift @@ -15,13 +15,13 @@ extension Kana2Kanji { /// (1)まず、計算済みnodeの確定分以降を取り出し、registeredにcompletedDataの値を反映したBOSにする。 /// /// (2)次に、再度計算して良い候補を得る。 - func kana2lattice_afterComplete(_ inputData: ComposingText, completedData: Candidate, N_best: Int, previousResult: (inputData: ComposingText, nodes: Nodes), needTypoCorrection: Bool) -> (result: LatticeNode, nodes: Nodes) { + func kana2lattice_afterComplete(_ inputData: ComposingText, completedData: Candidate, N_best: Int, previousResult: (inputData: ComposingText, lattice: Lattice), needTypoCorrection: Bool) -> (result: LatticeNode, lattice: Lattice) { debug("確定直後の変換、前は:", previousResult.inputData, "後は:", inputData) let count = inputData.input.count // (1) let start = RegisteredNode.fromLastCandidate(completedData) - let nodes: Nodes = previousResult.nodes.suffix(count) - for (i, nodeArray) in nodes.enumerated() { + let lattice = Lattice(nodes: previousResult.lattice.nodes.suffix(count)) + for (i, nodeArray) in lattice.nodes.enumerated() { if i == .zero { for node in nodeArray { node.prevs = [start] @@ -39,7 +39,7 @@ extension Kana2Kanji { // (2) let result = LatticeNode.EOSNode - for (i, nodeArray) in nodes.enumerated() { + for (i, nodeArray) in lattice.nodes.enumerated() { for node in nodeArray { if node.prevs.isEmpty { continue @@ -60,7 +60,7 @@ extension Kana2Kanji { let nextIndex = node.inputRange.endIndex // 文字数がcountと等しくない場合は先に進む if nextIndex != count { - for nextnode in nodes[nextIndex] { + for nextnode in lattice.nodes[nextIndex] { if self.dicdataStore.shouldBeRemoved(data: nextnode.data) { continue } @@ -93,6 +93,6 @@ extension Kana2Kanji { } } - return (result: result, nodes: nodes) + return (result: result, lattice: lattice) } } diff --git a/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/SuffixReplacementProcessing.swift b/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/SuffixReplacementProcessing.swift index 976a025..beaacec 100644 --- a/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/SuffixReplacementProcessing.swift +++ b/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/SuffixReplacementProcessing.swift @@ -24,35 +24,35 @@ extension Kana2Kanji { /// /// (5)ノードをアップデートした上で返却する。 - func kana2lattice_changed(_ inputData: ComposingText, N_best: Int, counts: (deleted: Int, added: Int), previousResult: (inputData: ComposingText, nodes: Nodes), needTypoCorrection: Bool) -> (result: LatticeNode, nodes: Nodes) { + func kana2lattice_changed(_ inputData: ComposingText, N_best: Int, counts: (deleted: Int, added: Int), previousResult: (inputData: ComposingText, lattice: Lattice), needTypoCorrection: Bool) -> (result: LatticeNode, lattice: Lattice) { // (0) let count = inputData.input.count let commonCount = previousResult.inputData.input.count - counts.deleted debug("kana2lattice_changed", inputData, counts, previousResult.inputData, count, commonCount) // (1) - var nodes = previousResult.nodes.prefix(commonCount).map {(nodes: [LatticeNode]) in + var lattice = Lattice(nodes: previousResult.lattice.nodes.prefix(commonCount).map {(nodes: [LatticeNode]) in nodes.filter {$0.inputRange.endIndex <= commonCount} - } - while nodes.last?.isEmpty ?? false { - nodes.removeLast() + }) + while lattice.nodes.last?.isEmpty ?? false { + lattice.nodes.removeLast() } let terminalNodes: Nodes if counts.added == 0 { - terminalNodes = nodes.map { + terminalNodes = Lattice(nodes: lattice.nodes.map { $0.filter { $0.inputRange.endIndex == count } - } + }) } else { // (2) - let addedNodes: [[LatticeNode]] = (0.. (result: LatticeNode, nodes: Nodes) { + func kana2lattice_no_change(N_best: Int, previousResult: (inputData: ComposingText, lattice: Lattice)) -> (result: LatticeNode, lattice: Lattice) { debug("キャッシュから復元、元の文字は:", previousResult.inputData.convertTarget) let count = previousResult.inputData.input.count // (1) let result = LatticeNode.EOSNode - for nodeArray in previousResult.nodes { + for nodeArray in previousResult.lattice.nodes { for node in nodeArray { if node.prevs.isEmpty { continue @@ -50,7 +50,7 @@ extension Kana2Kanji { } // (2) - return (result: result, nodes: previousResult.nodes) + return (result: result, lattice: previousResult.lattice) } } diff --git a/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Lattice.swift b/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Lattice.swift new file mode 100644 index 0000000..4d140fc --- /dev/null +++ b/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Lattice.swift @@ -0,0 +1,7 @@ +struct Lattice { + init(nodes: [[LatticeNode]] = []) { + self.nodes = [] + } + + var nodes: [[LatticeNode]] +} diff --git a/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Zenzai/zenzai.swift b/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Zenzai/zenzai.swift index d9a6072..19a82ec 100644 --- a/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Zenzai/zenzai.swift +++ b/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Zenzai/zenzai.swift @@ -61,11 +61,11 @@ extension Kana2Kanji { requestRichCandidates: Bool, personalizationMode: (mode: ConvertRequestOptions.ZenzaiMode.PersonalizationMode, base: EfficientNGram, personal: EfficientNGram)?, versionDependentConfig: ConvertRequestOptions.ZenzaiVersionDependentMode - ) -> (result: LatticeNode, nodes: Nodes, cache: ZenzaiCache) { + ) -> (result: LatticeNode, lattice: Lattice, cache: ZenzaiCache) { var constraint = zenzaiCache?.getNewConstraint(for: inputData) ?? PrefixConstraint([]) debug("initial constraint", constraint) let eosNode = LatticeNode.EOSNode - var nodes: Kana2Kanji.Nodes = [] + var lattice: Lattice = Lattice(nodes: []) var constructedCandidates: [(RegisteredNode, Candidate)] = [] var insertedCandidates: [(RegisteredNode, Candidate)] = [] defer { @@ -82,9 +82,9 @@ extension Kana2Kanji { // 制約がついている場合は高速になるので、N=3としている self.kana2lattice_all_with_prefix_constraint(inputData, N_best: 3, constraint: constraint) } - if nodes.isEmpty { + if lattice.nodes.isEmpty { // 初回のみ - nodes = draftResult.nodes + lattice = draftResult.lattice } let candidates = draftResult.result.getCandidateData().map(self.processClauseCandidate) constructedCandidates.append(contentsOf: zip(draftResult.result.prevs, candidates)) @@ -100,7 +100,7 @@ extension Kana2Kanji { debug("best was not found!") // Emptyの場合 // 制約が満たせない場合は無視する - return (eosNode, nodes, ZenzaiCache(inputData, constraint: PrefixConstraint([]), satisfyingCandidate: nil)) + return (eosNode, lattice, ZenzaiCache(inputData, constraint: PrefixConstraint([]), satisfyingCandidate: nil)) } debug("Constrained draft modeling", -start.timeIntervalSinceNow) @@ -111,7 +111,7 @@ extension Kana2Kanji { if inferenceLimit == 0 { debug("inference limit! \(candidate.text) is used for excuse") // When inference occurs more than maximum times, then just return result at this point - return (eosNode, nodes, ZenzaiCache(inputData, constraint: constraint, satisfyingCandidate: candidate)) + return (eosNode, lattice, ZenzaiCache(inputData, constraint: constraint, satisfyingCandidate: candidate)) } let reviewResult = zenz.candidateEvaluate( convertTarget: inputData.convertTarget, @@ -159,9 +159,9 @@ extension Kana2Kanji { } } if satisfied { - return (eosNode, nodes, ZenzaiCache(inputData, constraint: constraint, satisfyingCandidate: candidate)) + return (eosNode, lattice, ZenzaiCache(inputData, constraint: constraint, satisfyingCandidate: candidate)) } else { - return (eosNode, nodes, ZenzaiCache(inputData, constraint: constraint, satisfyingCandidate: nil)) + return (eosNode, lattice, ZenzaiCache(inputData, constraint: constraint, satisfyingCandidate: nil)) } case .continue: break reviewLoop diff --git a/Sources/KanaKanjiConverterModule/ConverterAPI/KanaKanjiConverter.swift b/Sources/KanaKanjiConverterModule/ConverterAPI/KanaKanjiConverter.swift index 268f95c..5c75878 100644 --- a/Sources/KanaKanjiConverterModule/ConverterAPI/KanaKanjiConverter.swift +++ b/Sources/KanaKanjiConverterModule/ConverterAPI/KanaKanjiConverter.swift @@ -34,7 +34,7 @@ import EfficientNGram // 前回の変換や確定の情報を取っておく部分。 private var previousInputData: ComposingText? - private var nodes: [[LatticeNode]] = [] + private var lattice: Lattice = Lattice() private var completedData: Candidate? private var lastData: DicdataElement? /// Zenzaiのためのzenzモデル @@ -49,7 +49,7 @@ import EfficientNGram self.zenzaiPersonalization = nil self.zenzaiCache = nil self.previousInputData = nil - self.nodes = [] + self.lattice = .init() self.completedData = nil self.lastData = nil } @@ -448,9 +448,9 @@ import EfficientNGram /// 重複のない変換候補。 /// - Note: /// 現在の実装は非常に複雑な方法で候補の順序を決定している。 - private func processResult(inputData: ComposingText, result: (result: LatticeNode, nodes: [[LatticeNode]]), options: ConvertRequestOptions) -> ConversionResult { + private func processResult(inputData: ComposingText, result: (result: LatticeNode, lattice: Lattice), options: ConvertRequestOptions) -> ConversionResult { self.previousInputData = inputData - self.nodes = result.nodes + self.lattice = result.lattice let clauseResult = result.result.getCandidateData() if clauseResult.isEmpty { let candidates = self.getUniqueCandidate(self.getAdditionalCandidate(inputData, options: options)) @@ -538,7 +538,7 @@ import EfficientNGram seenCandidate.formUnion(clause_candidates.map {$0.text}) // 最初の辞書データ - let dicCandidates: [Candidate] = result.nodes[0] + let dicCandidates: [Candidate] = result.lattice.nodes[0] .map { Candidate( text: $0.data.word, @@ -605,7 +605,7 @@ import EfficientNGram /// - N_best: 計算途中で保存する候補数。実際に得られる候補数とは異なる。 /// - Returns: /// 結果のラティスノードと、計算済みノードの全体 - private func convertToLattice(_ inputData: ComposingText, N_best: Int, zenzaiMode: ConvertRequestOptions.ZenzaiMode) -> (result: LatticeNode, nodes: [[LatticeNode]])? { + private func convertToLattice(_ inputData: ComposingText, N_best: Int, zenzaiMode: ConvertRequestOptions.ZenzaiMode) -> (result: LatticeNode, lattice: Lattice)? { if inputData.convertTarget.isEmpty { return nil } @@ -642,7 +642,7 @@ import EfficientNGram // 完全一致の場合 if previousInputData == inputData { - let result = converter.kana2lattice_no_change(N_best: N_best, previousResult: (inputData: previousInputData, nodes: nodes)) + let result = converter.kana2lattice_no_change(N_best: N_best, previousResult: (inputData: previousInputData, lattice: self.lattice)) self.previousInputData = inputData return result } @@ -650,7 +650,7 @@ import EfficientNGram // 文節確定の後の場合 if let completedData, previousInputData.inputHasSuffix(inputOf: inputData) { debug("\(#function): 文節確定用の関数を呼びます、確定された文節は\(completedData)") - let result = converter.kana2lattice_afterComplete(inputData, completedData: completedData, N_best: N_best, previousResult: (inputData: previousInputData, nodes: nodes), needTypoCorrection: needTypoCorrection) + let result = converter.kana2lattice_afterComplete(inputData, completedData: completedData, N_best: N_best, previousResult: (inputData: previousInputData, lattice: self.lattice), needTypoCorrection: needTypoCorrection) self.previousInputData = inputData self.completedData = nil return result @@ -662,7 +662,7 @@ import EfficientNGram let diff = inputData.differenceSuffix(to: previousInputData) debug("\(#function): 最後尾文字置換用の関数を呼びます、差分は\(diff)") - let result = converter.kana2lattice_changed(inputData, N_best: N_best, counts: (diff.deleted, diff.addedCount), previousResult: (inputData: previousInputData, nodes: nodes), needTypoCorrection: needTypoCorrection) + let result = converter.kana2lattice_changed(inputData, N_best: N_best, counts: (diff.deleted, diff.addedCount), previousResult: (inputData: previousInputData, lattice: self.lattice), needTypoCorrection: needTypoCorrection) self.previousInputData = inputData return result }