mirror of
https://github.com/mii443/AzooKeyKanaKanjiConverter.git
synced 2025-08-22 15:05:26 +00:00
fix: depth関連の実装を適切に修正
This commit is contained in:
@ -275,7 +275,7 @@ public final class DicdataStore {
|
|||||||
var targetLOUDS: [String: LOUDS.MovingTowardPrefixSearchHelper] = [:]
|
var targetLOUDS: [String: LOUDS.MovingTowardPrefixSearchHelper] = [:]
|
||||||
var stringToInfo: [([Character], (endIndex: Int, penalty: PValue))] = []
|
var stringToInfo: [([Character], (endIndex: Int, penalty: PValue))] = []
|
||||||
|
|
||||||
var temporaryMemoryDicdata: [DicdataElement] = []
|
var temporaryMemoryDicdata: [Int: [DicdataElement]] = [:]
|
||||||
// ジェネレータを舐める
|
// ジェネレータを舐める
|
||||||
while let (characters, info) = generator.next() {
|
while let (characters, info) = generator.next() {
|
||||||
guard let firstCharacter = characters.first else {
|
guard let firstCharacter = characters.first else {
|
||||||
@ -292,7 +292,7 @@ public final class DicdataStore {
|
|||||||
for key in keys {
|
for key in keys {
|
||||||
withMutableValue(&targetLOUDS[key]) { helper in
|
withMutableValue(&targetLOUDS[key]) { helper in
|
||||||
if helper == nil, let louds = self.loadLOUDS(query: key) {
|
if helper == nil, let louds = self.loadLOUDS(query: key) {
|
||||||
helper = LOUDS.MovingTowardPrefixSearchHelper(louds: louds, depth: 0 ..< .max)
|
helper = LOUDS.MovingTowardPrefixSearchHelper(louds: louds)
|
||||||
}
|
}
|
||||||
guard helper != nil else {
|
guard helper != nil else {
|
||||||
return
|
return
|
||||||
@ -303,20 +303,22 @@ public final class DicdataStore {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
// 短期記憶についてはこの位置で処理する
|
// 短期記憶についてはこの位置で処理する
|
||||||
let result = self.learningManager.movingTowardPrefixSearchOnTemporaryMemory(charIDs: consume charIDs, depth: 0 ..< .max)
|
let result = self.learningManager.movingTowardPrefixSearchOnTemporaryMemory(charIDs: consume charIDs)
|
||||||
updated = updated || !(result.dicdata.isEmpty)
|
updated = updated || !(result.dicdata.isEmpty)
|
||||||
availableMaxIndex = max(availableMaxIndex, result.availableMaxIndex)
|
availableMaxIndex = max(availableMaxIndex, result.availableMaxIndex)
|
||||||
for data in result.dicdata {
|
for (depth, dicdata) in result.dicdata {
|
||||||
if info.penalty.isZero {
|
for data in dicdata {
|
||||||
temporaryMemoryDicdata.append(data)
|
if info.penalty.isZero {
|
||||||
|
temporaryMemoryDicdata[depth, default: []].append(data)
|
||||||
|
}
|
||||||
|
let ratio = Self.penaltyRatio[data.lcid]
|
||||||
|
let pUnit: PValue = Self.getPenalty(data: data) / 2 // 負の値
|
||||||
|
let adjust = pUnit * info.penalty * ratio
|
||||||
|
if self.shouldBeRemoved(value: data.value() + adjust, wordCount: data.ruby.count) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
temporaryMemoryDicdata[depth, default: []].append(data.adjustedData(adjust))
|
||||||
}
|
}
|
||||||
let ratio = Self.penaltyRatio[data.lcid]
|
|
||||||
let pUnit: PValue = Self.getPenalty(data: data) / 2 // 負の値
|
|
||||||
let adjust = pUnit * info.penalty * ratio
|
|
||||||
if self.shouldBeRemoved(value: data.value() + adjust, wordCount: data.ruby.count) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
temporaryMemoryDicdata.append(data.adjustedData(adjust))
|
|
||||||
}
|
}
|
||||||
if availableMaxIndex < characters.endIndex - 1 {
|
if availableMaxIndex < characters.endIndex - 1 {
|
||||||
// 到達不可能だったパスを通知
|
// 到達不可能だったパスを通知
|
||||||
@ -326,11 +328,14 @@ public final class DicdataStore {
|
|||||||
stringToInfo.append((characters, info))
|
stringToInfo.append((characters, info))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
let minCount = stringToInfo.map {$0.0.count}.min() ?? 0
|
||||||
|
print(#function, minCount, stringToInfo.map{$0.0})
|
||||||
return (
|
return (
|
||||||
Dictionary(stringToInfo, uniquingKeysWith: {$0.penalty < $1.penalty ? $1 : $0}),
|
Dictionary(stringToInfo, uniquingKeysWith: {$0.penalty < $1.penalty ? $1 : $0}),
|
||||||
targetLOUDS.map { ($0.key, $0.value.indices)},
|
targetLOUDS.map { ($0.key, $0.value.indicesInDepth(depth: minCount - 1 ..< .max) )},
|
||||||
temporaryMemoryDicdata
|
temporaryMemoryDicdata.flatMap {
|
||||||
|
minCount < $0.key + 1 ? $0.value : []
|
||||||
|
}
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
/// prefixを起点として、それに続く語(prefix match)をLOUDS上で探索する関数。
|
/// prefixを起点として、それに続く語(prefix match)をLOUDS上で探索する関数。
|
||||||
@ -490,7 +495,9 @@ public final class DicdataStore {
|
|||||||
}
|
}
|
||||||
if learningManager.enabled {
|
if learningManager.enabled {
|
||||||
// temporalな学習結果にpenaltyを加えて追加する
|
// temporalな学習結果にpenaltyを加えて追加する
|
||||||
dicdata.append(contentsOf: self.learningManager.movingTowardPrefixSearchOnTemporaryMemory(charIDs: consume maxIDs, depth: depth).dicdata)
|
dicdata.append(
|
||||||
|
contentsOf: self.learningManager.movingTowardPrefixSearchOnTemporaryMemory(charIDs: consume maxIDs, depth: depth).dicdata.flatMap { $0.value }
|
||||||
|
)
|
||||||
}
|
}
|
||||||
for (key, value) in stringToEndIndex {
|
for (key, value) in stringToEndIndex {
|
||||||
let convertTarget = String(key)
|
let convertTarget = String(key)
|
||||||
|
@ -584,22 +584,22 @@ struct TemporalLearningMemoryTrie {
|
|||||||
return nodes[index].dataIndices.map {self.dicdata[$0]}
|
return nodes[index].dataIndices.map {self.dicdata[$0]}
|
||||||
}
|
}
|
||||||
|
|
||||||
func movingTowardPrefixSearch(chars: [UInt8], depth: Range<Int>) -> (dicdata: [DicdataElement], availableMaxIndex: Int) {
|
func movingTowardPrefixSearch(chars: [UInt8], depth: Range<Int>) -> (dicdata: [Int: [DicdataElement]], availableMaxIndex: Int) {
|
||||||
var index = 0
|
var index = 0
|
||||||
var availableMaxIndex = 0
|
var availableMaxIndex = 0
|
||||||
var indices: [Int] = []
|
var indices: [Int: [Int]] = [:]
|
||||||
for (offset, char) in chars.enumerated() {
|
for (offset, char) in chars.enumerated() {
|
||||||
if let nextIndex = nodes[index].children[char] {
|
if let nextIndex = nodes[index].children[char] {
|
||||||
availableMaxIndex = index
|
availableMaxIndex = index
|
||||||
index = nextIndex
|
index = nextIndex
|
||||||
if depth.contains(offset) {
|
if depth.contains(offset) {
|
||||||
indices.append(contentsOf: nodes[index].dataIndices)
|
indices[offset] = nodes[index].dataIndices
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
return (indices.map {self.dicdata[$0]}, availableMaxIndex)
|
return (indices.mapValues { items in items.map { self.dicdata[$0] }}, availableMaxIndex)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return (indices.map {self.dicdata[$0]}, availableMaxIndex)
|
return (indices.mapValues { items in items.map { self.dicdata[$0] }}, availableMaxIndex)
|
||||||
}
|
}
|
||||||
|
|
||||||
func prefixMatch(chars: [UInt8]) -> [DicdataElement] {
|
func prefixMatch(chars: [UInt8]) -> [DicdataElement] {
|
||||||
@ -720,9 +720,9 @@ final class LearningManager {
|
|||||||
return self.temporaryMemory.perfectMatch(chars: charIDs)
|
return self.temporaryMemory.perfectMatch(chars: charIDs)
|
||||||
}
|
}
|
||||||
|
|
||||||
func movingTowardPrefixSearchOnTemporaryMemory(charIDs: [UInt8], depth: Range<Int>) -> (dicdata: [DicdataElement], availableMaxIndex: Int) {
|
func movingTowardPrefixSearchOnTemporaryMemory(charIDs: [UInt8], depth: Range<Int> = 0 ..< .max) -> (dicdata: [Int: [DicdataElement]], availableMaxIndex: Int) {
|
||||||
guard let options, options.learningType.needUsingMemory else {
|
guard let options, options.learningType.needUsingMemory else {
|
||||||
return ([], 0)
|
return ([:], 0)
|
||||||
}
|
}
|
||||||
return self.temporaryMemory.movingTowardPrefixSearch(chars: charIDs, depth: depth)
|
return self.temporaryMemory.movingTowardPrefixSearch(chars: charIDs, depth: depth)
|
||||||
}
|
}
|
||||||
|
@ -240,25 +240,30 @@ package struct LOUDS: Sendable {
|
|||||||
// 辞書順でソートする
|
// 辞書順でソートする
|
||||||
var targets = targets
|
var targets = targets
|
||||||
targets.sort(by: Self.lexLessThan)
|
targets.sort(by: Self.lexLessThan)
|
||||||
var helper = MovingTowardPrefixSearchHelper(louds: self, depth: depth)
|
var helper = MovingTowardPrefixSearchHelper(louds: self)
|
||||||
for target in targets {
|
for target in targets {
|
||||||
_ = helper.update(target: target)
|
_ = helper.update(target: target)
|
||||||
}
|
}
|
||||||
return helper.indices
|
return helper.indicesInDepth(depth: depth)
|
||||||
}
|
}
|
||||||
|
|
||||||
struct MovingTowardPrefixSearchHelper {
|
struct MovingTowardPrefixSearchHelper {
|
||||||
init(louds: LOUDS, depth: Range<Int>) {
|
init(louds: LOUDS) {
|
||||||
self.louds = louds
|
self.louds = louds
|
||||||
self.depth = depth
|
|
||||||
}
|
}
|
||||||
let louds: LOUDS
|
let louds: LOUDS
|
||||||
let depth: Range<Int>
|
|
||||||
// 最終出力となる
|
// 最終出力となる
|
||||||
var indices: [Int] = []
|
var indices: [(depth: Int, index: Int)] = []
|
||||||
// 現在の探索結果を保存しておく
|
// 現在の探索結果を保存しておく
|
||||||
var stack: [(nodeIndex: Int, char: UInt8)] = []
|
var stack: [(nodeIndex: Int, char: UInt8)] = []
|
||||||
|
|
||||||
|
func indicesInDepth(depth: Range<Int>) -> [Int] {
|
||||||
|
return self.indices
|
||||||
|
.lazy
|
||||||
|
.filter { depth.contains($0.depth) }
|
||||||
|
.map { $0.index }
|
||||||
|
}
|
||||||
|
|
||||||
/// `target`を用いて更新する
|
/// `target`を用いて更新する
|
||||||
/// - Parameter target: 検索対象の`CharID`の列
|
/// - Parameter target: 検索対象の`CharID`の列
|
||||||
/// - Returns: `updated`はこれによって`indices`の更新があったかどうか。`availableMaxIndex`はアクセスに成功した最大インデックス
|
/// - Returns: `updated`はこれによって`indices`の更新があったかどうか。`availableMaxIndex`はアクセスに成功した最大インデックス
|
||||||
@ -266,7 +271,7 @@ package struct LOUDS: Sendable {
|
|||||||
var updated = false
|
var updated = false
|
||||||
var availableMaxIndex = 0
|
var availableMaxIndex = 0
|
||||||
// iがupperBoundを超えない範囲で検索を行う
|
// iがupperBoundを超えない範囲で検索を行う
|
||||||
for (i, char) in target.enumerated() where i < self.depth.upperBound {
|
for (i, char) in target.enumerated() {
|
||||||
if i < self.stack.count, self.stack[i].char == char {
|
if i < self.stack.count, self.stack[i].char == char {
|
||||||
// すでに探索済み
|
// すでに探索済み
|
||||||
availableMaxIndex = i
|
availableMaxIndex = i
|
||||||
@ -280,11 +285,9 @@ package struct LOUDS: Sendable {
|
|||||||
// このケースでは、探索を行う
|
// このケースでは、探索を行う
|
||||||
// 直前のstackを取り出し、そのnodeIndexから次のcharを探索する
|
// 直前のstackを取り出し、そのnodeIndexから次のcharを探索する
|
||||||
if let nodeIndex = self.louds.searchCharNodeIndex(from: self.stack.last?.nodeIndex ?? 1, char: char) {
|
if let nodeIndex = self.louds.searchCharNodeIndex(from: self.stack.last?.nodeIndex ?? 1, char: char) {
|
||||||
if self.depth.contains(i) {
|
self.indices.append((i, nodeIndex))
|
||||||
self.indices.append(nodeIndex)
|
updated = true
|
||||||
updated = true
|
availableMaxIndex = i
|
||||||
availableMaxIndex = i
|
|
||||||
}
|
|
||||||
self.stack.append((nodeIndex, char))
|
self.stack.append((nodeIndex, char))
|
||||||
} else {
|
} else {
|
||||||
// 見つからなかった場合、打ち切る
|
// 見つからなかった場合、打ち切る
|
||||||
|
@ -27,7 +27,7 @@ final class TemporalLearningMemoryTrieTests: XCTestCase {
|
|||||||
XCTAssertEqual(result1.first?.word, element1.word)
|
XCTAssertEqual(result1.first?.word, element1.word)
|
||||||
XCTAssertTrue(result1.first?.metadata.contains(.isLearned) ?? false)
|
XCTAssertTrue(result1.first?.metadata.contains(.isLearned) ?? false)
|
||||||
|
|
||||||
let result2 = trie.movingTowardPrefixSearch(chars: chars(for: element2.ruby), depth: (element2.ruby.count - 1)..<element2.ruby.count).dicdata
|
let result2 = trie.movingTowardPrefixSearch(chars: chars(for: element2.ruby), depth: (element2.ruby.count - 1)..<element2.ruby.count).dicdata.flatMap { $0.value }
|
||||||
XCTAssertEqual(result2.map { $0.word }, [element2.word])
|
XCTAssertEqual(result2.map { $0.word }, [element2.word])
|
||||||
|
|
||||||
let prefixResult = trie.prefixMatch(chars: chars(for: "テス"))
|
let prefixResult = trie.prefixMatch(chars: chars(for: "テス"))
|
||||||
|
Reference in New Issue
Block a user