mirror of
https://github.com/mii443/AzooKeyKanaKanjiConverter.git
synced 2025-08-22 15:05:26 +00:00
Merge pull request #189 from azooKey/codex/modify-learningmanager.init-and-add-error-handling
feat: `.pause`が残っている場合に学習データの復旧を試みるよう修正
This commit is contained in:
@ -171,6 +171,8 @@ ComposingText(
|
||||
|
||||
3のステップの実行中にエラーが生じた場合、`.pause`があるため、次回キーボードを開いた際は学習を停止状態にします。ついで適切なタイミングで再度ステップ3を実行することで、安全に全てのファイルを更新することができます。
|
||||
|
||||
azooKeyKanaKanjiConverter では、変換器を開いた際に `.pause` ファイルが残っている場合、自動的に空の一時記憶とマージを試みて `.pause` を削除し、学習機能を復旧します。
|
||||
|
||||
## 変換候補の並び順
|
||||
|
||||
変換候補の並び順の決定はとても難しい問題です。azooKeyではおおよそ以下のようになっています。`Converter.swift`が並び順を決めていますが、とても複雑な実装になっているため、改善したいと思っています。
|
||||
|
@ -76,6 +76,8 @@ let options = ConvertRequestOptions.withDefaultDictionary(
|
||||
)
|
||||
```
|
||||
|
||||
開く際に保存処理が中断された `.pause` ファイルが残っている場合は、変換器が自動的に復旧を試みてファイルを削除します。
|
||||
|
||||
### `ComposingText`
|
||||
`ComposingText`は入力管理を行いつつ変換をリクエストするためのAPIです。ローマ字入力などを適切にハンドルするために利用できます。詳しくは[ドキュメント](./Docs/composing_text.md)を参照してください。
|
||||
|
||||
|
@ -139,7 +139,7 @@ public final class DicdataStore {
|
||||
} else {
|
||||
self.requestOptions = value
|
||||
}
|
||||
let shouldReset = self.learningManager.setRequestOptions(options: value)
|
||||
let shouldReset = self.learningManager.setRequestOptions(value)
|
||||
if shouldReset {
|
||||
self.reloadMemory()
|
||||
}
|
||||
@ -226,7 +226,7 @@ public final class DicdataStore {
|
||||
}
|
||||
}
|
||||
|
||||
private func perfectMatchLOUDS(query: String, charIDs: [UInt8]) -> [Int] {
|
||||
func perfectMatchLOUDS(query: String, charIDs: [UInt8]) -> [Int] {
|
||||
guard let louds = self.loadLOUDS(query: query) else {
|
||||
return []
|
||||
}
|
||||
|
@ -626,7 +626,7 @@ final class LearningManager {
|
||||
debug("Error: louds/charID.chidが存在しません。このエラーは深刻ですが、テスト時には無視できる場合があります。Description: \(error)")
|
||||
}
|
||||
}
|
||||
private var char2UInt8: [Character: UInt8] = [:]
|
||||
var char2UInt8: [Character: UInt8] = [:]
|
||||
|
||||
static var today: UInt16 {
|
||||
UInt16(Int(Date().timeIntervalSince1970) / 86400) - 19000
|
||||
@ -653,34 +653,50 @@ final class LearningManager {
|
||||
(!self.memoryCollapsed) && self.options.learningType.needUsingMemory
|
||||
}
|
||||
|
||||
init() {
|
||||
self.memoryCollapsed = LongTermLearningMemory.memoryCollapsed(directoryURL: self.options.memoryDirectoryURL)
|
||||
if memoryCollapsed {
|
||||
// 学習データが壊れている状態であることを警告する
|
||||
debug("LearningManager init: Memory Collapsed")
|
||||
}
|
||||
if !options.learningType.needUsingMemory {
|
||||
return
|
||||
}
|
||||
Self.updateChar2Int8(bundleURL: options.dictionaryResourceURL, target: &char2UInt8)
|
||||
}
|
||||
init() {}
|
||||
|
||||
/// - Returns: Whether cache should be reseted or not.
|
||||
func setRequestOptions(options: ConvertRequestOptions) -> Bool {
|
||||
// 変更があったら`char2Int8`を読み込み直す
|
||||
if options.dictionaryResourceURL != self.options.dictionaryResourceURL {
|
||||
Self.updateChar2Int8(bundleURL: options.dictionaryResourceURL, target: &char2UInt8)
|
||||
func setRequestOptions(_ newOptions: ConvertRequestOptions) -> Bool {
|
||||
// 更新の必要がなければ何もしない
|
||||
if !newOptions.learningType.needUsingMemory {
|
||||
self.options = newOptions
|
||||
return false
|
||||
}
|
||||
self.options = options
|
||||
// 変更があったら`char2Int8`を読み込み直す
|
||||
if newOptions.dictionaryResourceURL != self.options.dictionaryResourceURL {
|
||||
Self.updateChar2Int8(bundleURL: newOptions.dictionaryResourceURL, target: &self.char2UInt8)
|
||||
}
|
||||
// ここで更新
|
||||
self.options = newOptions
|
||||
|
||||
switch options.learningType {
|
||||
// 学習の壊れ状態を確認
|
||||
self.memoryCollapsed = LongTermLearningMemory.memoryCollapsed(directoryURL: newOptions.memoryDirectoryURL)
|
||||
if self.memoryCollapsed && newOptions.learningType.needUsingMemory {
|
||||
do {
|
||||
try LongTermLearningMemory.merge(
|
||||
tempTrie: TemporalLearningMemoryTrie(),
|
||||
directoryURL: newOptions.memoryDirectoryURL,
|
||||
maxMemoryCount: newOptions.maxMemoryCount,
|
||||
char2UInt8: self.char2UInt8
|
||||
)
|
||||
} catch {
|
||||
debug(#file, #function, "automatic merge failed", error)
|
||||
}
|
||||
self.memoryCollapsed = LongTermLearningMemory.memoryCollapsed(directoryURL: newOptions.memoryDirectoryURL)
|
||||
}
|
||||
if self.memoryCollapsed {
|
||||
// 学習データが壊れている状態であることを警告する
|
||||
debug(#file, #function, "LearningManager init: Memory Collapsed")
|
||||
}
|
||||
|
||||
switch self.options.learningType {
|
||||
case .inputAndOutput, .onlyOutput: break
|
||||
case .nothing:
|
||||
self.temporaryMemory = TemporalLearningMemoryTrie()
|
||||
}
|
||||
|
||||
// リセットチェックも実施
|
||||
if options.shouldResetMemory {
|
||||
if self.options.shouldResetMemory {
|
||||
self.reset()
|
||||
self.options.shouldResetMemory = false
|
||||
return true
|
||||
|
@ -20,8 +20,11 @@ public enum KeyboardLanguage: String, Codable, Equatable, Sendable {
|
||||
}
|
||||
|
||||
public enum LearningType: Int, CaseIterable, Sendable {
|
||||
/// 学習情報は変換結果(output)に反映され、学習情報は更新(input)されます
|
||||
case inputAndOutput
|
||||
/// 学習情報は変換結果(output)に反映されるのみで、学習情報は更新されません
|
||||
case onlyOutput
|
||||
/// 学習情報は一切用いません
|
||||
case nothing
|
||||
|
||||
package var needUpdateMemory: Bool {
|
||||
|
Binary file not shown.
103
Tests/KanaKanjiConverterModuleTests/LearningMemoryTests.swift
Normal file
103
Tests/KanaKanjiConverterModuleTests/LearningMemoryTests.swift
Normal file
@ -0,0 +1,103 @@
|
||||
@testable import KanaKanjiConverterModule
|
||||
import XCTest
|
||||
|
||||
final class LearningMemoryTests: XCTestCase {
|
||||
static let resourceURL = Bundle.module.resourceURL!.appendingPathComponent("DictionaryMock", isDirectory: true)
|
||||
|
||||
private func getOptionsForMemoryTest(memoryDirectoryURL: URL) -> ConvertRequestOptions {
|
||||
var options = ConvertRequestOptions.default
|
||||
options.memoryDirectoryURL = memoryDirectoryURL
|
||||
options.dictionaryResourceURL = Self.resourceURL
|
||||
options.learningType = .inputAndOutput
|
||||
options.maxMemoryCount = 32
|
||||
return options
|
||||
}
|
||||
|
||||
func testPauseFileIsClearedOnInit() throws {
|
||||
let dir = FileManager.default.temporaryDirectory.appendingPathComponent("LearningMemoryTest-\(UUID().uuidString)", isDirectory: true)
|
||||
try FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true)
|
||||
defer { try? FileManager.default.removeItem(at: dir) }
|
||||
|
||||
let options = self.getOptionsForMemoryTest(memoryDirectoryURL: dir)
|
||||
let manager = LearningManager()
|
||||
_ = manager.setRequestOptions(options)
|
||||
|
||||
let element = DicdataElement(word: "テスト", ruby: "テスト", cid: CIDData.一般名詞.cid, mid: MIDData.一般.mid, value: -10)
|
||||
manager.update(data: [element])
|
||||
manager.save()
|
||||
|
||||
// ポーズファイルを設置
|
||||
let pauseURL = dir.appendingPathComponent(".pause", isDirectory: false)
|
||||
FileManager.default.createFile(atPath: pauseURL.path, contents: Data())
|
||||
XCTAssertTrue(LongTermLearningMemory.memoryCollapsed(directoryURL: dir))
|
||||
|
||||
// ここで副作用が発生
|
||||
_ = manager.setRequestOptions(options)
|
||||
|
||||
// 学習の破壊状態が回復されていることを確認
|
||||
XCTAssertFalse(LongTermLearningMemory.memoryCollapsed(directoryURL: dir))
|
||||
try? FileManager.default.removeItem(at: pauseURL)
|
||||
}
|
||||
|
||||
func testMemoryFilesCreateAndRemove() throws {
|
||||
let dir = FileManager.default.temporaryDirectory.appendingPathComponent("LearningMemoryTest-\(UUID().uuidString)", isDirectory: true)
|
||||
try FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true)
|
||||
defer { try? FileManager.default.removeItem(at: dir) }
|
||||
|
||||
let options = self.getOptionsForMemoryTest(memoryDirectoryURL: dir)
|
||||
let manager = LearningManager()
|
||||
_ = manager.setRequestOptions(options)
|
||||
|
||||
let element = DicdataElement(word: "テスト", ruby: "テスト", cid: CIDData.一般名詞.cid, mid: MIDData.一般.mid, value: -10)
|
||||
manager.update(data: [element])
|
||||
manager.save()
|
||||
|
||||
let files = try FileManager.default.contentsOfDirectory(at: dir, includingPropertiesForKeys: nil)
|
||||
XCTAssertTrue(files.contains { $0.lastPathComponent == "memory.louds" })
|
||||
XCTAssertTrue(files.contains { $0.lastPathComponent == "memory.loudschars2" })
|
||||
XCTAssertTrue(files.contains { $0.lastPathComponent == "memory.memorymetadata" })
|
||||
XCTAssertTrue(files.contains { $0.lastPathComponent.hasSuffix(".loudstxt3") })
|
||||
|
||||
manager.reset()
|
||||
let filesAfter = try FileManager.default.contentsOfDirectory(at: dir, includingPropertiesForKeys: nil)
|
||||
XCTAssertTrue(filesAfter.isEmpty)
|
||||
}
|
||||
|
||||
func testForgetMemory() throws {
|
||||
let dir = FileManager.default.temporaryDirectory.appendingPathComponent("LearningManagerPersistence-\(UUID().uuidString)", isDirectory: true)
|
||||
try FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true)
|
||||
defer { try? FileManager.default.removeItem(at: dir) }
|
||||
|
||||
let options = self.getOptionsForMemoryTest(memoryDirectoryURL: dir)
|
||||
let manager = LearningManager()
|
||||
_ = manager.setRequestOptions(options)
|
||||
let element = DicdataElement(word: "テスト", ruby: "テスト", cid: CIDData.一般名詞.cid, mid: MIDData.一般.mid, value: -10)
|
||||
manager.update(data: [element])
|
||||
manager.save()
|
||||
|
||||
let dicdataStore = DicdataStore(requestOptions: options)
|
||||
dicdataStore.sendToDicdataStore(.setRequestOptions(options))
|
||||
let charIDs = "テスト".map { dicdataStore.character2charId($0) }
|
||||
let indices = dicdataStore.perfectMatchLOUDS(query: "memory", charIDs: charIDs)
|
||||
let dicdata = dicdataStore.getDicdataFromLoudstxt3(identifier: "memory", indices: indices)
|
||||
XCTAssertFalse(dicdata.isEmpty)
|
||||
XCTAssertTrue(dicdata.contains { $0.word == element.word && $0.ruby == element.ruby })
|
||||
|
||||
dicdataStore.sendToDicdataStore(
|
||||
.forgetMemory(
|
||||
Candidate(
|
||||
text: element.word,
|
||||
value: element.value(),
|
||||
correspondingCount: 3,
|
||||
lastMid: element.mid,
|
||||
data: [element]
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
let indices2 = dicdataStore.perfectMatchLOUDS(query: "memory", charIDs: charIDs)
|
||||
let dicdata2 = dicdataStore.getDicdataFromLoudstxt3(identifier: "memory", indices: indices2)
|
||||
XCTAssertFalse(dicdata2.contains { $0.word == element.word && $0.ruby == element.ruby })
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,56 @@
|
||||
@testable import KanaKanjiConverterModule
|
||||
import XCTest
|
||||
|
||||
final class TemporalLearningMemoryTrieTests: XCTestCase {
|
||||
static let resourceURL = Bundle.module.resourceURL!.appendingPathComponent("DictionaryMock", isDirectory: true)
|
||||
|
||||
static func loadCharMap() -> [Character: UInt8] {
|
||||
let chidURL = resourceURL.appendingPathComponent("louds/charID.chid", isDirectory: false)
|
||||
let string = try! String(contentsOf: chidURL, encoding: .utf8)
|
||||
return Dictionary(uniqueKeysWithValues: string.enumerated().map { ($0.element, UInt8($0.offset)) })
|
||||
}
|
||||
|
||||
func chars(for string: String) -> [UInt8] {
|
||||
LearningManager.keyToChars(string, char2UInt8: Self.loadCharMap())!
|
||||
}
|
||||
|
||||
func testMemorizeAndMatch() throws {
|
||||
var trie = TemporalLearningMemoryTrie()
|
||||
let element1 = DicdataElement(word: "テスト", ruby: "テスト", cid: CIDData.一般名詞.cid, mid: MIDData.一般.mid, value: -10)
|
||||
let element2 = DicdataElement(word: "テスター", ruby: "テスター", cid: CIDData.一般名詞.cid, mid: MIDData.一般.mid, value: -12)
|
||||
|
||||
trie.memorize(dicdataElement: element1, chars: chars(for: element1.ruby))
|
||||
trie.memorize(dicdataElement: element2, chars: chars(for: element2.ruby))
|
||||
|
||||
let result1 = trie.perfectMatch(chars: chars(for: element1.ruby))
|
||||
XCTAssertEqual(result1.count, 1)
|
||||
XCTAssertEqual(result1.first?.word, element1.word)
|
||||
XCTAssertTrue(result1.first?.metadata.contains(.isLearned) ?? false)
|
||||
|
||||
let result2 = trie.throughMatch(chars: chars(for: element2.ruby), depth: (element2.ruby.count - 1)..<element2.ruby.count)
|
||||
XCTAssertEqual(result2.map { $0.word }, [element2.word])
|
||||
|
||||
let prefixResult = trie.prefixMatch(chars: chars(for: "テス"))
|
||||
XCTAssertEqual(Set(prefixResult.map { $0.word }), Set([element1.word, element2.word]))
|
||||
}
|
||||
|
||||
func testMemorizeUpdateCountAndForget() throws {
|
||||
var trie = TemporalLearningMemoryTrie()
|
||||
let element = DicdataElement(word: "テスター", ruby: "テスター", cid: CIDData.一般名詞.cid, mid: MIDData.一般.mid, value: -10)
|
||||
let charIDs = chars(for: element.ruby)
|
||||
|
||||
trie.memorize(dicdataElement: element, chars: charIDs)
|
||||
var stored = trie.perfectMatch(chars: charIDs).first!
|
||||
let adjust1 = stored.adjust
|
||||
|
||||
trie.memorize(dicdataElement: element, chars: charIDs)
|
||||
stored = trie.perfectMatch(chars: charIDs).first!
|
||||
let adjust2 = stored.adjust
|
||||
|
||||
XCTAssertGreaterThan(adjust2, adjust1)
|
||||
XCTAssertEqual(trie.perfectMatch(chars: charIDs).count, 1)
|
||||
|
||||
XCTAssertTrue(trie.forget(dicdataElement: stored, chars: charIDs))
|
||||
XCTAssertTrue(trie.perfectMatch(chars: charIDs).isEmpty)
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user