Merge pull request #189 from azooKey/codex/modify-learningmanager.init-and-add-error-handling

feat: `.pause`が残っている場合に学習データの復旧を試みるよう修正
This commit is contained in:
Miwa
2025-06-01 21:59:26 +09:00
committed by GitHub
8 changed files with 203 additions and 21 deletions

View File

@ -171,6 +171,8 @@ ComposingText(
3のステップの実行中にエラーが生じた場合、`.pause`があるため、次回キーボードを開いた際は学習を停止状態にします。ついで適切なタイミングで再度ステップ3を実行することで、安全に全てのファイルを更新することができます。
azooKeyKanaKanjiConverter では、変換器を開いた際に `.pause` ファイルが残っている場合、自動的に空の一時記憶とマージを試みて `.pause` を削除し、学習機能を復旧します。
## 変換候補の並び順
変換候補の並び順の決定はとても難しい問題です。azooKeyではおおよそ以下のようになっています。`Converter.swift`が並び順を決めていますが、とても複雑な実装になっているため、改善したいと思っています。

View File

@ -76,6 +76,8 @@ let options = ConvertRequestOptions.withDefaultDictionary(
)
```
開く際に保存処理が中断された `.pause` ファイルが残っている場合は、変換器が自動的に復旧を試みてファイルを削除します。
### `ComposingText`
`ComposingText`は入力管理を行いつつ変換をリクエストするためのAPIです。ローマ字入力などを適切にハンドルするために利用できます。詳しくは[ドキュメント](./Docs/composing_text.md)を参照してください。

View File

@ -139,7 +139,7 @@ public final class DicdataStore {
} else {
self.requestOptions = value
}
let shouldReset = self.learningManager.setRequestOptions(options: value)
let shouldReset = self.learningManager.setRequestOptions(value)
if shouldReset {
self.reloadMemory()
}
@ -226,7 +226,7 @@ public final class DicdataStore {
}
}
private func perfectMatchLOUDS(query: String, charIDs: [UInt8]) -> [Int] {
func perfectMatchLOUDS(query: String, charIDs: [UInt8]) -> [Int] {
guard let louds = self.loadLOUDS(query: query) else {
return []
}

View File

@ -626,7 +626,7 @@ final class LearningManager {
debug("Error: louds/charID.chidが存在しません。このエラーは深刻ですが、テスト時には無視できる場合があります。Description: \(error)")
}
}
private var char2UInt8: [Character: UInt8] = [:]
var char2UInt8: [Character: UInt8] = [:]
static var today: UInt16 {
UInt16(Int(Date().timeIntervalSince1970) / 86400) - 19000
@ -653,34 +653,50 @@ final class LearningManager {
(!self.memoryCollapsed) && self.options.learningType.needUsingMemory
}
init() {
self.memoryCollapsed = LongTermLearningMemory.memoryCollapsed(directoryURL: self.options.memoryDirectoryURL)
if memoryCollapsed {
//
debug("LearningManager init: Memory Collapsed")
}
if !options.learningType.needUsingMemory {
return
}
Self.updateChar2Int8(bundleURL: options.dictionaryResourceURL, target: &char2UInt8)
}
init() {}
/// - Returns: Whether cache should be reseted or not.
func setRequestOptions(options: ConvertRequestOptions) -> Bool {
// `char2Int8`
if options.dictionaryResourceURL != self.options.dictionaryResourceURL {
Self.updateChar2Int8(bundleURL: options.dictionaryResourceURL, target: &char2UInt8)
func setRequestOptions(_ newOptions: ConvertRequestOptions) -> Bool {
//
if !newOptions.learningType.needUsingMemory {
self.options = newOptions
return false
}
self.options = options
// `char2Int8`
if newOptions.dictionaryResourceURL != self.options.dictionaryResourceURL {
Self.updateChar2Int8(bundleURL: newOptions.dictionaryResourceURL, target: &self.char2UInt8)
}
//
self.options = newOptions
switch options.learningType {
//
self.memoryCollapsed = LongTermLearningMemory.memoryCollapsed(directoryURL: newOptions.memoryDirectoryURL)
if self.memoryCollapsed && newOptions.learningType.needUsingMemory {
do {
try LongTermLearningMemory.merge(
tempTrie: TemporalLearningMemoryTrie(),
directoryURL: newOptions.memoryDirectoryURL,
maxMemoryCount: newOptions.maxMemoryCount,
char2UInt8: self.char2UInt8
)
} catch {
debug(#file, #function, "automatic merge failed", error)
}
self.memoryCollapsed = LongTermLearningMemory.memoryCollapsed(directoryURL: newOptions.memoryDirectoryURL)
}
if self.memoryCollapsed {
//
debug(#file, #function, "LearningManager init: Memory Collapsed")
}
switch self.options.learningType {
case .inputAndOutput, .onlyOutput: break
case .nothing:
self.temporaryMemory = TemporalLearningMemoryTrie()
}
//
if options.shouldResetMemory {
if self.options.shouldResetMemory {
self.reset()
self.options.shouldResetMemory = false
return true

View File

@ -20,8 +20,11 @@ public enum KeyboardLanguage: String, Codable, Equatable, Sendable {
}
public enum LearningType: Int, CaseIterable, Sendable {
/// (output)(input)
case inputAndOutput
/// (output)
case onlyOutput
///
case nothing
package var needUpdateMemory: Bool {

View File

@ -0,0 +1,103 @@
@testable import KanaKanjiConverterModule
import XCTest
final class LearningMemoryTests: XCTestCase {
static let resourceURL = Bundle.module.resourceURL!.appendingPathComponent("DictionaryMock", isDirectory: true)
private func getOptionsForMemoryTest(memoryDirectoryURL: URL) -> ConvertRequestOptions {
var options = ConvertRequestOptions.default
options.memoryDirectoryURL = memoryDirectoryURL
options.dictionaryResourceURL = Self.resourceURL
options.learningType = .inputAndOutput
options.maxMemoryCount = 32
return options
}
func testPauseFileIsClearedOnInit() throws {
let dir = FileManager.default.temporaryDirectory.appendingPathComponent("LearningMemoryTest-\(UUID().uuidString)", isDirectory: true)
try FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true)
defer { try? FileManager.default.removeItem(at: dir) }
let options = self.getOptionsForMemoryTest(memoryDirectoryURL: dir)
let manager = LearningManager()
_ = manager.setRequestOptions(options)
let element = DicdataElement(word: "テスト", ruby: "テスト", cid: CIDData..cid, mid: MIDData..mid, value: -10)
manager.update(data: [element])
manager.save()
//
let pauseURL = dir.appendingPathComponent(".pause", isDirectory: false)
FileManager.default.createFile(atPath: pauseURL.path, contents: Data())
XCTAssertTrue(LongTermLearningMemory.memoryCollapsed(directoryURL: dir))
//
_ = manager.setRequestOptions(options)
//
XCTAssertFalse(LongTermLearningMemory.memoryCollapsed(directoryURL: dir))
try? FileManager.default.removeItem(at: pauseURL)
}
func testMemoryFilesCreateAndRemove() throws {
let dir = FileManager.default.temporaryDirectory.appendingPathComponent("LearningMemoryTest-\(UUID().uuidString)", isDirectory: true)
try FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true)
defer { try? FileManager.default.removeItem(at: dir) }
let options = self.getOptionsForMemoryTest(memoryDirectoryURL: dir)
let manager = LearningManager()
_ = manager.setRequestOptions(options)
let element = DicdataElement(word: "テスト", ruby: "テスト", cid: CIDData..cid, mid: MIDData..mid, value: -10)
manager.update(data: [element])
manager.save()
let files = try FileManager.default.contentsOfDirectory(at: dir, includingPropertiesForKeys: nil)
XCTAssertTrue(files.contains { $0.lastPathComponent == "memory.louds" })
XCTAssertTrue(files.contains { $0.lastPathComponent == "memory.loudschars2" })
XCTAssertTrue(files.contains { $0.lastPathComponent == "memory.memorymetadata" })
XCTAssertTrue(files.contains { $0.lastPathComponent.hasSuffix(".loudstxt3") })
manager.reset()
let filesAfter = try FileManager.default.contentsOfDirectory(at: dir, includingPropertiesForKeys: nil)
XCTAssertTrue(filesAfter.isEmpty)
}
func testForgetMemory() throws {
let dir = FileManager.default.temporaryDirectory.appendingPathComponent("LearningManagerPersistence-\(UUID().uuidString)", isDirectory: true)
try FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true)
defer { try? FileManager.default.removeItem(at: dir) }
let options = self.getOptionsForMemoryTest(memoryDirectoryURL: dir)
let manager = LearningManager()
_ = manager.setRequestOptions(options)
let element = DicdataElement(word: "テスト", ruby: "テスト", cid: CIDData..cid, mid: MIDData..mid, value: -10)
manager.update(data: [element])
manager.save()
let dicdataStore = DicdataStore(requestOptions: options)
dicdataStore.sendToDicdataStore(.setRequestOptions(options))
let charIDs = "テスト".map { dicdataStore.character2charId($0) }
let indices = dicdataStore.perfectMatchLOUDS(query: "memory", charIDs: charIDs)
let dicdata = dicdataStore.getDicdataFromLoudstxt3(identifier: "memory", indices: indices)
XCTAssertFalse(dicdata.isEmpty)
XCTAssertTrue(dicdata.contains { $0.word == element.word && $0.ruby == element.ruby })
dicdataStore.sendToDicdataStore(
.forgetMemory(
Candidate(
text: element.word,
value: element.value(),
correspondingCount: 3,
lastMid: element.mid,
data: [element]
)
)
)
let indices2 = dicdataStore.perfectMatchLOUDS(query: "memory", charIDs: charIDs)
let dicdata2 = dicdataStore.getDicdataFromLoudstxt3(identifier: "memory", indices: indices2)
XCTAssertFalse(dicdata2.contains { $0.word == element.word && $0.ruby == element.ruby })
}
}

View File

@ -0,0 +1,56 @@
@testable import KanaKanjiConverterModule
import XCTest
final class TemporalLearningMemoryTrieTests: XCTestCase {
static let resourceURL = Bundle.module.resourceURL!.appendingPathComponent("DictionaryMock", isDirectory: true)
static func loadCharMap() -> [Character: UInt8] {
let chidURL = resourceURL.appendingPathComponent("louds/charID.chid", isDirectory: false)
let string = try! String(contentsOf: chidURL, encoding: .utf8)
return Dictionary(uniqueKeysWithValues: string.enumerated().map { ($0.element, UInt8($0.offset)) })
}
func chars(for string: String) -> [UInt8] {
LearningManager.keyToChars(string, char2UInt8: Self.loadCharMap())!
}
func testMemorizeAndMatch() throws {
var trie = TemporalLearningMemoryTrie()
let element1 = DicdataElement(word: "テスト", ruby: "テスト", cid: CIDData..cid, mid: MIDData..mid, value: -10)
let element2 = DicdataElement(word: "テスター", ruby: "テスター", cid: CIDData..cid, mid: MIDData..mid, value: -12)
trie.memorize(dicdataElement: element1, chars: chars(for: element1.ruby))
trie.memorize(dicdataElement: element2, chars: chars(for: element2.ruby))
let result1 = trie.perfectMatch(chars: chars(for: element1.ruby))
XCTAssertEqual(result1.count, 1)
XCTAssertEqual(result1.first?.word, element1.word)
XCTAssertTrue(result1.first?.metadata.contains(.isLearned) ?? false)
let result2 = trie.throughMatch(chars: chars(for: element2.ruby), depth: (element2.ruby.count - 1)..<element2.ruby.count)
XCTAssertEqual(result2.map { $0.word }, [element2.word])
let prefixResult = trie.prefixMatch(chars: chars(for: "テス"))
XCTAssertEqual(Set(prefixResult.map { $0.word }), Set([element1.word, element2.word]))
}
func testMemorizeUpdateCountAndForget() throws {
var trie = TemporalLearningMemoryTrie()
let element = DicdataElement(word: "テスター", ruby: "テスター", cid: CIDData..cid, mid: MIDData..mid, value: -10)
let charIDs = chars(for: element.ruby)
trie.memorize(dicdataElement: element, chars: charIDs)
var stored = trie.perfectMatch(chars: charIDs).first!
let adjust1 = stored.adjust
trie.memorize(dicdataElement: element, chars: charIDs)
stored = trie.perfectMatch(chars: charIDs).first!
let adjust2 = stored.adjust
XCTAssertGreaterThan(adjust2, adjust1)
XCTAssertEqual(trie.perfectMatch(chars: charIDs).count, 1)
XCTAssertTrue(trie.forget(dicdataElement: stored, chars: charIDs))
XCTAssertTrue(trie.perfectMatch(chars: charIDs).isEmpty)
}
}