mirror of
https://github.com/mii443/AzooKeyKanaKanjiConverter.git
synced 2025-08-22 15:05:26 +00:00
Merge pull request #197 from azooKey/fix/coarse_forget
fix: forget memoryの実装を「粗い」チェックに変更し、表層形の一致のみで判断するようにした
This commit is contained in:
@ -226,6 +226,7 @@ struct LongTermLearningMemory {
|
||||
|
||||
debug("LongTermLearningMemory merge entryCount", entryCount, ltMetadata.count)
|
||||
|
||||
let forgetTargetWords = Set(forgetTargets.map { $0.word })
|
||||
// それぞれのloudstxt3ファイルに対して処理を行う
|
||||
for loudstxtIndex in 0 ..< Int(entryCount) / txtFileSplit + 1 {
|
||||
let loudstxtData: Data
|
||||
@ -262,8 +263,8 @@ struct LongTermLearningMemory {
|
||||
var newMetadata: [MetadataElement] = []
|
||||
assert(elements.count == metadata.count, "elements count and metadata count must be equal.")
|
||||
for (dicdataElement, metadataElement) in zip(elements, metadata) {
|
||||
// 忘却対象である場合は弾く
|
||||
if forgetTargets.contains(dicdataElement) {
|
||||
// 忘却対象である場合は弾く(粗いチェック)
|
||||
if forgetTargetWords.contains(dicdataElement.word) {
|
||||
debug("LongTermLearningMemory merge stopped because it is a forget target", dicdataElement)
|
||||
continue
|
||||
}
|
||||
@ -564,8 +565,10 @@ struct TemporalLearningMemoryTrie {
|
||||
}
|
||||
}
|
||||
// 存在する場合
|
||||
// dataIndicesから削除する(dicdataの方は触らない)
|
||||
nodes[index].dataIndices.removeAll(where: {self.dicdata[$0] == dicdataElement})
|
||||
// 判定を緩めにする(表層形が一致すればすべて削除する)
|
||||
nodes[index].dataIndices.removeAll {
|
||||
self.dicdata[$0].word == dicdataElement.word
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
|
@ -99,5 +99,46 @@ final class LearningMemoryTests: XCTestCase {
|
||||
let dicdata2 = dicdataStore.getDicdataFromLoudstxt3(identifier: "memory", indices: indices2)
|
||||
XCTAssertFalse(dicdata2.contains { $0.word == element.word && $0.ruby == element.ruby })
|
||||
}
|
||||
|
||||
func testCoarseForgetMemory() throws {
|
||||
// ForgetMemoryは「粗い」チェックを行うため、品詞が異なっていても同時に忘却される
|
||||
let dir = FileManager.default.temporaryDirectory.appendingPathComponent("LearningManagerPersistence-\(UUID().uuidString)", isDirectory: true)
|
||||
try FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true)
|
||||
defer { try? FileManager.default.removeItem(at: dir) }
|
||||
|
||||
let options = self.getOptionsForMemoryTest(memoryDirectoryURL: dir)
|
||||
let manager = LearningManager()
|
||||
_ = manager.setRequestOptions(options)
|
||||
let element = DicdataElement(word: "テスト", ruby: "テスト", cid: CIDData.一般名詞.cid, mid: MIDData.一般.mid, value: -10)
|
||||
manager.update(data: [element])
|
||||
let differentCidElement = DicdataElement(word: "テスト", ruby: "テスト", cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: -10)
|
||||
manager.update(data: [differentCidElement])
|
||||
manager.save()
|
||||
|
||||
let dicdataStore = DicdataStore(requestOptions: options)
|
||||
dicdataStore.sendToDicdataStore(.setRequestOptions(options))
|
||||
let charIDs = "テスト".map { dicdataStore.character2charId($0) }
|
||||
let indices = dicdataStore.perfectMatchLOUDS(query: "memory", charIDs: charIDs)
|
||||
let dicdata = dicdataStore.getDicdataFromLoudstxt3(identifier: "memory", indices: indices)
|
||||
XCTAssertFalse(dicdata.isEmpty)
|
||||
XCTAssertEqual(dicdata.count { $0.word == element.word && $0.ruby == element.ruby }, 2)
|
||||
|
||||
dicdataStore.sendToDicdataStore(
|
||||
.forgetMemory(
|
||||
Candidate(
|
||||
text: element.word,
|
||||
value: element.value(),
|
||||
correspondingCount: 3,
|
||||
lastMid: element.mid,
|
||||
data: [element]
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
let indices2 = dicdataStore.perfectMatchLOUDS(query: "memory", charIDs: charIDs)
|
||||
let dicdata2 = dicdataStore.getDicdataFromLoudstxt3(identifier: "memory", indices: indices2)
|
||||
XCTAssertFalse(dicdata2.contains { $0.word == element.word && $0.ruby == element.ruby })
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -34,6 +34,20 @@ final class TemporalLearningMemoryTrieTests: XCTestCase {
|
||||
XCTAssertEqual(Set(prefixResult.map { $0.word }), Set([element1.word, element2.word]))
|
||||
}
|
||||
|
||||
func testMemorizeTwice() throws {
|
||||
var trie = TemporalLearningMemoryTrie()
|
||||
let element1 = DicdataElement(word: "テスト", ruby: "テスト", cid: CIDData.一般名詞.cid, mid: MIDData.一般.mid, value: -10)
|
||||
trie.memorize(dicdataElement: element1, chars: chars(for: element1.ruby))
|
||||
|
||||
let element2 = DicdataElement(word: "テスト", ruby: "テスト", cid: CIDData.一般名詞.cid, mid: MIDData.一般.mid, value: -10, adjust: 1.5)
|
||||
trie.memorize(dicdataElement: element2, chars: chars(for: element2.ruby))
|
||||
|
||||
let result1 = trie.perfectMatch(chars: chars(for: element1.ruby))
|
||||
XCTAssertEqual(result1.count, 1)
|
||||
XCTAssertEqual(result1.first?.word, element1.word)
|
||||
XCTAssertTrue(result1.first?.metadata.contains(.isLearned) ?? false)
|
||||
}
|
||||
|
||||
func testMemorizeUpdateCountAndForget() throws {
|
||||
var trie = TemporalLearningMemoryTrie()
|
||||
let element = DicdataElement(word: "テスター", ruby: "テスター", cid: CIDData.一般名詞.cid, mid: MIDData.一般.mid, value: -10)
|
||||
@ -53,4 +67,21 @@ final class TemporalLearningMemoryTrieTests: XCTestCase {
|
||||
XCTAssertTrue(trie.forget(dicdataElement: stored, chars: charIDs))
|
||||
XCTAssertTrue(trie.perfectMatch(chars: charIDs).isEmpty)
|
||||
}
|
||||
|
||||
func testCoarseForget() throws {
|
||||
var trie = TemporalLearningMemoryTrie()
|
||||
let element1 = DicdataElement(word: "テスター", ruby: "テスター", cid: CIDData.一般名詞.cid, mid: MIDData.一般.mid, value: -10)
|
||||
let element2 = DicdataElement(word: "テスター", ruby: "テスター", cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: -10)
|
||||
let charIDs = chars(for: "テスター")
|
||||
|
||||
trie.memorize(dicdataElement: element1, chars: charIDs)
|
||||
trie.memorize(dicdataElement: element2, chars: charIDs)
|
||||
|
||||
// 単語としては2種類存在
|
||||
XCTAssertEqual(trie.perfectMatch(chars: charIDs).count, 2)
|
||||
|
||||
// forgetする場合、両方が同時に削除される(表層形の一致で判断=粗い一致)
|
||||
XCTAssertTrue(trie.forget(dicdataElement: element1, chars: charIDs))
|
||||
XCTAssertTrue(trie.perfectMatch(chars: charIDs).isEmpty)
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user