From 2569d8475e6f723e7c11f107b676324e4e6dca4d Mon Sep 17 00:00:00 2001 From: Miwa / Ensan Date: Mon, 2 Jun 2025 22:32:17 +0900 Subject: [PATCH] =?UTF-8?q?fix:=20forget=20memory=E3=81=AE=E5=AE=9F?= =?UTF-8?q?=E8=A3=85=E3=82=92=E3=80=8C=E7=B2=97=E3=81=84=E3=80=8D=E3=83=81?= =?UTF-8?q?=E3=82=A7=E3=83=83=E3=82=AF=E3=81=AB=E5=A4=89=E6=9B=B4=E3=81=97?= =?UTF-8?q?=E3=80=81=E8=A1=A8=E5=B1=A4=E5=BD=A2=E3=81=AE=E4=B8=80=E8=87=B4?= =?UTF-8?q?=E3=81=AE=E3=81=BF=E3=81=A7=E5=88=A4=E6=96=AD=E3=81=99=E3=82=8B?= =?UTF-8?q?=E3=82=88=E3=81=86=E3=81=AB=E3=81=97=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../DicdataStore/LearningMemory.swift | 11 +++-- .../LearningMemoryTests.swift | 41 +++++++++++++++++++ .../TemporalLearningMemoryTrieTests.swift | 31 ++++++++++++++ 3 files changed, 79 insertions(+), 4 deletions(-) diff --git a/Sources/KanaKanjiConverterModule/DicdataStore/LearningMemory.swift b/Sources/KanaKanjiConverterModule/DicdataStore/LearningMemory.swift index fe0c6a0..8e858a0 100644 --- a/Sources/KanaKanjiConverterModule/DicdataStore/LearningMemory.swift +++ b/Sources/KanaKanjiConverterModule/DicdataStore/LearningMemory.swift @@ -226,6 +226,7 @@ struct LongTermLearningMemory { debug("LongTermLearningMemory merge entryCount", entryCount, ltMetadata.count) + let forgetTargetWords = forgetTargets.map { $0.word } // それぞれのloudstxt3ファイルに対して処理を行う for loudstxtIndex in 0 ..< Int(entryCount) / txtFileSplit + 1 { let loudstxtData: Data @@ -262,8 +263,8 @@ struct LongTermLearningMemory { var newMetadata: [MetadataElement] = [] assert(elements.count == metadata.count, "elements count and metadata count must be equal.") for (dicdataElement, metadataElement) in zip(elements, metadata) { - // 忘却対象である場合は弾く - if forgetTargets.contains(dicdataElement) { + // 忘却対象である場合は弾く(粗いチェック) + if forgetTargetWords.contains(dicdataElement.word) { debug("LongTermLearningMemory merge stopped because it is a forget target", dicdataElement) continue } @@ -564,8 +565,10 @@ struct TemporalLearningMemoryTrie { } } // 存在する場合 - // dataIndicesから削除する(dicdataの方は触らない) - nodes[index].dataIndices.removeAll(where: {self.dicdata[$0] == dicdataElement}) + // 判定を緩めにする(表層形が一致すればすべて削除する) + nodes[index].dataIndices.removeAll { + self.dicdata[$0].word == dicdataElement.word + } return true } diff --git a/Tests/KanaKanjiConverterModuleTests/LearningMemoryTests.swift b/Tests/KanaKanjiConverterModuleTests/LearningMemoryTests.swift index c36fc9c..a007137 100644 --- a/Tests/KanaKanjiConverterModuleTests/LearningMemoryTests.swift +++ b/Tests/KanaKanjiConverterModuleTests/LearningMemoryTests.swift @@ -99,5 +99,46 @@ final class LearningMemoryTests: XCTestCase { let dicdata2 = dicdataStore.getDicdataFromLoudstxt3(identifier: "memory", indices: indices2) XCTAssertFalse(dicdata2.contains { $0.word == element.word && $0.ruby == element.ruby }) } + + func testCoarseForgetMemory() throws { + // ForgetMemoryは「粗い」チェックを行うため、品詞が異なっていても同時に忘却される + let dir = FileManager.default.temporaryDirectory.appendingPathComponent("LearningManagerPersistence-\(UUID().uuidString)", isDirectory: true) + try FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true) + defer { try? FileManager.default.removeItem(at: dir) } + + let options = self.getOptionsForMemoryTest(memoryDirectoryURL: dir) + let manager = LearningManager() + _ = manager.setRequestOptions(options) + let element = DicdataElement(word: "テスト", ruby: "テスト", cid: CIDData.一般名詞.cid, mid: MIDData.一般.mid, value: -10) + manager.update(data: [element]) + let differentCidElement = DicdataElement(word: "テスト", ruby: "テスト", cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: -10) + manager.update(data: [differentCidElement]) + manager.save() + + let dicdataStore = DicdataStore(requestOptions: options) + dicdataStore.sendToDicdataStore(.setRequestOptions(options)) + let charIDs = "テスト".map { dicdataStore.character2charId($0) } + let indices = dicdataStore.perfectMatchLOUDS(query: "memory", charIDs: charIDs) + let dicdata = dicdataStore.getDicdataFromLoudstxt3(identifier: "memory", indices: indices) + XCTAssertFalse(dicdata.isEmpty) + XCTAssertEqual(dicdata.count { $0.word == element.word && $0.ruby == element.ruby }, 2) + + dicdataStore.sendToDicdataStore( + .forgetMemory( + Candidate( + text: element.word, + value: element.value(), + correspondingCount: 3, + lastMid: element.mid, + data: [element] + ) + ) + ) + + let indices2 = dicdataStore.perfectMatchLOUDS(query: "memory", charIDs: charIDs) + let dicdata2 = dicdataStore.getDicdataFromLoudstxt3(identifier: "memory", indices: indices2) + XCTAssertFalse(dicdata2.contains { $0.word == element.word && $0.ruby == element.ruby }) + } + } diff --git a/Tests/KanaKanjiConverterModuleTests/TemporalLearningMemoryTrieTests.swift b/Tests/KanaKanjiConverterModuleTests/TemporalLearningMemoryTrieTests.swift index bf309a6..2b5a7dc 100644 --- a/Tests/KanaKanjiConverterModuleTests/TemporalLearningMemoryTrieTests.swift +++ b/Tests/KanaKanjiConverterModuleTests/TemporalLearningMemoryTrieTests.swift @@ -34,6 +34,20 @@ final class TemporalLearningMemoryTrieTests: XCTestCase { XCTAssertEqual(Set(prefixResult.map { $0.word }), Set([element1.word, element2.word])) } + func testMemorizeTwice() throws { + var trie = TemporalLearningMemoryTrie() + let element1 = DicdataElement(word: "テスト", ruby: "テスト", cid: CIDData.一般名詞.cid, mid: MIDData.一般.mid, value: -10) + trie.memorize(dicdataElement: element1, chars: chars(for: element1.ruby)) + + let element2 = DicdataElement(word: "テスト", ruby: "テスト", cid: CIDData.一般名詞.cid, mid: MIDData.一般.mid, value: -10, adjust: 1.5) + trie.memorize(dicdataElement: element2, chars: chars(for: element2.ruby)) + + let result1 = trie.perfectMatch(chars: chars(for: element1.ruby)) + XCTAssertEqual(result1.count, 1) + XCTAssertEqual(result1.first?.word, element1.word) + XCTAssertTrue(result1.first?.metadata.contains(.isLearned) ?? false) + } + func testMemorizeUpdateCountAndForget() throws { var trie = TemporalLearningMemoryTrie() let element = DicdataElement(word: "テスター", ruby: "テスター", cid: CIDData.一般名詞.cid, mid: MIDData.一般.mid, value: -10) @@ -53,4 +67,21 @@ final class TemporalLearningMemoryTrieTests: XCTestCase { XCTAssertTrue(trie.forget(dicdataElement: stored, chars: charIDs)) XCTAssertTrue(trie.perfectMatch(chars: charIDs).isEmpty) } + + func testCoarseForget() throws { + var trie = TemporalLearningMemoryTrie() + let element1 = DicdataElement(word: "テスター", ruby: "テスター", cid: CIDData.一般名詞.cid, mid: MIDData.一般.mid, value: -10) + let element2 = DicdataElement(word: "テスター", ruby: "テスター", cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: -10) + let charIDs = chars(for: "テスター") + + trie.memorize(dicdataElement: element1, chars: charIDs) + trie.memorize(dicdataElement: element2, chars: charIDs) + + // 単語としては2種類存在 + XCTAssertEqual(trie.perfectMatch(chars: charIDs).count, 2) + + // forgetする場合、両方が同時に削除される(表層形の一致で判断=粗い一致) + XCTAssertTrue(trie.forget(dicdataElement: element1, chars: charIDs)) + XCTAssertTrue(trie.perfectMatch(chars: charIDs).isEmpty) + } }