fix: 動的ユーザ辞書が適切に読めていなかった問題を修正

This commit is contained in:
Miwa / Ensan
2025-07-06 16:57:27 +09:00
parent 5b6b7f71a5
commit 9d66ef0851
2 changed files with 106 additions and 12 deletions

View File

@ -256,8 +256,8 @@ public final class DicdataStore {
var generator = TypoCorrectionGenerator(inputs: inputs, leftIndex: leftIndex, rightIndexRange: rightIndexRange, needTypoCorrection: needTypoCorrection)
var targetLOUDS: [String: LOUDS.MovingTowardPrefixSearchHelper] = [:]
var stringToInfo: [([Character], (endIndex: Int, penalty: PValue))] = []
var temporaryMemoryDicdata: [Int: [DicdataElement]] = [:]
//
var dynamicDicdata: [Int: [DicdataElement]] = [:]
//
while let (characters, info) = generator.next() {
guard let firstCharacter = characters.first else {
@ -291,7 +291,7 @@ public final class DicdataStore {
for (depth, dicdata) in result.dicdata {
for data in dicdata {
if info.penalty.isZero {
temporaryMemoryDicdata[depth, default: []].append(data)
dynamicDicdata[depth, default: []].append(data)
}
let ratio = Self.penaltyRatio[data.lcid]
let pUnit: PValue = Self.getPenalty(data: data) / 2 //
@ -299,7 +299,29 @@ public final class DicdataStore {
if self.shouldBeRemoved(value: data.value() + adjust, wordCount: data.ruby.count) {
continue
}
temporaryMemoryDicdata[depth, default: []].append(data.adjustedData(adjust))
dynamicDicdata[depth, default: []].append(data.adjustedData(adjust))
}
}
if !self.dynamicUserDict.isEmpty {
//
let katakanaString = String(characters).toKatakana()
let dynamicUserDictResult = self.getMatchDynamicUserDict(katakanaString)
updated = updated || !dynamicUserDictResult.isEmpty
for data in dynamicUserDictResult {
let depth = characters.endIndex
if info.penalty.isZero {
dynamicDicdata[depth, default: []].append(data)
} else {
let ratio = Self.penaltyRatio[data.lcid]
let pUnit: PValue = Self.getPenalty(data: data) / 2 //
let adjust = pUnit * info.penalty * ratio
if self.shouldBeRemoved(value: data.value() + adjust, wordCount: Array(data.ruby).count) {
continue
}
dynamicDicdata[depth, default: []].append(data.adjustedData(adjust))
}
// stringToInfogetLOUDSDataInRange
stringToInfo.append((Array(data.ruby), (depth - 1, info.penalty)))
}
}
if availableMaxIndex < characters.endIndex - 1 {
@ -314,7 +336,7 @@ public final class DicdataStore {
return (
Dictionary(stringToInfo, uniquingKeysWith: {$0.penalty < $1.penalty ? $1 : $0}),
targetLOUDS.map { ($0.key, $0.value.indicesInDepth(depth: minCount - 1 ..< .max) )},
temporaryMemoryDicdata.flatMap {
dynamicDicdata.flatMap {
minCount < $0.key + 1 ? $0.value : []
}
)
@ -401,13 +423,6 @@ public final class DicdataStore {
}
dicdata.append(contentsOf: result)
}
do {
let result = self.getMatchDynamicUserDict(segments[i - fromIndex])
for item in result {
stringToInfo[Array(item.ruby)] = (i, 0)
}
dicdata.append(contentsOf: result)
}
}
if fromIndex == .zero {
let result: [LatticeNode] = dicdata.compactMap {

View File

@ -223,4 +223,83 @@ final class DicdataStoreTests: XCTestCase {
XCTAssertTrue(result.contains(where: {$0.word == "九千九百九十九億九千九百九十九万九千九百九十九"}))
}
}
func testDynamicUserDict() throws {
let dicdataStore = DicdataStore(convertRequestOptions: requestOptions())
//
let testDynamicUserDict = [
DicdataElement(word: "テスト単語", ruby: "テストタンゴ", lcid: CIDData..cid, rcid: CIDData..cid, mid: MIDData..mid, value: -10),
DicdataElement(word: "カスタム変換", ruby: "カスタムヘンカン", lcid: CIDData..cid, rcid: CIDData..cid, mid: MIDData..mid, value: -12),
DicdataElement(word: "動的辞書", ruby: "ドウテキジショ", lcid: CIDData..cid, rcid: CIDData..cid, mid: MIDData..mid, value: -11)
]
dicdataStore.sendToDicdataStore(.importDynamicUserDict(testDynamicUserDict))
//
do {
let result = dicdataStore.getMatchDynamicUserDict("テストタンゴ")
XCTAssertEqual(result.count, 1)
XCTAssertEqual(result.first?.word, "テスト単語")
XCTAssertEqual(result.first?.ruby, "テストタンゴ")
}
//
do {
let result = dicdataStore.getPrefixMatchDynamicUserDict("カスタム")
XCTAssertEqual(result.count, 1)
XCTAssertEqual(result.first?.word, "カスタム変換")
XCTAssertEqual(result.first?.ruby, "カスタムヘンカン")
}
//
do {
var c = ComposingText()
c.insertAtCursorPosition("テストタンゴ", inputStyle: .direct)
let result = dicdataStore.getLOUDSDataInRange(inputData: c, from: 0, toIndexRange: c.input.endIndex - 1 ..< c.input.endIndex, needTypoCorrection: false)
XCTAssertTrue(result.contains(where: {$0.data.word == "テスト単語"}))
}
//
do {
var c = ComposingText()
c.insertAtCursorPosition("ドウテキジショ", inputStyle: .direct)
let result = dicdataStore.getLOUDSDataInRange(inputData: c, from: 0, toIndexRange: c.input.endIndex - 1 ..< c.input.endIndex, needTypoCorrection: false)
XCTAssertTrue(result.contains(where: {$0.data.word == "動的辞書"}))
}
//
do {
let result = dicdataStore.getMatchDynamicUserDict("ソンザイシナイ")
XCTAssertEqual(result.count, 0)
}
}
func testDynamicUserDictWithConversion() throws {
let dicdataStore = DicdataStore(convertRequestOptions: requestOptions())
//
let testDynamicUserDict = [
DicdataElement(word: "テストワード", ruby: "テストワード", lcid: CIDData..cid, rcid: CIDData..cid, mid: MIDData..mid, value: -8),
DicdataElement(word: "特殊読み", ruby: "トクシュヨミ", lcid: CIDData..cid, rcid: CIDData..cid, mid: MIDData..mid, value: -9)
]
dicdataStore.sendToDicdataStore(.importDynamicUserDict(testDynamicUserDict))
//
do {
var c = ComposingText()
sequentialInput(&c, sequence: "tesutowaーdo", inputStyle: .roman2kana)
let result = dicdataStore.getLOUDSDataInRange(inputData: c, from: 0, toIndexRange: c.input.endIndex - 1 ..< c.input.endIndex, needTypoCorrection: false)
XCTAssertTrue(result.contains(where: {$0.data.word == "テストワード"}))
}
//
do {
var c = ComposingText()
c.insertAtCursorPosition("トクシュヨミ", inputStyle: .direct)
let result = dicdataStore.getLOUDSDataInRange(inputData: c, from: 0, toIndexRange: c.input.endIndex - 1 ..< c.input.endIndex, needTypoCorrection: false)
let dynamicUserDictResult = result.first(where: {$0.data.word == "特殊読み"})
XCTAssertNotNil(dynamicUserDictResult)
XCTAssertEqual(dynamicUserDictResult?.data.metadata, .isFromUserDictionary)
}
}
}