refactor: clean up API

This commit is contained in:
ensan-hcl
2025-07-15 05:02:29 +09:00
parent 814a6b080b
commit b9bd88a247
5 changed files with 55 additions and 46 deletions

View File

@ -35,14 +35,19 @@ extension Kana2Kanji {
let indexMap = LatticeDualIndexMap(inputData)
let latticeIndices = indexMap.indices(inputCount: inputCount, surfaceCount: surfaceCount)
let rawNodes = latticeIndices.map { index in
let inputRange: (startIndex: Int, endIndexRange: Range<Int>?)? = if let iIndex = index.inputIndex {
(iIndex, nil)
} else {
nil
}
let surfaceRange: (startIndex: Int, endIndexRange: Range<Int>?)? = if let sIndex = index.surfaceIndex {
(sIndex, nil)
} else {
nil
}
return dicdataStore.getLOUDSDataInRange(
inputData: inputData,
from: index.inputIndex,
return dicdataStore.lookupDicdata(
composingText: inputData,
inputRange: inputRange,
surfaceRange: surfaceRange,
needTypoCorrection: needTypoCorrection
)

View File

@ -27,14 +27,19 @@ extension Kana2Kanji {
let indexMap = LatticeDualIndexMap(inputData)
let latticeIndices = indexMap.indices(inputCount: inputCount, surfaceCount: surfaceCount)
let rawNodes = latticeIndices.map { index in
let inputRange: (startIndex: Int, endIndexRange: Range<Int>?)? = if let iIndex = index.inputIndex {
(iIndex, nil)
} else {
nil
}
let surfaceRange: (startIndex: Int, endIndexRange: Range<Int>?)? = if let sIndex = index.surfaceIndex {
(sIndex, nil)
} else {
nil
}
return dicdataStore.getLOUDSDataInRange(
inputData: inputData,
from: index.inputIndex,
return dicdataStore.lookupDicdata(
composingText: inputData,
inputRange: inputRange,
surfaceRange: surfaceRange,
needTypoCorrection: false
)

View File

@ -68,10 +68,9 @@ extension Kana2Kanji {
} else {
nil
}
return self.dicdataStore.getLOUDSDataInRange(
inputData: inputData,
from: inputRange?.startIndex,
toIndexRange: inputRange?.endIndexRange,
return self.dicdataStore.lookupDicdata(
composingText: inputData,
inputRange: inputRange,
surfaceRange: surfaceRange,
needTypoCorrection: needTypoCorrection
)

View File

@ -465,33 +465,33 @@ public final class DicdataStore {
}
return data
}
/// kana2lattice
///
/// - Parameters:
/// - inputData:
/// - from:
/// - toIndexRange: `from ..< (toIndexRange)`
public func getLOUDSDataInRange(
inputData: ComposingText,
from fromInputIndex: Int?,
toIndexRange: Range<Int>? = nil,
/// - composingText:
/// - inputRange: `composingText.input`
/// - surfaceRange: `composingText.convertTarget`
/// - needTypoCorrection:
/// - Returns: `LatticeNode`
public func lookupDicdata(
composingText: ComposingText,
inputRange:(startIndex: Int, endIndexRange: Range<Int>?)? = nil,
surfaceRange: (startIndex: Int, endIndexRange: Range<Int>?)? = nil,
needTypoCorrection: Bool = true
) -> [LatticeNode] {
let inputProcessRange: TypoCorrectionGenerator.ProcessRange?
// TODO: make `fromInputIndex` optional later.
if let fromInputIndex {
let toInputIndexLeft = toIndexRange?.startIndex ?? fromInputIndex
let inputProcessRange: TypoCorrectionGenerator.ProcessRange?
if let inputRange {
let toInputIndexLeft = inputRange.endIndexRange?.startIndex ?? inputRange.startIndex
let toInputIndexRight = min(
toIndexRange?.endIndex ?? inputData.input.count,
fromInputIndex + self.maxlength
inputRange.endIndexRange?.endIndex ?? composingText.input.count,
inputRange.startIndex + self.maxlength
)
if fromInputIndex > toInputIndexLeft || toInputIndexLeft >= toInputIndexRight {
if inputRange.startIndex > toInputIndexLeft || toInputIndexLeft >= toInputIndexRight {
debug(#function, "index is wrong")
return []
}
inputProcessRange = .init(leftIndex: fromInputIndex, rightIndexRange: toInputIndexLeft ..< toInputIndexRight)
inputProcessRange = .init(leftIndex: inputRange.startIndex, rightIndexRange: toInputIndexLeft ..< toInputIndexRight)
} else {
inputProcessRange = nil
}
@ -500,7 +500,7 @@ public final class DicdataStore {
if let surfaceRange {
let toSurfaceIndexLeft = surfaceRange.endIndexRange?.startIndex ?? surfaceRange.startIndex
let toSurfaceIndexRight = min(
surfaceRange.endIndexRange?.endIndex ?? inputData.convertTarget.count,
surfaceRange.endIndexRange?.endIndex ?? composingText.convertTarget.count,
surfaceRange.startIndex + self.maxlength
)
if surfaceRange.startIndex > toSurfaceIndexLeft || toSurfaceIndexLeft >= toSurfaceIndexRight {
@ -517,7 +517,7 @@ public final class DicdataStore {
}
// MARK:
var (stringToInfo, indices, dicdata) = self.movingTowardPrefixSearch(
composingText: inputData,
composingText: composingText,
inputProcessRange: inputProcessRange,
surfaceProcessRange: surfaceProcessRange,
useMemory: self.learningManager.enabled,
@ -544,13 +544,13 @@ public final class DicdataStore {
if let inputProcessRange {
let segments = (inputProcessRange.leftIndex ..< inputProcessRange.rightIndexRange.endIndex).reduce(into: []) { (segments: inout [String], rightIndex: Int) in
segments.append((segments.last ?? "") + String(inputData.input[rightIndex].character.toKatakana()))
segments.append((segments.last ?? "") + String(composingText.input[rightIndex].character.toKatakana()))
}
for i in inputProcessRange.rightIndexRange {
do {
let result = self.getWiseDicdata(
convertTarget: segments[i - inputProcessRange.leftIndex],
inputData: inputData,
inputData: composingText,
inputRange: inputProcessRange.leftIndex ..< i + 1
)
for item in result {
@ -560,13 +560,13 @@ public final class DicdataStore {
}
}
}
let needBOS = fromInputIndex == .zero
let needBOS = inputRange?.startIndex == .zero || surfaceRange?.startIndex == .zero
let result: [LatticeNode] = dicdata.compactMap {
guard let endIndex = stringToInfo[Array($0.ruby)]?.endIndex else {
return nil
}
let range: Lattice.LatticeRange = switch endIndex {
case .input(let endIndex): .input(from: fromInputIndex!, to: endIndex + 1)
case .input(let endIndex): .input(from: (inputRange?.startIndex)!, to: endIndex + 1)
case .surface(let endIndex): .surface(from: (surfaceRange?.startIndex)!, to: endIndex + 1)
}
let node = LatticeNode(data: $0, range: range)

View File

@ -129,7 +129,7 @@ final class DicdataStoreTests: XCTestCase {
for (key, word) in mustWords {
var c = ComposingText()
c.insertAtCursorPosition(key, inputStyle: .direct)
let result = dicdataStore.getLOUDSDataInRange(inputData: c, from: 0, toIndexRange: c.input.endIndex - 1 ..< c.input.endIndex, needTypoCorrection: false)
let result = dicdataStore.lookupDicdata(composingText: c, inputRange: (0, c.input.endIndex - 1 ..< c.input.endIndex), needTypoCorrection: false)
//
XCTAssertEqual(result.first(where: {$0.data.word == word})?.data.word, word)
}
@ -150,7 +150,7 @@ final class DicdataStoreTests: XCTestCase {
for (key, word) in mustWords {
var c = ComposingText()
c.insertAtCursorPosition(key, inputStyle: .direct)
let result = dicdataStore.getLOUDSDataInRange(inputData: c, from: 0, toIndexRange: c.input.endIndex - 1 ..< c.input.endIndex, needTypoCorrection: false)
let result = dicdataStore.lookupDicdata(composingText: c, inputRange: (0, c.input.endIndex - 1 ..< c.input.endIndex), needTypoCorrection: false)
XCTAssertNil(result.first(where: {$0.data.word == word && $0.data.ruby == key}))
}
}
@ -170,17 +170,17 @@ final class DicdataStoreTests: XCTestCase {
for (key, word) in mustWords {
var c = ComposingText()
c.insertAtCursorPosition(key, inputStyle: .direct)
let result = dicdataStore.getLOUDSDataInRange(inputData: c, from: 0, toIndexRange: c.input.endIndex - 1 ..< c.input.endIndex, needTypoCorrection: true)
let result = dicdataStore.lookupDicdata(composingText: c, inputRange: (0, c.input.endIndex - 1 ..< c.input.endIndex), needTypoCorrection: true)
XCTAssertEqual(result.first(where: {$0.data.word == word})?.data.word, word)
}
}
func testGetLOUDSDataInRange() throws {
func testLookupDicdata() throws {
let dicdataStore = DicdataStore(convertRequestOptions: requestOptions())
do {
var c = ComposingText()
c.insertAtCursorPosition("ヘンカン", inputStyle: .roman2kana)
let result = dicdataStore.getLOUDSDataInRange(inputData: c, from: 0, toIndexRange: 2..<4)
let result = dicdataStore.lookupDicdata(composingText: c, inputRange: (0, 2 ..< 4))
XCTAssertFalse(result.contains(where: {$0.data.word == ""}))
XCTAssertTrue(result.contains(where: {$0.data.word == "変化"}))
XCTAssertTrue(result.contains(where: {$0.data.word == "変換"}))
@ -188,7 +188,7 @@ final class DicdataStoreTests: XCTestCase {
do {
var c = ComposingText()
c.insertAtCursorPosition("ヘンカン", inputStyle: .roman2kana)
let result = dicdataStore.getLOUDSDataInRange(inputData: c, from: 0, toIndexRange: 0..<4)
let result = dicdataStore.lookupDicdata(composingText: c, inputRange: (0, 0..<4))
XCTAssertTrue(result.contains(where: {$0.data.word == ""}))
XCTAssertTrue(result.contains(where: {$0.data.word == "変化"}))
XCTAssertTrue(result.contains(where: {$0.data.word == "変換"}))
@ -196,19 +196,19 @@ final class DicdataStoreTests: XCTestCase {
do {
var c = ComposingText()
c.insertAtCursorPosition("ツカッ", inputStyle: .roman2kana)
let result = dicdataStore.getLOUDSDataInRange(inputData: c, from: 0, toIndexRange: 2..<3)
let result = dicdataStore.lookupDicdata(composingText: c, inputRange: (0, 2..<3))
XCTAssertTrue(result.contains(where: {$0.data.word == "使っ"}))
}
do {
var c = ComposingText()
c.insertAtCursorPosition("ツカッt", inputStyle: .roman2kana)
let result = dicdataStore.getLOUDSDataInRange(inputData: c, from: 0, toIndexRange: 2..<4)
let result = dicdataStore.lookupDicdata(composingText: c, inputRange: (0, 2..<4))
XCTAssertTrue(result.contains(where: {$0.data.word == "使っ"}))
}
do {
var c = ComposingText()
sequentialInput(&c, sequence: "tukatt", inputStyle: .roman2kana)
let result = dicdataStore.getLOUDSDataInRange(inputData: c, from: 0, toIndexRange: 4..<6)
let result = dicdataStore.lookupDicdata(composingText: c, inputRange: (0, 4..<6))
XCTAssertTrue(result.contains(where: {$0.data.word == "使っ"}))
}
}
@ -255,7 +255,7 @@ final class DicdataStoreTests: XCTestCase {
do {
var c = ComposingText()
c.insertAtCursorPosition("テストタンゴ", inputStyle: .direct)
let result = dicdataStore.getLOUDSDataInRange(inputData: c, from: 0, toIndexRange: c.input.endIndex - 1 ..< c.input.endIndex, needTypoCorrection: false)
let result = dicdataStore.lookupDicdata(composingText: c, inputRange: (0, c.input.endIndex - 1 ..< c.input.endIndex), needTypoCorrection: false)
XCTAssertTrue(result.contains(where: {$0.data.word == "テスト単語"}))
}
@ -263,7 +263,7 @@ final class DicdataStoreTests: XCTestCase {
do {
var c = ComposingText()
c.insertAtCursorPosition("ドウテキジショ", inputStyle: .direct)
let result = dicdataStore.getLOUDSDataInRange(inputData: c, from: 0, toIndexRange: c.input.endIndex - 1 ..< c.input.endIndex, needTypoCorrection: false)
let result = dicdataStore.lookupDicdata(composingText: c, inputRange: (0, c.input.endIndex - 1 ..< c.input.endIndex), needTypoCorrection: false)
XCTAssertTrue(result.contains(where: {$0.data.word == "動的辞書"}))
}
@ -288,7 +288,7 @@ final class DicdataStoreTests: XCTestCase {
do {
var c = ComposingText()
sequentialInput(&c, sequence: "tesutowaーdo", inputStyle: .roman2kana)
let result = dicdataStore.getLOUDSDataInRange(inputData: c, from: 0, toIndexRange: c.input.endIndex - 1 ..< c.input.endIndex, needTypoCorrection: false)
let result = dicdataStore.lookupDicdata(composingText: c, inputRange: (0, c.input.endIndex - 1 ..< c.input.endIndex), needTypoCorrection: false)
XCTAssertTrue(result.contains(where: {$0.data.word == "テストワード"}))
XCTAssertEqual(result.first(where: {$0.data.word == "テストワード"})?.range, .input(from: 0, to: 11))
}
@ -297,7 +297,7 @@ final class DicdataStoreTests: XCTestCase {
do {
var c = ComposingText()
c.insertAtCursorPosition("トクシュヨミ", inputStyle: .direct)
let result = dicdataStore.getLOUDSDataInRange(inputData: c, from: 0, toIndexRange: c.input.endIndex - 1 ..< c.input.endIndex, needTypoCorrection: false)
let result = dicdataStore.lookupDicdata(composingText: c, inputRange: (0, c.input.endIndex - 1 ..< c.input.endIndex), needTypoCorrection: false)
let dynamicUserDictResult = result.first(where: {$0.data.word == "特殊読み"})
XCTAssertNotNil(dynamicUserDictResult)
XCTAssertEqual(dynamicUserDictResult?.data.metadata, .isFromUserDictionary)