Merge pull request #224 from azooKey/refactor/remove_complicated_boundary_checker

refactor: 特定のローマ字かな変換テーブルを前提にした複雑な境界チェックを廃止する
This commit is contained in:
Miwa
2025-07-21 01:33:47 -07:00
committed by GitHub
parent 3f93209534
commit 59cde2a2ca
4 changed files with 93 additions and 212 deletions

View File

@ -18,18 +18,27 @@ struct TypoCorrectionGenerator: Sendable {
} }
} }
// //
var leftConvertTargetElements: [ComposingText.ConvertTargetElement] = []
for element in inputs[0 ..< range.leftIndex] {
ComposingText.updateConvertTargetElements(currentElements: &leftConvertTargetElements, newElement: element)
}
let actualLeftConvertTarget = leftConvertTargetElements.reduce(into: "") { $0 += $1.string}
self.stack = nodes[0].compactMap { typoCandidate in self.stack = nodes[0].compactMap { typoCandidate in
guard let firstElement = typoCandidate.inputElements.first else { var convertTargetElements = [ComposingText.ConvertTargetElement]()
var fullConvertTargetElements = leftConvertTargetElements
for element in typoCandidate.inputElements {
ComposingText.updateConvertTargetElements(currentElements: &convertTargetElements, newElement: element)
ComposingText.updateConvertTargetElements(currentElements: &fullConvertTargetElements, newElement: element)
}
let fullConvertTarget = fullConvertTargetElements.reduce(into: "") { $0 += $1.string}
let convertTarget = convertTargetElements.reduce(into: "") { $0 += $1.string}
if fullConvertTarget == actualLeftConvertTarget + convertTarget {
return (convertTargetElements, typoCandidate.inputElements.count, typoCandidate.weight)
} else {
return nil return nil
} }
if ComposingText.isLeftSideValid(first: firstElement, of: inputs, from: range.leftIndex) {
var convertTargetElements = [ComposingText.ConvertTargetElement]()
for element in typoCandidate.inputElements {
ComposingText.updateConvertTargetElements(currentElements: &convertTargetElements, newElement: element)
}
return (convertTargetElements, typoCandidate.inputElements.last!, typoCandidate.inputElements.count, typoCandidate.weight)
}
return nil
} }
} }
@ -44,11 +53,45 @@ struct TypoCorrectionGenerator: Sendable {
var rightIndexRange: Range<Int> var rightIndexRange: Range<Int>
} }
var stack: [(convertTargetElements: [ComposingText.ConvertTargetElement], lastElement: ComposingText.InputElement, count: Int, penalty: PValue)] var stack: [(convertTargetElements: [ComposingText.ConvertTargetElement], count: Int, penalty: PValue)]
private static func check(
_ leftConvertTargetElements: [ComposingText.ConvertTargetElement],
isPrefixOf rightConvertTargetElements: [ComposingText.ConvertTargetElement]
) -> Bool {
if leftConvertTargetElements.count > rightConvertTargetElements.count {
//
return false
} else if leftConvertTargetElements.count == rightConvertTargetElements.count {
let lastIndex = leftConvertTargetElements.count - 1
if lastIndex == -1 {
// emptytrue
return true
}
// 1prefix
for (lhs, rhs) in zip(leftConvertTargetElements[0 ..< lastIndex], rightConvertTargetElements[0 ..< lastIndex]) {
if lhs != rhs {
return false
}
}
if leftConvertTargetElements[lastIndex].inputStyle != rightConvertTargetElements[lastIndex].inputStyle {
return false
}
return rightConvertTargetElements[lastIndex].string.hasPrefix(leftConvertTargetElements[lastIndex].string)
} else {
// leftConvertTargetElementsprefix
for (lhs, rhs) in zip(leftConvertTargetElements, rightConvertTargetElements[0 ..< leftConvertTargetElements.endIndex]) {
if lhs != rhs {
return false
}
}
return true
}
}
/// `target` /// `target`
mutating func setUnreachablePath(target: some Collection<Character>) { mutating func setUnreachablePath(target: some Collection<Character>) {
self.stack = self.stack.filter { (convertTargetElements, lastElement, count, penalty) in self.stack = self.stack.filter { (convertTargetElements, count, penalty) in
var stablePrefix: [Character] = [] var stablePrefix: [Character] = []
loop: for item in convertTargetElements { loop: for item in convertTargetElements {
switch item.inputStyle { switch item.inputStyle {
@ -79,11 +122,18 @@ struct TypoCorrectionGenerator: Sendable {
} }
mutating func next() -> ([Character], (endIndex: Lattice.LatticeIndex, penalty: PValue))? { mutating func next() -> ([Character], (endIndex: Lattice.LatticeIndex, penalty: PValue))? {
while let (convertTargetElements, lastElement, count, penalty) = self.stack.popLast() { while let (convertTargetElements, count, penalty) = self.stack.popLast() {
var result: ([Character], (endIndex: Lattice.LatticeIndex, penalty: PValue))? = nil var result: ([Character], (endIndex: Lattice.LatticeIndex, penalty: PValue))? = nil
if self.range.rightIndexRange.contains(count + self.range.leftIndex - 1) { if self.range.rightIndexRange.contains(count + self.range.leftIndex - 1) {
if let convertTarget = ComposingText.getConvertTargetIfRightSideIsValid(lastElement: lastElement, of: inputs, to: count + self.range.leftIndex, convertTargetElements: convertTargetElements)?.map({$0.toKatakana()}) { let originalConvertTarget = convertTargetElements.reduce(into: []) { $0 += $1.string.map { $0.toKatakana() } }
result = (convertTarget, (.input(count + self.range.leftIndex - 1), penalty)) if self.range.leftIndex + count < self.inputs.endIndex {
var newConvertTargetElements = convertTargetElements
ComposingText.updateConvertTargetElements(currentElements: &newConvertTargetElements, newElement: inputs[self.range.leftIndex + count])
if Self.check(convertTargetElements, isPrefixOf: newConvertTargetElements) {
result = (originalConvertTarget, (.input(count + self.range.leftIndex - 1), penalty))
}
} else {
result = (originalConvertTarget, (.input(count + self.range.leftIndex - 1), penalty))
} }
} }
// //
@ -108,7 +158,7 @@ struct TypoCorrectionGenerator: Sendable {
for element in correct { for element in correct {
ComposingText.updateConvertTargetElements(currentElements: &convertTargetElements, newElement: element) ComposingText.updateConvertTargetElements(currentElements: &convertTargetElements, newElement: element)
} }
stack.append((convertTargetElements, correct.last!, count + correct.count, penalty)) stack.append((convertTargetElements, count + correct.count, penalty))
} else { } else {
stack.append(contentsOf: self.nodes[count].compactMap { stack.append(contentsOf: self.nodes[count].compactMap {
if count + $0.inputElements.count > self.nodes.endIndex { if count + $0.inputElements.count > self.nodes.endIndex {
@ -120,7 +170,6 @@ struct TypoCorrectionGenerator: Sendable {
} }
return ( return (
convertTargetElements: convertTargetElements, convertTargetElements: convertTargetElements,
lastElement: $0.inputElements.last!,
count: count + $0.inputElements.count, count: count + $0.inputElements.count,
penalty: penalty + $0.weight penalty: penalty + $0.weight
) )

View File

@ -412,154 +412,6 @@ extension ComposingText {
return convertTargetElements.reduce(into: "") {$0 += $1.string} return convertTargetElements.reduce(into: "") {$0 += $1.string}
} }
static func shouldEscapeOtherValidation(convertTargetElement: [ConvertTargetElement], of originalElements: [InputElement]) -> Bool {
let string = convertTargetElement.reduce(into: "") {$0 += $1.string}
//
if !string.containsRomanAlphabet {
return true
}
if ["", "", "", ""].contains(string) {
return true
}
return false
}
static func isLeftSideValid(first firstElement: InputElement, of originalElements: [InputElement], from leftIndex: Int) -> Bool {
// leftIndex`el`
//
// * leftIndex == startIndex
// * el:direct
// * (_:direct) -> el
// * (a|i|u|e|o:roman2kana) -> el // akaka
// * (e-1:roman2kana and not n) && e-1 == es // ttatanna
// * (n:roman2kana) -> el && el not a|i|u|e|o|y|n // nkakanna
if leftIndex < originalElements.startIndex {
return false
}
// directElement
guard leftIndex != originalElements.startIndex && firstElement.inputStyle == .roman2kana else {
return true
}
let prevLastElement = originalElements[leftIndex - 1]
if prevLastElement.inputStyle != .roman2kana || !CharacterUtils.isRomanLetter(prevLastElement.character) {
return true
}
if ["a", "i", "u", "e", "o"].contains(prevLastElement.character) {
return true
}
if prevLastElement.character != "n" && prevLastElement.character == firstElement.character {
return true
}
let last_2 = originalElements[0 ..< leftIndex].suffix(2)
if ["zl", "zk", "zj", "zh", "xn"].contains(last_2.reduce(into: "") {$0.append($1.character)}) {
return true
}
let n_suffix = originalElements[0 ..< leftIndex].suffix(while: {$0.inputStyle == .roman2kana && $0.character == "n"})
// nnvalid
if n_suffix.count % 2 == 0 && !n_suffix.isEmpty {
return true
}
// nny-nnvalid
if n_suffix.count % 2 == 1 && !["a", "i", "u", "e", "o", "y", "n"].contains(firstElement.character) {
return true
}
// n1xvalid (xn)
if n_suffix.count % 2 == 1 && originalElements.dropLast(n_suffix.count).last == .init(character: "x", inputStyle: .roman2kana) {
return true
}
return false
}
/// valid調
/// - Parameters:
/// - lastElement:
/// - convertTargetElements: `convertTarget`
/// - originalElements: `input`
/// - rightIndex:
/// - Returns:
static func isRightSideValid(lastElement: InputElement, convertTargetElements: [ConvertTargetElement], of originalElements: [InputElement], to rightIndex: Int) -> Bool {
// rightIndexer
//
// * rightIndex == endIndex
// * er:direct
// * er -> (_:direct)
// * er == a|i|u|e|o // akaa
// * er != n && er -> er == e+1 // kkak
// * er == n && er -> (e+1:roman2kana and not a|i|u|e|o|n|y) // (nn)*nka(nn)*n
// * er == n && er -> (e+1:roman2kana) // (nn)*ann
// directElement
guard rightIndex != originalElements.endIndex && lastElement.inputStyle == .roman2kana else {
return true
}
if lastElement.inputStyle != .roman2kana {
return true
}
let nextFirstElement = originalElements[rightIndex]
if nextFirstElement.inputStyle != .roman2kana || !CharacterUtils.isRomanLetter(nextFirstElement.character) {
return true
}
if ["a", "i", "u", "e", "o"].contains(lastElement.character) {
return true
}
if lastElement.character != "n" && lastElement.character == nextFirstElement.character {
return true
}
guard let lastConvertTargetElements = convertTargetElements.last else {
return false
}
// nn
if lastElement.character == "n" && lastConvertTargetElements.string.last != "n" {
return true
}
// n1character
if lastElement.character == "n" && lastConvertTargetElements.inputStyle == .roman2kana && lastConvertTargetElements.string.last == "n" && !["a", "i", "u", "e", "o", "y", "n"].contains(nextFirstElement.character) {
return true
}
return false
}
///
/// - Parameters:
/// - lastElement:
/// - originalElements: `input`
/// - rightIndex:
/// - convertTargetElements: `convertTarget`
/// - Returns: valid`convertTarget`invalid`nil`
/// - Note: `elements = [r(k, a, n, s, h, a)]``k,a,n,s,h,a``k, a``a, n``s, h``k, a, n`
static func getConvertTargetIfRightSideIsValid(lastElement: InputElement, of originalElements: [InputElement], to rightIndex: Int, convertTargetElements: [ConvertTargetElement]) -> [Character]? {
debug(#function, lastElement, rightIndex)
if originalElements.endIndex < rightIndex {
return nil
}
//
// convertTarget
let shouldEscapeValidation = Self.shouldEscapeOtherValidation(convertTargetElement: convertTargetElements, of: originalElements)
if !shouldEscapeValidation && !Self.isRightSideValid(lastElement: lastElement, convertTargetElements: convertTargetElements, of: originalElements, to: rightIndex) {
return nil
}
// valid
var convertTargetElements = convertTargetElements
if let lastElement = convertTargetElements.last, lastElement.inputStyle == .roman2kana, rightIndex < originalElements.endIndex {
let nextFirstElement = originalElements[rightIndex]
if !lastElement.string.hasSuffix("n") && lastElement.string.last == nextFirstElement.character && CharacterUtils.isRomanLetter(nextFirstElement.character) {
//
convertTargetElements[convertTargetElements.endIndex - 1].string.removeLast()
convertTargetElements.append(ConvertTargetElement(string: [""], inputStyle: .direct))
}
if lastElement.string.hasSuffix("n") && !["a", "i", "u", "e", "o", "y", "n"].contains(nextFirstElement.character) {
//
convertTargetElements[convertTargetElements.endIndex - 1].string.removeLast()
convertTargetElements.append(ConvertTargetElement(string: [""], inputStyle: .direct))
}
}
return convertTargetElements.reduce(into: []) {$0 += $1.string}
}
// inputStyle // inputStyle
// k, o, r, e, h, ap, e, nd, e, s, u // k, o, r, e, h, ap, e, nd, e, s, u
// originalInput[ElementComposition(, roman2kana), ElementComposition(pen, direct), ElementComposition(, roman2kana)] // originalInput[ElementComposition(, roman2kana), ElementComposition(pen, direct), ElementComposition(, roman2kana)]

View File

@ -148,52 +148,6 @@ final class ComposingTextTests: XCTestCase {
} }
func testIsRightSideValid() throws {
do {
var c = ComposingText()
c.insertAtCursorPosition("akafatta", inputStyle: .roman2kana) // |
XCTAssertTrue(ComposingText.isRightSideValid(lastElement: ComposingText.InputElement(character: "a", inputStyle: .roman2kana), convertTargetElements: [ComposingText.ConvertTargetElement(string: [""], inputStyle: .roman2kana)], of: c.input, to: 1))
XCTAssertFalse(ComposingText.isRightSideValid(lastElement: ComposingText.InputElement(character: "k", inputStyle: .roman2kana), convertTargetElements: [ComposingText.ConvertTargetElement(string: ["", "k"], inputStyle: .roman2kana)], of: c.input, to: 2))
XCTAssertTrue(ComposingText.isRightSideValid(lastElement: ComposingText.InputElement(character: "a", inputStyle: .roman2kana), convertTargetElements: [ComposingText.ConvertTargetElement(string: ["", ""], inputStyle: .roman2kana)], of: c.input, to: 3))
XCTAssertFalse(ComposingText.isRightSideValid(lastElement: ComposingText.InputElement(character: "f", inputStyle: .roman2kana), convertTargetElements: [ComposingText.ConvertTargetElement(string: ["", "", "f"], inputStyle: .roman2kana)], of: c.input, to: 4))
XCTAssertTrue(ComposingText.isRightSideValid(lastElement: ComposingText.InputElement(character: "a", inputStyle: .roman2kana), convertTargetElements: [ComposingText.ConvertTargetElement(string: ["", "", "", ""], inputStyle: .roman2kana)], of: c.input, to: 5))
// true
XCTAssertTrue(ComposingText.isRightSideValid(lastElement: ComposingText.InputElement(character: "t", inputStyle: .roman2kana), convertTargetElements: [ComposingText.ConvertTargetElement(string: ["", "", "", "", "t"], inputStyle: .roman2kana)], of: c.input, to: 6))
// false
XCTAssertFalse(ComposingText.isRightSideValid(lastElement: ComposingText.InputElement(character: "t", inputStyle: .roman2kana), convertTargetElements: [ComposingText.ConvertTargetElement(string: ["", "", "", "", "t", "t"], inputStyle: .roman2kana)], of: c.input, to: 7))
XCTAssertTrue(ComposingText.isRightSideValid(lastElement: ComposingText.InputElement(character: "a", inputStyle: .roman2kana), convertTargetElements: [ComposingText.ConvertTargetElement(string: ["", "", "", "", "", ""], inputStyle: .roman2kana)], of: c.input, to: 8))
}
}
func testGetConvertTargetIfRightSideIsValid() throws {
do {
var c = ComposingText()
c.insertAtCursorPosition("akafatta", inputStyle: .roman2kana) // |
XCTAssertEqual(
ComposingText.getConvertTargetIfRightSideIsValid(
lastElement: ComposingText.InputElement(character: "t", inputStyle: .roman2kana),
of: c.input,
to: 6,
convertTargetElements: [ComposingText.ConvertTargetElement(string: Array("あかふぁt"), inputStyle: .roman2kana)]
),
Array("あかふぁっ")
)
}
do {
var c = ComposingText()
c.insertAtCursorPosition("kintarou", inputStyle: .roman2kana) // |
XCTAssertEqual(
ComposingText.getConvertTargetIfRightSideIsValid(
lastElement: ComposingText.InputElement(character: "n", inputStyle: .roman2kana),
of: c.input,
to: 3,
convertTargetElements: [ComposingText.ConvertTargetElement(string: Array("きn"), inputStyle: .roman2kana)]
),
Array("きん")
)
}
}
func testDifferenceSuffix() throws { func testDifferenceSuffix() throws {
do { do {
var c1 = ComposingText() var c1 = ComposingText()

View File

@ -175,6 +175,21 @@ final class DicdataStoreTests: XCTestCase {
} }
} }
///
func testMustCorrectTypoRoman2Kana() throws {
let dicdataStore = DicdataStore(convertRequestOptions: requestOptions())
let mustWords = [
("tskamatsu", "高松"), // ts ->
("kitsmura", "北村"), // ts ->
]
for (key, word) in mustWords {
var c = ComposingText()
c.insertAtCursorPosition(key, inputStyle: .roman2kana)
let result = dicdataStore.lookupDicdata(composingText: c, inputRange: (0, c.input.endIndex - 1 ..< c.input.endIndex), needTypoCorrection: true)
XCTAssertEqual(result.first(where: {$0.data.word == word})?.data.word, word)
}
}
func testLookupDicdata() throws { func testLookupDicdata() throws {
let dicdataStore = DicdataStore(convertRequestOptions: requestOptions()) let dicdataStore = DicdataStore(convertRequestOptions: requestOptions())
do { do {
@ -209,6 +224,12 @@ final class DicdataStoreTests: XCTestCase {
var c = ComposingText() var c = ComposingText()
sequentialInput(&c, sequence: "tukatt", inputStyle: .roman2kana) sequentialInput(&c, sequence: "tukatt", inputStyle: .roman2kana)
let result = dicdataStore.lookupDicdata(composingText: c, inputRange: (0, 4..<6)) let result = dicdataStore.lookupDicdata(composingText: c, inputRange: (0, 4..<6))
XCTAssertFalse(result.contains(where: {$0.data.word == "使っ"}))
}
do {
var c = ComposingText()
sequentialInput(&c, sequence: "tukatt", inputStyle: .roman2kana)
let result = dicdataStore.lookupDicdata(composingText: c, surfaceRange: (0, nil))
XCTAssertTrue(result.contains(where: {$0.data.word == "使っ"})) XCTAssertTrue(result.contains(where: {$0.data.word == "使っ"}))
} }
} }
@ -288,9 +309,14 @@ final class DicdataStoreTests: XCTestCase {
do { do {
var c = ComposingText() var c = ComposingText()
sequentialInput(&c, sequence: "tesutowaーdo", inputStyle: .roman2kana) sequentialInput(&c, sequence: "tesutowaーdo", inputStyle: .roman2kana)
let result = dicdataStore.lookupDicdata(composingText: c, inputRange: (0, c.input.endIndex - 1 ..< c.input.endIndex), needTypoCorrection: false) let result = dicdataStore.lookupDicdata(
composingText: c,
inputRange: (0, c.input.endIndex - 1 ..< c.input.endIndex),
surfaceRange: (0, c.convertTarget.count - 1 ..< c.convertTarget.count),
needTypoCorrection: false
)
XCTAssertTrue(result.contains(where: {$0.data.word == "テストワード"})) XCTAssertTrue(result.contains(where: {$0.data.word == "テストワード"}))
XCTAssertEqual(result.first(where: {$0.data.word == "テストワード"})?.range, .input(from: 0, to: 11)) XCTAssertEqual(result.first(where: {$0.data.word == "テストワード"})?.range, .surface(from: 0, to: 6))
} }
// //