Merge pull request #205 from azooKey/fix/roman_triplet_input

fix: getWiseData関連の実装を修正し、bya→ビャなどの変換を適切に表示
This commit is contained in:
Miwa
2025-06-14 19:47:36 +09:00
committed by GitHub
5 changed files with 20 additions and 19 deletions

View File

@ -13,6 +13,8 @@ extension Subcommands {
var displayTopN: Int = 1
@Option(name: [.customLong("zenz")], help: "gguf format model weight for zenz.")
var zenzWeightPath: String = ""
@Flag(name: [.customLong("mix_english_candidate")], help: "Enable mixing English Candidates.")
var mixEnglishCandidate = false
@Flag(name: [.customLong("disable_prediction")], help: "Disable producing prediction candidates.")
var disablePrediction = false
@Flag(name: [.customLong("enable_memory")], help: "Enable memory.")
@ -290,7 +292,7 @@ extension Subcommands {
keyboardLanguage: .ja_JP,
typographyLetterCandidate: false,
unicodeCandidate: true,
englishCandidateInRoman2KanaInput: true,
englishCandidateInRoman2KanaInput: self.mixEnglishCandidate,
fullWidthRomanCandidate: false,
halfWidthKanaCandidate: false,
learningType: learningType,

View File

@ -501,7 +501,7 @@ extension ComposingText {
/// - Returns: valid`convertTarget`invalid`nil`
/// - Note: `elements = [r(k, a, n, s, h, a)]``k,a,n,s,h,a``k, a``a, n``s, h``k, a, n`
static func getConvertTargetIfRightSideIsValid(lastElement: InputElement, of originalElements: [InputElement], to rightIndex: Int, convertTargetElements: [ConvertTargetElement]) -> [Character]? {
debug("getConvertTargetIfRightSideIsValid", lastElement, rightIndex)
debug(#function, lastElement, rightIndex)
if originalElements.endIndex < rightIndex {
return nil
}

View File

@ -281,7 +281,7 @@ public final class DicdataStore {
let toIndexLeft = toIndexRange?.startIndex ?? fromIndex
let toIndexRight = min(toIndexRange?.endIndex ?? inputData.input.count, fromIndex + self.maxlength)
if fromIndex > toIndexLeft || toIndexLeft >= toIndexRight {
debug("getLOUDSDataInRange: index is wrong")
debug(#function, "index is wrong")
return []
}
@ -388,15 +388,12 @@ public final class DicdataStore {
private func getFrozenLOUDSDataInRange(inputData: ComposingText, from fromIndex: Int, toIndexRange: Range<Int>? = nil) -> [LatticeNode] {
let toIndexLeft = toIndexRange?.startIndex ?? fromIndex
let toIndexRight = min(toIndexRange?.endIndex ?? inputData.input.count, fromIndex + self.maxlength)
debug("getLOUDSDataInRange", fromIndex, toIndexRange?.description ?? "nil", toIndexLeft, toIndexRight)
debug(#function, fromIndex, toIndexRange?.description ?? "nil", toIndexLeft, toIndexRight)
if fromIndex > toIndexLeft || toIndexLeft >= toIndexRight {
debug("getLOUDSDataInRange: index is wrong")
debug(#function, "index is wrong")
return []
}
let segments = (fromIndex ..< toIndexRight).reduce(into: []) { (segments: inout [String], rightIndex: Int) in
segments.append((segments.last ?? "") + String(inputData.input[rightIndex].character.toKatakana()))
}
let character = String(inputData.input[fromIndex].character.toKatakana())
let characterNode = LatticeNode(data: DicdataElement(word: character, ruby: character, cid: CIDData..cid, mid: MIDData..mid, value: -10), inputRange: fromIndex ..< fromIndex + 1)
if fromIndex == .zero {
@ -404,9 +401,10 @@ public final class DicdataStore {
}
// MARK:
let stringToEndIndex = inputData.getRanges(fromIndex, rightIndexRange: toIndexLeft ..< toIndexRight)
let stringToEndIndex = inputData.getRangesWithoutTypos(fromIndex, rightIndexRange: toIndexLeft ..< toIndexRight)
// MARK:
guard let (minString, maxString) = stringToEndIndex.keys.minAndMax(by: {$0.count < $1.count}) else {
debug(#function, "minString/maxString is nil", stringToEndIndex)
return [characterNode]
}
let maxIDs = maxString.map(self.character2charId)
@ -424,11 +422,10 @@ public final class DicdataStore {
// temporalpenalty
dicdata.append(contentsOf: self.learningManager.temporaryThroughMatch(charIDs: consume maxIDs, depth: depth))
}
for i in toIndexLeft ..< toIndexRight {
dicdata.append(contentsOf: self.getWiseDicdata(convertTarget: segments[i - fromIndex], inputData: inputData, inputRange: fromIndex ..< i + 1))
}
for item in stringToEndIndex {
dicdata.append(contentsOf: self.getMatchDynamicUserDict(String(item.key)))
for (key, value) in stringToEndIndex {
let convertTarget = String(key)
dicdata.append(contentsOf: self.getWiseDicdata(convertTarget: convertTarget, inputData: inputData, inputRange: fromIndex ..< value + 1))
dicdata.append(contentsOf: self.getMatchDynamicUserDict(convertTarget))
}
if fromIndex == .zero {
return dicdata.compactMap {
@ -629,7 +626,8 @@ public final class DicdataStore {
//
if requestOptions.keyboardLanguage != .en_US && inputData.input[inputRange].allSatisfy({$0.inputStyle == .roman2kana}) {
if let katakana = Roman2Kana.katakanaChanges[convertTarget], let hiragana = Roman2Kana.hiraganaChanges[Array(convertTarget)] {
let roman = String(inputData.input[inputRange].map(\.character))
if let katakana = Roman2Kana.katakanaChanges[roman], let hiragana = Roman2Kana.hiraganaChanges[Array(roman)] {
result.append(DicdataElement(word: String(hiragana), ruby: katakana, cid: CIDData..cid, mid: MIDData..mid, value: -13))
result.append(DicdataElement(ruby: katakana, cid: CIDData..cid, mid: MIDData..mid, value: -14))
}

View File

@ -23,7 +23,7 @@ extension ComposingText {
/// `left <= rightIndexRange.startIndex`
func getRangesWithTypos(_ left: Int, rightIndexRange: Range<Int>) -> [[Character]: (endIndex: Int, penalty: PValue)] {
let count = rightIndexRange.endIndex - left
debug("getRangesWithTypos", left, rightIndexRange, count)
debug(#function, left, rightIndexRange, count)
let nodes = (0..<count).map {(i: Int) in
Self.lengths.flatMap {(k: Int) -> [TypoCandidate] in
let j = i + k
@ -100,15 +100,16 @@ extension ComposingText {
/// closedRange
/// `left=4, rightIndexRange=6..<10``4...6, 4...7, 4...8, 4...9`
/// `left <= rightIndexRange.startIndex`
func getRanges(_ left: Int, rightIndexRange: Range<Int>) -> [[Character]: Int] {
func getRangesWithoutTypos(_ left: Int, rightIndexRange: Range<Int>) -> [[Character]: Int] {
let count = rightIndexRange.endIndex - left
debug("getRangesWithTypos", left, rightIndexRange, count)
debug(#function, left, rightIndexRange, count)
let nodes = (0..<count).map {(i: Int) in
Self.lengths.flatMap {(k: Int) -> [TypoCandidate] in
let j = i + k
if count <= j {
return []
}
// frozen: truetypo
return Self.getTypo(self.input[left + i ... left + j], frozen: true)
}
}

View File

@ -8,7 +8,7 @@
import Foundation
enum Roman2Kana {
static let katakanaChanges: [String: String] = Dictionary(uniqueKeysWithValues: hiraganaChanges.map { (String($0.key), String($0.value)) })
static let katakanaChanges: [String: String] = Dictionary(uniqueKeysWithValues: hiraganaChanges.map { (String($0.key), String($0.value).toKatakana()) })
static let hiraganaChanges: [[Character]: [Character]] = Dictionary(uniqueKeysWithValues: [
"a": "",
"xa": "",