mirror of
https://github.com/mii443/AzooKeyKanaKanjiConverter.git
synced 2025-12-03 02:58:27 +00:00
Move to root
This commit is contained in:
39
Sources/KanaKanjiConverterModule/CIDData.swift
Normal file
39
Sources/KanaKanjiConverterModule/CIDData.swift
Normal file
@@ -0,0 +1,39 @@
|
||||
//
|
||||
// CIDData.swift
|
||||
// azooKey
|
||||
//
|
||||
// Created by ensan on 2022/05/05.
|
||||
// Copyright © 2022 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
public enum CIDData {
|
||||
static var totalCount: Int {
|
||||
1319
|
||||
}
|
||||
case BOS
|
||||
case 記号
|
||||
case 係助詞ハ
|
||||
case 助動詞デス基本形
|
||||
case 一般名詞
|
||||
case 固有名詞
|
||||
case 人名一般
|
||||
case 地名一般
|
||||
case 数
|
||||
case EOS
|
||||
public var cid: Int {
|
||||
switch self {
|
||||
case .BOS: return 0
|
||||
case .記号: return 5
|
||||
case .係助詞ハ: return 261
|
||||
case .助動詞デス基本形: return 460
|
||||
case .一般名詞: return 1285
|
||||
case .固有名詞: return 1288
|
||||
case .人名一般: return 1289
|
||||
case .地名一般: return 1293
|
||||
case .数: return 1295
|
||||
case .EOS: return 1316
|
||||
}
|
||||
}
|
||||
}
|
||||
163
Sources/KanaKanjiConverterModule/Candidate.swift
Normal file
163
Sources/KanaKanjiConverterModule/Candidate.swift
Normal file
@@ -0,0 +1,163 @@
|
||||
//
|
||||
// Candidate.swift
|
||||
// Keyboard
|
||||
//
|
||||
// Created by ensan on 2020/10/26.
|
||||
// Copyright © 2020 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
/// Data of clause.
|
||||
final class ClauseDataUnit {
|
||||
/// The MID of the clause.
|
||||
var mid: Int = MIDData.EOS.mid
|
||||
/// The LCID in the next clause.
|
||||
var nextLcid = CIDData.EOS.cid
|
||||
/// The text of the unit.
|
||||
var text: String = ""
|
||||
/// The range of the unit in input text.
|
||||
var inputRange: Range<Int> = 0 ..< 0
|
||||
|
||||
/// Merge the given unit to this unit.
|
||||
/// - Parameter:
|
||||
/// - unit: The unit to merge.
|
||||
func merge(with unit: ClauseDataUnit) {
|
||||
self.text.append(unit.text)
|
||||
self.inputRange = self.inputRange.startIndex ..< unit.inputRange.endIndex
|
||||
self.nextLcid = unit.nextLcid
|
||||
}
|
||||
}
|
||||
|
||||
extension ClauseDataUnit: Equatable {
|
||||
static func == (lhs: ClauseDataUnit, rhs: ClauseDataUnit) -> Bool {
|
||||
lhs.mid == rhs.mid && lhs.nextLcid == rhs.nextLcid && lhs.text == rhs.text && lhs.inputRange == rhs.inputRange
|
||||
}
|
||||
}
|
||||
|
||||
#if DEBUG
|
||||
extension ClauseDataUnit: CustomDebugStringConvertible {
|
||||
var debugDescription: String {
|
||||
"ClauseDataUnit(mid: \(mid), nextLcid: \(nextLcid), text: \(text), inputRange: \(inputRange))"
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
struct CandidateData {
|
||||
typealias ClausesUnit = (clause: ClauseDataUnit, value: PValue)
|
||||
var clauses: [ClausesUnit]
|
||||
var data: [DicdataElement]
|
||||
|
||||
init(clauses: [ClausesUnit], data: [DicdataElement]) {
|
||||
self.clauses = clauses
|
||||
self.data = data
|
||||
}
|
||||
|
||||
var lastClause: ClauseDataUnit? {
|
||||
self.clauses.last?.clause
|
||||
}
|
||||
|
||||
var isEmpty: Bool {
|
||||
clauses.isEmpty
|
||||
}
|
||||
}
|
||||
|
||||
public enum CompleteAction {
|
||||
/// カーソルを調整する
|
||||
case moveCursor(Int)
|
||||
}
|
||||
|
||||
/// 変換候補のデータ
|
||||
public struct Candidate {
|
||||
/// 入力となるテキスト
|
||||
public var text: String
|
||||
/// 評価値
|
||||
public let value: PValue
|
||||
/// composingText.inputにおいて対応する文字数。
|
||||
public var correspondingCount: Int
|
||||
/// 最後のmid(予測変換に利用)
|
||||
public let lastMid: Int
|
||||
/// DicdataElement列
|
||||
public let data: [DicdataElement]
|
||||
/// 変換として選択した際に実行する`action`。
|
||||
/// - note: 括弧を入力した際にカーソルを移動するために追加した変数
|
||||
public var actions: [CompleteAction]
|
||||
/// 入力できるものか
|
||||
/// - note: 文字数表示のために追加したフラグ
|
||||
public let inputable: Bool
|
||||
|
||||
public init(text: String, value: PValue, correspondingCount: Int, lastMid: Int, data: [DicdataElement], actions: [CompleteAction] = [], inputable: Bool = true) {
|
||||
self.text = text
|
||||
self.value = value
|
||||
self.correspondingCount = correspondingCount
|
||||
self.lastMid = lastMid
|
||||
self.data = data
|
||||
self.actions = actions
|
||||
self.inputable = inputable
|
||||
}
|
||||
/// 後から`action`を追加した形を生成する関数
|
||||
/// - parameters:
|
||||
/// - actions: 実行する`action`
|
||||
@inlinable public mutating func withActions(_ actions: [CompleteAction]) {
|
||||
self.actions = actions
|
||||
}
|
||||
|
||||
private static let dateExpression = "<date format=\".*?\" type=\".*?\" language=\".*?\" delta=\".*?\" deltaunit=\".*?\">"
|
||||
private static let randomExpression = "<random type=\".*?\" value=\".*?\">"
|
||||
|
||||
/// テンプレートをパースして、変換候補のテキストを生成する。
|
||||
public static func parseTemplate(_ text: String) -> String {
|
||||
var newText = text
|
||||
while let range = newText.range(of: Self.dateExpression, options: .regularExpression) {
|
||||
let templateString = String(newText[range])
|
||||
let template = DateTemplateLiteral.import(from: templateString)
|
||||
let value = template.previewString()
|
||||
newText.replaceSubrange(range, with: value)
|
||||
}
|
||||
while let range = newText.range(of: Self.randomExpression, options: .regularExpression) {
|
||||
let templateString = String(newText[range])
|
||||
let template = RandomTemplateLiteral.import(from: templateString)
|
||||
let value = template.previewString()
|
||||
newText.replaceSubrange(range, with: value)
|
||||
}
|
||||
return newText.unescaped()
|
||||
}
|
||||
|
||||
/// テンプレートをパースして、変換候補のテキストを生成し、反映する。
|
||||
@inlinable public mutating func parseTemplate() {
|
||||
// ここでCandidate.textとdata.map(\.word).join("")の整合性が壊れることに注意
|
||||
// ただし、dataの方を加工するのは望ましい挙動ではない。
|
||||
self.text = Self.parseTemplate(text)
|
||||
}
|
||||
|
||||
/// 入力を文としたとき、prefixになる文節に対応するCandidateを作る
|
||||
public static func makePrefixClauseCandidate(data: some Collection<DicdataElement>) -> Candidate {
|
||||
var text = ""
|
||||
var correspondingCount = 0
|
||||
var lastRcid = CIDData.BOS.cid
|
||||
var lastMid = 501
|
||||
var candidateData: [DicdataElement] = []
|
||||
for item in data {
|
||||
// 文節だったら
|
||||
if DicdataStore.isClause(lastRcid, item.lcid) {
|
||||
break
|
||||
}
|
||||
text.append(item.word)
|
||||
correspondingCount += item.ruby.count
|
||||
lastRcid = item.rcid
|
||||
// 最初だった場合を想定している
|
||||
if item.mid != 500 && DicdataStore.includeMMValueCalculation(item) {
|
||||
lastMid = item.mid
|
||||
}
|
||||
candidateData.append(item)
|
||||
}
|
||||
return Candidate(
|
||||
text: text,
|
||||
value: -5,
|
||||
correspondingCount: correspondingCount,
|
||||
lastMid: lastMid,
|
||||
data: candidateData
|
||||
)
|
||||
}
|
||||
|
||||
}
|
||||
616
Sources/KanaKanjiConverterModule/ComposingText.swift
Normal file
616
Sources/KanaKanjiConverterModule/ComposingText.swift
Normal file
@@ -0,0 +1,616 @@
|
||||
//
|
||||
// ComposingText.swift
|
||||
// Keyboard
|
||||
//
|
||||
// Created by ensan on 2022/09/21.
|
||||
// Copyright © 2022 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import SwiftUtils
|
||||
|
||||
/// ユーザ入力、変換対象文字列、ディスプレイされる文字列、の3つを同時にハンドルするための構造体
|
||||
/// - `input`: `[k, y, o, u, h, a, a, m, e]`
|
||||
/// - `convertTarget`: `きょうはあめ`
|
||||
/// のようになる。`
|
||||
/// カーソルのポジションもこのクラスが管理する。
|
||||
/// 設計方針として、inputStyleに関わる実装の違いは全てアップデート方法の違いとして吸収し、`input` / `delete` / `moveCursor` / `complete`時の違いとしては露出させないようにすることを目指した。
|
||||
public struct ComposingText {
|
||||
public init(convertTargetCursorPosition: Int = 0, input: [ComposingText.InputElement] = [], convertTarget: String = "") {
|
||||
self.convertTargetCursorPosition = convertTargetCursorPosition
|
||||
self.input = input
|
||||
self.convertTarget = convertTarget
|
||||
}
|
||||
|
||||
/// カーソルの位置。0は左端(左から右に書く言語の場合)に対応する。
|
||||
public private(set) var convertTargetCursorPosition: Int = 0
|
||||
/// ユーザの入力シーケンス。historyとは異なり、変換対象文字列に対応するものを保持する。また、deleteやmove cursor等の操作履歴は保持しない。
|
||||
public private(set) var input: [InputElement] = []
|
||||
/// 変換対象文字列。
|
||||
public private(set) var convertTarget: String = ""
|
||||
|
||||
/// ユーザ入力の単位
|
||||
public struct InputElement {
|
||||
/// 入力された文字
|
||||
public var character: Character
|
||||
/// そのときの入力方式(ローマ字入力 / ダイレクト入力)
|
||||
public var inputStyle: InputStyle
|
||||
}
|
||||
|
||||
/// 変換対象文字列が存在するか否か
|
||||
public var isEmpty: Bool {
|
||||
self.convertTarget.isEmpty
|
||||
}
|
||||
|
||||
/// カーソルが右端に存在するか
|
||||
public var isAtEndIndex: Bool {
|
||||
self.convertTarget.count == self.convertTargetCursorPosition
|
||||
}
|
||||
|
||||
/// カーソルが左端に存在するか
|
||||
public var isAtStartIndex: Bool {
|
||||
0 == self.convertTargetCursorPosition
|
||||
}
|
||||
|
||||
/// カーソルより前の変換対象
|
||||
public var convertTargetBeforeCursor: some StringProtocol {
|
||||
self.convertTarget.prefix(self.convertTargetCursorPosition)
|
||||
}
|
||||
|
||||
/// `input`でのカーソル位置を無理やり作り出す関数
|
||||
/// `target`が左側に来るようなカーソルの位置を返す。
|
||||
/// 例えば`input`が`[k, y, o, u]`で`target`が`き|`の場合を考える。
|
||||
/// この状態では`input`に対応するカーソル位置が存在しない。
|
||||
/// この場合、`input`を`[き, ょ, u]`と置き換えた上で、`き|`と考えて、`1`を返す。
|
||||
private mutating func forceGetInputCursorPosition(target: some StringProtocol) -> Int {
|
||||
debug("ComposingText forceGetInputCursorPosition", self, target)
|
||||
if target.isEmpty {
|
||||
return 0
|
||||
}
|
||||
// 動作例1
|
||||
// input: `k, a, n, s, h, a` (全てroman2kana)
|
||||
// convetTarget: `か ん し| ゃ`
|
||||
// convertTargetCursorPosition: 3
|
||||
// target: かんし
|
||||
// 動作
|
||||
// 1. character = "k"
|
||||
// roman2kana = "k"
|
||||
// count = 1
|
||||
// 2. character = "a"
|
||||
// roman2kana = "か"
|
||||
// count = 2
|
||||
// target.hasPrefix(roman2kana)がtrueなので、lastPrefixIndex = 2, lastPrefix = "か"
|
||||
// 3. character = "n"
|
||||
// roman2kana = "かn"
|
||||
// count = 3
|
||||
// 4. character = "s"
|
||||
// roman2kana = "かんs"
|
||||
// count = 4
|
||||
// 5. character = "h"
|
||||
// roman2kana = "かんsh"
|
||||
// count = 5
|
||||
// 6. character = "a"
|
||||
// roman2kana = "かんしゃ"
|
||||
// count = 6
|
||||
// roman2kana.hasPrefix(target)がtrueなので、変換しすぎているとみなして調整の実行
|
||||
// replaceCountは6-2 = 4、したがって`n, s, h, a`が消去される
|
||||
// input = [k, a]
|
||||
// count = 2
|
||||
// roman2kana.count == 4, lastPrefix.count = 1なので、3文字分のsuffix`ん,し,ゃ`が追加される
|
||||
// input = [k, a, ん, し, ゃ]
|
||||
// count = 5
|
||||
// while
|
||||
// 1. roman2kana = かんし
|
||||
// count = 4
|
||||
// break
|
||||
// return count = 4
|
||||
//
|
||||
// 動作例2
|
||||
// input: `k, a, n, s, h, a` (全てroman2kana)
|
||||
// convetTarget: `か ん し| ゃ`
|
||||
// convertTargetCursorPosition: 2
|
||||
// target: かん
|
||||
// 動作
|
||||
// 1. character = "k"
|
||||
// roman2kana = "k"
|
||||
// count = 1
|
||||
// 2. character = "a"
|
||||
// roman2kana = "か"
|
||||
// count = 2
|
||||
// target.hasPrefix(roman2kana)がtrueなので、lastPrefixIndex = 2, lastPrefix = "か"
|
||||
// 3. character = "n"
|
||||
// roman2kana = "かn"
|
||||
// count = 3
|
||||
// 4. character = "s"
|
||||
// roman2kana = "かんs"
|
||||
// count = 4
|
||||
// roman2kana.hasPrefix(target)がtrueなので、変換しすぎているとみなして調整の実行
|
||||
// replaceCountは4-2 = 2、したがって`n, s`が消去される
|
||||
// input = [k, a] ... [h, a]
|
||||
// count = 2
|
||||
// roman2kana.count == 3, lastPrefix.count = 1なので、2文字分のsuffix`ん,s`が追加される
|
||||
// input = [k, a, ん, s]
|
||||
// count = 4
|
||||
// while
|
||||
// 1. roman2kana = かん
|
||||
// count = 3
|
||||
// break
|
||||
// return count = 3
|
||||
//
|
||||
// 動作例3
|
||||
// input: `i, t, t, a` (全てroman2kana)
|
||||
// convetTarget: `い っ| た`
|
||||
// convertTargetCursorPosition: 2
|
||||
// target: いっ
|
||||
// 動作
|
||||
// 1. character = "i"
|
||||
// roman2kana = "い"
|
||||
// count = 1
|
||||
// target.hasPrefix(roman2kana)がtrueなので、lastPrefixIndex = 1, lastPrefix = "い"
|
||||
// 2. character = "t"
|
||||
// roman2kana = "いt"
|
||||
// count = 2
|
||||
// 3. character = "t"
|
||||
// roman2kana = "いっt"
|
||||
// count = 3
|
||||
// roman2kana.hasPrefix(target)がtrueなので、変換しすぎているとみなして調整の実行
|
||||
// replaceCountは3-1 = 2、したがって`t, t`が消去される
|
||||
// input = [i] ... [a]
|
||||
// count = 1
|
||||
// roman2kana.count == 3, lastPrefix.count = 1なので、2文字分のsuffix`っ,t`が追加される
|
||||
// input = [i, っ, t, a]
|
||||
// count = 3
|
||||
// while
|
||||
// 1. roman2kana = いっ
|
||||
// count = 2
|
||||
// break
|
||||
// return count = 2
|
||||
|
||||
var count = 0
|
||||
var lastPrefixIndex = 0
|
||||
var lastPrefix = ""
|
||||
var converting: [ConvertTargetElement] = []
|
||||
|
||||
for element in input {
|
||||
Self.updateConvertTargetElements(currentElements: &converting, newElement: element)
|
||||
var converted = converting.reduce(into: "") {$0 += $1.string}
|
||||
count += 1
|
||||
|
||||
// 一致していたらその時点のカウントを返す
|
||||
if converted == target {
|
||||
return count
|
||||
}
|
||||
// 一致ではないのにhasPrefixが成立する場合、変換しすぎている
|
||||
// この場合、inputの変換が必要になる。
|
||||
// 例えばcovnertTargetが「あき|ょ」で、`[a, k, y, o]`まで見て「あきょ」になってしまった場合、「あき」がprefixとなる。
|
||||
// この場合、lastPrefix=1なので、1番目から現在までの入力をひらがな(suffix)で置き換える
|
||||
else if converted.hasPrefix(target) {
|
||||
let replaceCount = count - lastPrefixIndex
|
||||
let suffix = converted.suffix(converted.count - lastPrefix.count)
|
||||
self.input.removeSubrange(count - replaceCount ..< count)
|
||||
self.input.insert(contentsOf: suffix.map {InputElement(character: $0, inputStyle: CharacterUtils.isRomanLetter($0) ? .roman2kana : .direct)}, at: count - replaceCount)
|
||||
|
||||
count -= replaceCount
|
||||
count += suffix.count
|
||||
while converted != target {
|
||||
_ = converted.popLast()
|
||||
count -= 1
|
||||
}
|
||||
break
|
||||
}
|
||||
// prefixになっている場合は更新する
|
||||
else if target.hasPrefix(converted) {
|
||||
lastPrefixIndex = count
|
||||
lastPrefix = converted
|
||||
}
|
||||
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
private func diff(from oldString: some StringProtocol, to newString: String) -> (delete: Int, input: String) {
|
||||
let common = oldString.commonPrefix(with: newString)
|
||||
return (oldString.count - common.count, String(newString.dropFirst(common.count)))
|
||||
}
|
||||
|
||||
/// inputの更新における特殊処理を扱う
|
||||
/// TODO: アドホックな対処なのでどうにか一般化したい。
|
||||
private mutating func updateInput(_ string: String, at inputCursorPosition: Int, inputStyle: InputStyle) {
|
||||
if inputCursorPosition == 0 {
|
||||
self.input.insert(contentsOf: string.map {InputElement(character: $0, inputStyle: inputStyle)}, at: inputCursorPosition)
|
||||
return
|
||||
}
|
||||
let prev = self.input[inputCursorPosition - 1]
|
||||
if inputStyle == .roman2kana && prev.inputStyle == inputStyle, let first = string.first, String(first).onlyRomanAlphabet {
|
||||
if prev.character == first && !["a", "i", "u", "e", "o", "n"].contains(first) {
|
||||
self.input[inputCursorPosition - 1] = InputElement(character: "っ", inputStyle: .direct)
|
||||
self.input.insert(contentsOf: string.map {InputElement(character: $0, inputStyle: inputStyle)}, at: inputCursorPosition)
|
||||
return
|
||||
}
|
||||
let n_prefix = self.input[0 ..< inputCursorPosition].suffix {$0.character == "n" && $0.inputStyle == .roman2kana}
|
||||
if n_prefix.count % 2 == 1 && !["n", "a", "i", "u", "e", "o", "y"].contains(first) {
|
||||
self.input[inputCursorPosition - 1] = InputElement(character: "ん", inputStyle: .direct)
|
||||
self.input.insert(contentsOf: string.map {InputElement(character: $0, inputStyle: inputStyle)}, at: inputCursorPosition)
|
||||
return
|
||||
}
|
||||
}
|
||||
self.input.insert(contentsOf: string.map {InputElement(character: $0, inputStyle: inputStyle)}, at: inputCursorPosition)
|
||||
}
|
||||
|
||||
/// 現在のカーソル位置に文字を追加する関数
|
||||
public mutating func insertAtCursorPosition(_ string: String, inputStyle: InputStyle) {
|
||||
if string.isEmpty {
|
||||
return
|
||||
}
|
||||
let inputCursorPosition = self.forceGetInputCursorPosition(target: self.convertTarget.prefix(convertTargetCursorPosition))
|
||||
// input, convertTarget, convertTargetCursorPositionの3つを更新する
|
||||
// inputを更新
|
||||
self.updateInput(string, at: inputCursorPosition, inputStyle: inputStyle)
|
||||
|
||||
let oldConvertTarget = self.convertTarget.prefix(self.convertTargetCursorPosition)
|
||||
let newConvertTarget = Self.getConvertTarget(for: self.input.prefix(inputCursorPosition + string.count))
|
||||
let diff = self.diff(from: oldConvertTarget, to: newConvertTarget)
|
||||
// convertTargetを更新
|
||||
self.convertTarget.removeFirst(convertTargetCursorPosition)
|
||||
self.convertTarget.insert(contentsOf: newConvertTarget, at: convertTarget.startIndex)
|
||||
// convertTargetCursorPositionを更新
|
||||
self.convertTargetCursorPosition -= diff.delete
|
||||
self.convertTargetCursorPosition += diff.input.count
|
||||
}
|
||||
|
||||
/// 現在のカーソル位置から(左から右に書く言語では)右側の文字を削除する関数
|
||||
public mutating func deleteForwardFromCursorPosition(count: Int) {
|
||||
let count = min(convertTarget.count - convertTargetCursorPosition, count)
|
||||
if count == 0 {
|
||||
return
|
||||
}
|
||||
self.convertTargetCursorPosition += count
|
||||
self.deleteBackwardFromCursorPosition(count: count)
|
||||
}
|
||||
|
||||
/// 現在のカーソル位置から(左から右に書く言語では)左側の文字を削除する関数
|
||||
/// エッジケースとして、`sha: しゃ|`の状態で1文字消すような場合がある。この場合、`[s, h, a]`を`[し, ゃ]`に変換した上で「ゃ」を削除する。
|
||||
public mutating func deleteBackwardFromCursorPosition(count: Int) {
|
||||
let count = min(convertTargetCursorPosition, count)
|
||||
|
||||
if count == 0 {
|
||||
return
|
||||
}
|
||||
// 動作例1
|
||||
// convertTarget: かんしゃ|
|
||||
// input: [k, a, n, s, h, a]
|
||||
// count = 1
|
||||
// currentPrefix = かんしゃ
|
||||
// これから行く位置
|
||||
// targetCursorPosition = forceGetInputCursorPosition(かんし) = 4
|
||||
// 副作用でinputは[k, a, ん, し, ゃ]
|
||||
// 現在の位置
|
||||
// inputCursorPosition = forceGetInputCursorPosition(かんしゃ) = 5
|
||||
// 副作用でinputは[k, a, ん, し, ゃ]
|
||||
// inputを更新する
|
||||
// input = (input.prefix(targetCursorPosition) = [k, a, ん, し])
|
||||
// + (input.suffix(input.count - inputCursorPosition) = [])
|
||||
// = [k, a, ん, し]
|
||||
|
||||
// 動作例2
|
||||
// convertTarget: かんしゃ|
|
||||
// input: [k, a, n, s, h, a]
|
||||
// count = 2
|
||||
// currentPrefix = かんしゃ
|
||||
// これから行く位置
|
||||
// targetCursorPosition = forceGetInputCursorPosition(かん) = 3
|
||||
// 副作用でinputは[k, a, ん, s, h, a]
|
||||
// 現在の位置
|
||||
// inputCursorPosition = forceGetInputCursorPosition(かんしゃ) = 6
|
||||
// 副作用でinputは[k, a, ん, s, h, a]
|
||||
// inputを更新する
|
||||
// input = (input.prefix(targetCursorPosition) = [k, a, ん])
|
||||
// + (input.suffix(input.count - inputCursorPosition) = [])
|
||||
// = [k, a, ん]
|
||||
|
||||
// 今いる位置
|
||||
let currentPrefix = self.convertTargetBeforeCursor
|
||||
|
||||
// この2つの値はこの順で計算する。
|
||||
// これから行く位置
|
||||
let targetCursorPosition = self.forceGetInputCursorPosition(target: currentPrefix.dropLast(count))
|
||||
// 現在の位置
|
||||
let inputCursorPosition = self.forceGetInputCursorPosition(target: currentPrefix)
|
||||
|
||||
// inputを更新する
|
||||
self.input.removeSubrange(targetCursorPosition ..< inputCursorPosition)
|
||||
// カーソルを更新する
|
||||
self.convertTargetCursorPosition -= count
|
||||
|
||||
// convetTargetを更新する
|
||||
self.convertTarget = Self.getConvertTarget(for: self.input)
|
||||
}
|
||||
|
||||
/// 現在のカーソル位置からカーソルを動かす関数
|
||||
/// - parameters:
|
||||
/// - count: `convertTarget`において対応する文字数
|
||||
/// - returns: 実際に動かした文字数
|
||||
/// - note: 動かすことのできない文字数を指定した場合、返り値が変化する。
|
||||
public mutating func moveCursorFromCursorPosition(count: Int) -> Int {
|
||||
let count = max(min(self.convertTarget.count - self.convertTargetCursorPosition, count), -self.convertTargetCursorPosition)
|
||||
self.convertTargetCursorPosition += count
|
||||
return count
|
||||
}
|
||||
|
||||
/// 文頭の方を確定させる関数
|
||||
/// - parameters:
|
||||
/// - correspondingCount: `input`において対応する文字数
|
||||
public mutating func prefixComplete(correspondingCount: Int) {
|
||||
let correspondingCount = min(correspondingCount, self.input.count)
|
||||
self.input.removeFirst(correspondingCount)
|
||||
// convetTargetを更新する
|
||||
let newConvertTarget = Self.getConvertTarget(for: self.input)
|
||||
// カーソルの位置は、消す文字数の分削除する
|
||||
let cursorDelta = self.convertTarget.count - newConvertTarget.count
|
||||
self.convertTarget = newConvertTarget
|
||||
self.convertTargetCursorPosition -= cursorDelta
|
||||
// もしも左端にカーソルが位置していたら、文頭に移動させる
|
||||
if self.convertTargetCursorPosition == 0 {
|
||||
self.convertTargetCursorPosition = self.convertTarget.count
|
||||
}
|
||||
}
|
||||
|
||||
/// 現在のカーソル位置までの文字でComposingTextを作成し、返す
|
||||
public func prefixToCursorPosition() -> ComposingText {
|
||||
var text = self
|
||||
let index = text.forceGetInputCursorPosition(target: text.convertTarget.prefix(text.convertTargetCursorPosition))
|
||||
text.input = Array(text.input.prefix(index))
|
||||
text.convertTarget = String(text.convertTarget.prefix(text.convertTargetCursorPosition))
|
||||
return text
|
||||
}
|
||||
|
||||
public mutating func stopComposition() {
|
||||
self.input = []
|
||||
self.convertTarget = ""
|
||||
self.convertTargetCursorPosition = 0
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: 部分領域の計算のためのAPI
|
||||
// 例えば、「akafa」という入力があるとき、「aka」はvalidな部分領域だが、「kaf」はinvalidである。
|
||||
// 難しいケースとして「itta」の「it」を「いっ」としてvalidな部分領域と見做したいというモチベーションがある。
|
||||
extension ComposingText {
|
||||
static func getConvertTarget(for elements: some Sequence<InputElement>) -> String {
|
||||
var convertTargetElements: [ConvertTargetElement] = []
|
||||
for element in elements {
|
||||
updateConvertTargetElements(currentElements: &convertTargetElements, newElement: element)
|
||||
}
|
||||
return convertTargetElements.reduce(into: "") {$0 += $1.string}
|
||||
}
|
||||
|
||||
static func shouldEscapeOtherValidation(convertTargetElement: [ConvertTargetElement], of originalElements: [InputElement]) -> Bool {
|
||||
let string = convertTargetElement.reduce(into: "") {$0 += $1.string}
|
||||
// 句読点や矢印のエスケープ
|
||||
if !string.containsRomanAlphabet {
|
||||
return true
|
||||
}
|
||||
if ["→", "↓", "↑", "←"].contains(string) {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// MARK: 利用されていないAPI
|
||||
static func isLeftSideValid(first firstElement: InputElement, of originalElements: [InputElement], from leftIndex: Int) -> Bool {
|
||||
// leftIndexの位置にある`el`のチェック
|
||||
// 許されるパターンは以下の通り
|
||||
// * leftIndex == startIndex
|
||||
// * el:direct
|
||||
// * (_:direct) -> el
|
||||
// * (a|i|u|e|o:roman2kana) -> el // aka、のような場合、ka部分を正当とみなす
|
||||
// * (e-1:roman2kana and not n) && e-1 == es // tta、のような場合、ta部分を正当とみなすが、nnaはだめ。
|
||||
// * (n:roman2kana) -> el && el not a|i|u|e|o|y|n // nka、のような場合、ka部分を正当とみなすが、nnaはだめ。
|
||||
|
||||
if leftIndex < originalElements.startIndex {
|
||||
return false
|
||||
}
|
||||
// 左端か、directなElementである場合
|
||||
guard leftIndex != originalElements.startIndex && firstElement.inputStyle == .roman2kana else {
|
||||
return true
|
||||
}
|
||||
|
||||
let prevLastElement = originalElements[leftIndex - 1]
|
||||
if prevLastElement.inputStyle != .roman2kana || !CharacterUtils.isRomanLetter(prevLastElement.character) {
|
||||
return true
|
||||
}
|
||||
|
||||
if ["a", "i", "u", "e", "o"].contains(prevLastElement.character) {
|
||||
return true
|
||||
}
|
||||
if prevLastElement.character != "n" && prevLastElement.character == firstElement.character {
|
||||
return true
|
||||
}
|
||||
let last_2 = originalElements[0 ..< leftIndex].suffix(2)
|
||||
if ["zl", "zk", "zj", "zh"].contains(last_2.reduce(into: "") {$0.append($1.character)}) {
|
||||
return true
|
||||
}
|
||||
let n_suffix = originalElements[0 ..< leftIndex].suffix(while: {$0.inputStyle == .roman2kana && $0.character == "n"})
|
||||
if n_suffix.count % 2 == 0 && !n_suffix.isEmpty {
|
||||
return true
|
||||
}
|
||||
if n_suffix.count % 2 == 1 && !["a", "i", "u", "e", "o", "y", "n"].contains(firstElement.character) {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
/// 右側がvalidか調べる
|
||||
/// - Parameters:
|
||||
/// - lastElement: 領域の最後の要素
|
||||
/// - convertTargetElements: 領域内まで読んで作成した`convertTarget`
|
||||
/// - originalElements: 領域を取り出した元の`input`
|
||||
/// - rightIndex: 領域の右隣の要素のインデックス
|
||||
/// - Returns: 正当か否か
|
||||
static func isRightSideValid(lastElement: InputElement, convertTargetElements: [ConvertTargetElement], of originalElements: [InputElement], to rightIndex: Int) -> Bool {
|
||||
// rightIndexの位置にあるerのチェック
|
||||
// 許されるパターンは以下の通り
|
||||
// * rightIndex == endIndex
|
||||
// * er:direct
|
||||
// * er -> (_:direct)
|
||||
// * er == a|i|u|e|o // aka、のような場合、a部分を正当とみなす
|
||||
// * er != n && er -> er == e+1 // kka、のような場合、k部分を正当とみなす
|
||||
// * er == n && er -> (e+1:roman2kana and not a|i|u|e|o|n|y) // (nn)*nka、のような場合、(nn)*n部分を正当とみなす
|
||||
// * er == n && er -> (e+1:roman2kana) // (nn)*a、のような場合、nn部分を正当とみなす
|
||||
// 左端か、directなElementである場合
|
||||
guard rightIndex != originalElements.endIndex && lastElement.inputStyle == .roman2kana else {
|
||||
return true
|
||||
}
|
||||
if lastElement.inputStyle != .roman2kana {
|
||||
return true
|
||||
}
|
||||
let nextFirstElement = originalElements[rightIndex]
|
||||
if nextFirstElement.inputStyle != .roman2kana || !CharacterUtils.isRomanLetter(nextFirstElement.character) {
|
||||
return true
|
||||
}
|
||||
if ["a", "i", "u", "e", "o"].contains(lastElement.character) {
|
||||
return true
|
||||
}
|
||||
if lastElement.character != "n" && lastElement.character == nextFirstElement.character {
|
||||
return true
|
||||
}
|
||||
guard let lastConvertTargetElements = convertTargetElements.last else {
|
||||
return false
|
||||
}
|
||||
// nnが偶数個なら許す
|
||||
if lastElement.character == "n" && lastConvertTargetElements.string.last != "n" {
|
||||
return true
|
||||
}
|
||||
// nが最後に1つ余っていて、characterが条件を満たせば許す
|
||||
if lastElement.character == "n" && lastConvertTargetElements.inputStyle == .roman2kana && lastConvertTargetElements.string.last == "n" && !["a", "i", "u", "e", "o", "y", "n"].contains(nextFirstElement.character) {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
/// 「正当な」部分領域を返す関数
|
||||
/// - Parameters:
|
||||
/// - lastElement: 領域の最後の要素
|
||||
/// - originalElements: 領域を取り出した元の`input`
|
||||
/// - rightIndex: 領域の右隣の要素のインデックス
|
||||
/// - convertTargetElements: 領域内まで読んで作成した`convertTarget`
|
||||
/// - Returns: 領域がvalidであれば`convertTarget`を返し、invalidなら`nil`を返す。
|
||||
/// - Note: `elements = [r(k, a, n, s, h, a)]`のとき、`k,a,n,s,h,a`や`k, a`は正当だが`a, n`や`s, h`は正当ではない。`k, a, n`は特に正当であるとみなす。
|
||||
static func getConvertTargetIfRightSideIsValid(lastElement: InputElement, of originalElements: [InputElement], to rightIndex: Int, convertTargetElements: [ConvertTargetElement]) -> [Character]? {
|
||||
debug("getConvertTargetIfRightSideIsValid", lastElement, rightIndex)
|
||||
if originalElements.endIndex < rightIndex {
|
||||
return nil
|
||||
}
|
||||
// 正当性のチェックを行う
|
||||
// 基本的に、convertTargetと正しく対応する部分のみを取り出したい。
|
||||
let shouldEscapeValidation = Self.shouldEscapeOtherValidation(convertTargetElement: convertTargetElements, of: originalElements)
|
||||
if !shouldEscapeValidation && !Self.isRightSideValid(lastElement: lastElement, convertTargetElements: convertTargetElements, of: originalElements, to: rightIndex) {
|
||||
return nil
|
||||
}
|
||||
// ここまで来たらvalid
|
||||
var convertTargetElements = convertTargetElements
|
||||
if let lastElement = convertTargetElements.last, lastElement.inputStyle == .roman2kana, rightIndex < originalElements.endIndex {
|
||||
let nextFirstElement = originalElements[rightIndex]
|
||||
|
||||
if !lastElement.string.hasSuffix("n") && lastElement.string.last == nextFirstElement.character {
|
||||
// 書き換える
|
||||
convertTargetElements[convertTargetElements.endIndex - 1].string.removeLast()
|
||||
convertTargetElements.append(ConvertTargetElement(string: ["っ"], inputStyle: .direct))
|
||||
}
|
||||
|
||||
if lastElement.string.hasSuffix("n") && !["a", "i", "u", "e", "o", "y", "n"].contains(nextFirstElement.character) {
|
||||
// 書き換える
|
||||
convertTargetElements[convertTargetElements.endIndex - 1].string.removeLast()
|
||||
convertTargetElements.append(ConvertTargetElement(string: ["ん"], inputStyle: .direct))
|
||||
}
|
||||
}
|
||||
return convertTargetElements.reduce(into: []) {$0 += $1.string}
|
||||
}
|
||||
|
||||
// inputStyleが同一であるような文字列を集積したもの
|
||||
// k, o, r, e, h, aまでをローマ字入力し、p, e, nをダイレクト入力、d, e, s, uをローマ字入力した場合、
|
||||
// originalInputに対して[ElementComposition(これは, roman2kana), ElementComposition(pen, direct), ElementComposition(です, roman2kana)]、のようになる。
|
||||
struct ConvertTargetElement {
|
||||
var string: [Character]
|
||||
var inputStyle: InputStyle
|
||||
}
|
||||
|
||||
static func updateConvertTargetElements(currentElements: inout [ConvertTargetElement], newElement: InputElement) {
|
||||
// currentElementsが空の場合、および
|
||||
// 直前のElementの入力方式が同じでない場合は、新たなConvertTargetElementを作成して追加する
|
||||
if currentElements.last?.inputStyle != newElement.inputStyle {
|
||||
currentElements.append(ConvertTargetElement(string: updateConvertTarget(current: [], inputStyle: newElement.inputStyle, newCharacter: newElement.character), inputStyle: newElement.inputStyle))
|
||||
return
|
||||
}
|
||||
// 末尾のエレメントの文字列を更新する
|
||||
updateConvertTarget(¤tElements[currentElements.endIndex - 1].string, inputStyle: newElement.inputStyle, newCharacter: newElement.character)
|
||||
}
|
||||
|
||||
static func updateConvertTarget(current: [Character], inputStyle: InputStyle, newCharacter: Character) -> [Character] {
|
||||
switch inputStyle {
|
||||
case .direct:
|
||||
return current + [newCharacter]
|
||||
case .roman2kana:
|
||||
return Roman2Kana.toHiragana(currentText: current, added: newCharacter)
|
||||
}
|
||||
}
|
||||
|
||||
static func updateConvertTarget(_ convertTarget: inout [Character], inputStyle: InputStyle, newCharacter: Character) {
|
||||
switch inputStyle {
|
||||
case .direct:
|
||||
convertTarget.append(newCharacter)
|
||||
case .roman2kana:
|
||||
convertTarget = Roman2Kana.toHiragana(currentText: convertTarget, added: newCharacter)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Equatableにしておく
|
||||
extension ComposingText: Equatable {}
|
||||
extension ComposingText.InputElement: Equatable {}
|
||||
extension ComposingText.ConvertTargetElement: Equatable {}
|
||||
|
||||
// MARK: 差分計算用のAPI
|
||||
extension ComposingText {
|
||||
/// 2つの`ComposingText`のデータを比較し、差分を計算する。
|
||||
/// `convertTarget`との整合性をとるため、`convertTarget`に合わせた上で比較する
|
||||
func differenceSuffix(to previousData: ComposingText) -> (deleted: Int, addedCount: Int) {
|
||||
// k→か、sh→しゃ、のような場合、差分は全てx ... lastの範囲に現れるので、差分計算が問題なく動作する
|
||||
// かn → かんs、のような場合、「かんs、んs、s」のようなものは現れるが、「かん」が生成できない
|
||||
// 本質的にこれはポリシーの問題であり、「は|しゃ」の変換で「はし」が部分変換として現れないことと同根の問題である。
|
||||
// 解決のためには、inputの段階で「ん」をdirectで扱うべきである。
|
||||
|
||||
// 差分を計算する
|
||||
let common = self.input.commonPrefix(with: previousData.input)
|
||||
let deleted = previousData.input.count - common.count
|
||||
let added = self.input.dropFirst(common.count).count
|
||||
return (deleted, added)
|
||||
}
|
||||
|
||||
func inputHasSuffix(inputOf suffix: ComposingText) -> Bool {
|
||||
self.input.hasSuffix(suffix.input)
|
||||
}
|
||||
}
|
||||
|
||||
#if DEBUG
|
||||
extension ComposingText.InputElement: CustomDebugStringConvertible {
|
||||
public var debugDescription: String {
|
||||
switch self.inputStyle {
|
||||
case .direct:
|
||||
return "direct(\(character))"
|
||||
case .roman2kana:
|
||||
return "roman2kana(\(character))"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extension ComposingText.ConvertTargetElement: CustomDebugStringConvertible {
|
||||
var debugDescription: String {
|
||||
"ConvertTargetElement(string: \"\(string)\", inputStyle: \(inputStyle)"
|
||||
}
|
||||
}
|
||||
extension InputStyle: CustomDebugStringConvertible {
|
||||
public var debugDescription: String {
|
||||
"." + self.rawValue
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,203 @@
|
||||
//
|
||||
// CalendarCandidate.swift
|
||||
// Keyboard
|
||||
//
|
||||
// Created by ensan on 2023/05/13.
|
||||
// Copyright © 2023 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
extension KanaKanjiConverter {
|
||||
/// 西暦に変換した結果を返す関数。
|
||||
/// - parameters:
|
||||
/// - inputData: 入力情報。
|
||||
/// - note:
|
||||
/// 現在英字のみ。ギリシャ文字や数字に対応する必要あり。
|
||||
func toSeirekiCandidates(_ inputData: ComposingText) -> [Candidate] {
|
||||
let string = inputData.convertTarget.toKatakana()
|
||||
let result = self.toSeireki(string)
|
||||
return result.map {[Candidate(
|
||||
text: $0,
|
||||
value: -15,
|
||||
correspondingCount: inputData.input.count,
|
||||
lastMid: MIDData.一般.mid,
|
||||
data: [DicdataElement(word: $0, ruby: string, cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: -15)]
|
||||
)]} ?? []
|
||||
}
|
||||
|
||||
/// 和暦で書かれた入力を西暦に変換する関数
|
||||
/// - parameters:
|
||||
/// - string: 入力
|
||||
private func toSeireki(_ string: String) -> String? {
|
||||
let katakanaStringCount = string.count
|
||||
if string == "メイジガンネン"{
|
||||
return "1868年"
|
||||
}
|
||||
if string == "タイショウガンネン"{
|
||||
return "1912年"
|
||||
}
|
||||
if string == "ショウワガンネン"{
|
||||
return "1926年"
|
||||
}
|
||||
if string == "ヘイセイガンネン"{
|
||||
return "1989年"
|
||||
}
|
||||
if string == "レイワガンネン"{
|
||||
return "2019年"
|
||||
}
|
||||
var string = string[...]
|
||||
// ネンをdropする
|
||||
guard "ネン" == string.suffix(2) else {
|
||||
return nil
|
||||
}
|
||||
string = string.dropLast(2)
|
||||
if string.hasPrefix("ショウワ") {
|
||||
// ショウワをdropする
|
||||
string = string.dropFirst(4)
|
||||
// 残るは数値部分のみ
|
||||
if katakanaStringCount == 8, let year = Int(string) {
|
||||
return "\(year + 1925)年"
|
||||
}
|
||||
if katakanaStringCount == 7, let year = Int(string) {
|
||||
return "\(year + 1925)年"
|
||||
}
|
||||
} else if string.hasPrefix("ヘイセイ") {
|
||||
// ヘイセイをdropする
|
||||
string = string.dropFirst(4)
|
||||
// 残るは数値部分のみ
|
||||
if katakanaStringCount == 8, let year = Int(string) {
|
||||
return "\(year + 1988)年"
|
||||
}
|
||||
if katakanaStringCount == 7, let year = Int(string) {
|
||||
return "\(year + 1988)年"
|
||||
}
|
||||
} else if string.hasPrefix("レイワ") {
|
||||
// レイワをdropする
|
||||
string = string.dropFirst(3)
|
||||
// 残るは数値部分のみ
|
||||
if katakanaStringCount == 7, let year = Int(string) {
|
||||
return "\(year + 2018)年"
|
||||
}
|
||||
if katakanaStringCount == 6, let year = Int(string) {
|
||||
return "\(year + 2018)年"
|
||||
}
|
||||
} else if string.hasPrefix("メイジ") {
|
||||
// メイジをdropする
|
||||
string = string.dropFirst(3)
|
||||
// 残るは数値部分のみ
|
||||
if katakanaStringCount == 7, let year = Int(string) {
|
||||
return "\(year + 1867)年"
|
||||
}
|
||||
if katakanaStringCount == 6, let year = Int(string) {
|
||||
return "\(year + 1867)年"
|
||||
}
|
||||
} else if string.hasPrefix("タイショウ") {
|
||||
// タイショウをdropする
|
||||
string = string.dropFirst(5)
|
||||
// 残るは数値部分のみ
|
||||
if katakanaStringCount == 9, let year = Int(string) {
|
||||
return "\(year + 1911)年"
|
||||
}
|
||||
if katakanaStringCount == 8, let year = Int(string) {
|
||||
return "\(year + 1911)年"
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
/// 西暦で書かれた入力を和暦に変換する関数
|
||||
/// - parameters:
|
||||
/// - string: 入力
|
||||
func toWarekiCandidates(_ inputData: ComposingText) -> [Candidate] {
|
||||
let string = inputData.convertTarget.toKatakana()
|
||||
|
||||
let makeResult0: (String) -> Candidate = {
|
||||
Candidate(
|
||||
text: $0,
|
||||
value: -18,
|
||||
correspondingCount: inputData.input.count,
|
||||
lastMid: MIDData.年.mid,
|
||||
data: [DicdataElement(word: $0, ruby: string, cid: CIDData.一般名詞.cid, mid: MIDData.年.mid, value: -18)]
|
||||
)
|
||||
}
|
||||
let makeResult1: (String) -> Candidate = {
|
||||
Candidate(
|
||||
text: $0,
|
||||
value: -19,
|
||||
correspondingCount: inputData.input.count,
|
||||
lastMid: MIDData.年.mid,
|
||||
data: [DicdataElement(word: $0, ruby: string, cid: CIDData.一般名詞.cid, mid: MIDData.年.mid, value: -19)]
|
||||
)
|
||||
}
|
||||
|
||||
guard let seireki = Int(string.prefix(4)) else {
|
||||
return []
|
||||
}
|
||||
if !string.hasSuffix("ネン") {
|
||||
return []
|
||||
}
|
||||
if seireki == 1989 {
|
||||
return [
|
||||
makeResult0("平成元年"),
|
||||
makeResult1("昭和64年")
|
||||
]
|
||||
}
|
||||
if seireki == 2019 {
|
||||
return [
|
||||
makeResult0("令和元年"),
|
||||
makeResult1("平成31年")
|
||||
]
|
||||
}
|
||||
if seireki == 1926 {
|
||||
return [
|
||||
makeResult0("昭和元年"),
|
||||
makeResult1("大正15年")
|
||||
]
|
||||
}
|
||||
if seireki == 1912 {
|
||||
return [
|
||||
makeResult0("大正元年"),
|
||||
makeResult1("明治45年")
|
||||
]
|
||||
}
|
||||
if seireki == 1868 {
|
||||
return [
|
||||
makeResult0("明治元年"),
|
||||
makeResult1("慶應4年")
|
||||
]
|
||||
|
||||
}
|
||||
if (1990...2018).contains(seireki) {
|
||||
let i = seireki - 1988
|
||||
return [
|
||||
makeResult0("平成\(i)年")
|
||||
]
|
||||
}
|
||||
if (1927...1988).contains(seireki) {
|
||||
let i = seireki - 1925
|
||||
return [
|
||||
makeResult0("昭和\(i)年")
|
||||
]
|
||||
}
|
||||
if (1869...1911).contains(seireki) {
|
||||
let i = seireki - 1967
|
||||
return [
|
||||
makeResult0("明治\(i)年")
|
||||
]
|
||||
}
|
||||
if (1912...1926).contains(seireki) {
|
||||
let i = seireki - 1911
|
||||
return [
|
||||
makeResult0("大正\(i)年")
|
||||
]
|
||||
}
|
||||
if 2020 <= seireki {
|
||||
let i = seireki - 2018
|
||||
return [
|
||||
makeResult0("令和\(i)年")
|
||||
]
|
||||
}
|
||||
return []
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,102 @@
|
||||
//
|
||||
// ConvertRequestOptions.swift
|
||||
// Keyboard
|
||||
//
|
||||
// Created by ensan on 2022/12/20.
|
||||
// Copyright © 2022 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
public struct ConvertRequestOptions {
|
||||
/// 変換リクエストに必要な設定データ
|
||||
///
|
||||
/// - parameters:
|
||||
/// - N_best: 変換候補の数。上位`N`件までの言語モデル上の妥当性を保証します。大きくすると計算量が増加します。
|
||||
/// - requireJapanesePrediction: 日本語の予測変換候補の必要性。`false`にすると、日本語の予測変換候補を出力しなくなります。
|
||||
/// - requireEnglishPrediction: 英語の予測変換候補の必要性。`false`にすると、英語の予測変換候補を出力しなくなります。ローマ字入力を用いた日本語入力では`false`にした方が良いでしょう。
|
||||
/// - keyboardLanguage: キーボードの言語を指定します。
|
||||
/// - typographyLetterCandidate: `true`の場合、「おしゃれなフォント」での英数字変換候補が出力に含まれるようになります。詳しくは`KanaKanjiConverter.typographicalCandidates(_:)`を参照してください。
|
||||
/// - unicodeCandidate: `true`の場合、`U+xxxx`のような入力に対してUnicodeの変換候補が出力に含まれるようになります。詳しくは`KanaKanjiConverter.unicodeCandidates(_:)`を参照してください。`
|
||||
/// - englishCandidateInRoman2KanaInput: `true`の場合、日本語ローマ字入力時に英語変換候補を出力します。`false`の場合、ローマ字入力時に英語変換候補を出力しません。
|
||||
/// - fullWidthRomanCandidate: `true`の場合、全角英数字の変換候補が出力に含まれるようになります。
|
||||
/// - halfWidthKanaCandidate: `true`の場合、半角カナの変換候補が出力に含まれるようになります。
|
||||
/// - learningType: 学習モードを指定します。詳しくは`LearningType`を参照してください。
|
||||
/// - maxMemoryCount: 学習が有効な場合に保持するデータの最大数を指定します。`0`の場合`learningType`を`nothing`に指定する方が適切です。
|
||||
/// - shouldResetMemory: `true`の場合、変換を開始する前に学習データをリセットします。
|
||||
/// - dictionaryResourceURL: 内蔵辞書データの読み出し先を指定します。
|
||||
/// - memoryDirectoryURL: 学習データの保存先を指定します。書き込み可能なディレクトリを指定してください。
|
||||
/// - sharedContainerURL: ユーザ辞書など、キーボード外で書き込んだ設定データの保存されているディレクトリを指定します。
|
||||
/// - metadata: メタデータを指定します。詳しくは`ConvertRequestOptions.Metadata`を参照してください。
|
||||
public init(N_best: Int = 10, requireJapanesePrediction: Bool, requireEnglishPrediction: Bool, keyboardLanguage: KeyboardLanguage, typographyLetterCandidate: Bool = false, unicodeCandidate: Bool = true, englishCandidateInRoman2KanaInput: Bool, fullWidthRomanCandidate: Bool = false, halfWidthKanaCandidate: Bool, learningType: LearningType, maxMemoryCount: Int = 65536, shouldResetMemory: Bool = false, dictionaryResourceURL: URL, memoryDirectoryURL: URL, sharedContainerURL: URL, metadata: ConvertRequestOptions.Metadata) {
|
||||
self.N_best = N_best
|
||||
self.requireJapanesePrediction = requireJapanesePrediction
|
||||
self.requireEnglishPrediction = requireEnglishPrediction
|
||||
self.keyboardLanguage = keyboardLanguage
|
||||
self.typographyLetterCandidate = typographyLetterCandidate
|
||||
self.unicodeCandidate = unicodeCandidate
|
||||
self.englishCandidateInRoman2KanaInput = englishCandidateInRoman2KanaInput
|
||||
self.fullWidthRomanCandidate = fullWidthRomanCandidate
|
||||
self.halfWidthKanaCandidate = halfWidthKanaCandidate
|
||||
self.learningType = learningType
|
||||
self.maxMemoryCount = maxMemoryCount
|
||||
self.shouldResetMemory = shouldResetMemory
|
||||
self.memoryDirectoryURL = memoryDirectoryURL
|
||||
self.sharedContainerURL = sharedContainerURL
|
||||
self.metadata = metadata
|
||||
self.dictionaryResourceURL = dictionaryResourceURL
|
||||
}
|
||||
|
||||
public var N_best: Int
|
||||
public var requireJapanesePrediction: Bool
|
||||
public var requireEnglishPrediction: Bool
|
||||
public var keyboardLanguage: KeyboardLanguage
|
||||
// KeyboardSettingのinjection用途
|
||||
public var typographyLetterCandidate: Bool
|
||||
public var unicodeCandidate: Bool
|
||||
public var englishCandidateInRoman2KanaInput: Bool
|
||||
public var fullWidthRomanCandidate: Bool
|
||||
public var halfWidthKanaCandidate: Bool
|
||||
public var learningType: LearningType
|
||||
public var maxMemoryCount: Int
|
||||
public var shouldResetMemory: Bool
|
||||
// ディレクトリなど
|
||||
public var memoryDirectoryURL: URL
|
||||
public var sharedContainerURL: URL
|
||||
public var dictionaryResourceURL: URL
|
||||
// メタデータ
|
||||
public var metadata: Metadata
|
||||
|
||||
static var `default`: Self {
|
||||
Self(
|
||||
N_best: 10,
|
||||
requireJapanesePrediction: true,
|
||||
requireEnglishPrediction: true,
|
||||
keyboardLanguage: .ja_JP,
|
||||
typographyLetterCandidate: false,
|
||||
unicodeCandidate: true,
|
||||
englishCandidateInRoman2KanaInput: true,
|
||||
fullWidthRomanCandidate: true,
|
||||
halfWidthKanaCandidate: false,
|
||||
learningType: .inputAndOutput,
|
||||
maxMemoryCount: 65536,
|
||||
shouldResetMemory: false,
|
||||
// dummy data, won't work
|
||||
dictionaryResourceURL: Bundle.main.bundleURL,
|
||||
// dummy data, won't work
|
||||
memoryDirectoryURL: (try? FileManager.default.url(for: .libraryDirectory, in: .userDomainMask, appropriateFor: nil, create: false)) ?? Bundle.main.bundleURL,
|
||||
// dummy data, won't work
|
||||
sharedContainerURL: Bundle.main.bundleURL,
|
||||
metadata: Metadata(appVersionString: "Unknown")
|
||||
)
|
||||
}
|
||||
|
||||
public struct Metadata {
|
||||
/// - parameters:
|
||||
/// - appVersionString: アプリのバージョンを指定します。このデータは`KanaKanjiCovnerter.toVersionCandidate(_:)`などで用いられます。
|
||||
public init(appVersionString: String) {
|
||||
self.appVersionString = appVersionString
|
||||
}
|
||||
var appVersionString: String
|
||||
}
|
||||
}
|
||||
596
Sources/KanaKanjiConverterModule/Converter/Converter.swift
Normal file
596
Sources/KanaKanjiConverterModule/Converter/Converter.swift
Normal file
@@ -0,0 +1,596 @@
|
||||
//
|
||||
// Converter.swift
|
||||
// Kana2KajiProject
|
||||
//
|
||||
// Created by ensan on 2020/09/03.
|
||||
// Copyright © 2020 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import SwiftUtils
|
||||
|
||||
/// かな漢字変換の管理を受け持つクラス
|
||||
public final class KanaKanjiConverter {
|
||||
public init() {}
|
||||
public init(dicdataStore: DicdataStore) {
|
||||
self.converter = .init(dicdataStore: dicdataStore)
|
||||
}
|
||||
|
||||
private var converter = Kana2Kanji()
|
||||
private var checker = SpellChecker()
|
||||
private var checkerInitialized: [KeyboardLanguage: Bool] = [.none: true, .ja_JP: true]
|
||||
|
||||
// 前回の変換や確定の情報を取っておく部分。
|
||||
private var previousInputData: ComposingText?
|
||||
private var nodes: [[LatticeNode]] = []
|
||||
private var completedData: Candidate?
|
||||
private var lastData: DicdataElement?
|
||||
|
||||
/// リセットする関数
|
||||
public func stopComposition() {
|
||||
self.previousInputData = nil
|
||||
self.nodes = []
|
||||
self.completedData = nil
|
||||
self.lastData = nil
|
||||
}
|
||||
|
||||
public func setKeyboardLanguage(_ language: KeyboardLanguage) {
|
||||
if !checkerInitialized[language, default: false] {
|
||||
switch language {
|
||||
case .en_US:
|
||||
Task {
|
||||
_ = checker.completions(forPartialWordRange: NSRange(location: 0, length: 1), in: "a", language: "en-US")
|
||||
checkerInitialized[language] = true
|
||||
}
|
||||
case .el_GR:
|
||||
Task {
|
||||
_ = checker.completions(forPartialWordRange: NSRange(location: 0, length: 1), in: "α", language: "el-GR")
|
||||
checkerInitialized[language] = true
|
||||
}
|
||||
case .none, .ja_JP:
|
||||
checkerInitialized[language] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// 上流の関数から`dicdataStore`で行うべき操作を伝播する関数。
|
||||
/// - Parameters:
|
||||
/// - data: 行うべき操作。
|
||||
public func sendToDicdataStore(_ data: DicdataStore.Notification) {
|
||||
self.converter.dicdataStore.sendToDicdataStore(data)
|
||||
}
|
||||
/// 確定操作後、内部状態のキャッシュを変更する関数。
|
||||
/// - Parameters:
|
||||
/// - candidate: 確定された候補。
|
||||
public func setCompletedData(_ candidate: Candidate) {
|
||||
self.completedData = candidate
|
||||
}
|
||||
|
||||
/// 確定操作後、学習メモリをアップデートする関数。
|
||||
/// - Parameters:
|
||||
/// - candidate: 確定された候補。
|
||||
public func updateLearningData(_ candidate: Candidate) {
|
||||
self.converter.dicdataStore.updateLearningData(candidate, with: self.lastData)
|
||||
self.lastData = candidate.data.last
|
||||
}
|
||||
|
||||
/// 賢い変換候補を生成する関数。
|
||||
/// - Parameters:
|
||||
/// - string: 入力されたString
|
||||
/// - Returns:
|
||||
/// `賢い変換候補
|
||||
private func getWiseCandidate(_ inputData: ComposingText, options: ConvertRequestOptions) -> [Candidate] {
|
||||
var result = [Candidate]()
|
||||
|
||||
// toWarekiCandidates/toSeirekiCandidatesは以前は設定可能にしていたが、特にoffにする需要がなさそうなので常時有効化した
|
||||
result.append(contentsOf: self.toWarekiCandidates(inputData))
|
||||
result.append(contentsOf: self.toSeirekiCandidates(inputData))
|
||||
result.append(contentsOf: self.toEmailAddressCandidates(inputData))
|
||||
|
||||
if options.typographyLetterCandidate {
|
||||
result.append(contentsOf: self.typographicalCandidates(inputData))
|
||||
}
|
||||
if options.unicodeCandidate {
|
||||
result.append(contentsOf: self.unicodeCandidates(inputData))
|
||||
}
|
||||
result.append(contentsOf: self.toVersionCandidate(inputData, options: options))
|
||||
return result
|
||||
}
|
||||
|
||||
/// 変換候補の重複を除去する関数。
|
||||
/// - Parameters:
|
||||
/// - candidates: uniqueを実行する候補列。
|
||||
/// - Returns:
|
||||
/// `candidates`から重複を削除したもの。
|
||||
private func getUniqueCandidate(_ candidates: some Sequence<Candidate>, seenCandidates: Set<String> = []) -> [Candidate] {
|
||||
var result = [Candidate]()
|
||||
for candidate in candidates where !candidate.text.isEmpty && !seenCandidates.contains(candidate.text) {
|
||||
if let index = result.firstIndex(where: {$0.text == candidate.text}) {
|
||||
if result[index].value < candidate.value || result[index].correspondingCount < candidate.correspondingCount {
|
||||
result[index] = candidate
|
||||
}
|
||||
} else {
|
||||
result.append(candidate)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
/// 外国語への予測変換候補を生成する関数
|
||||
/// - Parameters:
|
||||
/// - inputData: 変換対象のデータ。
|
||||
/// - language: 言語コード。現在は`en-US`と`el(ギリシャ語)`のみ対応している。
|
||||
/// - Returns:
|
||||
/// 予測変換候補
|
||||
private func getForeignPredictionCandidate(inputData: ComposingText, language: String, penalty: PValue = -5) -> [Candidate] {
|
||||
switch language {
|
||||
case "en-US":
|
||||
var result: [Candidate] = []
|
||||
let ruby = String(inputData.input.map {$0.character})
|
||||
let range = NSRange(location: 0, length: ruby.utf16.count)
|
||||
if !ruby.onlyRomanAlphabet {
|
||||
return result
|
||||
}
|
||||
if let completions = checker.completions(forPartialWordRange: range, in: ruby, language: language) {
|
||||
if !completions.isEmpty {
|
||||
let data = [DicdataElement(ruby: ruby, cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: penalty)]
|
||||
let candidate: Candidate = Candidate(
|
||||
text: ruby,
|
||||
value: penalty,
|
||||
correspondingCount: inputData.input.count,
|
||||
lastMid: MIDData.一般.mid,
|
||||
data: data
|
||||
)
|
||||
result.append(candidate)
|
||||
}
|
||||
var value: PValue = -5 + penalty
|
||||
let delta: PValue = -10 / PValue(completions.count)
|
||||
for word in completions {
|
||||
let data = [DicdataElement(ruby: word, cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: value)]
|
||||
let candidate: Candidate = Candidate(
|
||||
text: word,
|
||||
value: value,
|
||||
correspondingCount: inputData.input.count,
|
||||
lastMid: MIDData.一般.mid,
|
||||
data: data
|
||||
)
|
||||
result.append(candidate)
|
||||
value += delta
|
||||
}
|
||||
}
|
||||
return result
|
||||
case "el":
|
||||
var result: [Candidate] = []
|
||||
let ruby = String(inputData.input.map {$0.character})
|
||||
let range = NSRange(location: 0, length: ruby.utf16.count)
|
||||
if let completions = checker.completions(forPartialWordRange: range, in: ruby, language: language) {
|
||||
if !completions.isEmpty {
|
||||
let data = [DicdataElement(ruby: ruby, cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: penalty)]
|
||||
let candidate: Candidate = Candidate(
|
||||
text: ruby,
|
||||
value: penalty,
|
||||
correspondingCount: inputData.input.count,
|
||||
lastMid: MIDData.一般.mid,
|
||||
data: data
|
||||
)
|
||||
result.append(candidate)
|
||||
}
|
||||
var value: PValue = -5 + penalty
|
||||
let delta: PValue = -10 / PValue(completions.count)
|
||||
for word in completions {
|
||||
let data = [DicdataElement(ruby: word, cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: value)]
|
||||
let candidate: Candidate = Candidate(
|
||||
text: word,
|
||||
value: value,
|
||||
correspondingCount: inputData.input.count,
|
||||
lastMid: MIDData.一般.mid,
|
||||
data: data
|
||||
)
|
||||
result.append(candidate)
|
||||
value += delta
|
||||
}
|
||||
}
|
||||
return result
|
||||
default:
|
||||
return []
|
||||
}
|
||||
}
|
||||
|
||||
/// 予測変換候補を生成する関数
|
||||
/// - Parameters:
|
||||
/// - sums: 変換対象のデータ。
|
||||
/// - Returns:
|
||||
/// 予測変換候補
|
||||
private func getPredictionCandidate(_ sums: [(CandidateData, Candidate)], composingText: ComposingText, options: ConvertRequestOptions) -> [Candidate] {
|
||||
// 予測変換は次の方針で行う。
|
||||
// prepart: 前半文節 lastPart: 最終文節とする。
|
||||
// まず、lastPartがnilであるところから始める
|
||||
|
||||
var candidates: [Candidate] = []
|
||||
var prepart: CandidateData = sums.max {$0.1.value < $1.1.value}!.0
|
||||
var lastpart: CandidateData.ClausesUnit?
|
||||
var count = 0
|
||||
while true {
|
||||
if count == 2 {
|
||||
break
|
||||
}
|
||||
if prepart.isEmpty {
|
||||
break
|
||||
}
|
||||
if let oldlastPart = lastpart {
|
||||
// 現在の最終分節をもう1つ取得
|
||||
let lastUnit = prepart.clauses.popLast()! // prepartをmutatingでlastを取る。
|
||||
let newUnit = lastUnit.clause // 新しいlastpartとなる部分。
|
||||
newUnit.merge(with: oldlastPart.clause) // マージする。(最終文節の範囲を広げたことになる)
|
||||
let newValue = lastUnit.value + oldlastPart.value
|
||||
let newlastPart: CandidateData.ClausesUnit = (clause: newUnit, value: newValue)
|
||||
let predictions = converter.getPredictionCandidates(composingText: composingText, prepart: prepart, lastClause: newlastPart.clause, N_best: 5)
|
||||
lastpart = newlastPart
|
||||
// 結果がemptyでなければ
|
||||
if !predictions.isEmpty {
|
||||
candidates += predictions
|
||||
count += 1
|
||||
}
|
||||
} else {
|
||||
// 最終分節を取得
|
||||
lastpart = prepart.clauses.popLast()
|
||||
// 予測変換を受け取る
|
||||
let predictions = converter.getPredictionCandidates(composingText: composingText, prepart: prepart, lastClause: lastpart!.clause, N_best: 5)
|
||||
// 結果がemptyでなければ
|
||||
if !predictions.isEmpty {
|
||||
// 結果に追加
|
||||
candidates += predictions
|
||||
count += 1
|
||||
}
|
||||
}
|
||||
}
|
||||
return candidates
|
||||
}
|
||||
|
||||
/// トップレベルに追加する付加的な変換候補を生成する関数
|
||||
/// - Parameters:
|
||||
/// - inputData: 変換対象のInputData。
|
||||
/// - Returns:
|
||||
/// 付加的な変換候補
|
||||
private func getTopLevelAdditionalCandidate(_ inputData: ComposingText, options: ConvertRequestOptions) -> [Candidate] {
|
||||
var candidates: [Candidate] = []
|
||||
if inputData.input.allSatisfy({$0.inputStyle == .roman2kana}) {
|
||||
if options.englishCandidateInRoman2KanaInput {
|
||||
candidates.append(contentsOf: self.getForeignPredictionCandidate(inputData: inputData, language: "en-US", penalty: -10))
|
||||
}
|
||||
}
|
||||
return candidates
|
||||
}
|
||||
/// 部分がカタカナである可能性を調べる
|
||||
/// 小さいほどよい。
|
||||
private func getKatakanaScore<S: StringProtocol>(_ katakana: S) -> PValue {
|
||||
var score: PValue = 1
|
||||
// テキスト分析によってこれらのカタカナが入っている場合カタカナ語である可能性が高いと分かった。
|
||||
for c in katakana {
|
||||
if "プヴペィフ".contains(c) {
|
||||
score *= 0.5
|
||||
} else if "ュピポ".contains(c) {
|
||||
score *= 0.6
|
||||
} else if "パォグーム".contains(c) {
|
||||
score *= 0.7
|
||||
}
|
||||
}
|
||||
return score
|
||||
}
|
||||
|
||||
/// 付加的な変換候補を生成する関数
|
||||
/// - Parameters:
|
||||
/// - inputData: 変換対象のInputData。
|
||||
/// - Returns:
|
||||
/// 付加的な変換候補
|
||||
private func getAdditionalCandidate(_ inputData: ComposingText, options: ConvertRequestOptions) -> [Candidate] {
|
||||
var candidates: [Candidate] = []
|
||||
let string = inputData.convertTarget.toKatakana()
|
||||
let correspondingCount = inputData.input.count
|
||||
do {
|
||||
// カタカナ
|
||||
let value = -14 * getKatakanaScore(string)
|
||||
let data = DicdataElement(ruby: string, cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: value)
|
||||
let katakana = Candidate(
|
||||
text: string,
|
||||
value: value,
|
||||
correspondingCount: correspondingCount,
|
||||
lastMid: MIDData.一般.mid,
|
||||
data: [data]
|
||||
)
|
||||
candidates.append(katakana)
|
||||
}
|
||||
let hiraganaString = string.toHiragana()
|
||||
do {
|
||||
// ひらがな
|
||||
let data = DicdataElement(word: hiraganaString, ruby: string, cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: -14.5)
|
||||
|
||||
let hiragana = Candidate(
|
||||
text: hiraganaString,
|
||||
value: -14.5,
|
||||
correspondingCount: correspondingCount,
|
||||
lastMid: MIDData.一般.mid,
|
||||
data: [data]
|
||||
)
|
||||
candidates.append(hiragana)
|
||||
}
|
||||
do {
|
||||
// 大文字
|
||||
let word = string.uppercased()
|
||||
let data = DicdataElement(word: word, ruby: string, cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: -15)
|
||||
let uppercasedLetter = Candidate(
|
||||
text: word,
|
||||
value: -14.6,
|
||||
correspondingCount: correspondingCount,
|
||||
lastMid: MIDData.一般.mid,
|
||||
data: [data]
|
||||
)
|
||||
candidates.append(uppercasedLetter)
|
||||
}
|
||||
if options.fullWidthRomanCandidate {
|
||||
// 全角英数字
|
||||
let word = string.applyingTransform(.fullwidthToHalfwidth, reverse: true) ?? ""
|
||||
let data = DicdataElement(word: word, ruby: string, cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: -15)
|
||||
let fullWidthLetter = Candidate(
|
||||
text: word,
|
||||
value: -14.7,
|
||||
correspondingCount: correspondingCount,
|
||||
lastMid: MIDData.一般.mid,
|
||||
data: [data]
|
||||
)
|
||||
candidates.append(fullWidthLetter)
|
||||
}
|
||||
if options.halfWidthKanaCandidate {
|
||||
// 半角カタカナ
|
||||
let word = string.applyingTransform(.fullwidthToHalfwidth, reverse: false) ?? ""
|
||||
let data = DicdataElement(word: word, ruby: string, cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: -15)
|
||||
let halfWidthKatakana = Candidate(
|
||||
text: word,
|
||||
value: -15,
|
||||
correspondingCount: correspondingCount,
|
||||
lastMid: MIDData.一般.mid,
|
||||
data: [data]
|
||||
)
|
||||
candidates.append(halfWidthKatakana)
|
||||
}
|
||||
|
||||
return candidates
|
||||
}
|
||||
|
||||
/// ラティスを処理し変換候補の形にまとめる関数
|
||||
/// - Parameters:
|
||||
/// - inputData: 変換対象のInputData。
|
||||
/// - result: convertToLatticeによって得られた結果。
|
||||
/// - options: リクエストにかかるオプション。
|
||||
/// - Returns:
|
||||
/// 重複のない変換候補。
|
||||
/// - Note:
|
||||
/// 現在の実装は非常に複雑な方法で候補の順序を決定している。
|
||||
private func processResult(inputData: ComposingText, result: (result: LatticeNode, nodes: [[LatticeNode]]), options: ConvertRequestOptions) -> (mainResults: [Candidate], firstClauseResults: [Candidate]) {
|
||||
self.previousInputData = inputData
|
||||
self.nodes = result.nodes
|
||||
let clauseResult = result.result.getCandidateData()
|
||||
if clauseResult.isEmpty {
|
||||
let candidates = self.getUniqueCandidate(self.getAdditionalCandidate(inputData, options: options))
|
||||
return (candidates, candidates) // アーリーリターン
|
||||
}
|
||||
let clauseCandidates: [Candidate] = clauseResult.map {(candidateData: CandidateData) -> Candidate in
|
||||
let first = candidateData.clauses.first!
|
||||
var count = 0
|
||||
do {
|
||||
var str = ""
|
||||
while true {
|
||||
str += candidateData.data[count].word
|
||||
if str == first.clause.text {
|
||||
break
|
||||
}
|
||||
count += 1
|
||||
}
|
||||
}
|
||||
return Candidate(
|
||||
text: first.clause.text,
|
||||
value: first.value,
|
||||
correspondingCount: first.clause.inputRange.count,
|
||||
lastMid: first.clause.mid,
|
||||
data: Array(candidateData.data[0...count])
|
||||
)
|
||||
}
|
||||
let sums: [(CandidateData, Candidate)] = clauseResult.map {($0, converter.processClauseCandidate($0))}
|
||||
// 文章全体を変換した場合の候補上位五件を作る
|
||||
let whole_sentence_unique_candidates = self.getUniqueCandidate(sums.map {$0.1})
|
||||
let sentence_candidates = whole_sentence_unique_candidates.min(count: 5, sortedBy: {$0.value > $1.value})
|
||||
// 予測変換
|
||||
let prediction_candidates: [Candidate] = options.requireJapanesePrediction ? Array(self.getUniqueCandidate(self.getPredictionCandidate(sums, composingText: inputData, options: options)).min(count: 4, sortedBy: {$0.value > $1.value})) : []
|
||||
|
||||
// 英単語の予測変換。appleのapiを使うため、処理が異なる。
|
||||
var foreign_candidates: [Candidate] = []
|
||||
|
||||
if options.requireEnglishPrediction {
|
||||
foreign_candidates.append(contentsOf: self.getForeignPredictionCandidate(inputData: inputData, language: "en-US"))
|
||||
}
|
||||
if options.keyboardLanguage == .el_GR {
|
||||
foreign_candidates.append(contentsOf: self.getForeignPredictionCandidate(inputData: inputData, language: "el"))
|
||||
}
|
||||
|
||||
// ゼロヒント予測変換
|
||||
let best10 = getUniqueCandidate(sentence_candidates.chained(prediction_candidates)).min(count: 10, sortedBy: {$0.value > $1.value})
|
||||
let zeroHintPrediction_candidates = converter.getZeroHintPredictionCandidates(preparts: best10, N_best: 3)
|
||||
let toplevel_additional_candidate = self.getTopLevelAdditionalCandidate(inputData, options: options)
|
||||
// 文全体を変換するパターン
|
||||
let full_candidate = getUniqueCandidate(
|
||||
best10
|
||||
.chained(foreign_candidates)
|
||||
.chained(zeroHintPrediction_candidates)
|
||||
.chained(toplevel_additional_candidate)
|
||||
).min(count: 5, sortedBy: {$0.value > $1.value})
|
||||
// 重複のない変換候補を作成するための集合
|
||||
var seenCandidate: Set<String> = full_candidate.mapSet {$0.text}
|
||||
// 文節のみ変換するパターン
|
||||
let clause_candidates = self.getUniqueCandidate(clauseCandidates, seenCandidates: seenCandidate).min(count: 5, sortedBy: {$0.value > $1.value})
|
||||
seenCandidate.formUnion(clause_candidates.map {$0.text})
|
||||
// 賢く変換するパターン
|
||||
let wise_candidates: [Candidate] = self.getWiseCandidate(inputData, options: options)
|
||||
seenCandidate.formUnion(wise_candidates.map {$0.text})
|
||||
|
||||
// 最初の辞書データ
|
||||
let dicCandidates: [Candidate] = result.nodes[0]
|
||||
.map {
|
||||
Candidate(
|
||||
text: $0.data.word,
|
||||
value: $0.data.value(),
|
||||
correspondingCount: $0.inputRange.count,
|
||||
lastMid: $0.data.mid,
|
||||
data: [$0.data]
|
||||
)
|
||||
}
|
||||
// 追加する部分
|
||||
let additionalCandidates: [Candidate] = self.getAdditionalCandidate(inputData, options: options)
|
||||
|
||||
/*
|
||||
文字列の長さごとに並べ、かつその中で評価の高いものから順に並べる。
|
||||
*/
|
||||
|
||||
let word_candidates: [Candidate] = self.getUniqueCandidate(dicCandidates.chained(additionalCandidates), seenCandidates: seenCandidate)
|
||||
.sorted {
|
||||
let count0 = $0.correspondingCount
|
||||
let count1 = $1.correspondingCount
|
||||
return count0 == count1 ? $0.value > $1.value : count0 > count1
|
||||
}
|
||||
|
||||
var result = Array(full_candidate)
|
||||
|
||||
// 最低でも1つ、入力に完全一致する候補が入るようにする
|
||||
let checkRuby: (Candidate) -> Bool = {$0.data.reduce(into: "") {$0 += $1.ruby} == inputData.convertTarget.toKatakana()}
|
||||
if !result.contains(where: checkRuby) {
|
||||
if let candidate = sentence_candidates.first(where: checkRuby) {
|
||||
result.append(candidate)
|
||||
} else if let candidate = whole_sentence_unique_candidates.first(where: checkRuby) {
|
||||
result.append(candidate)
|
||||
}
|
||||
}
|
||||
|
||||
result.append(contentsOf: clause_candidates)
|
||||
result.append(contentsOf: wise_candidates)
|
||||
result.append(contentsOf: word_candidates)
|
||||
|
||||
result.mutatingForeach { item in
|
||||
item.withActions(self.getApporopriateActions(item))
|
||||
item.parseTemplate()
|
||||
}
|
||||
return (result, Array(clause_candidates))
|
||||
}
|
||||
|
||||
/// 入力からラティスを構築する関数。状況に応じて呼ぶ関数を分ける。
|
||||
/// - Parameters:
|
||||
/// - inputData: 変換対象のInputData。
|
||||
/// - N_best: 計算途中で保存する候補数。実際に得られる候補数とは異なる。
|
||||
/// - Returns:
|
||||
/// 結果のラティスノードと、計算済みノードの全体
|
||||
private func convertToLattice(_ inputData: ComposingText, N_best: Int) -> (result: LatticeNode, nodes: [[LatticeNode]])? {
|
||||
if inputData.convertTarget.isEmpty {
|
||||
return nil
|
||||
}
|
||||
|
||||
guard let previousInputData else {
|
||||
debug("convertToLattice: 新規計算用の関数を呼びますA")
|
||||
let result = converter.kana2lattice_all(inputData, N_best: N_best)
|
||||
self.previousInputData = inputData
|
||||
return result
|
||||
}
|
||||
|
||||
debug("convertToLattice: before \(previousInputData) after \(inputData)")
|
||||
|
||||
// 完全一致の場合
|
||||
if previousInputData == inputData {
|
||||
let result = converter.kana2lattice_no_change(N_best: N_best, previousResult: (inputData: previousInputData, nodes: nodes))
|
||||
self.previousInputData = inputData
|
||||
return result
|
||||
}
|
||||
|
||||
// 文節確定の後の場合
|
||||
if let completedData, previousInputData.inputHasSuffix(inputOf: inputData) {
|
||||
debug("convertToLattice: 文節確定用の関数を呼びます、確定された文節は\(completedData)")
|
||||
let result = converter.kana2lattice_afterComplete(inputData, completedData: completedData, N_best: N_best, previousResult: (inputData: previousInputData, nodes: nodes))
|
||||
self.previousInputData = inputData
|
||||
self.completedData = nil
|
||||
return result
|
||||
}
|
||||
|
||||
// TODO: 元々はsuffixになっていないが、文節確定の後であるケースで、確定された文節を考慮できるようにする
|
||||
// へんかん|する → 変換 する| のようなパターンで、previousInputData: へんかん, inputData: する, となることがある
|
||||
|
||||
let diff = inputData.differenceSuffix(to: previousInputData)
|
||||
|
||||
// 一文字消した場合
|
||||
if diff.deleted > 0 && diff.addedCount == 0 {
|
||||
debug("convertToLattice: 最後尾削除用の関数を呼びます, 消した文字数は\(diff.deleted)")
|
||||
let result = converter.kana2lattice_deletedLast(deletedCount: diff.deleted, N_best: N_best, previousResult: (inputData: previousInputData, nodes: nodes))
|
||||
self.previousInputData = inputData
|
||||
return result
|
||||
}
|
||||
|
||||
// 一文字変わった場合
|
||||
if diff.deleted > 0 {
|
||||
debug("convertToLattice: 最後尾文字置換用の関数を呼びます、差分は\(diff)")
|
||||
let result = converter.kana2lattice_changed(inputData, N_best: N_best, counts: (diff.deleted, diff.addedCount), previousResult: (inputData: previousInputData, nodes: nodes))
|
||||
self.previousInputData = inputData
|
||||
return result
|
||||
}
|
||||
|
||||
// 1文字増やした場合
|
||||
if diff.deleted == 0 && diff.addedCount != 0 {
|
||||
debug("convertToLattice: 最後尾追加用の関数を呼びます、追加文字数は\(diff.addedCount)")
|
||||
let result = converter.kana2lattice_added(inputData, N_best: N_best, addedCount: diff.addedCount, previousResult: (inputData: previousInputData, nodes: nodes))
|
||||
self.previousInputData = inputData
|
||||
return result
|
||||
}
|
||||
|
||||
// 一文字増やしていない場合
|
||||
if true {
|
||||
debug("convertToLattice: 新規計算用の関数を呼びますB")
|
||||
let result = converter.kana2lattice_all(inputData, N_best: N_best)
|
||||
self.previousInputData = inputData
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
public func getApporopriateActions(_ candidate: Candidate) -> [CompleteAction] {
|
||||
if ["[]", "()", "{}", "〈〉", "〔〕", "()", "「」", "『』", "【】", "{}", "<>", "《》", "\"\"", "\'\'", "””"].contains(candidate.text) {
|
||||
return [.moveCursor(-1)]
|
||||
}
|
||||
if ["{{}}"].contains(candidate.text) {
|
||||
return [.moveCursor(-2)]
|
||||
}
|
||||
return []
|
||||
|
||||
}
|
||||
|
||||
/// 外部から呼ばれる変換候補を要求する関数。
|
||||
/// - Parameters:
|
||||
/// - inputData: 変換対象のInputData。
|
||||
/// - options: リクエストにかかるパラメータ。
|
||||
/// - Returns:
|
||||
/// 重複のない変換候補。
|
||||
public func requestCandidates(_ inputData: ComposingText, options: ConvertRequestOptions) -> (mainResults: [Candidate], firstClauseResults: [Candidate]) {
|
||||
debug("requestCandidates 入力は", inputData)
|
||||
// 変換対象が無の場合
|
||||
if inputData.convertTarget.isEmpty {
|
||||
return (.init(), .init())
|
||||
}
|
||||
let start1 = Date()
|
||||
|
||||
// DicdataStoreにRequestOptionを通知する
|
||||
self.sendToDicdataStore(.setRequestOptions(options))
|
||||
|
||||
guard let result = self.convertToLattice(inputData, N_best: options.N_best) else {
|
||||
return (.init(), .init())
|
||||
}
|
||||
|
||||
debug("ラティス構築", -start1.timeIntervalSinceNow)
|
||||
let start2 = Date()
|
||||
let candidates = self.processResult(inputData: inputData, result: result, options: options)
|
||||
debug("ラティス処理", -start2.timeIntervalSinceNow)
|
||||
debug("全体", -start1.timeIntervalSinceNow)
|
||||
|
||||
return candidates
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,62 @@
|
||||
//
|
||||
// EmailAddress.swift
|
||||
// Keyboard
|
||||
//
|
||||
// Created by ensan on 2022/10/01.
|
||||
// Copyright © 2022 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
extension KanaKanjiConverter {
|
||||
private static let domains = [
|
||||
"@gmail.com",
|
||||
"@icloud.com",
|
||||
"@yahoo.co.jp",
|
||||
"@au.com",
|
||||
"@docomo.ne.jp",
|
||||
"@excite.co.jp",
|
||||
"@ezweb.ne.jp",
|
||||
"@googlemail.com",
|
||||
"@hotmail.co.jp",
|
||||
"@hotmail.com",
|
||||
"@i.softbank.jp",
|
||||
"@live.jp",
|
||||
"@me.com",
|
||||
"@mineo.jp",
|
||||
"@nifty.com",
|
||||
"@outlook.com",
|
||||
"@outlook.jp",
|
||||
"@softbank.ne.jp",
|
||||
"@yahoo.ne.jp",
|
||||
"@ybb.ne.jp",
|
||||
"@ymobile.ne.jp"
|
||||
]
|
||||
/// 入力が@で終わる場合に、メアドのような候補を追加する関数
|
||||
/// - parameters:
|
||||
func toEmailAddressCandidates(_ inputData: ComposingText) -> [Candidate] {
|
||||
if !inputData.convertTarget.hasSuffix("@") {
|
||||
return []
|
||||
}
|
||||
let id = inputData.convertTarget.dropLast(1)
|
||||
if !(id.isEnglishSentence || id.isEmpty) {
|
||||
return []
|
||||
}
|
||||
let baseValue: PValue = id.isEmpty ? -20 : -13
|
||||
let string = inputData.convertTarget.toKatakana()
|
||||
var results: [Candidate] = []
|
||||
for (i, domain) in Self.domains.enumerated() {
|
||||
let address = id.appending(domain)
|
||||
results.append(
|
||||
Candidate(
|
||||
text: address,
|
||||
value: baseValue - PValue(i),
|
||||
correspondingCount: inputData.input.count,
|
||||
lastMid: MIDData.一般.mid,
|
||||
data: [DicdataElement(word: address, ruby: string, cid: .zero, mid: MIDData.一般.mid, value: baseValue - PValue(i))]
|
||||
)
|
||||
)
|
||||
}
|
||||
return results
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,351 @@
|
||||
//
|
||||
// RomanTypographys.swift
|
||||
// Keyboard
|
||||
//
|
||||
// Created by ensan on 2020/11/04.
|
||||
// Copyright © 2020 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
private extension UnicodeScalar {
|
||||
/// ローマ字の大文字かどうか
|
||||
var isRomanUppercased: Bool {
|
||||
("A"..."Z").contains(self)
|
||||
}
|
||||
/// ローマ字の小文字かどうか
|
||||
var isRomanLowercased: Bool {
|
||||
("a"..."z").contains(self)
|
||||
}
|
||||
/// ローマ字の数字かどうか
|
||||
var isRomanNumber: Bool {
|
||||
("0"..."9").contains(self)
|
||||
}
|
||||
}
|
||||
|
||||
extension KanaKanjiConverter {
|
||||
/// 装飾文字に変換した結果を返す関数。
|
||||
/// - parameters:
|
||||
/// - text: 対象文字列。
|
||||
/// - note:
|
||||
/// 現在英字のみ。ギリシャ文字や数字に対応する必要あり。
|
||||
func typographicalCandidates(_ inputData: ComposingText) -> [Candidate] {
|
||||
let string = inputData.convertTarget.toKatakana()
|
||||
let strings = self.typographicalLetters(from: string)
|
||||
return strings.map {
|
||||
Candidate(
|
||||
text: $0,
|
||||
value: -15,
|
||||
correspondingCount: inputData.input.count,
|
||||
lastMid: MIDData.一般.mid,
|
||||
data: [DicdataElement(word: $0, ruby: string, cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: -15)]
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// 装飾文字を実際に作る部分。
|
||||
/// - parameters:
|
||||
/// - text: 対象文字列。
|
||||
private func typographicalLetters(from text: String) -> [String] {
|
||||
if !text.onlyRomanAlphabetOrNumber {
|
||||
return []
|
||||
}
|
||||
let onlyRomanAlphabet = text.onlyRomanAlphabet
|
||||
var strings: [String] = []
|
||||
/// 𝐁𝐎𝐋𝐃
|
||||
do {
|
||||
let bold = text.unicodeScalars.map {
|
||||
if $0.isRomanUppercased {
|
||||
let scalar = UnicodeScalar($0.value + 119743)!
|
||||
return String(scalar)
|
||||
}
|
||||
if $0.isRomanLowercased {
|
||||
let scalar = UnicodeScalar($0.value + 119737)!
|
||||
return String(scalar)
|
||||
}
|
||||
if $0.isRomanNumber {
|
||||
let scalar = UnicodeScalar($0.value + 120734)!
|
||||
return String(scalar)
|
||||
}
|
||||
return String($0)
|
||||
|
||||
}.joined()
|
||||
strings.append(bold)
|
||||
}
|
||||
/// 𝐼𝑇𝐴𝐿𝐼𝐶
|
||||
if onlyRomanAlphabet {
|
||||
let italic = text.unicodeScalars.map {
|
||||
if $0.isRomanUppercased {
|
||||
let scalar = UnicodeScalar($0.value + 119795)!
|
||||
return String(scalar)
|
||||
}
|
||||
if $0.isRomanLowercased {
|
||||
if $0 == "h"{
|
||||
return "ℎ"
|
||||
}
|
||||
let scalar = UnicodeScalar($0.value + 119789)!
|
||||
return String(scalar)
|
||||
}
|
||||
return String($0)
|
||||
}.joined()
|
||||
strings.append(italic)
|
||||
}
|
||||
/// 𝑩𝑶𝑳𝑫𝑰𝑻𝑨𝑳𝑰𝑪
|
||||
if onlyRomanAlphabet {
|
||||
let boldItalic = text.unicodeScalars.map {
|
||||
if $0.isRomanUppercased {
|
||||
let scalar = UnicodeScalar($0.value + 119847)!
|
||||
return String(scalar)
|
||||
}
|
||||
if $0.isRomanLowercased {
|
||||
let scalar = UnicodeScalar($0.value + 119841)!
|
||||
return String(scalar)
|
||||
}
|
||||
return String($0)
|
||||
}.joined()
|
||||
strings.append(boldItalic)
|
||||
}
|
||||
|
||||
/// 𝒮𝒸𝓇𝒾𝓅𝓉
|
||||
if onlyRomanAlphabet {
|
||||
let script = text.unicodeScalars.map {
|
||||
if $0.isRomanUppercased {
|
||||
switch $0 {
|
||||
case "B":
|
||||
return "ℬ"
|
||||
case "E":
|
||||
return "ℰ"
|
||||
case "F":
|
||||
return "ℱ"
|
||||
case "H":
|
||||
return "ℋ"
|
||||
case "I":
|
||||
return "ℐ"
|
||||
case "L":
|
||||
return "ℒ"
|
||||
case "M":
|
||||
return "ℳ"
|
||||
case "R":
|
||||
return "ℛ"
|
||||
default:
|
||||
break
|
||||
}
|
||||
|
||||
let scalar = UnicodeScalar($0.value + 119899)!
|
||||
return String(scalar)
|
||||
}
|
||||
if $0.isRomanLowercased {
|
||||
switch $0 {
|
||||
case "e":
|
||||
return "ℯ"
|
||||
case "g":
|
||||
return "ℊ"
|
||||
case "o":
|
||||
return "ℴ"
|
||||
default: break
|
||||
}
|
||||
let scalar = UnicodeScalar($0.value + 119893)!
|
||||
return String(scalar)
|
||||
}
|
||||
return String($0)
|
||||
}.joined()
|
||||
strings.append(script)
|
||||
}
|
||||
|
||||
/// 𝓑𝓸𝓵𝓭𝓢𝓬𝓻𝓲𝓹𝓽
|
||||
if onlyRomanAlphabet {
|
||||
let boldScript = text.unicodeScalars.map {
|
||||
if $0.isRomanUppercased {
|
||||
let scalar = UnicodeScalar($0.value + 119951)!
|
||||
return String(scalar)
|
||||
}
|
||||
if $0.isRomanLowercased {
|
||||
let scalar = UnicodeScalar($0.value + 119945)!
|
||||
return String(scalar)
|
||||
}
|
||||
return String($0)
|
||||
}.joined()
|
||||
strings.append(boldScript)
|
||||
}
|
||||
/// 𝔉𝔯𝔞𝔨𝔱𝔲𝔯
|
||||
if onlyRomanAlphabet {
|
||||
let fraktur = text.unicodeScalars.map {
|
||||
if $0.isRomanUppercased {
|
||||
switch $0 {
|
||||
case "C":
|
||||
return "ℭ"
|
||||
case "H":
|
||||
return "ℌ"
|
||||
case "I":
|
||||
return "ℑ"
|
||||
case "R":
|
||||
return "ℜ"
|
||||
case "Z":
|
||||
return "ℨ"
|
||||
default: break
|
||||
}
|
||||
let scalar = UnicodeScalar($0.value + 120003)!
|
||||
return String(scalar)
|
||||
}
|
||||
if $0.isRomanLowercased {
|
||||
let scalar = UnicodeScalar($0.value + 119997)!
|
||||
return String(scalar)
|
||||
}
|
||||
return String($0)
|
||||
}.joined()
|
||||
strings.append(fraktur)
|
||||
}
|
||||
|
||||
/// 𝕕𝕠𝕦𝕓𝕝𝕖𝕊𝕥𝕣𝕦𝕔𝕜
|
||||
do {
|
||||
let doubleStruck = text.unicodeScalars.map {
|
||||
if $0.isRomanUppercased {
|
||||
switch $0 {
|
||||
case "C":
|
||||
return "ℂ"
|
||||
case "H":
|
||||
return "ℍ"
|
||||
case "N":
|
||||
return "ℕ"
|
||||
case "P":
|
||||
return "ℙ"
|
||||
case "Q":
|
||||
return "ℚ"
|
||||
case "R":
|
||||
return "ℝ"
|
||||
case "Z":
|
||||
return "ℤ"
|
||||
default: break
|
||||
}
|
||||
let scalar = UnicodeScalar($0.value + 120055)!
|
||||
return String(scalar)
|
||||
}
|
||||
if $0.isRomanLowercased {
|
||||
let scalar = UnicodeScalar($0.value + 120049)!
|
||||
return String(scalar)
|
||||
}
|
||||
if $0.isRomanNumber {
|
||||
let scalar = UnicodeScalar($0.value + 120744)!
|
||||
return String(scalar)
|
||||
}
|
||||
return String($0)
|
||||
}.joined()
|
||||
strings.append(doubleStruck)
|
||||
}
|
||||
|
||||
/// 𝕭𝖔𝖑𝖉𝕱𝖗𝖆𝖐𝖙𝖚𝖗
|
||||
if onlyRomanAlphabet {
|
||||
let boldFraktur = text.unicodeScalars.map {
|
||||
if $0.isRomanUppercased {
|
||||
let scalar = UnicodeScalar($0.value + 120107)!
|
||||
return String(scalar)
|
||||
}
|
||||
if $0.isRomanLowercased {
|
||||
let scalar = UnicodeScalar($0.value + 120101)!
|
||||
return String(scalar)
|
||||
}
|
||||
return String($0)
|
||||
}.joined()
|
||||
|
||||
strings.append(boldFraktur)
|
||||
}
|
||||
|
||||
/// 𝖲𝖺𝗇𝗌𝖲𝖾𝗋𝗂𝖿
|
||||
do {
|
||||
let sansSerif = text.unicodeScalars.map {
|
||||
if $0.isRomanUppercased {
|
||||
let scalar = UnicodeScalar($0.value + 120159)!
|
||||
return String(scalar)
|
||||
}
|
||||
if $0.isRomanLowercased {
|
||||
let scalar = UnicodeScalar($0.value + 120153)!
|
||||
return String(scalar)
|
||||
}
|
||||
if $0.isRomanNumber {
|
||||
let scalar = UnicodeScalar($0.value + 120754)!
|
||||
return String(scalar)
|
||||
}
|
||||
return String($0)
|
||||
}.joined()
|
||||
strings.append(sansSerif)
|
||||
}
|
||||
|
||||
/// 𝗦𝗮𝗻𝘀𝗦𝗲𝗿𝗶𝗳𝗕𝗼𝗹𝗱
|
||||
do {
|
||||
let sansSerifBold = text.unicodeScalars.map {
|
||||
if $0.isRomanUppercased {
|
||||
let scalar = UnicodeScalar($0.value + 120211)!
|
||||
return String(scalar)
|
||||
}
|
||||
if $0.isRomanLowercased {
|
||||
let scalar = UnicodeScalar($0.value + 120205)!
|
||||
return String(scalar)
|
||||
}
|
||||
if $0.isRomanNumber {
|
||||
let scalar = UnicodeScalar($0.value + 120764)!
|
||||
return String(scalar)
|
||||
}
|
||||
return String($0)
|
||||
}.joined()
|
||||
|
||||
strings.append(sansSerifBold)
|
||||
}
|
||||
|
||||
/// 𝘚𝘢𝘯𝘴𝘚𝘦𝘳𝘪𝘧𝘐𝘵𝘢𝘭𝘪𝘤
|
||||
if onlyRomanAlphabet {
|
||||
let sansSerifItalic = text.unicodeScalars.map {
|
||||
if $0.isRomanUppercased {
|
||||
let scalar = UnicodeScalar($0.value + 120263)!
|
||||
return String(scalar)
|
||||
}
|
||||
if $0.isRomanLowercased {
|
||||
let scalar = UnicodeScalar($0.value + 120257)!
|
||||
return String(scalar)
|
||||
}
|
||||
return String($0)
|
||||
}.joined()
|
||||
|
||||
strings.append(sansSerifItalic)
|
||||
}
|
||||
|
||||
/// 𝙎𝙖𝙣𝙨𝙎𝙚𝙧𝙞𝙛𝘽𝙤𝙡𝙙𝙄𝙩𝙖𝙡𝙞𝙘
|
||||
if onlyRomanAlphabet {
|
||||
let sansSerifBoldItalic = text.unicodeScalars.map {
|
||||
if $0.isRomanUppercased {
|
||||
let scalar = UnicodeScalar($0.value + 120315)!
|
||||
return String(scalar)
|
||||
}
|
||||
if $0.isRomanLowercased {
|
||||
let scalar = UnicodeScalar($0.value + 120309)!
|
||||
return String(scalar)
|
||||
}
|
||||
return String($0)
|
||||
}.joined()
|
||||
|
||||
strings.append(sansSerifBoldItalic)
|
||||
}
|
||||
|
||||
/// 𝙼𝚘𝚗𝚘𝚜𝚙𝚊𝚌𝚎
|
||||
do {
|
||||
let monospace = text.unicodeScalars.map {
|
||||
if $0.isRomanUppercased {
|
||||
let scalar = UnicodeScalar($0.value + 120367)!
|
||||
return String(scalar)
|
||||
}
|
||||
if $0.isRomanLowercased {
|
||||
let scalar = UnicodeScalar($0.value + 120361)!
|
||||
return String(scalar)
|
||||
}
|
||||
if $0.isRomanNumber {
|
||||
let scalar = UnicodeScalar($0.value + 120774)!
|
||||
return String(scalar)
|
||||
}
|
||||
return String($0)
|
||||
}.joined()
|
||||
|
||||
strings.append(monospace)
|
||||
}
|
||||
|
||||
return strings
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
//
|
||||
// SpellChecker.swift
|
||||
//
|
||||
//
|
||||
// Created by ensan on 2023/05/20.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
#if os(iOS) || os(tvOS)
|
||||
import UIKit
|
||||
#elseif os(macOS)
|
||||
import AppKit
|
||||
#endif
|
||||
|
||||
final class SpellChecker {
|
||||
#if os(iOS) || os(tvOS)
|
||||
private let checker = UITextChecker()
|
||||
#elseif os(macOS)
|
||||
private let checker = NSSpellChecker.shared
|
||||
#endif
|
||||
|
||||
func completions(forPartialWordRange range: NSRange, in string: String, language: String) -> [String]? {
|
||||
#if os(iOS) || os(tvOS)
|
||||
return checker.completions(forPartialWordRange: range, in: string, language: language)
|
||||
#elseif os(macOS)
|
||||
return checker.completions(forPartialWordRange: range, in: string, language: language, inSpellDocumentWithTag: 0)
|
||||
#endif
|
||||
}
|
||||
}
|
||||
33
Sources/KanaKanjiConverterModule/Converter/Unicode.swift
Normal file
33
Sources/KanaKanjiConverterModule/Converter/Unicode.swift
Normal file
@@ -0,0 +1,33 @@
|
||||
//
|
||||
// Unicode.swift
|
||||
// Keyboard
|
||||
//
|
||||
// Created by ensan on 2020/11/04.
|
||||
// Copyright © 2020 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
extension KanaKanjiConverter {
|
||||
/// unicode文字列`"uxxxx, Uxxxx, u+xxxx, U+xxxx"`を対応する記号に変換する関数
|
||||
/// - parameters:
|
||||
func unicodeCandidates(_ inputData: ComposingText) -> [Candidate] {
|
||||
let value0: PValue = -10
|
||||
let string = inputData.convertTarget.toKatakana()
|
||||
for prefix in ["u", "U", "u+", "U+"] where string.hasPrefix(prefix) {
|
||||
if let number = Int(string.dropFirst(prefix.count), radix: 16), let unicodeScalar = UnicodeScalar(number) {
|
||||
let char = String(unicodeScalar)
|
||||
return [
|
||||
Candidate(
|
||||
text: char,
|
||||
value: value0,
|
||||
correspondingCount: inputData.input.count,
|
||||
lastMid: MIDData.一般.mid,
|
||||
data: [DicdataElement(word: char, ruby: string, cid: .zero, mid: MIDData.一般.mid, value: value0)]
|
||||
)
|
||||
]
|
||||
}
|
||||
}
|
||||
return []
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,31 @@
|
||||
//
|
||||
// VersionCandidate.swift
|
||||
// Keyboard
|
||||
//
|
||||
// Created by N-i-ke on 2023/05/13.
|
||||
// Copyright © 2023 ensan All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
extension KanaKanjiConverter {
|
||||
|
||||
/// バージョン情報を表示する関数。
|
||||
/// Mozcは「バージョン」で言語モデルのバージョンが表示されるらしいので、azooKeyもこれをつけて「azooKey 1.7.2」とか表示させよう。
|
||||
/// - parameters:
|
||||
/// - inputData: 入力情報。
|
||||
func toVersionCandidate(_ inputData: ComposingText, options: ConvertRequestOptions) -> [Candidate] {
|
||||
|
||||
if inputData.convertTarget.toKatakana() == "バージョン" {
|
||||
let versionString = "azooKey Version \(options.metadata.appVersionString)"
|
||||
return [Candidate(
|
||||
text: versionString,
|
||||
value: -30,
|
||||
correspondingCount: inputData.input.count,
|
||||
lastMid: MIDData.一般.mid,
|
||||
data: [DicdataElement(word: versionString, ruby: inputData.convertTarget.toKatakana(), cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: -30)]
|
||||
)]
|
||||
}
|
||||
return []
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,77 @@
|
||||
//
|
||||
// DicdataElement.swift
|
||||
// Keyboard
|
||||
//
|
||||
// Created by ensan on 2020/09/10.
|
||||
// Copyright © 2020 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
public struct DicdataElement: Equatable, Hashable {
|
||||
static let BOSData = Self(word: "", ruby: "", cid: CIDData.BOS.cid, mid: MIDData.BOS.mid, value: 0, adjust: 0)
|
||||
static let EOSData = Self(word: "", ruby: "", cid: CIDData.EOS.cid, mid: MIDData.EOS.mid, value: 0, adjust: 0)
|
||||
|
||||
public init(word: String, ruby: String, lcid: Int, rcid: Int, mid: Int, value: PValue, adjust: PValue = .zero) {
|
||||
self.word = word
|
||||
self.ruby = ruby
|
||||
self.lcid = lcid
|
||||
self.rcid = rcid
|
||||
self.mid = mid
|
||||
self.baseValue = value
|
||||
self.adjust = adjust
|
||||
}
|
||||
|
||||
public init(word: String, ruby: String, cid: Int, mid: Int, value: PValue, adjust: PValue = .zero) {
|
||||
self.word = word
|
||||
self.ruby = ruby
|
||||
self.lcid = cid
|
||||
self.rcid = cid
|
||||
self.mid = mid
|
||||
self.baseValue = value
|
||||
self.adjust = adjust
|
||||
}
|
||||
|
||||
public init(ruby: String, cid: Int, mid: Int, value: PValue, adjust: PValue = .zero) {
|
||||
self.word = ruby
|
||||
self.ruby = ruby
|
||||
self.lcid = cid
|
||||
self.rcid = cid
|
||||
self.mid = mid
|
||||
self.baseValue = value
|
||||
self.adjust = adjust
|
||||
}
|
||||
|
||||
public func adjustedData(_ adjustValue: PValue) -> Self {
|
||||
.init(word: word, ruby: ruby, lcid: lcid, rcid: rcid, mid: mid, value: baseValue, adjust: adjustValue + self.adjust)
|
||||
}
|
||||
|
||||
public var word: String
|
||||
public var ruby: String
|
||||
public var lcid: Int
|
||||
public var rcid: Int
|
||||
public var mid: Int
|
||||
var baseValue: PValue
|
||||
public var adjust: PValue
|
||||
|
||||
public func value() -> PValue {
|
||||
min(.zero, self.baseValue + self.adjust)
|
||||
}
|
||||
|
||||
public static func == (lhs: Self, rhs: Self) -> Bool {
|
||||
lhs.word == rhs.word && lhs.ruby == rhs.ruby && lhs.lcid == rhs.lcid && lhs.mid == rhs.mid && lhs.rcid == rhs.rcid
|
||||
}
|
||||
|
||||
public func hash(into hasher: inout Hasher) {
|
||||
hasher.combine(word)
|
||||
hasher.combine(ruby)
|
||||
hasher.combine(lcid)
|
||||
hasher.combine(rcid)
|
||||
}
|
||||
}
|
||||
|
||||
extension DicdataElement: CustomDebugStringConvertible {
|
||||
public var debugDescription: String {
|
||||
"(ruby: \(self.ruby), word: \(self.word), cid: (\(self.lcid), \(self.rcid)), mid: \(self.mid), value: \(self.baseValue)+\(self.adjust)=\(self.value())"
|
||||
}
|
||||
}
|
||||
843
Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift
Normal file
843
Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift
Normal file
@@ -0,0 +1,843 @@
|
||||
//
|
||||
// DicdataStore.swift
|
||||
// Keyboard
|
||||
//
|
||||
// Created by ensan on 2020/09/17.
|
||||
// Copyright © 2020 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Algorithms
|
||||
import Foundation
|
||||
import SwiftUtils
|
||||
|
||||
public final class DicdataStore {
|
||||
public init(convertRequestOptions: ConvertRequestOptions) {
|
||||
self.requestOptions = convertRequestOptions
|
||||
self.setup()
|
||||
}
|
||||
|
||||
init(requestOptions: ConvertRequestOptions = .default) {
|
||||
self.requestOptions = requestOptions
|
||||
debug("DicdataStoreが初期化されました")
|
||||
self.setup()
|
||||
}
|
||||
|
||||
private var ccParsed: [Bool] = .init(repeating: false, count: 1319)
|
||||
private var ccLines: [[Int: PValue]] = []
|
||||
private var mmValue: [PValue] = []
|
||||
private let threshold: PValue = -17
|
||||
|
||||
private var loudses: [String: LOUDS] = [:]
|
||||
private var importedLoudses: Set<String> = []
|
||||
private var charsID: [Character: UInt8] = [:]
|
||||
private var learningManager = LearningManager()
|
||||
private var zeroHintPredictionDicdata: [DicdataElement]?
|
||||
|
||||
private var osUserDict: [DicdataElement] = []
|
||||
|
||||
internal let maxlength: Int = 20
|
||||
private let midCount = 502
|
||||
private let cidCount = 1319
|
||||
|
||||
private var requestOptions: ConvertRequestOptions = .default
|
||||
|
||||
private let numberFormatter = NumberFormatter()
|
||||
/// 初期化時のセットアップ用の関数。プロパティリストを読み込み、連接確率リストを読み込んで行分割し保存しておく。
|
||||
private func setup() {
|
||||
numberFormatter.numberStyle = .spellOut
|
||||
numberFormatter.locale = .init(identifier: "ja-JP")
|
||||
self.ccLines = [[Int: PValue]].init(repeating: [:], count: CIDData.totalCount)
|
||||
|
||||
do {
|
||||
let string = try String(contentsOf: self.requestOptions.dictionaryResourceURL.appendingPathComponent("louds/charID.chid", isDirectory: false), encoding: String.Encoding.utf8)
|
||||
charsID = [Character: UInt8].init(uniqueKeysWithValues: string.enumerated().map {($0.element, UInt8($0.offset))})
|
||||
} catch {
|
||||
debug("ファイルが存在しません: \(error)")
|
||||
}
|
||||
do {
|
||||
let url = requestOptions.dictionaryResourceURL.appendingPathComponent("mm.binary", isDirectory: false)
|
||||
do {
|
||||
let binaryData = try Data(contentsOf: url, options: [.uncached])
|
||||
self.mmValue = binaryData.toArray(of: Float.self).map {PValue($0)}
|
||||
} catch {
|
||||
debug("Failed to read the file.")
|
||||
self.mmValue = [PValue].init(repeating: .zero, count: self.midCount * self.midCount)
|
||||
}
|
||||
}
|
||||
_ = self.loadLOUDS(identifier: "user")
|
||||
_ = self.loadLOUDS(identifier: "memory")
|
||||
}
|
||||
|
||||
public enum Notification {
|
||||
case importOSUserDict([DicdataElement])
|
||||
case setRequestOptions(ConvertRequestOptions)
|
||||
case forgetMemory(Candidate)
|
||||
case closeKeyboard
|
||||
}
|
||||
|
||||
func sendToDicdataStore(_ data: Notification) {
|
||||
switch data {
|
||||
case .closeKeyboard:
|
||||
self.closeKeyboard()
|
||||
case let .importOSUserDict(osUserDict):
|
||||
self.osUserDict = osUserDict
|
||||
case let .forgetMemory(candidate):
|
||||
self.learningManager.forgetMemory(data: candidate.data)
|
||||
// loudsの処理があるので、リセットを実施する
|
||||
self.reloadMemory()
|
||||
case let .setRequestOptions(value):
|
||||
// bundleURLが変わる場合はsetupを再実行する
|
||||
if value.dictionaryResourceURL != self.requestOptions.dictionaryResourceURL {
|
||||
self.requestOptions = value
|
||||
self.setup()
|
||||
} else {
|
||||
self.requestOptions = value
|
||||
}
|
||||
let shouldReset = self.learningManager.setRequestOptions(options: value)
|
||||
if shouldReset {
|
||||
self.reloadMemory()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private func reloadMemory() {
|
||||
self.loudses.removeValue(forKey: "memory")
|
||||
self.importedLoudses.remove("memory")
|
||||
}
|
||||
|
||||
private func reloadUser() {
|
||||
self.loudses.removeValue(forKey: "user")
|
||||
self.importedLoudses.remove("user")
|
||||
}
|
||||
|
||||
private func closeKeyboard() {
|
||||
self.learningManager.save()
|
||||
// saveしたあとにmemoryのキャッシュされたLOUDSを使い続けないよう、キャッシュから削除する。
|
||||
self.reloadMemory()
|
||||
self.reloadUser()
|
||||
}
|
||||
|
||||
/// ペナルティ関数。文字数で決める。
|
||||
private static func getPenalty(data: DicdataElement) -> PValue {
|
||||
-2.0 / PValue(data.word.count)
|
||||
}
|
||||
|
||||
/// 計算時に利用。無視すべきデータかどうか。
|
||||
private func shouldBeRemoved(value: PValue, wordCount: Int) -> Bool {
|
||||
let d = value - self.threshold
|
||||
if d < 0 {
|
||||
return true
|
||||
}
|
||||
// dは正
|
||||
return -2.0 / PValue(wordCount) < -d
|
||||
}
|
||||
|
||||
/// 計算時に利用。無視すべきデータかどうか。
|
||||
internal func shouldBeRemoved(data: DicdataElement) -> Bool {
|
||||
let d = data.value() - self.threshold
|
||||
if d < 0 {
|
||||
return true
|
||||
}
|
||||
return Self.getPenalty(data: data) < -d
|
||||
}
|
||||
|
||||
private func loadLOUDS(identifier: String) -> LOUDS? {
|
||||
if importedLoudses.contains(identifier) {
|
||||
return self.loudses[identifier]
|
||||
}
|
||||
|
||||
importedLoudses.insert(identifier)
|
||||
if let louds = LOUDS.load(identifier, option: self.requestOptions) {
|
||||
self.loudses[identifier] = louds
|
||||
return louds
|
||||
} else {
|
||||
debug("loudsの読み込みに失敗、identifierは\(identifier)")
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
private func perfectMatchLOUDS(identifier: String, charIDs: [UInt8]) -> [Int] {
|
||||
guard let louds = self.loadLOUDS(identifier: identifier) else {
|
||||
return []
|
||||
}
|
||||
return [louds.searchNodeIndex(chars: charIDs)].compactMap {$0}
|
||||
}
|
||||
|
||||
private func throughMatchLOUDS(identifier: String, charIDs: [UInt8], depth: Range<Int>) -> [Int] {
|
||||
guard let louds = self.loadLOUDS(identifier: identifier) else {
|
||||
return []
|
||||
}
|
||||
let result = louds.byfixNodeIndices(chars: charIDs)
|
||||
// result[1]から始まるので、例えば3..<5 (3文字と4文字)の場合は1文字ずつずらして4..<6の範囲をもらう
|
||||
return Array(result[min(depth.lowerBound + 1, result.endIndex) ..< min(depth.upperBound + 1, result.endIndex)])
|
||||
}
|
||||
|
||||
private func prefixMatchLOUDS(identifier: String, charIDs: [UInt8], depth: Int = .max) -> [Int] {
|
||||
guard let louds = self.loadLOUDS(identifier: identifier) else {
|
||||
return []
|
||||
}
|
||||
return louds.prefixNodeIndices(chars: charIDs, maxDepth: depth)
|
||||
}
|
||||
|
||||
private func getDicdataFromLoudstxt3(identifier: String, indices: Set<Int>) -> [DicdataElement] {
|
||||
debug("getDicdataFromLoudstxt3", identifier, indices)
|
||||
// split = 2048
|
||||
let dict = [Int: [Int]].init(grouping: indices, by: {$0 >> 11})
|
||||
var data: [DicdataElement] = []
|
||||
for (key, value) in dict {
|
||||
data.append(contentsOf: LOUDS.getDataForLoudstxt3(identifier + "\(key)", indices: value.map {$0 & 2047}, option: self.requestOptions))
|
||||
}
|
||||
return data
|
||||
}
|
||||
|
||||
/// kana2latticeから参照する。
|
||||
/// - Parameters:
|
||||
/// - inputData: 入力データ
|
||||
/// - from: 起点
|
||||
/// - toIndexRange: `from ..< (toIndexRange)`の範囲で辞書ルックアップを行う。
|
||||
public func getLOUDSDataInRange(inputData: ComposingText, from fromIndex: Int, toIndexRange: Range<Int>? = nil) -> [LatticeNode] {
|
||||
let toIndexLeft = toIndexRange?.startIndex ?? fromIndex
|
||||
let toIndexRight = min(toIndexRange?.endIndex ?? inputData.input.count, fromIndex + self.maxlength)
|
||||
debug("getLOUDSDataInRange", fromIndex, toIndexRange?.description ?? "nil", toIndexLeft, toIndexRight)
|
||||
if fromIndex > toIndexLeft || toIndexLeft >= toIndexRight {
|
||||
debug("getLOUDSDataInRange: index is wrong")
|
||||
return []
|
||||
}
|
||||
|
||||
let segments = (fromIndex ..< toIndexRight).reduce(into: []) { (segments: inout [String], rightIndex: Int) in
|
||||
segments.append((segments.last ?? "") + String(inputData.input[rightIndex].character.toKatakana()))
|
||||
}
|
||||
// MARK: 誤り訂正の対象を列挙する。非常に重い処理。
|
||||
var stringToInfo = inputData.getRangesWithTypos(fromIndex, rightIndexRange: toIndexLeft ..< toIndexRight)
|
||||
|
||||
// MARK: 検索対象を列挙していく。
|
||||
let stringSet = stringToInfo.keys.map {($0, $0.map {self.charsID[$0, default: .max]})}
|
||||
let (minCharIDsCount, maxCharIDsCount) = stringSet.lazy.map {$0.1.count}.minAndMax() ?? (0, -1)
|
||||
// 先頭の文字: そこで検索したい文字列の集合
|
||||
let group = [Character: [([Character], [UInt8])]].init(grouping: stringSet, by: {$0.0.first!})
|
||||
|
||||
let depth = minCharIDsCount - 1 ..< maxCharIDsCount
|
||||
var indices: [(String, Set<Int>)] = group.map {dic in
|
||||
let key = String(dic.key)
|
||||
let set = dic.value.flatMapSet {(_, charIDs) in self.throughMatchLOUDS(identifier: key, charIDs: charIDs, depth: depth)}
|
||||
return (key, set)
|
||||
}
|
||||
indices.append(("user", stringSet.flatMapSet {self.throughMatchLOUDS(identifier: "user", charIDs: $0.1, depth: depth)}))
|
||||
if learningManager.enabled {
|
||||
indices.append(("memory", stringSet.flatMapSet {self.throughMatchLOUDS(identifier: "memory", charIDs: $0.1, depth: depth)}))
|
||||
}
|
||||
// MARK: 検索によって得たindicesから辞書データを実際に取り出していく
|
||||
var dicdata: [DicdataElement] = []
|
||||
for (identifier, value) in indices {
|
||||
let result: [DicdataElement] = self.getDicdataFromLoudstxt3(identifier: identifier, indices: value).compactMap { (data) -> DicdataElement? in
|
||||
let rubyArray = Array(data.ruby)
|
||||
let penalty = stringToInfo[rubyArray, default: (0, .zero)].penalty
|
||||
if penalty.isZero {
|
||||
return data
|
||||
}
|
||||
let ratio = Self.penaltyRatio[data.lcid]
|
||||
let pUnit: PValue = Self.getPenalty(data: data) / 2 // 負の値
|
||||
let adjust = pUnit * penalty * ratio
|
||||
if self.shouldBeRemoved(value: data.value() + adjust, wordCount: rubyArray.count) {
|
||||
return nil
|
||||
}
|
||||
return data.adjustedData(adjust)
|
||||
}
|
||||
dicdata.append(contentsOf: result)
|
||||
}
|
||||
dicdata.append(contentsOf: stringSet.flatMap {self.learningManager.temporaryThroughMatch(charIDs: $0.1, depth: depth)})
|
||||
|
||||
for i in toIndexLeft ..< toIndexRight {
|
||||
do {
|
||||
let result = self.getWiseDicdata(convertTarget: segments[i - fromIndex], inputData: inputData, inputRange: fromIndex ..< i + 1)
|
||||
for item in result {
|
||||
stringToInfo[Array(item.ruby)] = (i, 0)
|
||||
}
|
||||
dicdata.append(contentsOf: result)
|
||||
}
|
||||
do {
|
||||
let result = self.getMatchOSUserDict(segments[i - fromIndex])
|
||||
for item in result {
|
||||
stringToInfo[Array(item.ruby)] = (i, 0)
|
||||
}
|
||||
dicdata.append(contentsOf: result)
|
||||
}
|
||||
}
|
||||
if fromIndex == .zero {
|
||||
let result: [LatticeNode] = dicdata.compactMap {
|
||||
guard let endIndex = stringToInfo[Array($0.ruby)]?.endIndex else {
|
||||
return nil
|
||||
}
|
||||
let node = LatticeNode(data: $0, inputRange: fromIndex ..< endIndex + 1)
|
||||
node.prevs.append(RegisteredNode.BOSNode())
|
||||
return node
|
||||
}
|
||||
return result
|
||||
} else {
|
||||
let result: [LatticeNode] = dicdata.compactMap {
|
||||
guard let endIndex = stringToInfo[Array($0.ruby)]?.endIndex else {
|
||||
return nil
|
||||
}
|
||||
return LatticeNode(data: $0, inputRange: fromIndex ..< endIndex + 1)
|
||||
}
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
/// kana2latticeから参照する。louds版。
|
||||
/// - Parameters:
|
||||
/// - inputData: 入力データ
|
||||
/// - from: 始点
|
||||
/// - to: 終点
|
||||
public func getLOUDSData(inputData: ComposingText, from fromIndex: Int, to toIndex: Int) -> [LatticeNode] {
|
||||
if toIndex - fromIndex > self.maxlength || fromIndex > toIndex {
|
||||
return []
|
||||
}
|
||||
let segment = inputData.input[fromIndex...toIndex].reduce(into: "") {$0.append($1.character)}.toKatakana()
|
||||
|
||||
let string2penalty = inputData.getRangeWithTypos(fromIndex, toIndex)
|
||||
|
||||
// MARK: 検索によって得たindicesから辞書データを実際に取り出していく
|
||||
// 先頭の文字: そこで検索したい文字列の集合
|
||||
let strings = string2penalty.keys.map {
|
||||
(key: $0, charIDs: $0.map {self.charsID[$0, default: .max]})
|
||||
}
|
||||
let group = [Character: [(key: [Character], charIDs: [UInt8])]].init(grouping: strings, by: {$0.key.first!})
|
||||
|
||||
var indices: [(String, Set<Int>)] = group.map {dic in
|
||||
let head = String(dic.key)
|
||||
let set = dic.value.flatMapSet { (_, charIDs) in
|
||||
self.perfectMatchLOUDS(identifier: head, charIDs: charIDs)
|
||||
}
|
||||
return (head, set)
|
||||
}
|
||||
do {
|
||||
let set = strings.flatMapSet { (_, charIDs) in
|
||||
self.perfectMatchLOUDS(identifier: "user", charIDs: charIDs)
|
||||
}
|
||||
indices.append(("user", set))
|
||||
}
|
||||
if learningManager.enabled {
|
||||
let set = strings.flatMapSet { (_, charIDs) in
|
||||
self.perfectMatchLOUDS(identifier: "memory", charIDs: charIDs)
|
||||
}
|
||||
indices.append(("memory", set))
|
||||
}
|
||||
var dicdata: [DicdataElement] = []
|
||||
for (identifier, value) in indices {
|
||||
let result: [DicdataElement] = self.getDicdataFromLoudstxt3(identifier: identifier, indices: value).compactMap { (data) -> DicdataElement? in
|
||||
let rubyArray = Array(data.ruby)
|
||||
let penalty = string2penalty[rubyArray, default: .zero]
|
||||
if penalty.isZero {
|
||||
return data
|
||||
}
|
||||
let ratio = Self.penaltyRatio[data.lcid]
|
||||
let pUnit: PValue = Self.getPenalty(data: data) / 2 // 負の値
|
||||
let adjust = pUnit * penalty * ratio
|
||||
if self.shouldBeRemoved(value: data.value() + adjust, wordCount: rubyArray.count) {
|
||||
return nil
|
||||
}
|
||||
return data.adjustedData(adjust)
|
||||
}
|
||||
dicdata.append(contentsOf: result)
|
||||
}
|
||||
dicdata.append(contentsOf: strings.flatMap {self.learningManager.temporaryPerfectMatch(charIDs: $0.charIDs)})
|
||||
dicdata.append(contentsOf: self.getWiseDicdata(convertTarget: segment, inputData: inputData, inputRange: fromIndex ..< toIndex + 1))
|
||||
dicdata.append(contentsOf: self.getMatchOSUserDict(segment))
|
||||
|
||||
if fromIndex == .zero {
|
||||
let result: [LatticeNode] = dicdata.map {
|
||||
let node = LatticeNode(data: $0, inputRange: fromIndex ..< toIndex + 1)
|
||||
node.prevs.append(RegisteredNode.BOSNode())
|
||||
return node
|
||||
}
|
||||
return result
|
||||
} else {
|
||||
let result: [LatticeNode] = dicdata.map {LatticeNode(data: $0, inputRange: fromIndex ..< toIndex + 1)}
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
internal func getZeroHintPredictionDicdata() -> [DicdataElement] {
|
||||
if let dicdata = self.zeroHintPredictionDicdata {
|
||||
return dicdata
|
||||
}
|
||||
do {
|
||||
let csvString = try String(contentsOf: requestOptions.dictionaryResourceURL.appendingPathComponent("p/p_null.csv", isDirectory: false), encoding: String.Encoding.utf8)
|
||||
let csvLines = csvString.split(separator: "\n")
|
||||
let csvData = csvLines.map {$0.split(separator: ",", omittingEmptySubsequences: false)}
|
||||
let dicdata: [DicdataElement] = csvData.map {self.parseLoudstxt2FormattedEntry(from: $0)}
|
||||
self.zeroHintPredictionDicdata = dicdata
|
||||
return dicdata
|
||||
} catch {
|
||||
debug(error)
|
||||
self.zeroHintPredictionDicdata = []
|
||||
return []
|
||||
}
|
||||
}
|
||||
|
||||
/// 辞書から予測変換データを読み込む関数
|
||||
/// - Parameters:
|
||||
/// - head: 辞書を引く文字列
|
||||
/// - Returns:
|
||||
/// 発見されたデータのリスト。
|
||||
internal func getPredictionLOUDSDicdata(key: some StringProtocol) -> [DicdataElement] {
|
||||
let count = key.count
|
||||
if count == .zero {
|
||||
return []
|
||||
}
|
||||
// 1文字に対する予測変換は検索が難しいので、特別に用意した辞書を用いて実施する
|
||||
if count == 1 {
|
||||
do {
|
||||
let csvString = try String(contentsOf: requestOptions.dictionaryResourceURL.appendingPathComponent("p/p_\(key).csv", isDirectory: false), encoding: String.Encoding.utf8)
|
||||
let csvLines = csvString.split(separator: "\n")
|
||||
let csvData = csvLines.map {$0.split(separator: ",", omittingEmptySubsequences: false)}
|
||||
let dicdata: [DicdataElement] = csvData.map {self.parseLoudstxt2FormattedEntry(from: $0)}
|
||||
return dicdata
|
||||
} catch {
|
||||
debug("ファイルが存在しません: \(error)")
|
||||
return []
|
||||
}
|
||||
} else if count == 2 {
|
||||
var result: [DicdataElement] = []
|
||||
let first = String(key.first!)
|
||||
let charIDs = key.map {self.charsID[$0, default: .max]}
|
||||
// 最大700件に絞ることによって低速化を回避する。
|
||||
let prefixIndices = self.prefixMatchLOUDS(identifier: first, charIDs: charIDs, depth: 5).prefix(700)
|
||||
result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: first, indices: Set(prefixIndices)))
|
||||
let userDictIndices = self.prefixMatchLOUDS(identifier: "user", charIDs: charIDs, depth: 5).prefix(700)
|
||||
result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: "user", indices: Set(userDictIndices)))
|
||||
if learningManager.enabled {
|
||||
let memoryDictIndices = self.prefixMatchLOUDS(identifier: "memory", charIDs: charIDs, depth: 5).prefix(700)
|
||||
result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: "memory", indices: Set(memoryDictIndices)))
|
||||
result.append(contentsOf: self.learningManager.temporaryPrefixMatch(charIDs: charIDs))
|
||||
}
|
||||
return result
|
||||
} else {
|
||||
var result: [DicdataElement] = []
|
||||
let first = String(key.first!)
|
||||
let charIDs = key.map {self.charsID[$0, default: .max]}
|
||||
// 最大700件に絞ることによって低速化を回避する。
|
||||
let prefixIndices = self.prefixMatchLOUDS(identifier: first, charIDs: charIDs).prefix(700)
|
||||
result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: first, indices: Set(prefixIndices)))
|
||||
let userDictIndices = self.prefixMatchLOUDS(identifier: "user", charIDs: charIDs).prefix(700)
|
||||
result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: "user", indices: Set(userDictIndices)))
|
||||
if learningManager.enabled {
|
||||
let memoryDictIndices = self.prefixMatchLOUDS(identifier: "memory", charIDs: charIDs).prefix(700)
|
||||
result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: "memory", indices: Set(memoryDictIndices)))
|
||||
result.append(contentsOf: self.learningManager.temporaryPrefixMatch(charIDs: charIDs))
|
||||
}
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
private func parseLoudstxt2FormattedEntry(from dataString: [some StringProtocol]) -> DicdataElement {
|
||||
let ruby = String(dataString[0])
|
||||
let word = dataString[1].isEmpty ? ruby:String(dataString[1])
|
||||
let lcid = Int(dataString[2]) ?? .zero
|
||||
let rcid = Int(dataString[3]) ?? lcid
|
||||
let mid = Int(dataString[4]) ?? .zero
|
||||
let value: PValue = PValue(dataString[5]) ?? -30.0
|
||||
return DicdataElement(word: word, ruby: ruby, lcid: lcid, rcid: rcid, mid: mid, value: value)
|
||||
}
|
||||
|
||||
/// 補足的な辞書情報を得る。
|
||||
/// - parameters:
|
||||
/// - convertTarget: カタカナ変換済みの文字列
|
||||
/// - note
|
||||
/// - 入力全体をカタカナとかひらがなに変換するやつは、Converter側でやっているので注意。
|
||||
private func getWiseDicdata(convertTarget: String, inputData: ComposingText, inputRange: Range<Int>) -> [DicdataElement] {
|
||||
var result: [DicdataElement] = []
|
||||
result.append(contentsOf: self.getJapaneseNumberDicdata(head: convertTarget))
|
||||
if inputData.input[..<inputRange.startIndex].last?.character.isNumber != true && inputData.input[inputRange.endIndex...].first?.character.isNumber != true, let number = Float(convertTarget) {
|
||||
result.append(DicdataElement(ruby: convertTarget, cid: CIDData.数.cid, mid: MIDData.小さい数字.mid, value: -14))
|
||||
if number.truncatingRemainder(dividingBy: 1) == 0 {
|
||||
let int = Int(number)
|
||||
if int < Int(1E18) && -Int(1E18) < int, let kansuji = self.numberFormatter.string(from: NSNumber(value: int)) {
|
||||
result.append(DicdataElement(word: kansuji, ruby: convertTarget, cid: CIDData.数.cid, mid: MIDData.小さい数字.mid, value: -16))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// convertTargetを英単語として候補に追加する
|
||||
if requestOptions.keyboardLanguage == .en_US && convertTarget.onlyRomanAlphabet {
|
||||
result.append(DicdataElement(ruby: convertTarget, cid: CIDData.固有名詞.cid, mid: MIDData.英単語.mid, value: -14))
|
||||
}
|
||||
|
||||
// ローマ字入力の場合、単体でひらがな・カタカナ化した候補も追加
|
||||
if requestOptions.keyboardLanguage != .en_US && inputData.input[inputRange].allSatisfy({$0.inputStyle == .roman2kana}) {
|
||||
if let katakana = Roman2Kana.katakanaChanges[convertTarget], let hiragana = Roman2Kana.hiraganaChanges[Array(convertTarget)] {
|
||||
result.append(DicdataElement(word: String(hiragana), ruby: katakana, cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: -13))
|
||||
result.append(DicdataElement(ruby: katakana, cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: -14))
|
||||
}
|
||||
}
|
||||
|
||||
// 入力を全てひらがな、カタカナに変換したものを候補に追加する
|
||||
if convertTarget.count == 1 {
|
||||
let katakana = convertTarget.toKatakana()
|
||||
let hiragana = convertTarget.toHiragana()
|
||||
if convertTarget == katakana {
|
||||
result.append(DicdataElement(ruby: katakana, cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: -14))
|
||||
} else {
|
||||
result.append(DicdataElement(word: hiragana, ruby: katakana, cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: -13))
|
||||
result.append(DicdataElement(ruby: katakana, cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: -14))
|
||||
}
|
||||
}
|
||||
|
||||
// 記号変換
|
||||
if convertTarget.count == 1, let first = convertTarget.first {
|
||||
var value: PValue = -14
|
||||
let hs = Self.fullwidthToHalfwidth[first, default: first]
|
||||
|
||||
if hs != first {
|
||||
result.append(DicdataElement(word: convertTarget, ruby: convertTarget, cid: CIDData.記号.cid, mid: MIDData.一般.mid, value: value))
|
||||
value -= 5.0
|
||||
result.append(DicdataElement(word: String(hs), ruby: convertTarget, cid: CIDData.記号.cid, mid: MIDData.一般.mid, value: value))
|
||||
value -= 5.0
|
||||
}
|
||||
if let fs = Self.halfwidthToFullwidth[first], fs != first {
|
||||
result.append(DicdataElement(word: convertTarget, ruby: convertTarget, cid: CIDData.記号.cid, mid: MIDData.一般.mid, value: value))
|
||||
value -= 5.0
|
||||
result.append(DicdataElement(word: String(fs), ruby: convertTarget, cid: CIDData.記号.cid, mid: MIDData.一般.mid, value: value))
|
||||
value -= 5.0
|
||||
}
|
||||
for group in Self.weakRelatingSymbolGroups where group.contains(hs) {
|
||||
for symbol in group where symbol != hs {
|
||||
result.append(DicdataElement(word: String(symbol), ruby: convertTarget, cid: CIDData.記号.cid, mid: MIDData.一般.mid, value: value))
|
||||
value -= 5.0
|
||||
if let fs = Self.halfwidthToFullwidth[symbol] {
|
||||
result.append(DicdataElement(word: String(fs), ruby: convertTarget, cid: CIDData.記号.cid, mid: MIDData.一般.mid, value: value))
|
||||
value -= 5.0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// 記号に対する半角・全角変換
|
||||
private static let (fullwidthToHalfwidth, halfwidthToFullwidth) = zip(
|
||||
"+ー*=・!#%&'"〜|£$¥@`;:<>,.\/_ ̄-",
|
||||
"+ー*=・!#%&'"〜|£$¥@`;:<>,.\/_ ̄-".applyingTransform(.fullwidthToHalfwidth, reverse: false)!
|
||||
)
|
||||
.reduce(into: ([Character: Character](), [Character: Character]())) { (results: inout ([Character: Character], [Character: Character]), values: (Character, Character)) in
|
||||
results.0[values.0] = values.1
|
||||
results.1[values.1] = values.0
|
||||
}
|
||||
|
||||
// 弱い類似(矢印同士のような関係)にある記号をグループにしたもの
|
||||
// 例えば→に対して⇒のような記号はより類似度が強いため、上位に出したい。これを実現する必要が生じた場合はstrongRelatingSymbolGroupsを新設する。
|
||||
// 宣言順不同
|
||||
// 1つを入れると他が出る、というイメージ
|
||||
// 半角と全角がある場合は半角のみ
|
||||
private static let weakRelatingSymbolGroups: [[Character]] = [
|
||||
// 異体字セレクト用 (試験実装)
|
||||
["高", "髙"], // ハシゴダカ
|
||||
["斎", "斉", "齋", "齊"],
|
||||
["澤", "沢"],
|
||||
["気", "氣"],
|
||||
["澁", "渋"],
|
||||
["対", "對"],
|
||||
["辻", "辻󠄀"],
|
||||
["禰󠄀", "禰"],
|
||||
["煉󠄁", "煉"],
|
||||
["崎", "﨑"], // タツザキ
|
||||
["栄", "榮"],
|
||||
["吉", "𠮷"], // ツチヨシ
|
||||
["橋", "𣘺", "槗", "𫞎"],
|
||||
["浜", "濱", "濵"],
|
||||
["鴎", "鷗"],
|
||||
["学", "學"],
|
||||
["角", "⻆"],
|
||||
["亀", "龜"],
|
||||
["桜", "櫻"],
|
||||
["真", "眞"],
|
||||
|
||||
// 記号変換
|
||||
["☆", "★", "♡", "☾", "☽"], // 星
|
||||
["^", "^"], // ハット
|
||||
["¥", "$", "¢", "€", "£", "₿"], // 通貨
|
||||
["%", "‰"], // パーセント
|
||||
["°", "℃", "℉"],
|
||||
["◯"], // 図形
|
||||
["*", "※", "✳︎", "✴︎"], // こめ
|
||||
["・", "…", "‥", "•"],
|
||||
["+", "±", "⊕"],
|
||||
["×", "❌", "✖️"],
|
||||
["÷", "➗" ],
|
||||
["<", "≦", "≪", "〈", "《", "‹", "«"],
|
||||
[">", "≧", "≫", "〉", "》", "›", "»"],
|
||||
["=", "≒", "≠", "≡"],
|
||||
[":", ";"],
|
||||
["!", "❗️", "❣️", "‼︎", "⁉︎", "❕", "‼️", "⁉️", "¡"],
|
||||
["?", "❓", "⁉︎", "⁇", "❔", "⁉️", "¿"],
|
||||
["〒", "〠", "℡", "☎︎"],
|
||||
["々", "ヾ", "ヽ", "ゝ", "ゞ", "〃", "仝", "〻"],
|
||||
["〆", "〼", "ゟ", "ヿ"], // 特殊仮名
|
||||
["♂", "♀", "⚢", "⚣", "⚤", "⚥", "⚦", "⚧", "⚨", "⚩", "⚪︎", "⚲"], // ジェンダー記号
|
||||
["→", "↑", "←", "↓", "↙︎", "↖︎", "↘︎", "↗︎", "↔︎", "↕︎", "↪︎", "↩︎", "⇆"], // 矢印
|
||||
["♯", "♭", "♪", "♮", "♫", "♬", "♩", "𝄞", "𝄞"], // 音符
|
||||
["√", "∛", "∜"] // 根号
|
||||
]
|
||||
|
||||
private func loadCCBinary(url: URL) -> [(Int32, Float)] {
|
||||
do {
|
||||
let binaryData = try Data(contentsOf: url, options: [.uncached])
|
||||
return binaryData.toArray(of: (Int32, Float).self)
|
||||
} catch {
|
||||
debug("Failed to read the file.", error)
|
||||
return []
|
||||
}
|
||||
}
|
||||
|
||||
/// OSのユーザ辞書からrubyに等しい語を返す。
|
||||
private func getMatchOSUserDict(_ ruby: some StringProtocol) -> [DicdataElement] {
|
||||
self.osUserDict.filter {$0.ruby == ruby}
|
||||
}
|
||||
|
||||
/// OSのユーザ辞書からrubyに先頭一致する語を返す。
|
||||
internal func getPrefixMatchOSUserDict(_ ruby: some StringProtocol) -> [DicdataElement] {
|
||||
self.osUserDict.filter {$0.ruby.hasPrefix(ruby)}
|
||||
}
|
||||
|
||||
// 学習を反映する
|
||||
// TODO: previousの扱いを改善したい
|
||||
internal func updateLearningData(_ candidate: Candidate, with previous: DicdataElement?) {
|
||||
if let previous {
|
||||
self.learningManager.update(data: [previous] + candidate.data)
|
||||
} else {
|
||||
self.learningManager.update(data: candidate.data)
|
||||
}
|
||||
}
|
||||
/// class idから連接確率を得る関数
|
||||
/// - Parameters:
|
||||
/// - former: 左側の語のid
|
||||
/// - latter: 右側の語のid
|
||||
/// - Returns:
|
||||
/// 連接確率の対数。
|
||||
/// - 要求があった場合ごとにファイルを読み込んで
|
||||
/// 速度: ⏱0.115224 : 変換_処理_連接コスト計算_CCValue
|
||||
public func getCCValue(_ former: Int, _ latter: Int) -> PValue {
|
||||
if !ccParsed[former] {
|
||||
let url = requestOptions.dictionaryResourceURL.appendingPathComponent("cb/\(former).binary", isDirectory: false)
|
||||
let values = loadCCBinary(url: url)
|
||||
ccLines[former] = [Int: PValue].init(uniqueKeysWithValues: values.map {(Int($0.0), PValue($0.1))})
|
||||
ccParsed[former] = true
|
||||
}
|
||||
let defaultValue = ccLines[former][-1, default: -25]
|
||||
return ccLines[former][latter, default: defaultValue]
|
||||
}
|
||||
|
||||
/// meaning idから意味連接尤度を得る関数
|
||||
/// - Parameters:
|
||||
/// - former: 左側の語のid
|
||||
/// - latter: 右側の語のid
|
||||
/// - Returns:
|
||||
/// 意味連接確率の対数。
|
||||
/// - 要求があった場合ごとに確率値をパースして取得する。
|
||||
public func getMMValue(_ former: Int, _ latter: Int) -> PValue {
|
||||
if former == 500 || latter == 500 {
|
||||
return 0
|
||||
}
|
||||
return self.mmValue[former * self.midCount + latter]
|
||||
}
|
||||
|
||||
private static let possibleLOUDS: Set<Character> = [
|
||||
" ", " ̄", "‐", "―", "〜", "・", "、", "…", "‥", "。", "‘", "’", "“", "”", "〈", "〉", "《", "》", "「", "」", "『", "』", "【", "】", "〔", "〕", "‖", "*", "′", "〃", "※", "´", "¨", "゛", "゜", "←", "→", "↑", "↓", "─", "■", "□", "▲", "△", "▼", "▽", "◆", "◇", "○", "◎", "●", "★", "☆", "々", "ゝ", "ヽ", "ゞ", "ヾ", "ー", "〇", "ァ", "ア", "ィ", "イ", "ゥ", "ウ", "ヴ", "ェ", "エ", "ォ", "オ", "ヵ", "カ", "ガ", "キ", "ギ", "ク", "グ", "ヶ", "ケ", "ゲ", "コ", "ゴ", "サ", "ザ", "シ", "ジ", "〆", "ス", "ズ", "セ", "ゼ", "ソ", "ゾ", "タ", "ダ", "チ", "ヂ", "ッ", "ツ", "ヅ", "テ", "デ", "ト", "ド", "ナ", "ニ", "ヌ", "ネ", "ノ", "ハ", "バ", "パ", "ヒ", "ビ", "ピ", "フ", "ブ", "プ", "ヘ", "ベ", "ペ", "ホ", "ボ", "ポ", "マ", "ミ", "ム", "メ", "モ", "ヤ", "ユ", "ョ", "ヨ", "ラ", "リ", "ル", "レ", "ロ", "ヮ", "ワ", "ヰ", "ヱ", "ヲ", "ン", "仝", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "!", "?", "(", ")", "#", "%", "&", "^", "_", "'", "\""
|
||||
]
|
||||
|
||||
// 誤り訂正候補の構築の際、ファイルが存在しているか事前にチェックし、存在していなければ以後の計算を打ち切ることで、計算を減らす。
|
||||
internal static func existLOUDS(for character: Character) -> Bool {
|
||||
Self.possibleLOUDS.contains(character)
|
||||
}
|
||||
|
||||
/*
|
||||
文節の切れ目とは
|
||||
|
||||
* 後置機能語→前置機能語
|
||||
* 後置機能語→内容語
|
||||
* 内容語→前置機能語
|
||||
* 内容語→内容語
|
||||
|
||||
となる。逆に文節の切れ目にならないのは
|
||||
|
||||
* 前置機能語→内容語
|
||||
* 内容語→後置機能語
|
||||
|
||||
の二通りとなる。
|
||||
|
||||
*/
|
||||
/// class idから、文節かどうかを判断する関数。
|
||||
/// - Parameters:
|
||||
/// - c_former: 左側の語のid
|
||||
/// - c_latter: 右側の語のid
|
||||
/// - Returns:
|
||||
/// そこが文節の境界であるかどうか。
|
||||
internal static func isClause(_ former: Int, _ latter: Int) -> Bool {
|
||||
// EOSが基本多いので、この順の方がヒット率が上がると思われる。
|
||||
let latter_wordtype = Self.wordTypes[latter]
|
||||
if latter_wordtype == 3 {
|
||||
return false
|
||||
}
|
||||
let former_wordtype = Self.wordTypes[former]
|
||||
if former_wordtype == 3 {
|
||||
return false
|
||||
}
|
||||
if latter_wordtype == 0 {
|
||||
return former_wordtype != 0
|
||||
}
|
||||
if latter_wordtype == 1 {
|
||||
return former_wordtype != 0
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
/// wordTypesの初期化時に使うのみ。
|
||||
private static let BOS_EOS_wordIDs: Set<Int> = [CIDData.BOS.cid, CIDData.EOS.cid]
|
||||
/// wordTypesの初期化時に使うのみ。
|
||||
private static let PREPOSITION_wordIDs: Set<Int> = [1315, 6, 557, 558, 559, 560]
|
||||
/// wordTypesの初期化時に使うのみ。
|
||||
private static let INPOSITION_wordIDs: Set<Int> = Set<Int>(Array(561..<868)
|
||||
+ Array(1283..<1297)
|
||||
+ Array(1306..<1310)
|
||||
+ Array(11..<53)
|
||||
+ Array(555..<557)
|
||||
+ Array(1281..<1283)
|
||||
).union([1314, 3, 2, 4, 5, 1, 9])
|
||||
|
||||
/*
|
||||
private static let POSTPOSITION_wordIDs: Set<Int> = Set<Int>((7...8).map{$0}
|
||||
+ (54..<555).map{$0}
|
||||
+ (868..<1281).map{$0}
|
||||
+ (1297..<1306).map{$0}
|
||||
+ (1310..<1314).map{$0}
|
||||
).union([10])
|
||||
*/
|
||||
|
||||
/// - Returns:
|
||||
/// - 3 when BOS/EOS
|
||||
/// - 0 when preposition
|
||||
/// - 1 when core
|
||||
/// - 2 when postposition
|
||||
/// - データ1つあたり1Bなので、1.3KBくらいのメモリを利用する。
|
||||
static let wordTypes = (0...1319).map(_judgeWordType)
|
||||
|
||||
/// wordTypesの初期化時に使うのみ。
|
||||
private static func _judgeWordType(cid: Int) -> UInt8 {
|
||||
if Self.BOS_EOS_wordIDs.contains(cid) {
|
||||
return 3 // BOS/EOS
|
||||
}
|
||||
if Self.PREPOSITION_wordIDs.contains(cid) {
|
||||
return 0 // 前置
|
||||
}
|
||||
if Self.INPOSITION_wordIDs.contains(cid) {
|
||||
return 1 // 内容
|
||||
}
|
||||
return 2 // 後置
|
||||
}
|
||||
|
||||
internal static func includeMMValueCalculation(_ data: DicdataElement) -> Bool {
|
||||
// 非自立動詞
|
||||
if 895...1280 ~= data.lcid || 895...1280 ~= data.rcid {
|
||||
return true
|
||||
}
|
||||
// 非自立名詞
|
||||
if 1297...1305 ~= data.lcid || 1297...1305 ~= data.rcid {
|
||||
return true
|
||||
}
|
||||
// 内容語かどうか
|
||||
return wordTypes[data.lcid] == 1 || wordTypes[data.rcid] == 1
|
||||
}
|
||||
|
||||
/// - データ1つあたり2Bなので、2.6KBくらいのメモリを利用する。
|
||||
static let penaltyRatio = (0...1319).map(_getTypoPenaltyRatio)
|
||||
|
||||
/// penaltyRatioの初期化時に使うのみ。
|
||||
internal static func _getTypoPenaltyRatio(_ lcid: Int) -> PValue {
|
||||
// 助詞147...368, 助動詞369...554
|
||||
if 147...554 ~= lcid {
|
||||
return 2.5
|
||||
}
|
||||
return 1
|
||||
}
|
||||
|
||||
// 学習を有効にする語彙を決める。
|
||||
internal static func needWValueMemory(_ data: DicdataElement) -> Bool {
|
||||
// 助詞、助動詞
|
||||
if 147...554 ~= data.lcid {
|
||||
return false
|
||||
}
|
||||
// 接頭辞
|
||||
if 557...560 ~= data.lcid {
|
||||
return false
|
||||
}
|
||||
// 接尾名詞を除去
|
||||
if 1297...1305 ~= data.lcid {
|
||||
return false
|
||||
}
|
||||
// 記号を除去
|
||||
if 6...9 ~= data.lcid {
|
||||
return false
|
||||
}
|
||||
if 0 == data.lcid || 1316 == data.lcid {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
internal static let possibleNexts: [String: [String]] = [
|
||||
"x": ["ァ", "ィ", "ゥ", "ェ", "ォ", "ッ", "ャ", "ュ", "ョ", "ヮ"],
|
||||
"l": ["ァ", "ィ", "ゥ", "ェ", "ォ", "ッ", "ャ", "ュ", "ョ", "ヮ"],
|
||||
"xt": ["ッ"],
|
||||
"lt": ["ッ"],
|
||||
"xts": ["ッ"],
|
||||
"lts": ["ッ"],
|
||||
"xy": ["ャ", "ュ", "ョ"],
|
||||
"ly": ["ャ", "ュ", "ョ"],
|
||||
"xw": ["ヮ"],
|
||||
"lw": ["ヮ"],
|
||||
"v": ["ヴ"],
|
||||
"k": ["カ", "キ", "ク", "ケ", "コ"],
|
||||
"q": ["クァ", "クィ", "クゥ", "クェ", "クォ"],
|
||||
"qy": ["クャ", "クィ", "クュ", "クェ", "クョ"],
|
||||
"qw": ["クヮ", "クィ", "クゥ", "クェ", "クォ"],
|
||||
"ky": ["キャ", "キィ", "キュ", "キェ", "キョ"],
|
||||
"g": ["ガ", "ギ", "グ", "ゲ", "ゴ"],
|
||||
"gy": ["ギャ", "ギィ", "ギュ", "ギェ", "ギョ"],
|
||||
"s": ["サ", "シ", "ス", "セ", "ソ"],
|
||||
"sy": ["シャ", "シィ", "シュ", "シェ", "ショ"],
|
||||
"sh": ["シャ", "シィ", "シュ", "シェ", "ショ"],
|
||||
"z": ["ザ", "ジ", "ズ", "ゼ", "ゾ"],
|
||||
"zy": ["ジャ", "ジィ", "ジュ", "ジェ", "ジョ"],
|
||||
"j": ["ジ"],
|
||||
"t": ["タ", "チ", "ツ", "テ", "ト"],
|
||||
"ty": ["チャ", "チィ", "チュ", "チェ", "チョ"],
|
||||
"ts": ["ツ"],
|
||||
"th": ["テャ", "ティ", "テュ", "テェ", "テョ"],
|
||||
"tw": ["トァ", "トィ", "トゥ", "トェ", "トォ"],
|
||||
"cy": ["チャ", "チィ", "チュ", "チェ", "チョ"],
|
||||
"ch": ["チ"],
|
||||
"d": ["ダ", "ヂ", "ヅ", "デ", "ド"],
|
||||
"dy": ["ヂャ", "ヂィ", "ヂュ", "ヂェ", "ヂョ"],
|
||||
"dh": ["デャ", "ディ", "デュ", "デェ", "デョ"],
|
||||
"dw": ["ドァ", "ドィ", "ドゥ", "ドェ", "ドォ"],
|
||||
"n": ["ナ", "ニ", "ヌ", "ネ", "ノ", "ン"],
|
||||
"ny": ["ニャ", "ニィ", "ニュ", "ニェ", "ニョ"],
|
||||
"h": ["ハ", "ヒ", "フ", "ヘ", "ホ"],
|
||||
"hy": ["ヒャ", "ヒィ", "ヒュ", "ヒェ", "ヒョ"],
|
||||
"hw": ["ファ", "フィ", "フェ", "フォ"],
|
||||
"f": ["フ"],
|
||||
"b": ["バ", "ビ", "ブ", "ベ", "ボ"],
|
||||
"by": ["ビャ", "ビィ", "ビュ", "ビェ", "ビョ"],
|
||||
"p": ["パ", "ピ", "プ", "ペ", "ポ"],
|
||||
"py": ["ピャ", "ピィ", "ピュ", "ピェ", "ピョ"],
|
||||
"m": ["マ", "ミ", "ム", "メ", "モ"],
|
||||
"my": ["ミャ", "ミィ", "ミュ", "ミェ", "ミョ"],
|
||||
"y": ["ヤ", "ユ", "イェ", "ヨ"],
|
||||
"r": ["ラ", "リ", "ル", "レ", "ロ"],
|
||||
"ry": ["リャ", "リィ", "リュ", "リェ", "リョ"],
|
||||
"w": ["ワ", "ウィ", "ウェ", "ヲ"],
|
||||
"wy": ["ヰ", "ヱ"]
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,447 @@
|
||||
//
|
||||
// extension JapaneseNumber.swift
|
||||
// Keyboard
|
||||
//
|
||||
// Created by ensan on 2020/09/17.
|
||||
// Copyright © 2020 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
private enum JapaneseNumber {
|
||||
case いち, に, さん, よん, ご, ろく, なな, はち, きゅう, れい
|
||||
case じゅう, ひゃく, せん, まん, おく, ちょう
|
||||
case おわり
|
||||
case エラー
|
||||
var isNumber: Bool {
|
||||
[.いち, .に, .さん, .よん, .ご, .ろく, .なな, .はち, .きゅう, .れい, .おわり].contains(self)
|
||||
}
|
||||
var isNotNumber: Bool {
|
||||
[.じゅう, .ひゃく, .せん, .まん, .おく, .ちょう, .エラー, .おわり].contains(self)
|
||||
}
|
||||
|
||||
var toRoman: String {
|
||||
switch self {
|
||||
case .いち:
|
||||
return "1"
|
||||
case .に:
|
||||
return "2"
|
||||
case .さん:
|
||||
return "3"
|
||||
case .よん:
|
||||
return "4"
|
||||
case .ご:
|
||||
return "5"
|
||||
case .ろく:
|
||||
return "6"
|
||||
case .なな:
|
||||
return "7"
|
||||
case .はち:
|
||||
return "8"
|
||||
case .きゅう:
|
||||
return "9"
|
||||
case .れい:
|
||||
return "0"
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
var maxDigit: Int? {
|
||||
switch self {
|
||||
case .おわり:
|
||||
return 1
|
||||
case .まん:
|
||||
return 2
|
||||
case .おく:
|
||||
return 3
|
||||
case .ちょう:
|
||||
return 4
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
var toKanji: String {
|
||||
switch self {
|
||||
case .いち:
|
||||
return "一"
|
||||
case .に:
|
||||
return "二"
|
||||
case .さん:
|
||||
return "三"
|
||||
case .よん:
|
||||
return "四"
|
||||
case .ご:
|
||||
return "五"
|
||||
case .ろく:
|
||||
return "六"
|
||||
case .なな:
|
||||
return "七"
|
||||
case .はち:
|
||||
return "八"
|
||||
case .きゅう:
|
||||
return "九"
|
||||
case .れい:
|
||||
return "〇"
|
||||
case .じゅう:
|
||||
return "十"
|
||||
case .ひゃく:
|
||||
return "百"
|
||||
case .せん:
|
||||
return "千"
|
||||
case .まん:
|
||||
return "万"
|
||||
case .おく:
|
||||
return "億"
|
||||
case .ちょう:
|
||||
return "兆"
|
||||
case .おわり:
|
||||
return ""
|
||||
case .エラー:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private enum Number {
|
||||
case Zero, One, Two, Three, Four, Five, Six, Seven, Eight, Nine
|
||||
var character: Character {
|
||||
switch self {
|
||||
|
||||
case .Zero:
|
||||
return "0"
|
||||
case .One:
|
||||
return "1"
|
||||
case .Two:
|
||||
return "2"
|
||||
case .Three:
|
||||
return "3"
|
||||
case .Four:
|
||||
return "4"
|
||||
case .Five:
|
||||
return "5"
|
||||
case .Six:
|
||||
return "6"
|
||||
case .Seven:
|
||||
return "7"
|
||||
case .Eight:
|
||||
return "8"
|
||||
case .Nine:
|
||||
return "9"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extension DicdataStore {
|
||||
private func parseLiteral(input: some StringProtocol) -> [JapaneseNumber] {
|
||||
var chars = input.makeIterator()
|
||||
var tokens: [JapaneseNumber] = []
|
||||
func judge(char: Character) {
|
||||
if char == "イ"{
|
||||
if let char = chars.next(), char == "チ" || char == "ッ"{
|
||||
tokens.append(.いち)
|
||||
} else {
|
||||
tokens.append(.エラー)
|
||||
return
|
||||
}
|
||||
} else if char == "オ"{
|
||||
if let char = chars.next(), char == "ク"{
|
||||
tokens.append(.おく)
|
||||
} else {
|
||||
tokens.append(.エラー)
|
||||
return
|
||||
}
|
||||
} else if char == "キ"{
|
||||
if let char = chars.next(), char == "ュ"{
|
||||
if let char = chars.next(), char == "ウ"{
|
||||
tokens.append(.きゅう)
|
||||
} else {
|
||||
tokens.append(.エラー)
|
||||
return
|
||||
}
|
||||
} else {
|
||||
tokens.append(.エラー)
|
||||
return
|
||||
}
|
||||
} else if char == "ク"{
|
||||
tokens.append(.きゅう)
|
||||
} else if char == "ゴ"{
|
||||
tokens.append(.ご)
|
||||
} else if char == "サ"{
|
||||
if let char = chars.next(), char == "ン"{
|
||||
tokens.append(.さん)
|
||||
} else {
|
||||
tokens.append(.エラー)
|
||||
return
|
||||
}
|
||||
} else if char == "シ"{
|
||||
if let char = chars.next() {
|
||||
if char == "チ"{
|
||||
tokens.append(.なな)
|
||||
} else {
|
||||
tokens.append(.よん)
|
||||
judge(char: char)
|
||||
}
|
||||
} else {
|
||||
tokens.append(.よん)
|
||||
}
|
||||
} else if char == "ジ"{
|
||||
if let char = chars.next(), char == "ュ"{
|
||||
if let char = chars.next(), char == "ウ" || char == "ッ"{
|
||||
tokens.append(.じゅう)
|
||||
} else {
|
||||
tokens.append(.エラー)
|
||||
return
|
||||
}
|
||||
} else {
|
||||
tokens.append(.エラー)
|
||||
return
|
||||
}
|
||||
} else if char == "セ"{
|
||||
if let char = chars.next(), char == "ン"{
|
||||
tokens.append(.せん)
|
||||
} else {
|
||||
tokens.append(.エラー)
|
||||
return
|
||||
}
|
||||
} else if char == "ゼ"{
|
||||
if let char = chars.next() {
|
||||
if char == "ロ"{
|
||||
tokens.append(.れい)
|
||||
} else if char == "ン"{
|
||||
tokens.append(.せん)
|
||||
} else {
|
||||
tokens.append(.エラー)
|
||||
return
|
||||
}
|
||||
} else {
|
||||
tokens.append(.エラー)
|
||||
return
|
||||
}
|
||||
} else if char == "チ"{
|
||||
if let char = chars.next(), char == "ョ"{
|
||||
if let char = chars.next(), char == "ウ"{
|
||||
tokens.append(.ちょう)
|
||||
} else {
|
||||
tokens.append(.エラー)
|
||||
return
|
||||
}
|
||||
} else {
|
||||
tokens.append(.エラー)
|
||||
return
|
||||
}
|
||||
} else if char == "ナ"{
|
||||
if let char = chars.next(), char == "ナ"{
|
||||
tokens.append(.なな)
|
||||
} else {
|
||||
tokens.append(.エラー)
|
||||
return
|
||||
}
|
||||
} else if char == "ニ"{
|
||||
tokens.append(.に)
|
||||
} else if char == "ハ"{
|
||||
if let char = chars.next(), char == "チ" || char == "ッ"{
|
||||
tokens.append(.はち)
|
||||
} else {
|
||||
tokens.append(.エラー)
|
||||
return
|
||||
}
|
||||
} else if char == "ヒ"{
|
||||
if let char = chars.next(), char == "ャ"{
|
||||
if let char = chars.next(), char == "ク"{
|
||||
tokens.append(.ひゃく)
|
||||
} else {
|
||||
tokens.append(.エラー)
|
||||
return
|
||||
}
|
||||
} else {
|
||||
tokens.append(.エラー)
|
||||
return
|
||||
}
|
||||
} else if char == "ビ"{
|
||||
if let char = chars.next(), char == "ャ"{
|
||||
if let char = chars.next(), char == "ク"{
|
||||
tokens.append(.ひゃく)
|
||||
} else {
|
||||
tokens.append(.エラー)
|
||||
return
|
||||
}
|
||||
} else {
|
||||
tokens.append(.エラー)
|
||||
return
|
||||
}
|
||||
} else if char == "ピ"{
|
||||
if let char = chars.next(), char == "ャ"{
|
||||
if let char = chars.next(), char == "ク"{
|
||||
tokens.append(.ひゃく)
|
||||
} else {
|
||||
tokens.append(.エラー)
|
||||
return
|
||||
}
|
||||
} else {
|
||||
tokens.append(.エラー)
|
||||
return
|
||||
}
|
||||
} else if char == "マ"{
|
||||
if let char = chars.next() {
|
||||
if char == "ン"{
|
||||
tokens.append(.まん)
|
||||
} else if char == "ル"{
|
||||
tokens.append(.れい)
|
||||
} else {
|
||||
tokens.append(.エラー)
|
||||
return
|
||||
}
|
||||
} else {
|
||||
tokens.append(.エラー)
|
||||
return
|
||||
}
|
||||
} else if char == "ヨ"{
|
||||
if let char = chars.next(), char == "ン"{
|
||||
tokens.append(.よん)
|
||||
} else {
|
||||
tokens.append(.エラー)
|
||||
return
|
||||
}
|
||||
} else if char == "レ"{
|
||||
if let char = chars.next(), char == "イ"{
|
||||
tokens.append(.れい)
|
||||
} else {
|
||||
tokens.append(.エラー)
|
||||
return
|
||||
}
|
||||
} else if char == "ロ"{
|
||||
if let char = chars.next(), char == "ク" || char == "ッ"{
|
||||
tokens.append(.ろく)
|
||||
} else {
|
||||
tokens.append(.エラー)
|
||||
return
|
||||
}
|
||||
} else {
|
||||
tokens.append(.エラー)
|
||||
return
|
||||
}
|
||||
|
||||
}
|
||||
while let char = chars.next() {
|
||||
judge(char: char)
|
||||
}
|
||||
tokens.append(.おわり)
|
||||
return tokens
|
||||
}
|
||||
|
||||
private func parseTokens(tokens: [JapaneseNumber]) -> [(Number, Number, Number, Number)] {
|
||||
var maxDigits: Int?
|
||||
var result: [(Number, Number, Number, Number)] = []
|
||||
var stack: (Number, Number, Number, Number) = (.Zero, .Zero, .Zero, .Zero)
|
||||
var tokens = tokens.makeIterator()
|
||||
var curnum: Number?
|
||||
while let token = tokens.next() {
|
||||
switch token {
|
||||
case .いち:
|
||||
if curnum != nil {return []}
|
||||
curnum = .One
|
||||
case .に:
|
||||
if curnum != nil {return []}
|
||||
curnum = .Two
|
||||
case .さん:
|
||||
if curnum != nil {return []}
|
||||
curnum = .Three
|
||||
case .よん:
|
||||
if curnum != nil {return []}
|
||||
curnum = .Four
|
||||
case .ご:
|
||||
if curnum != nil {return []}
|
||||
curnum = .Five
|
||||
case .ろく:
|
||||
if curnum != nil {return []}
|
||||
curnum = .Six
|
||||
case .なな:
|
||||
if curnum != nil {return []}
|
||||
curnum = .Seven
|
||||
case .はち:
|
||||
if curnum != nil {return []}
|
||||
curnum = .Eight
|
||||
case .きゅう:
|
||||
if curnum != nil {return []}
|
||||
curnum = .Nine
|
||||
case .れい:
|
||||
if curnum != nil {return []}
|
||||
curnum = .Zero
|
||||
case .じゅう:
|
||||
stack.2 = curnum ?? .One
|
||||
curnum = nil
|
||||
case .ひゃく:
|
||||
stack.1 = curnum ?? .One
|
||||
curnum = nil
|
||||
case .せん:
|
||||
stack.0 = curnum ?? .One
|
||||
curnum = nil
|
||||
case .おわり, .まん, .おく, .ちょう:
|
||||
stack.3 = curnum ?? .Zero
|
||||
if let maxDigit = maxDigits {
|
||||
if maxDigit <= token.maxDigit! {
|
||||
return []
|
||||
}
|
||||
result[maxDigit - token.maxDigit!] = stack
|
||||
} else {
|
||||
maxDigits = token.maxDigit!
|
||||
result = [(Number, Number, Number, Number)].init(repeating: (.Zero, .Zero, .Zero, .Zero), count: maxDigits!)
|
||||
result[0] = stack
|
||||
}
|
||||
curnum = nil
|
||||
stack = (.Zero, .Zero, .Zero, .Zero)
|
||||
case .エラー:
|
||||
break
|
||||
}
|
||||
}
|
||||
return result
|
||||
|
||||
}
|
||||
|
||||
func getJapaneseNumberDicdata(head: String) -> [DicdataElement] {
|
||||
|
||||
let tokens = parseLiteral(input: head)
|
||||
|
||||
if !tokens.allSatisfy({$0 != .エラー}) {
|
||||
return []
|
||||
}
|
||||
let kanji = tokens.map {$0.toKanji}.joined()
|
||||
|
||||
let roman: String
|
||||
if tokens.allSatisfy({$0.isNumber}) {
|
||||
roman = tokens.map {$0.toRoman}.joined()
|
||||
} else if tokens.allSatisfy({$0.isNotNumber}) {
|
||||
return []
|
||||
} else {
|
||||
let result = parseTokens(tokens: tokens)
|
||||
if result.isEmpty {
|
||||
return []
|
||||
}
|
||||
var chars: [Character] = []
|
||||
for stack in result {
|
||||
if chars.isEmpty {
|
||||
if stack.0 != .Zero {
|
||||
chars.append(contentsOf: [stack.0.character, stack.1.character, stack.2.character, stack.3.character])
|
||||
} else if stack.1 != .Zero {
|
||||
chars.append(contentsOf: [stack.1.character, stack.2.character, stack.3.character])
|
||||
} else if stack.2 != .Zero {
|
||||
chars.append(contentsOf: [stack.2.character, stack.3.character])
|
||||
} else if stack.3 != .Zero {
|
||||
chars.append(stack.3.character)
|
||||
} else {
|
||||
return []
|
||||
}
|
||||
} else {
|
||||
chars.append(contentsOf: [stack.0.character, stack.1.character, stack.2.character, stack.3.character])
|
||||
}
|
||||
}
|
||||
roman = String(chars)
|
||||
}
|
||||
return [
|
||||
DicdataElement(word: kanji, ruby: head, cid: CIDData.数.cid, mid: MIDData.数.mid, value: -17 + PValue(head.count) / 3),
|
||||
DicdataElement(word: roman, ruby: head, cid: CIDData.数.cid, mid: MIDData.数.mid, value: -16 + 4 / PValue(roman.count))
|
||||
]
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,848 @@
|
||||
//
|
||||
// LearningMemory.swift
|
||||
// Keyboard
|
||||
//
|
||||
// Created by ensan on 2021/02/01.
|
||||
// Copyright © 2021 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import SwiftUtils
|
||||
|
||||
private struct MetadataElement: CustomDebugStringConvertible {
|
||||
init(day: UInt16, count: UInt8) {
|
||||
self.lastUsedDay = day
|
||||
self.lastUpdatedDay = day
|
||||
self.count = count
|
||||
}
|
||||
|
||||
var lastUsedDay: UInt16
|
||||
var lastUpdatedDay: UInt16
|
||||
var count: UInt8
|
||||
|
||||
var debugDescription: String {
|
||||
"(lastUsedDay: \(lastUsedDay), lastUpdatedDay: \(lastUpdatedDay), count: \(count))"
|
||||
}
|
||||
}
|
||||
|
||||
/// 長期記憶用の構造体
|
||||
struct LongTermLearningMemory {
|
||||
private static func pauseFileURL(directoryURL: URL) -> URL {
|
||||
directoryURL.appendingPathComponent(".pause", isDirectory: false)
|
||||
}
|
||||
private static func loudsFileURL(asTemporaryFile: Bool, directoryURL: URL) -> URL {
|
||||
if asTemporaryFile {
|
||||
return directoryURL.appendingPathComponent("memory.louds.2", isDirectory: false)
|
||||
} else {
|
||||
return directoryURL.appendingPathComponent("memory.louds", isDirectory: false)
|
||||
}
|
||||
}
|
||||
private static func metadataFileURL(asTemporaryFile: Bool, directoryURL: URL) -> URL {
|
||||
if asTemporaryFile {
|
||||
return directoryURL.appendingPathComponent("memory.memorymetadata.2", isDirectory: false)
|
||||
} else {
|
||||
return directoryURL.appendingPathComponent("memory.memorymetadata", isDirectory: false)
|
||||
}
|
||||
}
|
||||
private static func loudsCharsFileURL(asTemporaryFile: Bool, directoryURL: URL) -> URL {
|
||||
if asTemporaryFile {
|
||||
return directoryURL.appendingPathComponent("memory.loudschars2.2", isDirectory: false)
|
||||
} else {
|
||||
return directoryURL.appendingPathComponent("memory.loudschars2", isDirectory: false)
|
||||
}
|
||||
}
|
||||
private static func loudsTxt3FileURL(_ value: String, asTemporaryFile: Bool, directoryURL: URL) -> URL {
|
||||
if asTemporaryFile {
|
||||
return directoryURL.appendingPathComponent("memory\(value).loudstxt3.2", isDirectory: false)
|
||||
} else {
|
||||
return directoryURL.appendingPathComponent("memory\(value).loudstxt3", isDirectory: false)
|
||||
}
|
||||
}
|
||||
private static func fileExist(_ url: URL) -> Bool {
|
||||
FileManager.default.fileExists(atPath: url.path)
|
||||
}
|
||||
/// 上書きする関数
|
||||
/// - Parameters:
|
||||
/// - fromURL: 上書きする内容を持ったファイル。消去はされない。
|
||||
/// - toURL: 上書きされるファイル。元あったファイルは消去され、`fromURL`で指定された中身になる。
|
||||
private static func overwrite(from fromURL: URL, to toURL: URL) throws {
|
||||
// これは成功してもしなくても良い
|
||||
// - ファイルが存在して削除ができない場合はエラーにしたいが、その後のcopyが失敗するので問題ない。
|
||||
// - ファイルが存在せず削除ができない場合はエラーにしたくないが、その後のcopyが成功するので問題ない。
|
||||
try? FileManager.default.removeItem(at: toURL)
|
||||
// `.2`ファイルは残したままreplaceを実施する。
|
||||
try FileManager.default.copyItem(at: fromURL, to: toURL)
|
||||
}
|
||||
|
||||
/// 学習が壊れた状態にあるか判定する関数
|
||||
/// - note: 壊れている場合、一時的に学習をオフにすると良い。
|
||||
static func memoryCollapsed(directoryURL: URL) -> Bool {
|
||||
fileExist(pauseFileURL(directoryURL: directoryURL))
|
||||
}
|
||||
|
||||
static var txtFileSplit: Int { 2048 }
|
||||
static var maxMemoryCount: Int = 8192
|
||||
|
||||
private static func BoolToUInt64(_ bools: [Bool]) -> [UInt64] {
|
||||
let unit = 64
|
||||
let value = bools.count.quotientAndRemainder(dividingBy: unit)
|
||||
let _bools = bools + [Bool].init(repeating: true, count: (unit - value.remainder) % unit)
|
||||
var result = [UInt64]()
|
||||
for i in 0...value.quotient {
|
||||
var value: UInt64 = 0
|
||||
for j in 0..<unit {
|
||||
value += (_bools[i * unit + j] ? 1:0) << (unit - j - 1)
|
||||
}
|
||||
result.append(value)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
/// - note:
|
||||
/// この関数は出現数(`metadata.count`)と単語の長さ(`dicdata.ruby.count`)に基づいてvalueを決める。
|
||||
/// 出現数が大きいほどvalueは大きくなり、単語が長いほどvalueは大きくなる。
|
||||
/// 特に、単語の長さが1のとき、値域は`[-5, -8]`となる。一方単語の長さが2であれば値域は`[-3, -6]`であり、長さ4ならば`[-2, -5]`となる。
|
||||
fileprivate static func valueForData(metadata: MetadataElement, dicdata: DicdataElement) -> PValue {
|
||||
let d = 1 - Double(metadata.count) / 255
|
||||
return PValue(-1 - 4 / Double(dicdata.ruby.count) - 3 * pow(d, 3))
|
||||
}
|
||||
|
||||
fileprivate struct MetadataBlock {
|
||||
var metadata: [MetadataElement]
|
||||
|
||||
func makeBinary() -> Data {
|
||||
var data = Data()
|
||||
// エントリのカウントを1byteでエンコード
|
||||
var count = UInt8(self.metadata.count)
|
||||
data.append(contentsOf: Data(bytes: &count, count: MemoryLayout<UInt8>.size))
|
||||
var metadata = self.metadata.map {MetadataElement(day: $0.lastUsedDay, count: $0.count)}
|
||||
data.append(contentsOf: Data(bytes: &metadata, count: MemoryLayout<MetadataElement>.size * metadata.count))
|
||||
return data
|
||||
}
|
||||
}
|
||||
|
||||
fileprivate struct DataBlock {
|
||||
var count: Int {
|
||||
data.count
|
||||
}
|
||||
var ruby: String
|
||||
var data: [(word: String, lcid: Int, rcid: Int, mid: Int, score: PValue)]
|
||||
|
||||
init(dicdata: [DicdataElement]) {
|
||||
self.ruby = ""
|
||||
self.data = []
|
||||
|
||||
for element in dicdata {
|
||||
if self.ruby.isEmpty {
|
||||
self.ruby = element.ruby
|
||||
}
|
||||
self.data.append((element.word, element.lcid, element.rcid, element.mid, element.baseValue))
|
||||
}
|
||||
}
|
||||
|
||||
func makeLoudstxt3Entry() -> Data {
|
||||
var data = Data()
|
||||
// エントリのカウントを2byteでエンコード
|
||||
var count = UInt16(self.count)
|
||||
data.append(contentsOf: Data(bytes: &count, count: MemoryLayout<UInt16>.size))
|
||||
|
||||
// 数値データ部をエンコード
|
||||
// 10byteが1つのエントリに対応するので、10*count byte
|
||||
for (_, lcid, rcid, mid, score) in self.data {
|
||||
assert(0 <= lcid && lcid <= UInt16.max)
|
||||
assert(0 <= rcid && rcid <= UInt16.max)
|
||||
assert(0 <= mid && mid <= UInt16.max)
|
||||
var lcid = UInt16(lcid)
|
||||
var rcid = UInt16(rcid)
|
||||
var mid = UInt16(mid)
|
||||
data.append(contentsOf: Data(bytes: &lcid, count: MemoryLayout<UInt16>.size))
|
||||
data.append(contentsOf: Data(bytes: &rcid, count: MemoryLayout<UInt16>.size))
|
||||
data.append(contentsOf: Data(bytes: &mid, count: MemoryLayout<UInt16>.size))
|
||||
var score = Float32(score)
|
||||
data.append(contentsOf: Data(bytes: &score, count: MemoryLayout<Float32>.size))
|
||||
}
|
||||
// wordをエンコード
|
||||
// 最先頭の要素はrubyになる
|
||||
let text = ([self.ruby] + self.data.map { $0.word == self.ruby ? "" : $0.word }).joined(separator: "\t")
|
||||
data.append(contentsOf: text.data(using: .utf8, allowLossyConversion: false)!)
|
||||
return data
|
||||
}
|
||||
}
|
||||
|
||||
/// 関連するファイルを全て削除する
|
||||
static func reset(directoryURL: URL) throws {
|
||||
// 全削除する
|
||||
let fileURLs = try FileManager.default.contentsOfDirectory(at: directoryURL, includingPropertiesForKeys: nil)
|
||||
for file in fileURLs {
|
||||
if file.isFileURL && (
|
||||
// 学習データファイル
|
||||
file.path.hasSuffix(".loudstxt3")
|
||||
|| file.path.hasSuffix(".loudschars2")
|
||||
|| file.path.hasSuffix(".memorymetadata")
|
||||
|| file.path.hasSuffix(".louds")
|
||||
// 一時ファイル
|
||||
|| file.path.hasSuffix(".loudstxt3.2")
|
||||
|| file.path.hasSuffix(".loudschars2.2")
|
||||
|| file.path.hasSuffix(".memorymetadata.2")
|
||||
|| file.path.hasSuffix(".louds.2")
|
||||
// .pauseファイル
|
||||
|| file.path.hasSuffix(".pause")
|
||||
// 古い学習機能のデータファイル
|
||||
|| file.path.hasSuffix("learningMemory.txt")
|
||||
) {
|
||||
try FileManager.default.removeItem(at: file)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// 一時記憶と長期記憶の学習データをマージする
|
||||
static func merge(tempTrie: TemporalLearningMemoryTrie, forgetTargets: [DicdataElement] = [], directoryURL: URL) throws {
|
||||
// MARK: `.pause`ファイルが存在する場合、`merge`を行う前に`.2`ファイルの復活を試み、失敗した場合は`merge`を諦める。
|
||||
if fileExist(pauseFileURL(directoryURL: directoryURL)) {
|
||||
debug("LongTermLearningMemory merge collapsion detected, trying recovery...")
|
||||
try overwriteTempFiles(
|
||||
directoryURL: directoryURL,
|
||||
loudsFileTemp: nil,
|
||||
loudsCharsFileTemp: nil,
|
||||
metadataFileTemp: nil,
|
||||
loudsTxt3FileCount: nil,
|
||||
removingRead2File: true
|
||||
)
|
||||
}
|
||||
|
||||
// MARK: ここで、前回のファイルの更新は問題なく成功していることが確認できる
|
||||
let startTime = Date()
|
||||
let today = LearningManager.today
|
||||
var newTrie = tempTrie
|
||||
// 構造:
|
||||
// dataCount(UInt32), count, data*count, count, data*count, ...
|
||||
// MARK: 読み出しは、`metadataFile`が存在しなかった場合(学習が一切ない場合)に失敗する。
|
||||
let ltMetadata = (try? Data(contentsOf: metadataFileURL(asTemporaryFile: false, directoryURL: directoryURL))) ?? Data([.zero, .zero, .zero, .zero])
|
||||
// 最初の4byteはentry countに対応する
|
||||
var metadataOffset = 0
|
||||
let entryCount = ltMetadata[metadataOffset ..< metadataOffset + 4].toArray(of: UInt32.self)[0]
|
||||
metadataOffset += 4
|
||||
|
||||
debug("LongTermLearningMemory merge entryCount", entryCount, ltMetadata.count)
|
||||
|
||||
// それぞれのloudstxt3ファイルに対して処理を行う
|
||||
for loudstxtIndex in 0 ..< Int(entryCount) / txtFileSplit + 1 {
|
||||
// loudstxt3の数
|
||||
let loudstxtData: Data
|
||||
do {
|
||||
loudstxtData = try Data(contentsOf: loudsTxt3FileURL("\(loudstxtIndex)", asTemporaryFile: false, directoryURL: directoryURL))
|
||||
} catch {
|
||||
debug("LongTermLearningMemory merge failed to read \(loudstxtIndex)", error)
|
||||
continue
|
||||
}
|
||||
let count = Int(loudstxtData[0 ..< 2].toArray(of: UInt16.self)[0])
|
||||
let indices = loudstxtData[2 ..< 2 + 4 * count].toArray(of: UInt32.self)
|
||||
for i in 0 ..< count {
|
||||
// メタデータの読み取り
|
||||
// 1byteで項目数
|
||||
let itemCount = Int(ltMetadata[metadataOffset ..< metadataOffset + 1].toArray(of: UInt8.self)[0])
|
||||
metadataOffset += 1
|
||||
let metadata = ltMetadata[metadataOffset ..< metadataOffset + itemCount * 5].toArray(of: MetadataElement.self)
|
||||
metadataOffset += itemCount * 5
|
||||
|
||||
// バイナリ内部でのindex
|
||||
let startIndex = Int(indices[i])
|
||||
let endIndex = i == (indices.endIndex - 1) ? loudstxtData.endIndex : Int(indices[i + 1])
|
||||
let elements = LOUDS.parseBinary(binary: loudstxtData[startIndex ..< endIndex])
|
||||
// 該当部分を取り出してメタデータに従ってフィルター、trieに追加
|
||||
guard let ruby = elements.first?.ruby else {
|
||||
continue
|
||||
}
|
||||
var newDicdata: [DicdataElement] = []
|
||||
var newMetadata: [MetadataElement] = []
|
||||
for (dicdataElement, metadataElement) in zip(elements, metadata) {
|
||||
// 忘却対象である場合は弾く
|
||||
if forgetTargets.contains(dicdataElement) {
|
||||
continue
|
||||
}
|
||||
if ruby != dicdataElement.ruby {
|
||||
continue
|
||||
}
|
||||
var dicdataElement = dicdataElement
|
||||
var metadataElement = metadataElement
|
||||
guard today >= metadataElement.lastUpdatedDay else {
|
||||
// 異常対応
|
||||
// 変なデータが入っているとオーバーフローが起こるのでフェイルセーフにする
|
||||
continue
|
||||
}
|
||||
// 32日ごとにカウントを半減させる
|
||||
while today - metadataElement.lastUpdatedDay > 32 {
|
||||
metadataElement.count >>= 1
|
||||
metadataElement.lastUpdatedDay += 32
|
||||
}
|
||||
// カウントがゼロになるか128日以上使っていない単語は除外
|
||||
if metadataElement.count == 0 || today - metadataElement.lastUsedDay >= 128 {
|
||||
continue
|
||||
}
|
||||
dicdataElement.baseValue = valueForData(metadata: metadataElement, dicdata: dicdataElement)
|
||||
newDicdata.append(dicdataElement)
|
||||
newMetadata.append(metadataElement)
|
||||
}
|
||||
guard let chars = LearningManager.keyToChars(ruby) else {
|
||||
continue
|
||||
}
|
||||
newTrie.append(dicdata: newDicdata, chars: chars, metadata: newMetadata)
|
||||
}
|
||||
// メモリ数上限を超過した場合、長いものから捨てる
|
||||
if newTrie.dicdata.count > Self.maxMemoryCount {
|
||||
break
|
||||
}
|
||||
}
|
||||
// newTrieのデータからLOUDSを作り書き出す
|
||||
try self.update(trie: newTrie, directoryURL: directoryURL)
|
||||
debug("LongTermLearningMemory merge ⏰", Date().timeIntervalSince(startTime), newTrie.dicdata.count)
|
||||
}
|
||||
|
||||
fileprivate static func make_loudstxt3(lines: [DataBlock]) -> Data {
|
||||
let lc = lines.count // データ数
|
||||
let count = Data(bytes: [UInt16(lc)], count: 2) // データ数をUInt16でマップ
|
||||
|
||||
let data = lines.map { $0.makeLoudstxt3Entry() }
|
||||
let body = data.reduce(Data(), +) // データ
|
||||
|
||||
let header_endIndex: UInt32 = 2 + UInt32(lc) * UInt32(MemoryLayout<UInt32>.size)
|
||||
let headerArray = data.dropLast().reduce(into: [header_endIndex]) {array, value in // ヘッダの作成
|
||||
array.append(array.last! + UInt32(value.count))
|
||||
}
|
||||
|
||||
let header = Data(bytes: headerArray, count: MemoryLayout<UInt32>.size * headerArray.count)
|
||||
let binary = count + header + body
|
||||
|
||||
return binary
|
||||
}
|
||||
|
||||
enum UpdateError: Error {
|
||||
/// `.pause`が存在するため更新を停止する場合
|
||||
case pauseFileExist
|
||||
}
|
||||
|
||||
/// ファイルを安全に書き出すため、以下の手順を取る
|
||||
///
|
||||
/// 1. 各ファイルを`memory.louds.2`のように書き出す
|
||||
/// 2. `.pause`を書き出す
|
||||
/// 3. それぞれの`.2`を元ファイルの位置にコピーする
|
||||
/// 4. `.pause`を削除する
|
||||
///
|
||||
/// このとき、読み出し側では
|
||||
/// * `.pause`がない場合、`.2`のつかないファイルを読み出す。
|
||||
/// * `.pause`がある場合、適当なタイミングで上記ステップの`3`以降を再実行する。また、`.pause`がある場合、学習機能を停止する。
|
||||
///
|
||||
/// 上記手順では`.pause`がない間は`.2`のつかないファイルが整合性を保っており、`.pause`がある場合は`.2`のつくファイルが整合性を保っているため、常に整合性を保ったファイルを維持することができる。
|
||||
///
|
||||
/// 例えば1のステップの実行中にエラーが生じた場合、次回キーボードを開いた際は単に更新前のファイルを読み込む。
|
||||
///
|
||||
/// 3のステップの実行中にエラーが生じた場合、次回キーボードを開いた際は学習を停止状態にする。ついで閉じる際に再度ステップ3を実行することで、安全に全てのファイルを更新することができる。
|
||||
static func update(trie: TemporalLearningMemoryTrie, directoryURL: URL) throws {
|
||||
// MARK: `.pause`の存在を確認し、存在していれば失敗させる
|
||||
// この場合、先に復活作業を実施すべきである
|
||||
guard !fileExist(pauseFileURL(directoryURL: directoryURL)) else {
|
||||
throw UpdateError.pauseFileExist
|
||||
}
|
||||
|
||||
// MARK: 各ファイルを`.2`で書き出す
|
||||
var nodes2Characters: [UInt8] = [0x0, 0x0]
|
||||
var dicdata: [DataBlock] = [.init(dicdata: []), .init(dicdata: [])]
|
||||
var metadata: [MetadataBlock] = [.init(metadata: []), .init(metadata: [])]
|
||||
var bits: [Bool] = [true, false]
|
||||
var currentNodes: [(UInt8, Int)] = trie.nodes[0].children.sorted(by: {$0.key < $1.key})
|
||||
bits += [Bool](repeating: true, count: currentNodes.count) + [false]
|
||||
while !currentNodes.isEmpty {
|
||||
currentNodes.forEach {char, nodeIndex in
|
||||
nodes2Characters.append(char)
|
||||
let dicdataBlock = DataBlock(dicdata: trie.nodes[nodeIndex].dataIndices.map {trie.dicdata[$0]})
|
||||
dicdata.append(dicdataBlock)
|
||||
metadata.append(MetadataBlock(metadata: trie.nodes[nodeIndex].dataIndices.map {trie.metadata[$0]}))
|
||||
|
||||
bits += [Bool](repeating: true, count: trie.nodes[nodeIndex].children.count) + [false]
|
||||
}
|
||||
currentNodes = currentNodes.flatMap {(_, nodeIndex) in trie.nodes[nodeIndex].children.sorted(by: {$0.key < $1.key})}
|
||||
}
|
||||
|
||||
let bytes = Self.BoolToUInt64(bits)
|
||||
let loudsFileTemp = loudsFileURL(asTemporaryFile: true, directoryURL: directoryURL)
|
||||
do {
|
||||
let binary = Data(bytes: bytes, count: bytes.count * 8)
|
||||
try binary.write(to: loudsFileTemp)
|
||||
}
|
||||
|
||||
let loudsCharsFileTemp = loudsCharsFileURL(asTemporaryFile: true, directoryURL: directoryURL)
|
||||
do {
|
||||
let binary = Data(bytes: nodes2Characters, count: nodes2Characters.count)
|
||||
try binary.write(to: loudsCharsFileTemp)
|
||||
}
|
||||
let metadataFileTemp = metadataFileURL(asTemporaryFile: true, directoryURL: directoryURL)
|
||||
do {
|
||||
var binary = Data()
|
||||
binary += Data(bytes: [UInt32(metadata.count)], count: 4) // エントリ数をUInt32でマップ
|
||||
let result = metadata.reduce(into: binary) {
|
||||
$0.append($1.makeBinary())
|
||||
}
|
||||
try result.write(to: metadataFileTemp)
|
||||
}
|
||||
|
||||
let loudsTxt3FileCount: Int
|
||||
do {
|
||||
loudsTxt3FileCount = ((dicdata.count) / txtFileSplit) + 1
|
||||
let indiceses: [Range<Int>] = (0..<loudsTxt3FileCount).map {
|
||||
let start = $0 * txtFileSplit
|
||||
let _end = ($0 + 1) * txtFileSplit
|
||||
let end = dicdata.count < _end ? dicdata.count:_end
|
||||
return start..<end
|
||||
}
|
||||
|
||||
for indices in indiceses {
|
||||
do {
|
||||
let start = indices.startIndex / txtFileSplit
|
||||
let binary = make_loudstxt3(lines: Array(dicdata[indices]))
|
||||
try binary.write(to: loudsTxt3FileURL("\(start)", asTemporaryFile: true, directoryURL: directoryURL), options: .atomic)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: `.pause`ファイルを書き出す
|
||||
try Data().write(to: pauseFileURL(directoryURL: directoryURL))
|
||||
|
||||
// MARK: 各`.2`のファイルで元のファイルを上書きする
|
||||
try overwriteTempFiles(
|
||||
directoryURL: directoryURL,
|
||||
loudsFileTemp: loudsFileTemp,
|
||||
loudsCharsFileTemp: loudsCharsFileTemp,
|
||||
metadataFileTemp: metadataFileTemp,
|
||||
loudsTxt3FileCount: loudsTxt3FileCount,
|
||||
// MARK: 成功の場合、`.pause`ファイルも削除する
|
||||
removingRead2File: true
|
||||
)
|
||||
}
|
||||
|
||||
/// - note: 上書きが全て成功するまで、一時ファイルは削除してはいけない。安全のため、`.pause`を除きそもそも一時ファイルを一切削除しないようにする。
|
||||
private static func overwriteTempFiles(directoryURL: URL, loudsFileTemp: URL?, loudsCharsFileTemp: URL?, metadataFileTemp: URL?, loudsTxt3FileCount: Int?, removingRead2File: Bool) throws {
|
||||
try overwrite(
|
||||
from: loudsCharsFileTemp ?? loudsCharsFileURL(asTemporaryFile: true, directoryURL: directoryURL),
|
||||
to: loudsCharsFileURL(asTemporaryFile: false, directoryURL: directoryURL)
|
||||
)
|
||||
try overwrite(
|
||||
from: metadataFileTemp ?? metadataFileURL(asTemporaryFile: true, directoryURL: directoryURL),
|
||||
to: metadataFileURL(asTemporaryFile: false, directoryURL: directoryURL)
|
||||
)
|
||||
if let loudsTxt3FileCount {
|
||||
for i in 0 ..< loudsTxt3FileCount {
|
||||
try overwrite(
|
||||
from: loudsTxt3FileURL("\(i)", asTemporaryFile: true, directoryURL: directoryURL),
|
||||
to: loudsTxt3FileURL("\(i)", asTemporaryFile: false, directoryURL: directoryURL)
|
||||
)
|
||||
}
|
||||
} else {
|
||||
let fileURLs = try FileManager.default.contentsOfDirectory(at: directoryURL, includingPropertiesForKeys: nil)
|
||||
for file in fileURLs {
|
||||
if file.isFileURL && file.path.hasSuffix(".loudstxt3.2") {
|
||||
try overwrite(from: file, to: URL(fileURLWithPath: String(file.path.dropLast(2))))
|
||||
}
|
||||
}
|
||||
}
|
||||
// 読み出し側で最初に読み出されるのは`.louds`なので、これを最後に書き出す方が安全
|
||||
try overwrite(
|
||||
from: loudsFileTemp ?? loudsFileURL(asTemporaryFile: true, directoryURL: directoryURL),
|
||||
to: loudsFileURL(asTemporaryFile: false, directoryURL: directoryURL)
|
||||
)
|
||||
if removingRead2File {
|
||||
try FileManager.default.removeItem(at: pauseFileURL(directoryURL: directoryURL))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// 一時記憶用のデータなので、複雑な形状にしない。
|
||||
struct TemporalLearningMemoryTrie {
|
||||
struct Node {
|
||||
var dataIndices: [Int] = [] // loudstxt3の中のデータのインデックスリスト
|
||||
var children: [UInt8: Int] = [:] // characterのIDからインデックスへのマッピング
|
||||
}
|
||||
|
||||
fileprivate var nodes = [Node()]
|
||||
fileprivate var dicdata: [DicdataElement] = []
|
||||
fileprivate var metadata: [MetadataElement] = []
|
||||
|
||||
/// 同じノードにあることがわかっているデータを一括で追加する場面で利用する関数
|
||||
/// 主にマージ時の利用を想定
|
||||
fileprivate mutating func append(dicdata: [DicdataElement], chars: [UInt8], metadata: [MetadataElement]) {
|
||||
if dicdata.count != metadata.count {
|
||||
debug("TemporalLearningMemoryTrie append: count of dicdata and metadata do not match")
|
||||
return
|
||||
}
|
||||
var index = 0
|
||||
for char in chars {
|
||||
if let nextIndex = nodes[index].children[char] {
|
||||
index = nextIndex
|
||||
} else {
|
||||
let nextIndex = nodes.endIndex
|
||||
nodes[index].children[char] = nextIndex
|
||||
nodes.append(Node())
|
||||
index = nextIndex
|
||||
}
|
||||
}
|
||||
for (dicdataElement, metadataElement) in zip(dicdata, metadata) {
|
||||
if let dataIndex = nodes[index].dataIndices.first(where: {Self.sameDicdataIfRubyIsEqual(left: self.dicdata[$0], right: dicdataElement)}) {
|
||||
// すでにnodes[index]に同じデータが存在している場合、カウントを加算し、最後に使った日を後の方に変更する
|
||||
withMutableValue(&self.metadata[dataIndex]) { currentMetadata in
|
||||
currentMetadata.lastUsedDay = max(currentMetadata.lastUsedDay, metadataElement.lastUsedDay)
|
||||
currentMetadata.lastUpdatedDay = max(currentMetadata.lastUpdatedDay, metadataElement.lastUpdatedDay)
|
||||
currentMetadata.count += min(.max - currentMetadata.count, metadataElement.count)
|
||||
}
|
||||
self.dicdata[dataIndex] = dicdataElement
|
||||
} else {
|
||||
// まだnodes[index]に同じデータが存在していない場合、data末尾に新しい要素を追加してnodes[index]を更新する
|
||||
let dataIndex = self.dicdata.endIndex
|
||||
self.dicdata.append(dicdataElement)
|
||||
self.metadata.append(metadataElement)
|
||||
nodes[index].dataIndices.append(dataIndex)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// ルビが同じだとわかっている場合に2つのDicdataElementが同じものか判定する関数
|
||||
private static func sameDicdataIfRubyIsEqual(left: DicdataElement, right: DicdataElement) -> Bool {
|
||||
left.lcid == right.lcid && left.rcid == right.rcid && left.word == right.word
|
||||
}
|
||||
|
||||
mutating func memorize(dicdataElement: DicdataElement, chars: [UInt8]) {
|
||||
var index = 0
|
||||
for char in chars {
|
||||
if let nextIndex = nodes[index].children[char] {
|
||||
index = nextIndex
|
||||
} else {
|
||||
let nextIndex = nodes.endIndex
|
||||
nodes[index].children[char] = nextIndex
|
||||
nodes.append(Node())
|
||||
index = nextIndex
|
||||
}
|
||||
}
|
||||
// 雑な設定だが200年くらいは持つのでヨシ。
|
||||
let day = LearningManager.today
|
||||
if let dataIndex = nodes[index].dataIndices.first(where: {Self.sameDicdataIfRubyIsEqual(left: self.dicdata[$0], right: dicdataElement)}) {
|
||||
withMutableValue(&self.metadata[dataIndex]) {
|
||||
$0.count += min(.max - $0.count, 1)
|
||||
// 雑な設定だが200年くらいは持つのでヨシ。
|
||||
$0.lastUsedDay = day
|
||||
}
|
||||
} else {
|
||||
let dataIndex = self.dicdata.endIndex
|
||||
var dicdataElement = dicdataElement
|
||||
let metadataElement = MetadataElement(day: day, count: 1)
|
||||
dicdataElement.baseValue = LongTermLearningMemory.valueForData(metadata: metadataElement, dicdata: dicdataElement)
|
||||
self.dicdata.append(dicdataElement)
|
||||
self.metadata.append(metadataElement)
|
||||
nodes[index].dataIndices.append(dataIndex)
|
||||
}
|
||||
}
|
||||
|
||||
@discardableResult
|
||||
mutating func forget(dicdataElement: DicdataElement, chars: [UInt8]) -> Bool {
|
||||
var index = 0
|
||||
for char in chars {
|
||||
if let nextIndex = nodes[index].children[char] {
|
||||
index = nextIndex
|
||||
} else {
|
||||
// 存在しない場合
|
||||
return false
|
||||
}
|
||||
}
|
||||
// 存在する場合
|
||||
// dataIndicesから削除する(dicdataの方は触らない)
|
||||
nodes[index].dataIndices.removeAll(where: {self.dicdata[$0] == dicdataElement})
|
||||
return true
|
||||
}
|
||||
|
||||
func perfectMatch(chars: [UInt8]) -> [DicdataElement] {
|
||||
var index = 0
|
||||
for char in chars {
|
||||
if let nextIndex = nodes[index].children[char] {
|
||||
index = nextIndex
|
||||
} else {
|
||||
return []
|
||||
}
|
||||
}
|
||||
return nodes[index].dataIndices.map {self.dicdata[$0]}
|
||||
}
|
||||
|
||||
func throughMatch(chars: [UInt8], depth: Range<Int>) -> [DicdataElement] {
|
||||
var index = 0
|
||||
var indices: [Int] = []
|
||||
for (offset, char) in chars.enumerated() {
|
||||
if let nextIndex = nodes[index].children[char] {
|
||||
index = nextIndex
|
||||
if depth.contains(offset) {
|
||||
indices.append(contentsOf: nodes[index].dataIndices)
|
||||
}
|
||||
} else {
|
||||
return indices.map {self.dicdata[$0]}
|
||||
}
|
||||
}
|
||||
return indices.map {self.dicdata[$0]}
|
||||
}
|
||||
|
||||
func prefixMatch(chars: [UInt8]) -> [DicdataElement] {
|
||||
var index = 0
|
||||
for char in chars {
|
||||
if let nextIndex = nodes[index].children[char] {
|
||||
index = nextIndex
|
||||
} else {
|
||||
return []
|
||||
}
|
||||
}
|
||||
var nodeIndices: [Int] = Array(nodes[index].children.values)
|
||||
var indices: [Int] = nodes[index].dataIndices
|
||||
while let index = nodeIndices.popLast() {
|
||||
nodeIndices.append(contentsOf: nodes[index].children.values)
|
||||
indices.append(contentsOf: nodes[index].dataIndices)
|
||||
}
|
||||
return indices.map {self.dicdata[$0]}
|
||||
}
|
||||
}
|
||||
|
||||
final class LearningManager {
|
||||
private static func updateChar2Int8(bundleURL: URL) {
|
||||
do {
|
||||
let chidURL = bundleURL.appendingPathComponent("louds/charID.chid", isDirectory: false)
|
||||
let string = try String(contentsOf: chidURL, encoding: .utf8)
|
||||
Self.char2UInt8 = [Character: UInt8].init(uniqueKeysWithValues: string.enumerated().map {($0.element, UInt8($0.offset))})
|
||||
} catch {
|
||||
debug("ファイルが存在しません: \(error)")
|
||||
}
|
||||
}
|
||||
private static var char2UInt8: [Character: UInt8] = [:]
|
||||
|
||||
static var today: UInt16 {
|
||||
UInt16(Int(Date().timeIntervalSince1970) / 86400) - 19000
|
||||
}
|
||||
|
||||
static func keyToChars(_ key: some StringProtocol) -> [UInt8]? {
|
||||
var chars: [UInt8] = []
|
||||
chars.reserveCapacity(key.count)
|
||||
for character in key {
|
||||
if let char = char2UInt8[character] {
|
||||
chars.append(char)
|
||||
} else {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return chars
|
||||
}
|
||||
|
||||
private var temporaryMemory: TemporalLearningMemoryTrie = .init()
|
||||
private var options: ConvertRequestOptions = .default
|
||||
private var memoryCollapsed: Bool = false
|
||||
|
||||
var enabled: Bool {
|
||||
(!self.memoryCollapsed) && self.options.learningType.needUsingMemory
|
||||
}
|
||||
|
||||
init() {
|
||||
self.memoryCollapsed = LongTermLearningMemory.memoryCollapsed(directoryURL: self.options.memoryDirectoryURL)
|
||||
if memoryCollapsed {
|
||||
// 学習データが壊れている状態であることを警告する
|
||||
debug("LearningManager init: Memory Collapsed")
|
||||
}
|
||||
if !options.learningType.needUsingMemory {
|
||||
return
|
||||
}
|
||||
Self.updateChar2Int8(bundleURL: options.dictionaryResourceURL)
|
||||
}
|
||||
|
||||
/// - Returns: Whether cache should be reseted or not.
|
||||
func setRequestOptions(options: ConvertRequestOptions) -> Bool {
|
||||
// 変更があったら`char2Int8`を読み込み直す
|
||||
if options.dictionaryResourceURL != self.options.dictionaryResourceURL {
|
||||
Self.updateChar2Int8(bundleURL: options.dictionaryResourceURL)
|
||||
}
|
||||
self.options = options
|
||||
LongTermLearningMemory.maxMemoryCount = options.maxMemoryCount
|
||||
|
||||
switch options.learningType {
|
||||
case .inputAndOutput, .onlyOutput: break
|
||||
case .nothing:
|
||||
self.temporaryMemory = TemporalLearningMemoryTrie()
|
||||
}
|
||||
|
||||
// リセットチェックも実施
|
||||
if options.shouldResetMemory {
|
||||
self.reset()
|
||||
self.options.shouldResetMemory = false
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func temporaryPerfectMatch(charIDs: [UInt8]) -> [DicdataElement] {
|
||||
if !options.learningType.needUsingMemory {
|
||||
return []
|
||||
}
|
||||
return self.temporaryMemory.perfectMatch(chars: charIDs)
|
||||
}
|
||||
|
||||
func temporaryThroughMatch(charIDs: [UInt8], depth: Range<Int>) -> [DicdataElement] {
|
||||
if !options.learningType.needUsingMemory {
|
||||
return []
|
||||
}
|
||||
return self.temporaryMemory.throughMatch(chars: charIDs, depth: depth)
|
||||
}
|
||||
|
||||
func temporaryPrefixMatch(charIDs: [UInt8]) -> [DicdataElement] {
|
||||
if !options.learningType.needUsingMemory {
|
||||
return []
|
||||
}
|
||||
return self.temporaryMemory.prefixMatch(chars: charIDs)
|
||||
}
|
||||
|
||||
func update(data: [DicdataElement]) {
|
||||
if !options.learningType.needUpdateMemory {
|
||||
return
|
||||
}
|
||||
// 単語単位
|
||||
for datum in data where DicdataStore.needWValueMemory(datum) {
|
||||
guard let chars = Self.keyToChars(datum.ruby) else {
|
||||
continue
|
||||
}
|
||||
self.temporaryMemory.memorize(dicdataElement: datum, chars: chars)
|
||||
}
|
||||
|
||||
if data.count == 1 {
|
||||
return
|
||||
}
|
||||
// 文節単位bigram
|
||||
do {
|
||||
var firstClause: DicdataElement?
|
||||
var secondClause: DicdataElement?
|
||||
for datum in data {
|
||||
if var newFirstClause = firstClause {
|
||||
if var newSecondClause = secondClause {
|
||||
if DicdataStore.isClause(newFirstClause.rcid, datum.lcid) {
|
||||
// firstClauseとsecondClauseがあって文節境界である場合, bigramを作って学習に入れる
|
||||
let element = DicdataElement(
|
||||
word: newFirstClause.word + newSecondClause.word,
|
||||
ruby: newFirstClause.ruby + newSecondClause.ruby,
|
||||
lcid: newFirstClause.lcid,
|
||||
rcid: newFirstClause.rcid,
|
||||
mid: newSecondClause.mid,
|
||||
value: newFirstClause.baseValue + newSecondClause.baseValue
|
||||
)
|
||||
// firstClauseを押し出す
|
||||
firstClause = secondClause
|
||||
secondClause = datum
|
||||
guard let chars = Self.keyToChars(element.ruby) else {
|
||||
continue
|
||||
}
|
||||
debug("LearningManager update first/second", element)
|
||||
self.temporaryMemory.memorize(dicdataElement: element, chars: chars)
|
||||
} else {
|
||||
// firstClauseとsecondClauseがあって文節境界でない場合, secondClauseをアップデート
|
||||
newSecondClause.word.append(contentsOf: datum.word)
|
||||
newSecondClause.ruby.append(contentsOf: datum.ruby)
|
||||
newSecondClause.rcid = datum.rcid
|
||||
if DicdataStore.includeMMValueCalculation(datum) {
|
||||
newSecondClause.mid = datum.mid
|
||||
}
|
||||
newSecondClause.baseValue += datum.baseValue
|
||||
secondClause = newSecondClause
|
||||
}
|
||||
} else {
|
||||
if DicdataStore.isClause(newFirstClause.rcid, datum.lcid) {
|
||||
// firstClauseがあって文節境界である場合, secondClauseを作る
|
||||
secondClause = datum
|
||||
} else {
|
||||
// firstClauseがあって文節境界でない場合, firstClauseをアップデート
|
||||
newFirstClause.word.append(contentsOf: datum.word)
|
||||
newFirstClause.ruby.append(contentsOf: datum.ruby)
|
||||
newFirstClause.rcid = datum.rcid
|
||||
if DicdataStore.includeMMValueCalculation(datum) {
|
||||
newFirstClause.mid = datum.mid
|
||||
}
|
||||
newFirstClause.baseValue += datum.baseValue
|
||||
firstClause = newFirstClause
|
||||
}
|
||||
}
|
||||
} else {
|
||||
firstClause = datum
|
||||
}
|
||||
}
|
||||
if let firstClause, let secondClause {
|
||||
let element = DicdataElement(
|
||||
word: firstClause.word + secondClause.word,
|
||||
ruby: firstClause.ruby + secondClause.ruby,
|
||||
lcid: firstClause.lcid,
|
||||
rcid: firstClause.rcid,
|
||||
mid: secondClause.mid,
|
||||
value: firstClause.baseValue + secondClause.baseValue
|
||||
)
|
||||
if let chars = Self.keyToChars(element.ruby) {
|
||||
debug("LearningManager update first/second rest", element)
|
||||
self.temporaryMemory.memorize(dicdataElement: element, chars: chars)
|
||||
}
|
||||
}
|
||||
}
|
||||
// 全体
|
||||
let element = DicdataElement(
|
||||
word: data.reduce(into: "") {$0.append(contentsOf: $1.word)},
|
||||
ruby: data.reduce(into: "") {$0.append(contentsOf: $1.ruby)},
|
||||
lcid: data.first?.lcid ?? CIDData.一般名詞.cid,
|
||||
rcid: data.last?.rcid ?? CIDData.一般名詞.cid,
|
||||
mid: data.last?.mid ?? MIDData.一般.mid,
|
||||
value: data.reduce(into: 0) {$0 += $1.baseValue}
|
||||
)
|
||||
guard let chars = Self.keyToChars(element.ruby) else {
|
||||
return
|
||||
}
|
||||
debug("LearningManager update all", element)
|
||||
self.temporaryMemory.memorize(dicdataElement: element, chars: chars)
|
||||
}
|
||||
|
||||
/// データに含まれる語彙の学習をリセットする関数
|
||||
func forgetMemory(data: [DicdataElement]) {
|
||||
// 1. temporary memoryを削除する
|
||||
for element in data {
|
||||
guard let chars = Self.keyToChars(element.ruby) else {
|
||||
continue
|
||||
}
|
||||
self.temporaryMemory.forget(dicdataElement: element, chars: chars)
|
||||
}
|
||||
// 2. longterm memoryを削除する
|
||||
do {
|
||||
try LongTermLearningMemory.merge(tempTrie: self.temporaryMemory, forgetTargets: data, directoryURL: self.options.memoryDirectoryURL)
|
||||
// マージが済んだので、temporaryMemoryを空にする
|
||||
self.temporaryMemory = TemporalLearningMemoryTrie()
|
||||
} catch {
|
||||
// アップデートに失敗した場合、そのまま諦める。
|
||||
debug("LearningManager resetLearning: Failed to save LongTermLearningMemory", error)
|
||||
}
|
||||
// 状態を更新する
|
||||
self.memoryCollapsed = LongTermLearningMemory.memoryCollapsed(directoryURL: self.options.memoryDirectoryURL)
|
||||
}
|
||||
|
||||
func save() {
|
||||
if !options.learningType.needUpdateMemory {
|
||||
return
|
||||
}
|
||||
do {
|
||||
try LongTermLearningMemory.merge(tempTrie: self.temporaryMemory, directoryURL: self.options.memoryDirectoryURL)
|
||||
// マージが済んだので、temporaryMemoryを空にする
|
||||
self.temporaryMemory = TemporalLearningMemoryTrie()
|
||||
} catch {
|
||||
// アップデートに失敗した場合、そのまま諦める。
|
||||
debug("LearningManager save: Failed to save LongTermLearningMemory", error)
|
||||
}
|
||||
// 状態を更新する
|
||||
self.memoryCollapsed = LongTermLearningMemory.memoryCollapsed(directoryURL: self.options.memoryDirectoryURL)
|
||||
}
|
||||
|
||||
func reset() {
|
||||
self.temporaryMemory = TemporalLearningMemoryTrie()
|
||||
do {
|
||||
try LongTermLearningMemory.reset(directoryURL: self.options.memoryDirectoryURL)
|
||||
} catch {
|
||||
debug("LearningManager reset failed", error)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,290 @@
|
||||
//
|
||||
// TypoCorrection.swift
|
||||
// Keyboard
|
||||
//
|
||||
// Created by ensan on 2022/12/18.
|
||||
// Copyright © 2022 ensan. All rights reserved.
|
||||
//
|
||||
import SwiftUtils
|
||||
|
||||
// MARK: 誤り訂正用のAPI
|
||||
extension ComposingText {
|
||||
private func shouldBeRemovedForDicdataStore(components: [ConvertTargetElement]) -> Bool {
|
||||
// 判定に使うのは最初の1エレメントの最初の文字で十分
|
||||
guard let first = components.first?.string.first?.toKatakana() else {
|
||||
return false
|
||||
}
|
||||
return !CharacterUtils.isRomanLetter(first) && !DicdataStore.existLOUDS(for: first)
|
||||
}
|
||||
|
||||
/// closedRangeでもらう
|
||||
/// getRangeWithTyposの複数版にあたる。`result`の計算が一回で済む分、高速になる。
|
||||
/// 例えば`left=4, rightIndexRange=6..<10`の場合、`4...6, 4...7, 4...8, 4...9`の範囲で計算する
|
||||
/// `left <= rightIndexRange.startIndex`が常に成り立つ
|
||||
func getRangesWithTypos(_ left: Int, rightIndexRange: Range<Int>) -> [[Character]: (endIndex: Int, penalty: PValue)] {
|
||||
let count = rightIndexRange.endIndex - left
|
||||
debug("getRangesWithTypos", left, rightIndexRange, count)
|
||||
let nodes = (0..<count).map {(i: Int) in
|
||||
Self.lengths.flatMap {(k: Int) -> [TypoCandidate] in
|
||||
let j = i + k
|
||||
if count <= j {
|
||||
return []
|
||||
}
|
||||
return Self.getTypo(self.input[left + i ... left + j])
|
||||
}
|
||||
}
|
||||
|
||||
let maxPenalty: PValue = 3.5 * 3
|
||||
// Performance Tuning Note:直接Dictionaryを作るのではなく、一度Arrayを作ってから最後にDictionaryに変換する方が、高速である
|
||||
var stringToInfo: [([Character], (endIndex: Int, penalty: PValue))] = []
|
||||
|
||||
// 深さ優先で列挙する
|
||||
var stack: [(convertTargetElements: [ConvertTargetElement], lastElement: InputElement, count: Int, penalty: PValue)] = nodes[0].compactMap { typoCandidate in
|
||||
guard let firstElement = typoCandidate.inputElements.first else {
|
||||
return nil
|
||||
}
|
||||
if Self.isLeftSideValid(first: firstElement, of: self.input, from: left) {
|
||||
var convertTargetElements = [ConvertTargetElement]()
|
||||
for element in typoCandidate.inputElements {
|
||||
ComposingText.updateConvertTargetElements(currentElements: &convertTargetElements, newElement: element)
|
||||
}
|
||||
return (convertTargetElements, typoCandidate.inputElements.last!, typoCandidate.inputElements.count, typoCandidate.weight)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
while let (convertTargetElements, lastElement, count, penalty) = stack.popLast() {
|
||||
if rightIndexRange.contains(count + left - 1) {
|
||||
if let convertTarget = ComposingText.getConvertTargetIfRightSideIsValid(lastElement: lastElement, of: self.input, to: count + left, convertTargetElements: convertTargetElements)?.map({$0.toKatakana()}) {
|
||||
stringToInfo.append((convertTarget, (count + left - 1, penalty)))
|
||||
}
|
||||
}
|
||||
// エスケープ
|
||||
if nodes.endIndex <= count {
|
||||
continue
|
||||
}
|
||||
// 訂正数上限(3個)
|
||||
if penalty >= maxPenalty {
|
||||
var convertTargetElements = convertTargetElements
|
||||
let correct = [self.input[left + count]].map {InputElement(character: $0.character.toKatakana(), inputStyle: $0.inputStyle)}
|
||||
if count + correct.count > nodes.endIndex {
|
||||
continue
|
||||
}
|
||||
for element in correct {
|
||||
ComposingText.updateConvertTargetElements(currentElements: &convertTargetElements, newElement: element)
|
||||
}
|
||||
stack.append((convertTargetElements, correct.last!, count + correct.count, penalty))
|
||||
} else {
|
||||
stack.append(contentsOf: nodes[count].compactMap {
|
||||
if count + $0.inputElements.count > nodes.endIndex {
|
||||
return nil
|
||||
}
|
||||
var convertTargetElements = convertTargetElements
|
||||
for element in $0.inputElements {
|
||||
ComposingText.updateConvertTargetElements(currentElements: &convertTargetElements, newElement: element)
|
||||
}
|
||||
if shouldBeRemovedForDicdataStore(components: convertTargetElements) {
|
||||
return nil
|
||||
}
|
||||
return (
|
||||
convertTargetElements: convertTargetElements,
|
||||
lastElement: $0.inputElements.last!,
|
||||
count: count + $0.inputElements.count,
|
||||
penalty: penalty + $0.weight
|
||||
)
|
||||
})
|
||||
}
|
||||
}
|
||||
return Dictionary(stringToInfo, uniquingKeysWith: {$0.penalty < $1.penalty ? $1 : $0})
|
||||
}
|
||||
|
||||
func getRangeWithTypos(_ left: Int, _ right: Int) -> [[Character]: PValue] {
|
||||
// 各iから始まる候補を列挙する
|
||||
// 例えばinput = [d(あ), r(s), r(i), r(t), r(s), d(は), d(は), d(れ)]の場合
|
||||
// nodes = [[d(あ)], [r(s)], [r(i)], [r(t), [r(t), r(a)]], [r(s)], [d(は), d(ば), d(ぱ)], [d(れ)]]
|
||||
// となる
|
||||
let count = right - left + 1
|
||||
let nodes = (0..<count).map {(i: Int) in
|
||||
Self.lengths.flatMap {(k: Int) -> [TypoCandidate] in
|
||||
let j = i + k
|
||||
if count <= j {
|
||||
return []
|
||||
}
|
||||
return Self.getTypo(self.input[left + i ... left + j])
|
||||
}
|
||||
}
|
||||
|
||||
let maxPenalty: PValue = 3.5 * 3
|
||||
|
||||
// 深さ優先で列挙する
|
||||
var stack: [(convertTargetElements: [ConvertTargetElement], lastElement: InputElement, count: Int, penalty: PValue)] = nodes[0].compactMap { typoCandidate in
|
||||
guard let firstElement = typoCandidate.inputElements.first else {
|
||||
return nil
|
||||
}
|
||||
if Self.isLeftSideValid(first: firstElement, of: self.input, from: left) {
|
||||
var convertTargetElements = [ConvertTargetElement]()
|
||||
for element in typoCandidate.inputElements {
|
||||
ComposingText.updateConvertTargetElements(currentElements: &convertTargetElements, newElement: element)
|
||||
}
|
||||
return (convertTargetElements, typoCandidate.inputElements.last!, typoCandidate.inputElements.count, typoCandidate.weight)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var stringToPenalty: [([Character], PValue)] = []
|
||||
|
||||
while let (convertTargetElements, lastElement, count, penalty) = stack.popLast() {
|
||||
if count + left - 1 == right {
|
||||
if let convertTarget = ComposingText.getConvertTargetIfRightSideIsValid(lastElement: lastElement, of: self.input, to: count + left, convertTargetElements: convertTargetElements)?.map({$0.toKatakana()}) {
|
||||
stringToPenalty.append((convertTarget, penalty))
|
||||
}
|
||||
continue
|
||||
}
|
||||
// エスケープ
|
||||
if nodes.endIndex <= count {
|
||||
continue
|
||||
}
|
||||
// 訂正数上限(3個)
|
||||
if penalty >= maxPenalty {
|
||||
var convertTargetElements = convertTargetElements
|
||||
let correct = [self.input[left + count]].map {InputElement(character: $0.character.toKatakana(), inputStyle: $0.inputStyle)}
|
||||
if count + correct.count > nodes.endIndex {
|
||||
continue
|
||||
}
|
||||
for element in correct {
|
||||
ComposingText.updateConvertTargetElements(currentElements: &convertTargetElements, newElement: element)
|
||||
}
|
||||
stack.append((convertTargetElements, correct.last!, count + correct.count, penalty))
|
||||
} else {
|
||||
stack.append(contentsOf: nodes[count].compactMap {
|
||||
if count + $0.inputElements.count > nodes.endIndex {
|
||||
return nil
|
||||
}
|
||||
var convertTargetElements = convertTargetElements
|
||||
for element in $0.inputElements {
|
||||
ComposingText.updateConvertTargetElements(currentElements: &convertTargetElements, newElement: element)
|
||||
}
|
||||
if shouldBeRemovedForDicdataStore(components: convertTargetElements) {
|
||||
return nil
|
||||
}
|
||||
return (
|
||||
convertTargetElements: convertTargetElements,
|
||||
lastElement: $0.inputElements.last!,
|
||||
count: count + $0.inputElements.count,
|
||||
penalty: penalty + $0.weight
|
||||
)
|
||||
})
|
||||
}
|
||||
}
|
||||
return Dictionary(stringToPenalty, uniquingKeysWith: max)
|
||||
}
|
||||
|
||||
private static func getTypo(_ elements: some Collection<InputElement>) -> [TypoCandidate] {
|
||||
let key = elements.reduce(into: "") {$0.append($1.character)}.toKatakana()
|
||||
|
||||
if (elements.allSatisfy {$0.inputStyle == .direct}) {
|
||||
if key.count > 1 {
|
||||
return Self.directPossibleTypo[key, default: []].map {
|
||||
TypoCandidate(
|
||||
inputElements: $0.value.map {InputElement(character: $0, inputStyle: .direct)},
|
||||
weight: $0.weight
|
||||
)
|
||||
}
|
||||
} else if key.count == 1 {
|
||||
var result = Self.directPossibleTypo[key, default: []].map {
|
||||
TypoCandidate(
|
||||
inputElements: $0.value.map {InputElement(character: $0, inputStyle: .direct)},
|
||||
weight: $0.weight
|
||||
)
|
||||
}
|
||||
// そのまま
|
||||
result.append(TypoCandidate(inputElements: key.map {InputElement(character: $0, inputStyle: .direct)}, weight: 0))
|
||||
return result
|
||||
}
|
||||
}
|
||||
if (elements.allSatisfy {$0.inputStyle == .roman2kana}) {
|
||||
if key.count > 1 {
|
||||
return Self.roman2KanaPossibleTypo[key, default: []].map {
|
||||
TypoCandidate(
|
||||
inputElements: $0.map {InputElement(character: $0, inputStyle: .roman2kana)},
|
||||
weight: 3.5
|
||||
)
|
||||
}
|
||||
} else if key.count == 1 {
|
||||
var result = Self.roman2KanaPossibleTypo[key, default: []].map {
|
||||
TypoCandidate(
|
||||
inputElements: $0.map {InputElement(character: $0, inputStyle: .roman2kana)},
|
||||
weight: 3.5
|
||||
)
|
||||
}
|
||||
// そのまま
|
||||
result.append(
|
||||
TypoCandidate(inputElements: key.map {InputElement(character: $0, inputStyle: .roman2kana)}, weight: 0)
|
||||
)
|
||||
return result
|
||||
}
|
||||
}
|
||||
return []
|
||||
}
|
||||
|
||||
private static let lengths = [0, 1]
|
||||
|
||||
private struct TypoUnit: Equatable {
|
||||
var value: String
|
||||
var weight: PValue
|
||||
|
||||
init(_ value: String, weight: PValue = 3.5) {
|
||||
self.value = value
|
||||
self.weight = weight
|
||||
}
|
||||
}
|
||||
|
||||
struct TypoCandidate: Equatable {
|
||||
var inputElements: [InputElement]
|
||||
var weight: PValue
|
||||
}
|
||||
|
||||
/// ダイレクト入力用
|
||||
private static let directPossibleTypo: [String: [TypoUnit]] = [
|
||||
"カ": [TypoUnit("ガ", weight: 7.0)],
|
||||
"キ": [TypoUnit("ギ")],
|
||||
"ク": [TypoUnit("グ")],
|
||||
"ケ": [TypoUnit("ゲ")],
|
||||
"コ": [TypoUnit("ゴ")],
|
||||
"サ": [TypoUnit("ザ")],
|
||||
"シ": [TypoUnit("ジ")],
|
||||
"ス": [TypoUnit("ズ")],
|
||||
"セ": [TypoUnit("ゼ")],
|
||||
"ソ": [TypoUnit("ゾ")],
|
||||
"タ": [TypoUnit("ダ", weight: 6.0)],
|
||||
"チ": [TypoUnit("ヂ")],
|
||||
"ツ": [TypoUnit("ッ", weight: 6.0), TypoUnit("ヅ", weight: 4.5)],
|
||||
"テ": [TypoUnit("デ", weight: 6.0)],
|
||||
"ト": [TypoUnit("ド", weight: 4.5)],
|
||||
"ハ": [TypoUnit("バ", weight: 4.5), TypoUnit("パ", weight: 6.0)],
|
||||
"ヒ": [TypoUnit("ビ"), TypoUnit("ピ", weight: 4.5)],
|
||||
"フ": [TypoUnit("ブ"), TypoUnit("プ", weight: 4.5)],
|
||||
"ヘ": [TypoUnit("ベ"), TypoUnit("ペ", weight: 4.5)],
|
||||
"ホ": [TypoUnit("ボ"), TypoUnit("ポ", weight: 4.5)],
|
||||
"バ": [TypoUnit("パ")],
|
||||
"ビ": [TypoUnit("ピ")],
|
||||
"ブ": [TypoUnit("プ")],
|
||||
"ベ": [TypoUnit("ペ")],
|
||||
"ボ": [TypoUnit("ポ")],
|
||||
"ヤ": [TypoUnit("ャ")],
|
||||
"ユ": [TypoUnit("ュ")],
|
||||
"ヨ": [TypoUnit("ョ")]
|
||||
]
|
||||
|
||||
private static let roman2KanaPossibleTypo: [String: [String]] = [
|
||||
"bs": ["ba"],
|
||||
"no": ["bo"],
|
||||
"li": ["ki"],
|
||||
"lo": ["ko"],
|
||||
"lu": ["ku"],
|
||||
"my": ["mu"],
|
||||
"tp": ["to"],
|
||||
"ts": ["ta"],
|
||||
"wi": ["wo"],
|
||||
"pu": ["ou"]
|
||||
]
|
||||
}
|
||||
109
Sources/KanaKanjiConverterModule/Kana2Kanji/Kana2Kanji.swift
Normal file
109
Sources/KanaKanjiConverterModule/Kana2Kanji/Kana2Kanji.swift
Normal file
@@ -0,0 +1,109 @@
|
||||
//
|
||||
// kana2kanji.swift
|
||||
// Kana2KajiProject
|
||||
//
|
||||
// Created by ensan on 2020/09/02.
|
||||
// Copyright © 2020 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
#if os(iOS) || os(tvOS)
|
||||
public typealias PValue = Float16
|
||||
#else
|
||||
public typealias PValue = Float32
|
||||
#endif
|
||||
|
||||
struct Kana2Kanji {
|
||||
var dicdataStore = DicdataStore()
|
||||
|
||||
/// CandidateDataの状態からCandidateに変更する関数
|
||||
/// - parameters:
|
||||
/// - data: CandidateData
|
||||
/// - returns:
|
||||
/// Candidateとなった値を返す。
|
||||
/// - note:
|
||||
/// この関数の役割は意味連接の考慮にある。
|
||||
func processClauseCandidate(_ data: CandidateData) -> Candidate {
|
||||
let mmValue: (value: PValue, mid: Int) = data.clauses.reduce((value: .zero, mid: MIDData.EOS.mid)) { result, data in
|
||||
(
|
||||
value: result.value + self.dicdataStore.getMMValue(result.mid, data.clause.mid),
|
||||
mid: data.clause.mid
|
||||
)
|
||||
}
|
||||
let text = data.clauses.map {$0.clause.text}.joined()
|
||||
let value = data.clauses.last!.value + mmValue.value
|
||||
let lastMid = data.clauses.last!.clause.mid
|
||||
let correspondingCount = data.clauses.reduce(into: 0) {$0 += $1.clause.inputRange.count}
|
||||
return Candidate(
|
||||
text: text,
|
||||
value: value,
|
||||
correspondingCount: correspondingCount,
|
||||
lastMid: lastMid,
|
||||
data: data.data
|
||||
)
|
||||
}
|
||||
|
||||
/// 入力がない状態から、妥当な候補を探す
|
||||
/// - parameters:
|
||||
/// - preparts: Candidate列。以前確定した候補など
|
||||
/// - N_best: 取得する候補数
|
||||
/// - returns:
|
||||
/// ゼロヒント予測変換の結果
|
||||
/// - note:
|
||||
/// 「食べちゃ-てる」「食べちゃ-いる」などの間抜けな候補を返すことが多いため、学習によるもの以外を無効化している。
|
||||
func getZeroHintPredictionCandidates(preparts: some Collection<Candidate>, N_best: Int) -> [Candidate] {
|
||||
// let dicdata = self.dicdataStore.getZeroHintPredictionDicdata()
|
||||
var result: [Candidate] = []
|
||||
/*
|
||||
result.reserveCapacity(N_best + 1)
|
||||
preparts.forEach{candidate in
|
||||
dicdata.forEach{data in
|
||||
let ccValue = self.dicdataStore.getCCValue(candidate.rcid, data.lcid)
|
||||
let isInposition = DicdataStore.isInposition(data)
|
||||
let mmValue = isInposition ? self.dicdataStore.getMMValue(candidate.lastMid, data.mid):0.0
|
||||
let wValue = data.value()
|
||||
let newValue = candidate.value + mmValue + ccValue + wValue
|
||||
//追加すべきindexを取得する
|
||||
let lastindex = (result.lastIndex(where: {$0.value >= newValue}) ?? -1) + 1
|
||||
if lastindex >= N_best{
|
||||
return
|
||||
}
|
||||
var nodedata = candidate.data
|
||||
nodedata.append(data)
|
||||
|
||||
let candidate = Candidate(text: candidate.text + data.string, value: newValue, correspondingCount: candidate.correspondingCount, rcid: data.rcid, lastMid: isInposition ? data.mid:candidate.lastMid, data: nodedata)
|
||||
result.insert(candidate, at: lastindex)
|
||||
//カウントがオーバーしている場合は除去する
|
||||
if result.count == N_best &+ 1{
|
||||
result.removeLast()
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
for candidate in preparts {
|
||||
if let last = candidate.data.last {
|
||||
let nexts = [(DicdataElement, Int)]()
|
||||
for (data, count) in nexts where count > 1 {
|
||||
let ccValue = self.dicdataStore.getCCValue(last.rcid, data.lcid)
|
||||
let includeMMValueCalculation = DicdataStore.includeMMValueCalculation(data)
|
||||
let mmValue = includeMMValueCalculation ? self.dicdataStore.getMMValue(candidate.lastMid, data.mid):.zero
|
||||
let wValue = data.value()
|
||||
let bonus = PValue(count * 1)
|
||||
let newValue = candidate.value + mmValue + ccValue + wValue + bonus
|
||||
var nodedata = candidate.data
|
||||
nodedata.append(data)
|
||||
let candidate = Candidate(
|
||||
text: candidate.text + data.word,
|
||||
value: newValue,
|
||||
correspondingCount: candidate.correspondingCount,
|
||||
lastMid: includeMMValueCalculation ? data.mid:candidate.lastMid,
|
||||
data: nodedata
|
||||
)
|
||||
result.append(candidate)
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,111 @@
|
||||
//
|
||||
// afterCharacterAdded.swift
|
||||
// Keyboard
|
||||
//
|
||||
// Created by ensan on 2020/09/14.
|
||||
// Copyright © 2020 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import SwiftUtils
|
||||
|
||||
extension Kana2Kanji {
|
||||
/// カナを漢字に変換する関数, 最後の一文字を追加した場合。
|
||||
/// - Parameters:
|
||||
/// - addedCharacter: 追加された文字。
|
||||
/// - N_best: N_best。
|
||||
/// - previousResult: 追加される前のデータ。
|
||||
/// - Returns:
|
||||
/// - 変換候補。
|
||||
/// ### 実装状況
|
||||
/// (0)多用する変数の宣言。
|
||||
///
|
||||
/// (1)まず、追加された一文字に繋がるノードを列挙する。
|
||||
///
|
||||
/// (2)次に、計算済みノードから、(1)で求めたノードにつながるようにregisterして、N_bestを求めていく。
|
||||
///
|
||||
/// (3)(1)のregisterされた結果をresultノードに追加していく。この際EOSとの連接コストを計算しておく。
|
||||
///
|
||||
/// (4)ノードをアップデートした上で返却する。
|
||||
func kana2lattice_addedLast(_ inputData: ComposingText, N_best: Int, previousResult: (inputData: ComposingText, nodes: Nodes) ) -> (result: LatticeNode, nodes: Nodes) {
|
||||
debug("一文字追加。内部文字列は\(inputData.input).\(previousResult.nodes.map {($0.first?.data.ruby, $0.first?.inputRange)})")
|
||||
// (0)
|
||||
var nodes = previousResult.nodes
|
||||
let count = previousResult.inputData.input.count
|
||||
|
||||
// (1)
|
||||
let addedNodes: [[LatticeNode]] = (0...count).map {(i: Int) in
|
||||
self.dicdataStore.getLOUDSData(inputData: inputData, from: i, to: count)
|
||||
}
|
||||
|
||||
// ココが一番時間がかかっていた。
|
||||
// (2)
|
||||
for nodeArray in nodes {
|
||||
for node in nodeArray {
|
||||
if node.prevs.isEmpty {
|
||||
continue
|
||||
}
|
||||
if self.dicdataStore.shouldBeRemoved(data: node.data) {
|
||||
continue
|
||||
}
|
||||
// 変換した文字数
|
||||
let nextIndex = node.inputRange.endIndex
|
||||
for nextnode in addedNodes[nextIndex] {
|
||||
// この関数はこの時点で呼び出して、後のnode.registered.isEmptyで最終的に弾くのが良い。
|
||||
if self.dicdataStore.shouldBeRemoved(data: nextnode.data) {
|
||||
continue
|
||||
}
|
||||
// クラスの連続確率を計算する。
|
||||
let ccValue: PValue = self.dicdataStore.getCCValue(node.data.rcid, nextnode.data.lcid)
|
||||
// nodeの持っている全てのprevnodeに対して
|
||||
for (index, value) in node.values.enumerated() {
|
||||
let newValue: PValue = ccValue + value
|
||||
// 追加すべきindexを取得する
|
||||
let lastindex: Int = (nextnode.prevs.lastIndex(where: {$0.totalValue >= newValue}) ?? -1) + 1
|
||||
if lastindex == N_best {
|
||||
continue
|
||||
}
|
||||
let newnode: RegisteredNode = node.getRegisteredNode(index, value: newValue)
|
||||
// カウントがオーバーしている場合は除去する
|
||||
if nextnode.prevs.count >= N_best {
|
||||
nextnode.prevs.removeLast()
|
||||
}
|
||||
// removeしてからinsertした方が速い (insertはO(N)なので)
|
||||
nextnode.prevs.insert(newnode, at: lastindex)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// (3)
|
||||
let result = LatticeNode.EOSNode
|
||||
for (i, nodeArray) in addedNodes.enumerated() {
|
||||
for node in nodeArray {
|
||||
if node.prevs.isEmpty {
|
||||
continue
|
||||
}
|
||||
// 生起確率を取得する。
|
||||
let wValue = node.data.value()
|
||||
if i == 0 {
|
||||
// valuesを更新する
|
||||
node.values = node.prevs.map {$0.totalValue + wValue + self.dicdataStore.getCCValue($0.data.rcid, node.data.lcid)}
|
||||
} else {
|
||||
// valuesを更新する
|
||||
node.values = node.prevs.map {$0.totalValue + wValue}
|
||||
}
|
||||
// 最後に至るので
|
||||
for index in node.prevs.indices {
|
||||
let newnode = node.getRegisteredNode(index, value: node.values[index])
|
||||
result.prevs.append(newnode)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// (4)
|
||||
for (index, nodeArray) in addedNodes.enumerated() where index < nodes.endIndex {
|
||||
nodes[index].append(contentsOf: nodeArray)
|
||||
}
|
||||
nodes.append(addedNodes.last ?? [])
|
||||
return (result: result, nodes: nodes)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,154 @@
|
||||
//
|
||||
// afterCharacterAdded.swift
|
||||
// Keyboard
|
||||
//
|
||||
// Created by ensan on 2020/09/14.
|
||||
// Copyright © 2020 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import SwiftUtils
|
||||
|
||||
extension Kana2Kanji {
|
||||
/// カナを漢字に変換する関数, 最後の複数文字を追加した場合。
|
||||
/// - Parameters:
|
||||
/// - inputData: 今のInputData。
|
||||
/// - N_best: N_best。
|
||||
/// - addedCount: 文字数
|
||||
/// - previousResult: 追加される前のデータ。
|
||||
/// - Returns:
|
||||
/// - 変換候補。
|
||||
/// ### 実装状況
|
||||
/// (0)多用する変数の宣言。
|
||||
///
|
||||
/// (1)まず、追加された一文字に繋がるノードを列挙する。
|
||||
///
|
||||
/// (2)次に、計算済みノードから、(1)で求めたノードにつながるようにregisterして、N_bestを求めていく。
|
||||
///
|
||||
/// (3)(1)のregisterされた結果をresultノードに追加していく。この際EOSとの連接コストを計算しておく。
|
||||
///
|
||||
/// (4)ノードをアップデートした上で返却する。
|
||||
func kana2lattice_added(_ inputData: ComposingText, N_best: Int, addedCount: Int, previousResult: (inputData: ComposingText, nodes: Nodes)) -> (result: LatticeNode, nodes: Nodes) {
|
||||
debug("\(addedCount)文字追加。追加されたのは「\(inputData.input.suffix(addedCount))」")
|
||||
if addedCount == 1 {
|
||||
return kana2lattice_addedLast(inputData, N_best: N_best, previousResult: previousResult)
|
||||
}
|
||||
// (0)
|
||||
var nodes = previousResult.nodes
|
||||
let count = inputData.input.count
|
||||
|
||||
// (1)
|
||||
let addedNodes: [[LatticeNode]] = (.zero ..< count).map {(i: Int) in
|
||||
self.dicdataStore.getLOUDSDataInRange(
|
||||
inputData: inputData,
|
||||
from: i,
|
||||
toIndexRange: (max(previousResult.inputData.input.count, i) ..< max(previousResult.inputData.input.count, min(count, i + self.dicdataStore.maxlength + 1)))
|
||||
)
|
||||
}
|
||||
|
||||
// (2)
|
||||
for nodeArray in nodes {
|
||||
for node in nodeArray {
|
||||
if node.prevs.isEmpty {
|
||||
continue
|
||||
}
|
||||
if self.dicdataStore.shouldBeRemoved(data: node.data) {
|
||||
continue
|
||||
}
|
||||
// 変換した文字数
|
||||
let nextIndex = node.inputRange.endIndex
|
||||
assert(nextIndex == node.inputRange.endIndex)
|
||||
for nextnode in addedNodes[nextIndex] {
|
||||
// この関数はこの時点で呼び出して、後のnode.registered.isEmptyで最終的に弾くのが良い。
|
||||
if self.dicdataStore.shouldBeRemoved(data: nextnode.data) {
|
||||
continue
|
||||
}
|
||||
// クラスの連続確率を計算する。
|
||||
let ccValue: PValue = self.dicdataStore.getCCValue(node.data.rcid, nextnode.data.lcid)
|
||||
// nodeの持っている全てのprevnodeに対して
|
||||
for (index, value) in node.values.enumerated() {
|
||||
let newValue: PValue = ccValue + value
|
||||
// 追加すべきindexを取得する
|
||||
let lastindex: Int = (nextnode.prevs.lastIndex(where: {$0.totalValue >= newValue}) ?? -1) + 1
|
||||
if lastindex == N_best {
|
||||
continue
|
||||
}
|
||||
let newnode: RegisteredNode = node.getRegisteredNode(index, value: newValue)
|
||||
// カウントがオーバーしている場合は除去する
|
||||
if nextnode.prevs.count >= N_best {
|
||||
nextnode.prevs.removeLast()
|
||||
}
|
||||
// removeしてからinsertした方が速い (insertはO(N)なので)
|
||||
nextnode.prevs.insert(newnode, at: lastindex)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// (3)
|
||||
let result = LatticeNode.EOSNode
|
||||
|
||||
for (i, nodeArray) in addedNodes.enumerated() {
|
||||
for node in nodeArray {
|
||||
if node.prevs.isEmpty {
|
||||
continue
|
||||
}
|
||||
if self.dicdataStore.shouldBeRemoved(data: node.data) {
|
||||
continue
|
||||
}
|
||||
// 生起確率を取得する。
|
||||
let wValue = node.data.value()
|
||||
if i == 0 {
|
||||
// valuesを更新する
|
||||
node.values = node.prevs.map {$0.totalValue + wValue + self.dicdataStore.getCCValue($0.data.rcid, node.data.lcid)}
|
||||
} else {
|
||||
// valuesを更新する
|
||||
node.values = node.prevs.map {$0.totalValue + wValue}
|
||||
}
|
||||
// 変換した文字数
|
||||
let nextIndex = node.inputRange.endIndex
|
||||
if count == nextIndex {
|
||||
// 最後に至るので
|
||||
for index in node.prevs.indices {
|
||||
let newnode = node.getRegisteredNode(index, value: node.values[index])
|
||||
result.prevs.append(newnode)
|
||||
}
|
||||
} else {
|
||||
for nextnode in addedNodes[nextIndex] {
|
||||
// この関数はこの時点で呼び出して、後のnode.registered.isEmptyで最終的に弾くのが良い。
|
||||
if self.dicdataStore.shouldBeRemoved(data: nextnode.data) {
|
||||
continue
|
||||
}
|
||||
// クラスの連続確率を計算する。
|
||||
let ccValue: PValue = self.dicdataStore.getCCValue(node.data.rcid, nextnode.data.lcid)
|
||||
// nodeの持っている全てのprevnodeに対して
|
||||
for (index, value) in node.values.enumerated() {
|
||||
let newValue: PValue = ccValue + value
|
||||
// 追加すべきindexを取得する
|
||||
let lastindex: Int = (nextnode.prevs.lastIndex(where: {$0.totalValue >= newValue}) ?? -1) + 1
|
||||
if lastindex == N_best {
|
||||
continue
|
||||
}
|
||||
let newnode: RegisteredNode = node.getRegisteredNode(index, value: newValue)
|
||||
// カウントがオーバーしている場合は除去する
|
||||
if nextnode.prevs.count >= N_best {
|
||||
nextnode.prevs.removeLast()
|
||||
}
|
||||
// removeしてからinsertした方が速い (insertはO(N)なので)
|
||||
nextnode.prevs.insert(newnode, at: lastindex)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (index, nodeArray) in addedNodes.enumerated() {
|
||||
if index < nodes.endIndex {
|
||||
nodes[index].append(contentsOf: nodeArray)
|
||||
} else {
|
||||
nodes.append(nodeArray)
|
||||
}
|
||||
}
|
||||
return (result: result, nodes: nodes)
|
||||
}
|
||||
}
|
||||
96
Sources/KanaKanjiConverterModule/Kana2Kanji/all.swift
Normal file
96
Sources/KanaKanjiConverterModule/Kana2Kanji/all.swift
Normal file
@@ -0,0 +1,96 @@
|
||||
//
|
||||
// all.swift
|
||||
// Keyboard
|
||||
//
|
||||
// Created by ensan on 2020/09/14.
|
||||
// Copyright © 2020 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import SwiftUtils
|
||||
|
||||
extension Kana2Kanji {
|
||||
/// Latticeを構成する基本単位
|
||||
typealias Nodes = [[LatticeNode]]
|
||||
|
||||
/// カナを漢字に変換する関数, 前提はなくかな列が与えられた場合。
|
||||
/// - Parameters:
|
||||
/// - inputData: 入力データ。
|
||||
/// - N_best: N_best。
|
||||
/// - Returns:
|
||||
/// 変換候補。
|
||||
/// ### 実装状況
|
||||
/// (0)多用する変数の宣言。
|
||||
///
|
||||
/// (1)まず、追加された一文字に繋がるノードを列挙する。
|
||||
///
|
||||
/// (2)次に、計算済みノードから、(1)で求めたノードにつながるようにregisterして、N_bestを求めていく。
|
||||
///
|
||||
/// (3)(1)のregisterされた結果をresultノードに追加していく。この際EOSとの連接計算を行っておく。
|
||||
///
|
||||
/// (4)ノードをアップデートした上で返却する。
|
||||
func kana2lattice_all(_ inputData: ComposingText, N_best: Int) -> (result: LatticeNode, nodes: Nodes) {
|
||||
debug("新規に計算を行います。inputされた文字列は\(inputData.input.count)文字分の\(inputData.convertTarget)")
|
||||
let count: Int = inputData.input.count
|
||||
let result: LatticeNode = LatticeNode.EOSNode
|
||||
let nodes: [[LatticeNode]] = (.zero ..< count).map {dicdataStore.getLOUDSDataInRange(inputData: inputData, from: $0)}
|
||||
// 「i文字目から始まるnodes」に対して
|
||||
for (i, nodeArray) in nodes.enumerated() {
|
||||
// それぞれのnodeに対して
|
||||
for node in nodeArray {
|
||||
if node.prevs.isEmpty {
|
||||
continue
|
||||
}
|
||||
if self.dicdataStore.shouldBeRemoved(data: node.data) {
|
||||
continue
|
||||
}
|
||||
// 生起確率を取得する。
|
||||
let wValue: PValue = node.data.value()
|
||||
if i == 0 {
|
||||
// valuesを更新する
|
||||
node.values = node.prevs.map {$0.totalValue + wValue + self.dicdataStore.getCCValue($0.data.rcid, node.data.lcid)}
|
||||
} else {
|
||||
// valuesを更新する
|
||||
node.values = node.prevs.map {$0.totalValue + wValue}
|
||||
}
|
||||
// 変換した文字数
|
||||
let nextIndex: Int = node.inputRange.endIndex
|
||||
// 文字数がcountと等しい場合登録する
|
||||
if nextIndex == count {
|
||||
for index in node.prevs.indices {
|
||||
let newnode: RegisteredNode = node.getRegisteredNode(index, value: node.values[index])
|
||||
result.prevs.append(newnode)
|
||||
}
|
||||
} else {
|
||||
// nodeの繋がる次にあり得る全てのnextnodeに対して
|
||||
for nextnode in nodes[nextIndex] {
|
||||
// この関数はこの時点で呼び出して、後のnode.registered.isEmptyで最終的に弾くのが良い。
|
||||
if self.dicdataStore.shouldBeRemoved(data: nextnode.data) {
|
||||
continue
|
||||
}
|
||||
// クラスの連続確率を計算する。
|
||||
let ccValue: PValue = self.dicdataStore.getCCValue(node.data.rcid, nextnode.data.lcid)
|
||||
// nodeの持っている全てのprevnodeに対して
|
||||
for (index, value) in node.values.enumerated() {
|
||||
let newValue: PValue = ccValue + value
|
||||
// 追加すべきindexを取得する
|
||||
let lastindex: Int = (nextnode.prevs.lastIndex(where: {$0.totalValue >= newValue}) ?? -1) + 1
|
||||
if lastindex == N_best {
|
||||
continue
|
||||
}
|
||||
let newnode: RegisteredNode = node.getRegisteredNode(index, value: newValue)
|
||||
// カウントがオーバーしている場合は除去する
|
||||
if nextnode.prevs.count >= N_best {
|
||||
nextnode.prevs.removeLast()
|
||||
}
|
||||
// removeしてからinsertした方が速い (insertはO(N)なので)
|
||||
nextnode.prevs.insert(newnode, at: lastindex)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return (result: result, nodes: nodes)
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,149 @@
|
||||
//
|
||||
// changed_last_n_character.swift
|
||||
// Keyboard
|
||||
//
|
||||
// Created by ensan on 2020/10/14.
|
||||
// Copyright © 2020 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import SwiftUtils
|
||||
|
||||
extension Kana2Kanji {
|
||||
/// カナを漢字に変換する関数, 最後の一文字が変わった場合。
|
||||
/// ### 実装状況
|
||||
/// (0)多用する変数の宣言。
|
||||
///
|
||||
/// (1)まず、変更前の一文字につながるノードを全て削除する。
|
||||
///
|
||||
/// (2)次に、変更後の一文字につながるノードを全て列挙する。
|
||||
///
|
||||
/// (3)(1)を解析して(2)にregisterしていく。
|
||||
///
|
||||
/// (4)registerされた結果をresultノードに追加していく。
|
||||
///
|
||||
/// (5)ノードをアップデートした上で返却する。
|
||||
|
||||
func kana2lattice_changed(_ inputData: ComposingText, N_best: Int, counts: (deleted: Int, added: Int), previousResult: (inputData: ComposingText, nodes: Nodes)) -> (result: LatticeNode, nodes: Nodes) {
|
||||
// (0)
|
||||
let count = inputData.input.count
|
||||
let commonCount = previousResult.inputData.input.count - counts.deleted
|
||||
debug("kana2lattice_changed", inputData, counts, previousResult.inputData, count, commonCount)
|
||||
|
||||
// (1)
|
||||
var nodes = previousResult.nodes.prefix(commonCount).map {(nodes: [LatticeNode]) in
|
||||
nodes.filter {$0.inputRange.endIndex <= commonCount}
|
||||
}
|
||||
while nodes.last?.isEmpty ?? false {
|
||||
nodes.removeLast()
|
||||
}
|
||||
// (2)
|
||||
let addedNodes: [[LatticeNode]] = (0..<count).map {(i: Int) in
|
||||
self.dicdataStore.getLOUDSDataInRange(inputData: inputData, from: i, toIndexRange: max(commonCount, i) ..< count)
|
||||
}
|
||||
|
||||
// (3)
|
||||
for nodeArray in nodes {
|
||||
for node in nodeArray {
|
||||
if node.prevs.isEmpty {
|
||||
continue
|
||||
}
|
||||
if self.dicdataStore.shouldBeRemoved(data: node.data) {
|
||||
continue
|
||||
}
|
||||
// 変換した文字数
|
||||
let nextIndex = node.inputRange.endIndex
|
||||
for nextnode in addedNodes[nextIndex] {
|
||||
if self.dicdataStore.shouldBeRemoved(data: nextnode.data) {
|
||||
continue
|
||||
}
|
||||
// クラスの連続確率を計算する。
|
||||
let ccValue: PValue = self.dicdataStore.getCCValue(node.data.rcid, nextnode.data.lcid)
|
||||
// nodeの持っている全てのprevnodeに対して
|
||||
for (index, value) in node.values.enumerated() {
|
||||
let newValue: PValue = ccValue + value
|
||||
// 追加すべきindexを取得する
|
||||
let lastindex: Int = (nextnode.prevs.lastIndex(where: {$0.totalValue >= newValue}) ?? -1) + 1
|
||||
if lastindex == N_best {
|
||||
continue
|
||||
}
|
||||
let newnode: RegisteredNode = node.getRegisteredNode(index, value: newValue)
|
||||
// カウントがオーバーしている場合は除去する
|
||||
if nextnode.prevs.count >= N_best {
|
||||
nextnode.prevs.removeLast()
|
||||
}
|
||||
// removeしてからinsertした方が速い (insertはO(N)なので)
|
||||
nextnode.prevs.insert(newnode, at: lastindex)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// (3)
|
||||
let result = LatticeNode.EOSNode
|
||||
for (i, nodes) in addedNodes.enumerated() {
|
||||
for node in nodes {
|
||||
if node.prevs.isEmpty {
|
||||
continue
|
||||
}
|
||||
// この関数はこの時点で呼び出して、後のnode.registered.isEmptyで最終的に弾くのが良い。
|
||||
if self.dicdataStore.shouldBeRemoved(data: node.data) {
|
||||
continue
|
||||
}
|
||||
// 生起確率を取得する。
|
||||
let wValue = node.data.value()
|
||||
if i == 0 {
|
||||
// valuesを更新する
|
||||
node.values = node.prevs.map {$0.totalValue + wValue + self.dicdataStore.getCCValue($0.data.rcid, node.data.lcid)}
|
||||
} else {
|
||||
// valuesを更新する
|
||||
node.values = node.prevs.map {$0.totalValue + wValue}
|
||||
}
|
||||
let nextIndex = node.inputRange.endIndex
|
||||
if count == nextIndex {
|
||||
// 最後に至るので
|
||||
for index in node.prevs.indices {
|
||||
let newnode = node.getRegisteredNode(index, value: node.values[index])
|
||||
result.prevs.append(newnode)
|
||||
}
|
||||
} else {
|
||||
for nextnode in addedNodes[nextIndex] {
|
||||
// この関数はこの時点で呼び出して、後のnode.registered.isEmptyで最終的に弾くのが良い。
|
||||
if self.dicdataStore.shouldBeRemoved(data: nextnode.data) {
|
||||
continue
|
||||
}
|
||||
// クラスの連続確率を計算する。
|
||||
let ccValue = self.dicdataStore.getCCValue(node.data.rcid, nextnode.data.lcid)
|
||||
// nodeの持っている全てのprevnodeに対して
|
||||
for (index, value) in node.values.enumerated() {
|
||||
let newValue = ccValue + value
|
||||
// 追加すべきindexを取得する
|
||||
let lastindex: Int = (nextnode.prevs.lastIndex(where: {$0.totalValue >= newValue}) ?? -1) + 1
|
||||
if lastindex == N_best {
|
||||
continue
|
||||
}
|
||||
let newnode: RegisteredNode = node.getRegisteredNode(index, value: newValue)
|
||||
// カウントがオーバーしている場合は除去する
|
||||
if nextnode.prevs.count >= N_best {
|
||||
nextnode.prevs.removeLast()
|
||||
}
|
||||
// removeしてからinsertした方が速い (insertはO(N)なので)
|
||||
nextnode.prevs.insert(newnode, at: lastindex)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (index, nodeArray) in addedNodes.enumerated() where index < nodes.endIndex {
|
||||
nodes[index].append(contentsOf: nodeArray)
|
||||
}
|
||||
for nodeArray in addedNodes.suffix(counts.added) {
|
||||
nodes.append(nodeArray)
|
||||
}
|
||||
|
||||
return (result: result, nodes: nodes)
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,98 @@
|
||||
//
|
||||
// afterPartlyCompleted.swift
|
||||
// Keyboard
|
||||
//
|
||||
// Created by ensan on 2020/09/14.
|
||||
// Copyright © 2020 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import SwiftUtils
|
||||
|
||||
extension Kana2Kanji {
|
||||
/// カナを漢字に変換する関数, 部分的に確定した後の場合。
|
||||
/// ### 実装方法
|
||||
/// (1)まず、計算済みnodeの確定分以降を取り出し、registeredにcompletedDataの値を反映したBOSにする。
|
||||
///
|
||||
/// (2)次に、再度計算して良い候補を得る。
|
||||
func kana2lattice_afterComplete(_ inputData: ComposingText, completedData: Candidate, N_best: Int, previousResult: (inputData: ComposingText, nodes: Nodes)) -> (result: LatticeNode, nodes: Nodes) {
|
||||
debug("確定直後の変換、前は:", previousResult.inputData, "後は:", inputData)
|
||||
let count = inputData.input.count
|
||||
// (1)
|
||||
let start = RegisteredNode.fromLastCandidate(completedData)
|
||||
let nodes: Nodes = previousResult.nodes.suffix(count)
|
||||
for (i, nodeArray) in nodes.enumerated() {
|
||||
if i == .zero {
|
||||
for node in nodeArray {
|
||||
node.prevs = [start]
|
||||
// inputRangeを確定した部分のカウント分ずらす
|
||||
node.inputRange = node.inputRange.startIndex - completedData.correspondingCount ..< node.inputRange.endIndex - completedData.correspondingCount
|
||||
}
|
||||
} else {
|
||||
for node in nodeArray {
|
||||
node.prevs = []
|
||||
// inputRangeを確定した部分のカウント分ずらす
|
||||
node.inputRange = node.inputRange.startIndex - completedData.correspondingCount ..< node.inputRange.endIndex - completedData.correspondingCount
|
||||
}
|
||||
}
|
||||
}
|
||||
// (2)
|
||||
let result = LatticeNode.EOSNode
|
||||
|
||||
for (i, nodeArray) in nodes.enumerated() {
|
||||
for node in nodeArray {
|
||||
if node.prevs.isEmpty {
|
||||
continue
|
||||
}
|
||||
if self.dicdataStore.shouldBeRemoved(data: node.data) {
|
||||
continue
|
||||
}
|
||||
// 生起確率を取得する。
|
||||
let wValue = node.data.value()
|
||||
if i == 0 {
|
||||
// valuesを更新する
|
||||
node.values = node.prevs.map {$0.totalValue + wValue + self.dicdataStore.getCCValue($0.data.rcid, node.data.lcid)}
|
||||
} else {
|
||||
// valuesを更新する
|
||||
node.values = node.prevs.map {$0.totalValue + wValue}
|
||||
}
|
||||
// 変換した文字数
|
||||
let nextIndex = node.inputRange.endIndex
|
||||
// 文字数がcountと等しくない場合は先に進む
|
||||
if nextIndex != count {
|
||||
for nextnode in nodes[nextIndex] {
|
||||
if self.dicdataStore.shouldBeRemoved(data: nextnode.data) {
|
||||
continue
|
||||
}
|
||||
// クラスの連続確率を計算する。
|
||||
let ccValue = self.dicdataStore.getCCValue(node.data.rcid, nextnode.data.lcid)
|
||||
// nodeの持っている全てのprevnodeに対して
|
||||
for (index, value) in node.values.enumerated() {
|
||||
let newValue = ccValue + value
|
||||
// 追加すべきindexを取得する
|
||||
let lastindex = (nextnode.prevs.lastIndex(where: {$0.totalValue >= newValue}) ?? -1) + 1
|
||||
if lastindex == N_best {
|
||||
continue
|
||||
}
|
||||
let newnode = node.getRegisteredNode(index, value: newValue)
|
||||
// カウントがオーバーしている場合は除去する
|
||||
if nextnode.prevs.count >= N_best {
|
||||
nextnode.prevs.removeLast()
|
||||
}
|
||||
// removeしてからinsertした方が速い (insertはO(N)なので)
|
||||
nextnode.prevs.insert(newnode, at: lastindex)
|
||||
}
|
||||
}
|
||||
// countと等しければ変換が完成したので終了する
|
||||
} else {
|
||||
for index in node.prevs.indices {
|
||||
let newnode = node.getRegisteredNode(index, value: node.values[index])
|
||||
result.prevs.append(newnode)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
return (result: result, nodes: nodes)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,60 @@
|
||||
//
|
||||
// afterLastCharacterDeleted.swift
|
||||
// Keyboard
|
||||
//
|
||||
// Created by ensan on 2020/09/14.
|
||||
// Copyright © 2020 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import SwiftUtils
|
||||
|
||||
extension Kana2Kanji {
|
||||
|
||||
/// カナを漢字に変換する関数, 最後の複数文字を削除した場合。
|
||||
/// - Parameters:
|
||||
/// - deletedCount: 消した文字数。
|
||||
/// - N_best: N_best値。
|
||||
/// - previousResult: ひとつ前のデータ。つまり消した文字があった時の変換のデータ。
|
||||
/// - Returns:
|
||||
/// 発見された候補のリスト。
|
||||
///
|
||||
/// ### 実装方法
|
||||
/// (1)まず、計算済みノードを捜査して、新しい文末につながるものをresultにregisterしていく。
|
||||
/// N_bestの計算は既にやってあるので不要。
|
||||
///
|
||||
/// (2)次に、返却用ノードを計算する。文字数が超過するものはfilterで除去する。
|
||||
|
||||
func kana2lattice_deletedLast(deletedCount: Int, N_best: Int, previousResult: (inputData: ComposingText, nodes: Nodes)) -> (result: LatticeNode, nodes: Nodes) {
|
||||
debug("削除の連続性を利用した変換、元の文字は:", previousResult.inputData.convertTarget)
|
||||
let count = previousResult.inputData.input.count - deletedCount
|
||||
// (1)
|
||||
let result = LatticeNode.EOSNode
|
||||
|
||||
for nodeArray in previousResult.nodes {
|
||||
for node in nodeArray {
|
||||
if node.prevs.isEmpty {
|
||||
continue
|
||||
}
|
||||
if self.dicdataStore.shouldBeRemoved(data: node.data) {
|
||||
continue
|
||||
}
|
||||
let nextIndex = node.inputRange.endIndex
|
||||
if nextIndex == count {
|
||||
// 変換した文字数
|
||||
for (index, value) in node.values.enumerated() {
|
||||
let newnode = node.getRegisteredNode(index, value: value)
|
||||
result.prevs.append(newnode)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// (2)
|
||||
let updatedNodes = previousResult.nodes.prefix(count).map {(nodeArray: [LatticeNode]) in
|
||||
nodeArray.filter {$0.inputRange.endIndex <= count}
|
||||
}
|
||||
return (result: result, nodes: updatedNodes)
|
||||
}
|
||||
|
||||
}
|
||||
107
Sources/KanaKanjiConverterModule/Kana2Kanji/getPrediction.swift
Normal file
107
Sources/KanaKanjiConverterModule/Kana2Kanji/getPrediction.swift
Normal file
@@ -0,0 +1,107 @@
|
||||
//
|
||||
// getPrediction.swift
|
||||
// Keyboard
|
||||
//
|
||||
// Created by ensan on 2020/12/09.
|
||||
// Copyright © 2020 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import SwiftUtils
|
||||
|
||||
extension Kana2Kanji {
|
||||
/// CandidateDataの状態から予測変換候補を取得する関数
|
||||
/// - parameters:
|
||||
/// - prepart: CandidateDataで、予測変換候補に至る前の部分。例えば「これはき」の「き」の部分から予測をする場合「これは」の部分がprepart。
|
||||
/// - lastRuby:
|
||||
/// 「これはき」の「き」の部分
|
||||
/// - N_best: 取得する数
|
||||
/// - returns:
|
||||
/// 「これはき」から「これは今日」に対応する候補などを作って返す。
|
||||
/// - note:
|
||||
/// この関数の役割は意味連接の考慮にある。
|
||||
func getPredictionCandidates(composingText: ComposingText, prepart: CandidateData, lastClause: ClauseDataUnit, N_best: Int) -> [Candidate] {
|
||||
debug("getPredictionCandidates", composingText, lastClause.inputRange, lastClause.text)
|
||||
let lastRuby = ComposingText.getConvertTarget(for: composingText.input[lastClause.inputRange]).toKatakana()
|
||||
let lastRubyCount = lastClause.inputRange.count
|
||||
let datas: [DicdataElement]
|
||||
do {
|
||||
var _str = ""
|
||||
let prestring: String = prepart.clauses.reduce(into: "") {$0.append(contentsOf: $1.clause.text)}
|
||||
var count: Int = .zero
|
||||
while true {
|
||||
if prestring == _str {
|
||||
break
|
||||
}
|
||||
_str += prepart.data[count].word
|
||||
count += 1
|
||||
}
|
||||
datas = Array(prepart.data.prefix(count))
|
||||
}
|
||||
|
||||
let osuserdict: [DicdataElement] = dicdataStore.getPrefixMatchOSUserDict(lastRuby)
|
||||
|
||||
let lastCandidate: Candidate = prepart.isEmpty ? Candidate(text: "", value: .zero, correspondingCount: 0, lastMid: MIDData.EOS.mid, data: []) : self.processClauseCandidate(prepart)
|
||||
let lastRcid: Int = lastCandidate.data.last?.rcid ?? CIDData.EOS.cid
|
||||
let nextLcid: Int = prepart.lastClause?.nextLcid ?? CIDData.EOS.cid
|
||||
let lastMid: Int = lastCandidate.lastMid
|
||||
let correspoindingCount: Int = lastCandidate.correspondingCount + lastRubyCount
|
||||
let ignoreCCValue: PValue = self.dicdataStore.getCCValue(lastRcid, nextLcid)
|
||||
|
||||
let inputStyle = composingText.input.last?.inputStyle ?? .direct
|
||||
let dicdata: [DicdataElement]
|
||||
switch inputStyle {
|
||||
case .direct:
|
||||
dicdata = self.dicdataStore.getPredictionLOUDSDicdata(key: lastRuby)
|
||||
case .roman2kana:
|
||||
let roman = lastRuby.suffix(while: {String($0).onlyRomanAlphabet})
|
||||
if !roman.isEmpty {
|
||||
let ruby: Substring = lastRuby.dropLast(roman.count)
|
||||
if ruby.isEmpty {
|
||||
dicdata = []
|
||||
break
|
||||
}
|
||||
let possibleNexts: [Substring] = DicdataStore.possibleNexts[String(roman), default: []].map {ruby + $0}
|
||||
debug("getPredictionCandidates", lastRuby, ruby, roman, possibleNexts, prepart, lastRubyCount)
|
||||
dicdata = possibleNexts.flatMap { self.dicdataStore.getPredictionLOUDSDicdata(key: $0) }
|
||||
} else {
|
||||
debug("getPredicitonCandidates", lastRuby, roman)
|
||||
dicdata = self.dicdataStore.getPredictionLOUDSDicdata(key: lastRuby)
|
||||
}
|
||||
}
|
||||
|
||||
var result: [Candidate] = []
|
||||
|
||||
result.reserveCapacity(N_best &+ 1)
|
||||
for data in (dicdata + osuserdict) {
|
||||
let includeMMValueCalculation = DicdataStore.includeMMValueCalculation(data)
|
||||
let mmValue: PValue = includeMMValueCalculation ? self.dicdataStore.getMMValue(lastMid, data.mid):.zero
|
||||
let ccValue: PValue = self.dicdataStore.getCCValue(lastRcid, data.lcid)
|
||||
let penalty: PValue = -PValue(data.ruby.count &- lastRuby.count) * 3.0 // 文字数差をペナルティとする
|
||||
let wValue: PValue = data.value()
|
||||
let newValue: PValue = lastCandidate.value + mmValue + ccValue + wValue + penalty - ignoreCCValue
|
||||
// 追加すべきindexを取得する
|
||||
let lastindex: Int = (result.lastIndex(where: {$0.value >= newValue}) ?? -1) + 1
|
||||
if lastindex >= N_best {
|
||||
continue
|
||||
}
|
||||
var nodedata: [DicdataElement] = datas
|
||||
nodedata.append(data)
|
||||
let candidate: Candidate = Candidate(
|
||||
text: lastCandidate.text + data.word,
|
||||
value: newValue,
|
||||
correspondingCount: correspoindingCount,
|
||||
lastMid: includeMMValueCalculation ? data.mid:lastMid,
|
||||
data: nodedata
|
||||
)
|
||||
// カウントがオーバーしそうな場合は除去する
|
||||
if result.count >= N_best {
|
||||
result.removeLast()
|
||||
}
|
||||
// removeしてからinsertした方が速い (insertはO(N)なので)
|
||||
result.insert(candidate, at: lastindex)
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
}
|
||||
56
Sources/KanaKanjiConverterModule/Kana2Kanji/no_change.swift
Normal file
56
Sources/KanaKanjiConverterModule/Kana2Kanji/no_change.swift
Normal file
@@ -0,0 +1,56 @@
|
||||
//
|
||||
// no_change.swift
|
||||
// Keyboard
|
||||
//
|
||||
// Created by ensan on 2022/11/09.
|
||||
// Copyright © 2022 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import SwiftUtils
|
||||
|
||||
extension Kana2Kanji {
|
||||
|
||||
/// カナを漢字に変換する関数, キャッシュから単に復元する。
|
||||
/// - Parameters:
|
||||
/// - N_best: N_best値。
|
||||
/// - previousResult: ひとつ前のデータ。
|
||||
/// - Returns:
|
||||
/// 発見された候補のリスト。
|
||||
///
|
||||
/// ### 実装方法
|
||||
/// (1)まず、計算済みノードを捜査して、新しい文末につながるものをresultにregisterしていく。
|
||||
/// N_bestの計算は既にやってあるので不要。
|
||||
///
|
||||
/// (2)次に、返却用ノードを計算する。
|
||||
|
||||
func kana2lattice_no_change(N_best: Int, previousResult: (inputData: ComposingText, nodes: Nodes)) -> (result: LatticeNode, nodes: Nodes) {
|
||||
debug("キャッシュから復元、元の文字は:", previousResult.inputData.convertTarget)
|
||||
let count = previousResult.inputData.input.count
|
||||
// (1)
|
||||
let result = LatticeNode.EOSNode
|
||||
|
||||
for nodeArray in previousResult.nodes {
|
||||
for node in nodeArray {
|
||||
if node.prevs.isEmpty {
|
||||
continue
|
||||
}
|
||||
if self.dicdataStore.shouldBeRemoved(data: node.data) {
|
||||
continue
|
||||
}
|
||||
let nextIndex = node.inputRange.endIndex
|
||||
if nextIndex == count {
|
||||
// 変換した文字数
|
||||
for (index, value) in node.values.enumerated() {
|
||||
let newnode = node.getRegisteredNode(index, value: value)
|
||||
result.prevs.append(newnode)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// (2)
|
||||
return (result: result, nodes: previousResult.nodes)
|
||||
}
|
||||
|
||||
}
|
||||
178
Sources/KanaKanjiConverterModule/LOUDS/LOUDS.swift
Normal file
178
Sources/KanaKanjiConverterModule/LOUDS/LOUDS.swift
Normal file
@@ -0,0 +1,178 @@
|
||||
//
|
||||
// LOUDS.swift
|
||||
// Keyboard
|
||||
//
|
||||
// Created by ensan on 2020/09/30.
|
||||
// Copyright © 2020 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
private extension UInt64 {
|
||||
static let prefixOne: UInt64 = 1 << 63
|
||||
}
|
||||
|
||||
/// LOUDS
|
||||
struct LOUDS {
|
||||
private typealias Unit = UInt64
|
||||
private static let unit = 64
|
||||
private static let uExp = 6
|
||||
|
||||
private let bits: [Unit]
|
||||
private let indices: Range<Int>
|
||||
private let char2nodeIndices: [[Int]]
|
||||
/// 0の数(1の数ではない)
|
||||
private let rankLarge: [Int]
|
||||
|
||||
init(bytes: [UInt64], nodeIndex2ID: [UInt8]) {
|
||||
self.bits = bytes
|
||||
self.char2nodeIndices = nodeIndex2ID.enumerated().reduce(into: .init(repeating: [], count: 1 << 8)) { list, data in
|
||||
list[Int(data.element)].append(data.offset)
|
||||
}
|
||||
self.indices = self.bits.indices
|
||||
self.rankLarge = bytes.reduce(into: [0]) {
|
||||
$0.append(($0.last ?? 0) &+ (Self.unit &- $1.nonzeroBitCount))
|
||||
}
|
||||
}
|
||||
|
||||
/// parentNodeIndex個の0を探索し、その次から1個増えるまでのIndexを返す。
|
||||
private func childNodeIndices(from parentNodeIndex: Int) -> Range<Int> {
|
||||
// 求めるのは、
|
||||
// startIndex == 自身の左側にparentNodeIndex個の0があるような最小のindex
|
||||
// endIndex == 自身の左側にparentNodeIndex+1個の0があるような最小のindex
|
||||
// すなわち、childNodeIndicesである。
|
||||
// まずstartIndexを発見し、そこから0が現れる点を探すことでendIndexを見つける方針で実装している。
|
||||
|
||||
// 探索パート①
|
||||
// rankLargeは左側の0の数を示すので、difを取っている
|
||||
// まず最低限の絞り込みを行う。leftを探索する。
|
||||
// 探しているのは、startIndexが含まれるbitsのindex `i`
|
||||
var left = (parentNodeIndex >> Self.uExp) &- 1
|
||||
while true {
|
||||
let dif = parentNodeIndex &- self.rankLarge[left &+ 1]
|
||||
if dif >= Self.unit {
|
||||
left &+= dif >> Self.uExp
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
guard let i = (left &+ 1 ..< self.bits.count).first(where: {(index: Int) in self.rankLarge[index &+ 1] >= parentNodeIndex}) else {
|
||||
return 0 ..< 0
|
||||
}
|
||||
|
||||
return self.bits.withUnsafeBufferPointer {(buffer: UnsafeBufferPointer<Unit>) -> Range<Int> in
|
||||
// 探索パート②
|
||||
// 目標は`k`の発見
|
||||
// 今のbyteの中を探索し、超過分(dif)の0を手に入れたところでkが確定する。
|
||||
let byte = buffer[i]
|
||||
let dif = self.rankLarge[i &+ 1] &- parentNodeIndex // 0の数の超過分
|
||||
var count = Unit(Self.unit &- byte.nonzeroBitCount) // 0の数
|
||||
var k = Self.unit
|
||||
|
||||
for c in 0 ..< Self.unit {
|
||||
if count == dif {
|
||||
k = c
|
||||
break
|
||||
}
|
||||
// byteの上からc桁めが0なら == (byte << 0)が100………00より小さければ == 最初の1桁を一番下に持ってきた値そのもの
|
||||
count &-= (byte << c) < Unit.prefixOne ? 1:0
|
||||
}
|
||||
|
||||
let start = (i << Self.uExp) &+ k &- parentNodeIndex &+ 1
|
||||
if dif == .zero {
|
||||
var j = i &+ 1
|
||||
while buffer[j] == Unit.max {
|
||||
j &+= 1
|
||||
}
|
||||
let byte2 = buffer[j]
|
||||
// 最初の0を探す作業
|
||||
let a = (0 ..< Self.unit).first(where: {(byte2 << $0) < Unit.prefixOne})
|
||||
return start ..< (j << Self.uExp) &+ (a ?? 0) &- parentNodeIndex &+ 1
|
||||
} else {
|
||||
// 次の0を探す作業
|
||||
let a = (k ..< Self.unit).first(where: {(byte << $0) < Unit.prefixOne})
|
||||
return start ..< (i << Self.uExp) &+ (a ?? 0) &- parentNodeIndex &+ 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// charIndexを取得する
|
||||
/// `childNodeIndices`と差し引きして、二分探索部分の速度への影響は高々0.02秒ほど
|
||||
private func searchCharNodeIndex(from parentNodeIndex: Int, char: UInt8) -> Int? {
|
||||
// char2nodeIndicesには単調増加性があるので二分探索が成立する
|
||||
let childNodeIndices = self.childNodeIndices(from: parentNodeIndex)
|
||||
let nodeIndices = self.char2nodeIndices[Int(char)]
|
||||
var left = nodeIndices.startIndex
|
||||
var right = nodeIndices.endIndex
|
||||
while left < right {
|
||||
let mid = (left + right) >> 1
|
||||
if childNodeIndices.startIndex <= nodeIndices[mid] {
|
||||
right = mid
|
||||
} else {
|
||||
left = mid + 1
|
||||
}
|
||||
}
|
||||
if left < nodeIndices.endIndex && childNodeIndices.contains(nodeIndices[left]) {
|
||||
return nodeIndices[left]
|
||||
} else {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
/// 完全一致検索を実行する
|
||||
/// - Parameter chars: CharIDに変換した文字列
|
||||
/// - Returns: 対応するloudstxt3ファイル内のインデックス
|
||||
internal func searchNodeIndex(chars: [UInt8]) -> Int? {
|
||||
var index = 1
|
||||
for char in chars {
|
||||
if let nodeIndex = self.searchCharNodeIndex(from: index, char: char) {
|
||||
index = nodeIndex
|
||||
} else {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return index
|
||||
}
|
||||
|
||||
private func prefixNodeIndices(nodeIndex: Int, depth: Int = 0, maxDepth: Int) -> [Int] {
|
||||
var childNodeIndices = Array(self.childNodeIndices(from: nodeIndex))
|
||||
if depth == maxDepth {
|
||||
return childNodeIndices
|
||||
}
|
||||
for index in childNodeIndices {
|
||||
childNodeIndices.append(contentsOf: self.prefixNodeIndices(nodeIndex: index, depth: depth + 1, maxDepth: maxDepth))
|
||||
}
|
||||
return childNodeIndices
|
||||
}
|
||||
|
||||
/// 前方一致検索を実行する
|
||||
///
|
||||
/// 「しかい」を入力した場合、そこから先の「しかいし」「しかいしゃ」「しかいいん」なども探す。
|
||||
/// - Parameter chars: CharIDに変換した文字列
|
||||
/// - Parameter maxDepth: 先に進む深さの最大値
|
||||
/// - Returns: 対応するloudstxt3ファイル内のインデックスのリスト
|
||||
internal func prefixNodeIndices(chars: [UInt8], maxDepth: Int) -> [Int] {
|
||||
guard let nodeIndex = self.searchNodeIndex(chars: chars) else {
|
||||
return []
|
||||
}
|
||||
return self.prefixNodeIndices(nodeIndex: nodeIndex, maxDepth: maxDepth)
|
||||
}
|
||||
|
||||
/// 部分前方一致検索を実行する
|
||||
///
|
||||
/// 「しかい」を入力した場合、「しかい」だけでなく「し」「しか」の検索も行う。
|
||||
/// - Parameter chars: CharIDに変換した文字列
|
||||
/// - Returns: 対応するloudstxt3ファイル内のインデックスのリスト
|
||||
/// - Note: より適切な名前に変更したい
|
||||
internal func byfixNodeIndices(chars: [UInt8]) -> [Int] {
|
||||
var indices = [1]
|
||||
for char in chars {
|
||||
if let nodeIndex = self.searchCharNodeIndex(from: indices.last!, char: char) {
|
||||
indices.append(nodeIndex)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
return indices
|
||||
}
|
||||
}
|
||||
128
Sources/KanaKanjiConverterModule/LOUDS/extension LOUDS.swift
Normal file
128
Sources/KanaKanjiConverterModule/LOUDS/extension LOUDS.swift
Normal file
@@ -0,0 +1,128 @@
|
||||
//
|
||||
// extension Data.swift
|
||||
// Keyboard
|
||||
//
|
||||
// Created by ensan on 2020/09/30.
|
||||
// Copyright © 2020 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import SwiftUtils
|
||||
|
||||
extension LOUDS {
|
||||
private static func loadLOUDSBinary(from url: URL) -> [UInt64]? {
|
||||
do {
|
||||
let binaryData = try Data(contentsOf: url, options: [.uncached]) // 2度読み込むことはないのでキャッシュ不要
|
||||
let ui64array = binaryData.toArray(of: UInt64.self)
|
||||
return ui64array
|
||||
} catch {
|
||||
debug(error)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
private static func getLOUDSURL(_ identifier: String, option: ConvertRequestOptions) -> (chars: URL, louds: URL) {
|
||||
|
||||
if identifier == "user"{
|
||||
return (
|
||||
option.sharedContainerURL.appendingPathComponent("user.loudschars2", isDirectory: false),
|
||||
option.sharedContainerURL.appendingPathComponent("user.louds", isDirectory: false)
|
||||
)
|
||||
}
|
||||
if identifier == "memory"{
|
||||
return (
|
||||
option.memoryDirectoryURL.appendingPathComponent("memory.loudschars2", isDirectory: false),
|
||||
option.memoryDirectoryURL.appendingPathComponent("memory.louds", isDirectory: false)
|
||||
)
|
||||
}
|
||||
return (
|
||||
option.dictionaryResourceURL.appendingPathComponent("louds/\(identifier).loudschars2", isDirectory: false),
|
||||
option.dictionaryResourceURL.appendingPathComponent("louds/\(identifier).louds", isDirectory: false)
|
||||
)
|
||||
}
|
||||
|
||||
private static func getLoudstxt3URL(_ identifier: String, option: ConvertRequestOptions) -> URL {
|
||||
if identifier.hasPrefix("user") {
|
||||
return option.sharedContainerURL.appendingPathComponent("\(identifier).loudstxt3", isDirectory: false)
|
||||
}
|
||||
if identifier.hasPrefix("memory") {
|
||||
return option.memoryDirectoryURL.appendingPathComponent("\(identifier).loudstxt3", isDirectory: false)
|
||||
}
|
||||
return option.dictionaryResourceURL.appendingPathComponent("louds/\(identifier).loudstxt3", isDirectory: false)
|
||||
}
|
||||
|
||||
/// LOUDSをファイルから読み込む関数
|
||||
/// - Parameter identifier: ファイル名
|
||||
/// - Returns: 存在すればLOUDSデータを返し、存在しなければ`nil`を返す。
|
||||
internal static func load(_ identifier: String, option: ConvertRequestOptions) -> LOUDS? {
|
||||
let (charsURL, loudsURL) = getLOUDSURL(identifier, option: option)
|
||||
let nodeIndex2ID: [UInt8]
|
||||
do {
|
||||
nodeIndex2ID = try Array(Data(contentsOf: charsURL, options: [.uncached])) // 2度読み込むことはないのでキャッシュ不要
|
||||
} catch {
|
||||
debug("ファイルが存在しません: \(error)")
|
||||
return nil
|
||||
}
|
||||
|
||||
if let bytes = LOUDS.loadLOUDSBinary(from: loudsURL) {
|
||||
let louds = LOUDS(bytes: bytes.map {$0.littleEndian}, nodeIndex2ID: nodeIndex2ID)
|
||||
return louds
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@inlinable
|
||||
static func parseBinary(binary: Data) -> [DicdataElement] {
|
||||
// 最初の2byteがカウント
|
||||
let count = binary[binary.startIndex ..< binary.startIndex + 2].toArray(of: UInt16.self)[0]
|
||||
var index = binary.startIndex + 2
|
||||
var dicdata: [DicdataElement] = []
|
||||
dicdata.reserveCapacity(Int(count))
|
||||
for _ in 0 ..< count {
|
||||
let ids = binary[index ..< index + 6].toArray(of: UInt16.self)
|
||||
let value = binary[index + 6 ..< index + 10].toArray(of: Float32.self)[0]
|
||||
dicdata.append(DicdataElement(word: "", ruby: "", lcid: Int(ids[0]), rcid: Int(ids[1]), mid: Int(ids[2]), value: PValue(value)))
|
||||
index += 10
|
||||
}
|
||||
|
||||
let substrings = binary[index...].split(separator: UInt8(ascii: "\t"), omittingEmptySubsequences: false)
|
||||
guard let ruby = String(data: substrings[0], encoding: .utf8) else {
|
||||
debug("getDataForLoudstxt3: failed to parse", dicdata)
|
||||
return []
|
||||
}
|
||||
for (index, substring) in substrings[1...].enumerated() {
|
||||
guard let word = String(data: substring, encoding: .utf8) else {
|
||||
debug("getDataForLoudstxt3: failed to parse", ruby)
|
||||
continue
|
||||
}
|
||||
withMutableValue(&dicdata[index]) {
|
||||
$0.ruby = ruby
|
||||
$0.word = word.isEmpty ? ruby : word
|
||||
}
|
||||
}
|
||||
return dicdata
|
||||
|
||||
}
|
||||
|
||||
internal static func getDataForLoudstxt3(_ identifier: String, indices: [Int], option: ConvertRequestOptions) -> [DicdataElement] {
|
||||
let binary: Data
|
||||
do {
|
||||
let url = getLoudstxt3URL(identifier, option: option)
|
||||
binary = try Data(contentsOf: url)
|
||||
} catch {
|
||||
debug("getDataForLoudstxt3: \(error)")
|
||||
return []
|
||||
}
|
||||
|
||||
let lc = binary[0..<2].toArray(of: UInt16.self)[0]
|
||||
let header_endIndex: UInt32 = 2 + UInt32(lc) * UInt32(MemoryLayout<UInt32>.size)
|
||||
let ui32array = binary[2..<header_endIndex].toArray(of: UInt32.self)
|
||||
|
||||
let result: [DicdataElement] = indices.flatMap {(index: Int) -> [DicdataElement] in
|
||||
let startIndex = Int(ui32array[index])
|
||||
let endIndex = index == (lc - 1) ? binary.endIndex : Int(ui32array[index + 1])
|
||||
return parseBinary(binary: binary[startIndex ..< endIndex])
|
||||
}
|
||||
return result
|
||||
}
|
||||
}
|
||||
45
Sources/KanaKanjiConverterModule/LatticeNode.swift
Normal file
45
Sources/KanaKanjiConverterModule/LatticeNode.swift
Normal file
@@ -0,0 +1,45 @@
|
||||
//
|
||||
// LatticeNode.swift
|
||||
// Keyboard
|
||||
//
|
||||
// Created by ensan on 2020/09/11.
|
||||
// Copyright © 2020 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
/// ラティスのノード。これを用いて計算する。
|
||||
public final class LatticeNode {
|
||||
/// このノードが保持する辞書データ
|
||||
public let data: DicdataElement
|
||||
/// このノードの前に来ているノード。`N_best`の分だけ保存する
|
||||
var prevs: [RegisteredNode] = []
|
||||
/// `prevs`の各要素に対応するスコアのデータ
|
||||
var values: [PValue] = []
|
||||
/// inputData.input内のrange
|
||||
var inputRange: Range<Int>
|
||||
|
||||
/// `EOS`に対応するノード。
|
||||
static var EOSNode: LatticeNode {
|
||||
LatticeNode(data: DicdataElement.EOSData, inputRange: 0..<0)
|
||||
}
|
||||
|
||||
init(data: DicdataElement, inputRange: Range<Int>) {
|
||||
self.data = data
|
||||
self.values = [data.value()]
|
||||
self.inputRange = inputRange
|
||||
}
|
||||
|
||||
/// `LatticeNode`の持っている情報を反映した`RegisteredNode`を作成する
|
||||
/// `LatticeNode`は複数の過去のノードを持つことができるが、`RegisteredNode`は1つしか持たない。
|
||||
func getRegisteredNode(_ index: Int, value: PValue) -> RegisteredNode {
|
||||
RegisteredNode(data: self.data, registered: self.prevs[index], totalValue: value, inputRange: self.inputRange)
|
||||
}
|
||||
|
||||
/// 再帰的にノードを遡り、`CandidateData`を構築する関数
|
||||
/// - Returns: 文節単位の区切り情報を持った変換候補データのリスト。
|
||||
/// - Note: 最終的に`EOS`ノードにおいて実行する想定のAPIになっている。
|
||||
func getCandidateData() -> [CandidateData] {
|
||||
self.prevs.map {$0.getCandidateData()}
|
||||
}
|
||||
}
|
||||
35
Sources/KanaKanjiConverterModule/MIDData.swift
Normal file
35
Sources/KanaKanjiConverterModule/MIDData.swift
Normal file
@@ -0,0 +1,35 @@
|
||||
//
|
||||
// MIDData.swift
|
||||
// azooKey
|
||||
//
|
||||
// Created by ensan on 2022/10/25.
|
||||
// Copyright © 2022 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
public enum MIDData {
|
||||
static var totalCount: Int {
|
||||
503
|
||||
}
|
||||
case BOS
|
||||
case EOS
|
||||
case 一般
|
||||
case 数
|
||||
case 英単語
|
||||
case 小さい数字
|
||||
case 年
|
||||
case 絵文字
|
||||
public var mid: Int {
|
||||
switch self {
|
||||
case .BOS: return 500
|
||||
case .EOS: return 500
|
||||
case .一般: return 501
|
||||
case .年: return 237
|
||||
case .英単語: return 40
|
||||
case .数: return 452
|
||||
case .小さい数字: return 361
|
||||
case .絵文字: return 502
|
||||
}
|
||||
}
|
||||
}
|
||||
102
Sources/KanaKanjiConverterModule/RegisteredNodeProtocol.swift
Normal file
102
Sources/KanaKanjiConverterModule/RegisteredNodeProtocol.swift
Normal file
@@ -0,0 +1,102 @@
|
||||
//
|
||||
// RegisteredNode.swift
|
||||
// Keyboard
|
||||
//
|
||||
// Created by ensan on 2020/09/16.
|
||||
// Copyright © 2020 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
/// `struct`の`RegisteredNode`を再帰的に所持できるようにするため、Existential Typeで抽象化する。
|
||||
/// - Note: `indirect enum`との比較はまだやっていない。
|
||||
protocol RegisteredNodeProtocol {
|
||||
var data: DicdataElement {get}
|
||||
var prev: (any RegisteredNodeProtocol)? {get}
|
||||
var totalValue: PValue {get}
|
||||
var inputRange: Range<Int> {get}
|
||||
}
|
||||
|
||||
struct RegisteredNode: RegisteredNodeProtocol {
|
||||
/// このノードが保持する辞書データ
|
||||
let data: DicdataElement
|
||||
/// 1つ前のノードのデータ
|
||||
let prev: (any RegisteredNodeProtocol)?
|
||||
/// 始点からこのノードまでのコスト
|
||||
let totalValue: PValue
|
||||
/// `composingText`の`input`で対応する範囲
|
||||
let inputRange: Range<Int>
|
||||
|
||||
init(data: DicdataElement, registered: RegisteredNode?, totalValue: PValue, inputRange: Range<Int>) {
|
||||
self.data = data
|
||||
self.prev = registered
|
||||
self.totalValue = totalValue
|
||||
self.inputRange = inputRange
|
||||
}
|
||||
|
||||
/// 始点ノードを生成する関数
|
||||
/// - Returns: 始点ノードのデータ
|
||||
static func BOSNode() -> RegisteredNode {
|
||||
RegisteredNode(data: DicdataElement.BOSData, registered: nil, totalValue: 0, inputRange: 0 ..< 0)
|
||||
}
|
||||
|
||||
/// 入力中、確定した部分を考慮した始点ノードを生成する関数
|
||||
/// - Returns: 始点ノードのデータ
|
||||
static func fromLastCandidate(_ candidate: Candidate) -> RegisteredNode {
|
||||
RegisteredNode(
|
||||
data: DicdataElement(word: "", ruby: "", lcid: CIDData.BOS.cid, rcid: candidate.data.last?.rcid ?? CIDData.BOS.cid, mid: candidate.lastMid, value: 0),
|
||||
registered: nil,
|
||||
totalValue: 0,
|
||||
inputRange: 0 ..< 0
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
extension RegisteredNodeProtocol {
|
||||
/// 再帰的にノードを遡り、`CandidateData`を構築する関数
|
||||
/// - Returns: 文節単位の区切り情報を持った変換候補データ
|
||||
func getCandidateData() -> CandidateData {
|
||||
guard let prev else {
|
||||
let unit = ClauseDataUnit()
|
||||
unit.mid = self.data.mid
|
||||
unit.inputRange = self.inputRange
|
||||
return CandidateData(clauses: [(clause: unit, value: .zero)], data: [])
|
||||
}
|
||||
var lastcandidate = prev.getCandidateData() // 自分に至るregisterdそれぞれのデータに処理
|
||||
|
||||
if self.data.word.isEmpty {
|
||||
return lastcandidate
|
||||
}
|
||||
|
||||
guard let lastClause = lastcandidate.lastClause else {
|
||||
return lastcandidate
|
||||
}
|
||||
|
||||
if lastClause.text.isEmpty || !DicdataStore.isClause(prev.data.rcid, self.data.lcid) {
|
||||
// 文節ではないので、最後に追加する。
|
||||
lastClause.text.append(self.data.word)
|
||||
lastClause.inputRange = lastClause.inputRange.startIndex ..< self.inputRange.endIndex
|
||||
// 最初だった場合を想定している
|
||||
if (lastClause.mid == 500 && self.data.mid != 500) || DicdataStore.includeMMValueCalculation(self.data) {
|
||||
lastClause.mid = self.data.mid
|
||||
}
|
||||
lastcandidate.clauses[lastcandidate.clauses.count - 1].value = self.totalValue
|
||||
lastcandidate.data.append(self.data)
|
||||
return lastcandidate
|
||||
}
|
||||
// 文節の区切りだった場合
|
||||
else {
|
||||
let unit = ClauseDataUnit()
|
||||
unit.text = self.data.word
|
||||
unit.inputRange = self.inputRange
|
||||
if DicdataStore.includeMMValueCalculation(self.data) {
|
||||
unit.mid = self.data.mid
|
||||
}
|
||||
// 前の文節の処理
|
||||
lastClause.nextLcid = self.data.lcid
|
||||
lastcandidate.clauses.append((clause: unit, value: self.totalValue))
|
||||
lastcandidate.data.append(self.data)
|
||||
return lastcandidate
|
||||
}
|
||||
}
|
||||
}
|
||||
132
Sources/KanaKanjiConverterModule/Replacer/TextReplacer.swift
Normal file
132
Sources/KanaKanjiConverterModule/Replacer/TextReplacer.swift
Normal file
@@ -0,0 +1,132 @@
|
||||
//
|
||||
// TextReplacer.swift
|
||||
// Keyboard
|
||||
//
|
||||
// Created by ensan on 2023/03/17.
|
||||
// Copyright © 2023 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import SwiftUtils
|
||||
|
||||
/// `TextReplacer`は前後の文脈に基づいて、現在のカーソル位置の語の置き換えを提案するためのモジュールである。
|
||||
/// 例えば、「tha|nk」と入力があるとき、「think」や「thanks」などを候補として表示することが考えられる。
|
||||
///
|
||||
/// 現在の機能は「絵文字」のバリエーションを表示することに限定する。
|
||||
public struct TextReplacer {
|
||||
// TODO: prefix trieなどの方が便利だと思う
|
||||
private var emojiSearchDict: [String: [String]] = [:]
|
||||
private var emojiGroups: [EmojiGroup] = []
|
||||
|
||||
public init() {
|
||||
let fileURL: URL
|
||||
// 読み込むファイルはバージョンごとに変更する必要がある
|
||||
if #available(iOS 16.4, *) {
|
||||
fileURL = Bundle.main.bundleURL.appendingPathComponent("emoji_all_E15.0.txt.gen", isDirectory: false)
|
||||
} else if #available(iOS 15.4, *) {
|
||||
fileURL = Bundle.main.bundleURL.appendingPathComponent("emoji_all_E14.0.txt.gen", isDirectory: false)
|
||||
} else {
|
||||
fileURL = Bundle.main.bundleURL.appendingPathComponent("emoji_all_E13.1.txt.gen", isDirectory: false)
|
||||
}
|
||||
var emojiSearchDict: [String: [String]] = [:]
|
||||
var emojiGroups: [EmojiGroup] = []
|
||||
do {
|
||||
let string = try String(contentsOf: fileURL, encoding: .utf8)
|
||||
let lines = string.split(separator: "\n")
|
||||
for line in lines {
|
||||
let splited = line.split(separator: "\t", omittingEmptySubsequences: false)
|
||||
guard splited.count == 3 else {
|
||||
debug("error", line)
|
||||
self.emojiSearchDict = emojiSearchDict
|
||||
self.emojiGroups = emojiGroups
|
||||
return
|
||||
}
|
||||
let base = String(splited[0])
|
||||
let variations = splited[2].split(separator: ",").map(String.init)
|
||||
// 検索クエリを登録
|
||||
for query in splited[1].split(separator: ",") {
|
||||
emojiSearchDict[String(query), default: []].append(base)
|
||||
emojiSearchDict[String(query), default: []].append(contentsOf: variations)
|
||||
}
|
||||
emojiGroups.append(EmojiGroup(base: base, variations: variations))
|
||||
}
|
||||
self.emojiGroups = emojiGroups
|
||||
self.emojiSearchDict = emojiSearchDict
|
||||
} catch {
|
||||
debug(error)
|
||||
self.emojiSearchDict = emojiSearchDict
|
||||
self.emojiGroups = emojiGroups
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
public func getSearchResult(query: String, target: [ConverterBehaviorSemantics.ReplacementTarget]) -> [SearchResultItem] {
|
||||
// 正規化する
|
||||
let query = query.lowercased().toHiragana()
|
||||
var results: [SearchResultItem] = []
|
||||
if target.contains(.emoji) {
|
||||
if let candidates = self.emojiSearchDict[query] {
|
||||
for candidate in candidates {
|
||||
results.append(SearchResultItem(query: query, text: candidate))
|
||||
}
|
||||
}
|
||||
}
|
||||
return results
|
||||
}
|
||||
|
||||
public struct SearchResultItem {
|
||||
public var query: String
|
||||
public var text: String
|
||||
public var inputable: Bool {
|
||||
true
|
||||
}
|
||||
public func getDebugInformation() -> String {
|
||||
"SearchResultItem(\(text))"
|
||||
}
|
||||
}
|
||||
|
||||
public func getReplacementCandidate(left: String, center: String, right: String, target: [ConverterBehaviorSemantics.ReplacementTarget]) -> [ReplacementCandidate] {
|
||||
var results: [ReplacementCandidate] = []
|
||||
if target.contains(.emoji) {
|
||||
if center.count == 1, let item = self.emojiGroups.first(where: {$0.all.contains(center)}) {
|
||||
// 選択部分の置換
|
||||
for emoji in item.all where emoji != center {
|
||||
results.append(ReplacementCandidate(target: center, replace: emoji, base: item.base, targetType: .emoji))
|
||||
}
|
||||
} else if let last = left.last.map(String.init), let item = self.emojiGroups.first(where: {$0.all.contains(last)}) {
|
||||
// 左側の置換
|
||||
for emoji in item.all where emoji != last {
|
||||
results.append(ReplacementCandidate(target: last, replace: emoji, base: item.base, targetType: .emoji))
|
||||
}
|
||||
}
|
||||
}
|
||||
return results
|
||||
}
|
||||
|
||||
/// 「同一」の絵文字のグループ
|
||||
private struct EmojiGroup {
|
||||
var base: String
|
||||
var variations: [String]
|
||||
var all: [String] {
|
||||
[base] + variations
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public struct ReplacementCandidate {
|
||||
public var target: String
|
||||
public var replace: String
|
||||
public var base: String
|
||||
public var targetType: ConverterBehaviorSemantics.ReplacementTarget
|
||||
|
||||
public var text: String {
|
||||
replace
|
||||
}
|
||||
public var inputable: Bool {
|
||||
true
|
||||
}
|
||||
|
||||
public func getDebugInformation() -> String {
|
||||
"ReplacementCandidate(\(target)->\(replace))"
|
||||
}
|
||||
}
|
||||
302
Sources/KanaKanjiConverterModule/Roman2Kana.swift
Normal file
302
Sources/KanaKanjiConverterModule/Roman2Kana.swift
Normal file
@@ -0,0 +1,302 @@
|
||||
//
|
||||
// Roman2Kana.swift
|
||||
// Keyboard
|
||||
//
|
||||
// Created by ensan on 2020/09/24.
|
||||
// Copyright © 2020 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
enum Roman2Kana {
|
||||
static let katakanaChanges: [String: String] = Dictionary(uniqueKeysWithValues: hiraganaChanges.map { (String($0.key), String($0.value)) })
|
||||
static let hiraganaChanges: [[Character]: [Character]] = Dictionary(uniqueKeysWithValues: [
|
||||
"a": "あ",
|
||||
"xa": "ぁ",
|
||||
"la": "ぁ",
|
||||
"i": "い",
|
||||
"xi": "ぃ",
|
||||
"li": "ぃ",
|
||||
"u": "う",
|
||||
"wu": "う",
|
||||
"vu": "ゔ",
|
||||
"xu": "ぅ",
|
||||
"lu": "ぅ",
|
||||
"e": "え",
|
||||
"xe": "ぇ",
|
||||
"le": "ぇ",
|
||||
"o": "お",
|
||||
"xo": "ぉ",
|
||||
"lo": "ぉ",
|
||||
"ka": "か",
|
||||
"ga": "が",
|
||||
"xka": "ゕ",
|
||||
"lka": "ゕ",
|
||||
"ki": "き",
|
||||
"gi": "ぎ",
|
||||
"ku": "く",
|
||||
"gu": "ぐ",
|
||||
"ke": "け",
|
||||
"ge": "げ",
|
||||
"xke": "ゖ",
|
||||
"lke": "ゖ",
|
||||
"ko": "こ",
|
||||
"go": "ご",
|
||||
"sa": "さ",
|
||||
"za": "ざ",
|
||||
"si": "し",
|
||||
"shi": "し",
|
||||
"zi": "じ",
|
||||
"ji": "じ",
|
||||
"su": "す",
|
||||
"zu": "ず",
|
||||
"se": "せ",
|
||||
"ze": "ぜ",
|
||||
"so": "そ",
|
||||
"zo": "ぞ",
|
||||
"ta": "た",
|
||||
"da": "だ",
|
||||
"ti": "ち",
|
||||
"chi": "ち",
|
||||
"di": "ぢ",
|
||||
"tu": "つ",
|
||||
"tsu": "つ",
|
||||
"xtu": "っ",
|
||||
"ltu": "っ",
|
||||
"xtsu": "っ",
|
||||
"ltsu": "っ",
|
||||
"du": "づ",
|
||||
"te": "て",
|
||||
"de": "で",
|
||||
"to": "と",
|
||||
"do": "ど",
|
||||
"na": "な",
|
||||
"ni": "に",
|
||||
"nu": "ぬ",
|
||||
"ne": "ね",
|
||||
"no": "の",
|
||||
"ha": "は",
|
||||
"ba": "ば",
|
||||
"pa": "ぱ",
|
||||
"hi": "ひ",
|
||||
"bi": "び",
|
||||
"pi": "ぴ",
|
||||
"hu": "ふ",
|
||||
"fu": "ふ",
|
||||
"bu": "ぶ",
|
||||
"pu": "ぷ",
|
||||
"he": "へ",
|
||||
"be": "べ",
|
||||
"pe": "ぺ",
|
||||
"ho": "ほ",
|
||||
"bo": "ぼ",
|
||||
"po": "ぽ",
|
||||
"ma": "ま",
|
||||
"mi": "み",
|
||||
"mu": "む",
|
||||
"me": "め",
|
||||
"mo": "も",
|
||||
"ya": "や",
|
||||
"xya": "ゃ",
|
||||
"lya": "ゃ",
|
||||
"yu": "ゆ",
|
||||
"xyu": "ゅ",
|
||||
"lyu": "ゅ",
|
||||
"yo": "よ",
|
||||
"xyo": "ょ",
|
||||
"lyo": "ょ",
|
||||
"ra": "ら",
|
||||
"ri": "り",
|
||||
"ru": "る",
|
||||
"re": "れ",
|
||||
"ro": "ろ",
|
||||
"wa": "わ",
|
||||
"xwa": "ゎ",
|
||||
"lwa": "ゎ",
|
||||
"wyi": "ゐ",
|
||||
"wye": "ゑ",
|
||||
"wo": "を",
|
||||
"nn": "ん",
|
||||
"ye": "いぇ",
|
||||
"va": "ゔぁ",
|
||||
"vi": "ゔぃ",
|
||||
"ve": "ゔぇ",
|
||||
"vo": "ゔぉ",
|
||||
"kya": "きゃ",
|
||||
"kyu": "きゅ",
|
||||
"kye": "きぇ",
|
||||
"kyo": "きょ",
|
||||
"gya": "ぎゃ",
|
||||
"gyu": "ぎゅ",
|
||||
"gye": "ぎぇ",
|
||||
"gyo": "ぎょ",
|
||||
"qa": "くぁ",
|
||||
"kwa": "くぁ",
|
||||
"qwa": "くぁ",
|
||||
"qi": "くぃ",
|
||||
"kwi": "くぃ",
|
||||
"qwi": "くぃ",
|
||||
"qu": "くぅ",
|
||||
"kwu": "くぅ",
|
||||
"qwu": "くぅ",
|
||||
"qe": "くぇ",
|
||||
"kwe": "くぇ",
|
||||
"qwe": "くぇ",
|
||||
"qo": "くぉ",
|
||||
"kwo": "くぉ",
|
||||
"qwo": "くぉ",
|
||||
"gwa": "ぐぁ",
|
||||
"gwi": "ぐぃ",
|
||||
"gwu": "ぐぅ",
|
||||
"gwe": "ぐぇ",
|
||||
"gwo": "ぐぉ",
|
||||
"sha": "しゃ",
|
||||
"sya": "しゃ",
|
||||
"shu": "しゅ",
|
||||
"syu": "しゅ",
|
||||
"she": "しぇ",
|
||||
"sye": "しぇ",
|
||||
"sho": "しょ",
|
||||
"syo": "しょ",
|
||||
"ja": "じゃ",
|
||||
"zya": "じゃ",
|
||||
"jya": "じゃ",
|
||||
"jyi": "じぃ",
|
||||
"ju": "じゅ",
|
||||
"zyu": "じゅ",
|
||||
"jyu": "じゅ",
|
||||
"je": "じぇ",
|
||||
"zye": "じぇ",
|
||||
"jye": "じぇ",
|
||||
"jo": "じょ",
|
||||
"zyo": "じょ",
|
||||
"jyo": "じょ",
|
||||
"swa": "すぁ",
|
||||
"swi": "すぃ",
|
||||
"swu": "すぅ",
|
||||
"swe": "すぇ",
|
||||
"swo": "すぉ",
|
||||
"cha": "ちゃ",
|
||||
"cya": "ちゃ",
|
||||
"tya": "ちゃ",
|
||||
"tyi": "ちぃ",
|
||||
"cyi": "ちぃ",
|
||||
"chu": "ちゅ",
|
||||
"cyu": "ちゅ",
|
||||
"tyu": "ちゅ",
|
||||
"che": "ちぇ",
|
||||
"cye": "ちぇ",
|
||||
"tye": "ちぇ",
|
||||
"cho": "ちょ",
|
||||
"cyo": "ちょ",
|
||||
"tyo": "ちょ",
|
||||
"tsa": "つぁ",
|
||||
"tsi": "つぃ",
|
||||
"tse": "つぇ",
|
||||
"tso": "つぉ",
|
||||
"tha": "てゃ",
|
||||
"thi": "てぃ",
|
||||
"thu": "てゅ",
|
||||
"the": "てぇ",
|
||||
"tho": "てょ",
|
||||
"twa": "とぁ",
|
||||
"twi": "とぃ",
|
||||
"twu": "とぅ",
|
||||
"twe": "とぇ",
|
||||
"two": "とぉ",
|
||||
"dya": "ぢゃ",
|
||||
"dyi": "ぢぃ",
|
||||
"dyu": "ぢゅ",
|
||||
"dye": "ぢぇ",
|
||||
"dyo": "ぢょ",
|
||||
"dha": "でゃ",
|
||||
"dhi": "でぃ",
|
||||
"dhu": "でゅ",
|
||||
"dhe": "でぇ",
|
||||
"dho": "でょ",
|
||||
"dwa": "どぁ",
|
||||
"dwi": "どぃ",
|
||||
"dwu": "どぅ",
|
||||
"dwe": "どぇ",
|
||||
"dwo": "どぉ",
|
||||
"nya": "にゃ",
|
||||
"nyi": "にぃ",
|
||||
"nyu": "にゅ",
|
||||
"nye": "にぇ",
|
||||
"nyo": "にょ",
|
||||
"hya": "ひゃ",
|
||||
"hyi": "ひぃ",
|
||||
"hyu": "ひゅ",
|
||||
"hye": "ひぇ",
|
||||
"hyo": "ひょ",
|
||||
"bya": "びゃ",
|
||||
"byi": "びぃ",
|
||||
"byu": "びゅ",
|
||||
"bye": "びぇ",
|
||||
"byo": "びょ",
|
||||
"pya": "ぴゃ",
|
||||
"pyi": "ぴぃ",
|
||||
"pyu": "ぴゅ",
|
||||
"pye": "ぴぇ",
|
||||
"pyo": "ぴょ",
|
||||
"fa": "ふぁ",
|
||||
"hwa": "ふぁ",
|
||||
"fwa": "ふぁ",
|
||||
"fi": "ふぃ",
|
||||
"hwi": "ふぃ",
|
||||
"fwi": "ふぃ",
|
||||
"fwu": "ふぅ",
|
||||
"fe": "ふぇ",
|
||||
"hwe": "ふぇ",
|
||||
"fwe": "ふぇ",
|
||||
"fo": "ふぉ",
|
||||
"hwo": "ふぉ",
|
||||
"fwo": "ふぉ",
|
||||
"mya": "みゃ",
|
||||
"myi": "みぃ",
|
||||
"myu": "みゅ",
|
||||
"mye": "みぇ",
|
||||
"myo": "みょ",
|
||||
"rya": "りゃ",
|
||||
"ryi": "りぃ",
|
||||
"ryu": "りゅ",
|
||||
"rye": "りぇ",
|
||||
"ryo": "りょ",
|
||||
"wi": "うぃ",
|
||||
"we": "うぇ",
|
||||
"wha": "うぁ",
|
||||
"whi": "うぃ",
|
||||
"whu": "う",
|
||||
"whe": "うぇ",
|
||||
"who": "うぉ",
|
||||
"zh": "←",
|
||||
"zj": "↓",
|
||||
"zk": "↑",
|
||||
"zl": "→"
|
||||
].map {(Array($0.key), Array($0.value))})
|
||||
|
||||
static func toHiragana(currentText: [Character], added: Character) -> [Character] {
|
||||
let last_3 = currentText.suffix(3)
|
||||
if let kana = Roman2Kana.hiraganaChanges[last_3 + [added]] {
|
||||
return currentText.prefix(currentText.count - last_3.count) + kana
|
||||
}
|
||||
let last_2 = currentText.suffix(2)
|
||||
if let kana = Roman2Kana.hiraganaChanges[last_2 + [added]] {
|
||||
return currentText.prefix(currentText.count - last_2.count) + kana
|
||||
}
|
||||
let last_1 = currentText.suffix(1)
|
||||
if let kana = Roman2Kana.hiraganaChanges[last_1 + [added]] {
|
||||
return currentText.prefix(currentText.count - last_1.count) + kana
|
||||
}
|
||||
if last_1 == [added] && String(added).onlyRomanAlphabet {
|
||||
return currentText.prefix(currentText.count - last_1.count) + ["っ", added]
|
||||
}
|
||||
if last_1 == ["n"] && added != "y"{
|
||||
return currentText.prefix(currentText.count - last_1.count) + ["ん", added]
|
||||
}
|
||||
|
||||
if let kana = Roman2Kana.hiraganaChanges[[added]] {
|
||||
return currentText + kana
|
||||
}
|
||||
return currentText + [added]
|
||||
}
|
||||
}
|
||||
45
Sources/KanaKanjiConverterModule/States.swift
Normal file
45
Sources/KanaKanjiConverterModule/States.swift
Normal file
@@ -0,0 +1,45 @@
|
||||
//
|
||||
// States.swift
|
||||
//
|
||||
//
|
||||
// Created by ensan on 2023/04/30.
|
||||
//
|
||||
|
||||
public enum InputStyle: String {
|
||||
/// 入力された文字を直接入力するスタイル
|
||||
case direct = "direct"
|
||||
/// ローマ字日本語入力とするスタイル
|
||||
case roman2kana = "roman"
|
||||
}
|
||||
|
||||
public enum KeyboardLanguage: String, Codable, Equatable {
|
||||
case en_US
|
||||
case ja_JP
|
||||
case el_GR
|
||||
case none
|
||||
}
|
||||
|
||||
public enum LearningType: Int, CaseIterable {
|
||||
case inputAndOutput
|
||||
case onlyOutput
|
||||
case nothing
|
||||
|
||||
var needUpdateMemory: Bool {
|
||||
self == .inputAndOutput
|
||||
}
|
||||
|
||||
var needUsingMemory: Bool {
|
||||
self != .nothing
|
||||
}
|
||||
}
|
||||
|
||||
public enum ConverterBehaviorSemantics {
|
||||
/// 標準的な日本語入力のように、変換する候補を選ぶパターン
|
||||
case conversion
|
||||
/// iOSの英語入力のように、確定は不要だが、左右の文字列の置き換え候補が出てくるパターン
|
||||
case replacement([ReplacementTarget])
|
||||
|
||||
public enum ReplacementTarget: UInt8 {
|
||||
case emoji
|
||||
}
|
||||
}
|
||||
266
Sources/KanaKanjiConverterModule/TemplateData.swift
Normal file
266
Sources/KanaKanjiConverterModule/TemplateData.swift
Normal file
@@ -0,0 +1,266 @@
|
||||
//
|
||||
// TemplateLiteral.swift
|
||||
// azooKey
|
||||
//
|
||||
// Created by ensan on 2020/12/20.
|
||||
// Copyright © 2020 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import SwiftUtils
|
||||
|
||||
public struct TemplateData: Codable {
|
||||
public var name: String
|
||||
public var literal: any TemplateLiteralProtocol
|
||||
public var type: TemplateLiteralType
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case template
|
||||
case name
|
||||
}
|
||||
|
||||
public init(template: String, name: String) {
|
||||
self.name = name
|
||||
if template.dropFirst().hasPrefix("date") {
|
||||
self.type = .date
|
||||
} else if template.dropFirst().hasPrefix("random") {
|
||||
self.type = .random
|
||||
} else {
|
||||
debug("不明", template, name)
|
||||
self.type = .date
|
||||
self.literal = DateTemplateLiteral.example
|
||||
return
|
||||
}
|
||||
switch self.type {
|
||||
case .date:
|
||||
self.literal = DateTemplateLiteral.import(from: template)
|
||||
case .random:
|
||||
self.literal = RandomTemplateLiteral.import(from: template)
|
||||
}
|
||||
}
|
||||
|
||||
public init(from decoder: Decoder) throws {
|
||||
let values = try decoder.container(keyedBy: CodingKeys.self)
|
||||
let template = try values.decode(String.self, forKey: .template)
|
||||
let name = try values.decode(String.self, forKey: .name)
|
||||
self.init(template: template, name: name)
|
||||
}
|
||||
|
||||
public var previewString: String {
|
||||
literal.previewString()
|
||||
}
|
||||
|
||||
public func encode(to encoder: Encoder) throws {
|
||||
// containerはvarにしておく
|
||||
var container = encoder.container(keyedBy: CodingKeys.self)
|
||||
try container.encode(self.literal.export(), forKey: .template)
|
||||
try container.encode(name, forKey: .name)
|
||||
}
|
||||
|
||||
public static let dataFileName = "user_templates.json"
|
||||
|
||||
public static func save(_ data: [TemplateData]) {
|
||||
if let json = try? JSONEncoder().encode(data) {
|
||||
guard let url = try? FileManager.default.url(for: .documentDirectory, in: .userDomainMask, appropriateFor: nil, create: true).appendingPathComponent(TemplateData.dataFileName) else {
|
||||
return
|
||||
}
|
||||
do {
|
||||
try json.write(to: url)
|
||||
} catch {
|
||||
debug("TemplateData.save", error)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static func load() -> [TemplateData] {
|
||||
do {
|
||||
let url = try FileManager.default.url(for: .documentDirectory, in: .userDomainMask, appropriateFor: nil, create: false).appendingPathComponent(Self.dataFileName)
|
||||
let json = try Data(contentsOf: url)
|
||||
let saveData = try JSONDecoder().decode([TemplateData].self, from: json)
|
||||
return saveData
|
||||
} catch {
|
||||
debug("TemplateData.load", error)
|
||||
return [
|
||||
TemplateData(template: "<random type=\"int\" value=\"1,6\">", name: "サイコロ"),
|
||||
TemplateData(template: "<random type=\"double\" value=\"0,1\">", name: "乱数"),
|
||||
TemplateData(template: "<random type=\"string\" value=\"大吉,吉,凶\">", name: "おみくじ"),
|
||||
TemplateData(template: "<date format=\"yyyy年MM月dd日\" type=\"western\" language=\"ja_JP\" delta=\"0\" deltaunit=\"1\">", name: "今日"),
|
||||
TemplateData(template: "<date format=\"yyyy年MM月dd日\" type=\"western\" language=\"ja_JP\" delta=\"1\" deltaunit=\"86400\">", name: "明日"),
|
||||
TemplateData(template: "<date format=\"Gy年MM月dd日\" type=\"japanese\" language=\"ja_JP\" delta=\"0\" deltaunit=\"1\">", name: "和暦")
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public protocol TemplateLiteralProtocol {
|
||||
func export() -> String
|
||||
|
||||
func previewString() -> String
|
||||
}
|
||||
|
||||
public enum TemplateLiteralType {
|
||||
case date
|
||||
case random
|
||||
}
|
||||
|
||||
public extension TemplateLiteralProtocol {
|
||||
static func parse(splited: [some StringProtocol], key: String) -> some StringProtocol {
|
||||
let result = (splited.first {$0.hasPrefix(key + "=\"")} ?? "").dropFirst(key.count + 2).dropLast(1)
|
||||
if result.hasSuffix("\"") {
|
||||
return result.dropLast(1)
|
||||
}
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
public struct DateTemplateLiteral: TemplateLiteralProtocol, Equatable {
|
||||
public init(format: String, type: DateTemplateLiteral.CalendarType, language: DateTemplateLiteral.Language, delta: String, deltaUnit: Int) {
|
||||
self.format = format
|
||||
self.type = type
|
||||
self.language = language
|
||||
self.delta = delta
|
||||
self.deltaUnit = deltaUnit
|
||||
}
|
||||
|
||||
public static let example = DateTemplateLiteral(format: "yyyy年MM月dd日(EEE) a hh:mm:ss", type: .western, language: .japanese, delta: "0", deltaUnit: 1)
|
||||
public var format: String
|
||||
public var type: CalendarType
|
||||
public var language: Language
|
||||
public var delta: String
|
||||
public var deltaUnit: Int
|
||||
|
||||
public enum CalendarType: String {
|
||||
case western
|
||||
case japanese
|
||||
|
||||
public var identifier: Calendar.Identifier {
|
||||
switch self {
|
||||
case .western:
|
||||
return .gregorian
|
||||
case .japanese:
|
||||
return .japanese
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public enum Language: String {
|
||||
case english = "en_US"
|
||||
case japanese = "ja_JP"
|
||||
|
||||
public var identifier: String {
|
||||
self.rawValue
|
||||
}
|
||||
}
|
||||
|
||||
public func previewString() -> String {
|
||||
let formatter = DateFormatter()
|
||||
formatter.locale = Locale(identifier: self.language.identifier)
|
||||
formatter.calendar = Calendar(identifier: self.type.identifier)
|
||||
formatter.dateFormat = format
|
||||
return formatter.string(from: Date().advanced(by: Double((Int(delta) ?? 0) * deltaUnit)))
|
||||
}
|
||||
|
||||
public static func `import`(from string: String, escaped: Bool = false) -> DateTemplateLiteral {
|
||||
let splited = string.split(separator: " ")
|
||||
let format = parse(splited: splited, key: "format")
|
||||
let type = parse(splited: splited, key: "type")
|
||||
let language = parse(splited: splited, key: "language")
|
||||
let delta = parse(splited: splited, key: "delta")
|
||||
let deltaUnit = parse(splited: splited, key: "deltaunit")
|
||||
return DateTemplateLiteral(
|
||||
format: format.unescaped(),
|
||||
type: CalendarType(rawValue: String(type))!,
|
||||
language: Language(rawValue: String(language))!,
|
||||
delta: String(delta),
|
||||
deltaUnit: Int(deltaUnit) ?? 0
|
||||
)
|
||||
}
|
||||
|
||||
public func export() -> String {
|
||||
"""
|
||||
<date format="\(format.escaped())" type="\(type.rawValue)" language="\(language.identifier)" delta="\(delta)" deltaunit="\(deltaUnit)">
|
||||
"""
|
||||
}
|
||||
}
|
||||
|
||||
public struct RandomTemplateLiteral: TemplateLiteralProtocol, Equatable {
|
||||
public init(value: RandomTemplateLiteral.Value) {
|
||||
self.value = value
|
||||
}
|
||||
|
||||
public static func == (lhs: RandomTemplateLiteral, rhs: RandomTemplateLiteral) -> Bool {
|
||||
lhs.value == rhs.value
|
||||
}
|
||||
|
||||
public enum ValueType: String {
|
||||
case int
|
||||
case double
|
||||
case string
|
||||
}
|
||||
public enum Value: Equatable {
|
||||
case int(from: Int, to: Int)
|
||||
case double(from: Double, to: Double)
|
||||
case string([String])
|
||||
|
||||
public var type: ValueType {
|
||||
switch self {
|
||||
case .int(from: _, to: _):
|
||||
return .int
|
||||
case .double(from: _, to: _):
|
||||
return .double
|
||||
case .string:
|
||||
return .string
|
||||
}
|
||||
}
|
||||
|
||||
public var string: String {
|
||||
switch self {
|
||||
case let .int(from: left, to: right):
|
||||
return "\(left),\(right)"
|
||||
case let .double(from: left, to: right):
|
||||
return "\(left),\(right)"
|
||||
case let .string(strings):
|
||||
return strings.map {$0.escaped()}.joined(separator: ",")
|
||||
}
|
||||
}
|
||||
}
|
||||
public var value: Value
|
||||
|
||||
public func previewString() -> String {
|
||||
switch value {
|
||||
case let .int(from: left, to: right):
|
||||
return "\(Int.random(in: left...right))"
|
||||
case let .double(from: left, to: right):
|
||||
return "\(Double.random(in: left...right))"
|
||||
case let .string(strings):
|
||||
return strings.randomElement() ?? "データ無し"
|
||||
}
|
||||
}
|
||||
|
||||
public static func `import`(from string: String, escaped: Bool = false) -> RandomTemplateLiteral {
|
||||
let splited = string.split(separator: " ")
|
||||
let type = parse(splited: splited, key: "type")
|
||||
let valueString = parse(splited: splited, key: "value").unescaped()
|
||||
|
||||
let valueType = ValueType(rawValue: String(type))!
|
||||
let value: Value
|
||||
switch valueType {
|
||||
case .int:
|
||||
let splited = valueString.split(separator: ",")
|
||||
value = .int(from: Int(splited[0]) ?? 0, to: Int(splited[1]) ?? 0)
|
||||
case .double:
|
||||
let splited = valueString.split(separator: ",")
|
||||
value = .double(from: Double(splited[0]) ?? .nan, to: Double(splited[1]) ?? .nan)
|
||||
case .string:
|
||||
value = .string(valueString.components(separatedBy: ","))
|
||||
}
|
||||
return RandomTemplateLiteral(value: value)
|
||||
}
|
||||
|
||||
public func export() -> String {
|
||||
"""
|
||||
<random type="\(value.type.rawValue)" value="\(value.string.escaped())">
|
||||
"""
|
||||
}
|
||||
|
||||
}
|
||||
69
Sources/SwiftUtils/AppVersion.swift
Normal file
69
Sources/SwiftUtils/AppVersion.swift
Normal file
@@ -0,0 +1,69 @@
|
||||
//
|
||||
// AppVersion.swift
|
||||
// azooKey
|
||||
//
|
||||
// Created by ensan on 2022/07/02.
|
||||
// Copyright © 2022 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
/// AppVersion is a struct that represents a version of an app.
|
||||
/// It is a wrapper of String that conforms to Codable, Equatable, Comparable, Hashable, LosslessStringConvertible, CustomStringConvertible.
|
||||
/// It is initialized with a string that represents a version of an app.
|
||||
/// The string must be in the format of "major.minor.patch".
|
||||
/// The string must not contain any other characters than numbers and dots.
|
||||
public struct AppVersion: Codable, Equatable, Comparable, Hashable, LosslessStringConvertible, CustomStringConvertible {
|
||||
|
||||
/// ParseError is an enum that represents an error that occurs when parsing a string to an AppVersion.
|
||||
private enum ParseError: Error {
|
||||
case nonIntegerValue
|
||||
}
|
||||
|
||||
/// Initializes an AppVersion with a string that represents a version of an app.
|
||||
public init?(_ description: String) {
|
||||
if let versionSequence = try? description.split(separator: ".").map({ (value: Substring) throws -> Int in
|
||||
guard let value = Int(value) else { throw ParseError.nonIntegerValue }
|
||||
return value
|
||||
}) {
|
||||
if versionSequence.count < 1 {
|
||||
self.majorVersion = 0
|
||||
} else {
|
||||
self.majorVersion = versionSequence[0]
|
||||
}
|
||||
|
||||
if versionSequence.count < 2 {
|
||||
self.minorVersion = 0
|
||||
} else {
|
||||
self.minorVersion = versionSequence[1]
|
||||
}
|
||||
|
||||
if versionSequence.count < 3 {
|
||||
self.patchVersion = 0
|
||||
} else {
|
||||
self.patchVersion = versionSequence[2]
|
||||
}
|
||||
} else {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
/// Compares two AppVersions.
|
||||
public static func < (lhs: AppVersion, rhs: AppVersion) -> Bool {
|
||||
for (l, r) in zip([lhs.majorVersion, lhs.minorVersion, lhs.patchVersion], [rhs.majorVersion, rhs.minorVersion, rhs.patchVersion]) {
|
||||
if l == r {
|
||||
continue
|
||||
}
|
||||
return l < r
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
public var majorVersion: Int
|
||||
public var minorVersion: Int
|
||||
public var patchVersion: Int
|
||||
|
||||
public var description: String {
|
||||
"\(majorVersion).\(minorVersion).\(patchVersion)"
|
||||
}
|
||||
}
|
||||
128
Sources/SwiftUtils/ArrayUtils.swift
Normal file
128
Sources/SwiftUtils/ArrayUtils.swift
Normal file
@@ -0,0 +1,128 @@
|
||||
//
|
||||
// ArrayUtils.swift
|
||||
//
|
||||
//
|
||||
// Created by ensan on 2023/04/30.
|
||||
//
|
||||
|
||||
import Algorithms
|
||||
import Foundation
|
||||
|
||||
@resultBuilder
|
||||
public struct ArrayBuilder {
|
||||
public static func buildBlock<T>(_ values: T...) -> [T] {
|
||||
values
|
||||
}
|
||||
}
|
||||
|
||||
public extension Sequence {
|
||||
/// Returns a sequence that contains the elements of this sequence followed by the elements of the given sequence.
|
||||
/// - Parameters:
|
||||
/// - sequence: A sequence of elements to chain.
|
||||
/// - Returns: A sequence that contains the elements of this sequence followed by the elements of the given sequence.
|
||||
@inlinable func chained<S: Sequence<Element>>(_ sequence: S) -> Chain2Sequence<Self, S> {
|
||||
chain(self, sequence)
|
||||
}
|
||||
}
|
||||
|
||||
public extension Collection {
|
||||
/// Returns a `Set` containing the elements of this sequence with transformed values.
|
||||
/// - Parameters:
|
||||
/// - transform: A closure that transforms each element of this sequence into a value that can be hashed.
|
||||
/// - Returns: A `Set` containing the elements of this sequence.
|
||||
@inlinable func mapSet<T>(transform closure: (Element) throws -> T) rethrows -> Set<T> {
|
||||
var set = Set<T>()
|
||||
set.reserveCapacity(self.count)
|
||||
for item in self {
|
||||
set.update(with: try closure(item))
|
||||
}
|
||||
return set
|
||||
}
|
||||
|
||||
/// Returns a `Set` containing the elements of this sequence with transformed values.
|
||||
/// - Parameters:
|
||||
/// - transform: A closure that transforms each element of this sequence into a sequence of values that can be hashed.
|
||||
/// - Returns: A `Set` containing the elements of this sequence.
|
||||
@inlinable func flatMapSet<T: Sequence>(transform closure: (Element) throws -> T) rethrows -> Set<T.Element> {
|
||||
var set = Set<T.Element>()
|
||||
for item in self {
|
||||
set.formUnion(try closure(item))
|
||||
}
|
||||
return set
|
||||
}
|
||||
|
||||
/// Returns a `Set` containing the non-nil elements of this sequence with transformed values.
|
||||
/// - Parameters:
|
||||
/// - transform: A closure that transforms each element of this sequence into an optional value that can be hashed.
|
||||
/// - Returns: A `Set` containing the non-nil elements of this sequence.
|
||||
@inlinable func compactMapSet<T>(transform closure: (Element) throws -> T?) rethrows -> Set<T> {
|
||||
var set = Set<T>()
|
||||
set.reserveCapacity(self.count)
|
||||
for item in self {
|
||||
if let value = try closure(item) {
|
||||
set.update(with: value)
|
||||
}
|
||||
}
|
||||
return set
|
||||
}
|
||||
}
|
||||
|
||||
public extension MutableCollection {
|
||||
/// Calls the given closure with a pointer to the array's mutable contiguous storage.
|
||||
/// - Parameter
|
||||
/// - transform: A closure that takes a pointer to the array's mutable contiguous storage.
|
||||
@inlinable mutating func mutatingForeach(transform closure: (inout Element) throws -> Void) rethrows {
|
||||
for index in self.indices {
|
||||
try closure(&self[index])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public extension Collection {
|
||||
/// Returns a SubSequence containing the elements of this sequence up to the first element that does not satisfy the given predicate.
|
||||
/// - Parameters:
|
||||
/// - condition: A closure that takes an element of the sequence as its argument and returns a Boolean value indicating whether the element should be included.
|
||||
/// - Returns: A SubSequence containing the elements of this sequence up to the first element that does not satisfy the given predicate.
|
||||
@inlinable func suffix(while condition: (Element) -> Bool) -> SubSequence {
|
||||
var left = self.endIndex
|
||||
while left != self.startIndex, condition(self[self.index(left, offsetBy: -1)]) {
|
||||
left = self.index(left, offsetBy: -1)
|
||||
}
|
||||
return self[left ..< self.endIndex]
|
||||
}
|
||||
}
|
||||
|
||||
public extension Collection where Self.Element: Equatable {
|
||||
/// Returns a Bool value indicating whether the collection has the given suffix.
|
||||
/// - Parameters:
|
||||
/// - suffix: A collection to search for at the end of this collection.
|
||||
/// - Returns: A Bool value indicating whether the collection has the given suffix.
|
||||
@inlinable func hasSuffix(_ suffix: some Collection<Element>) -> Bool {
|
||||
if self.count < suffix.count {
|
||||
return false
|
||||
}
|
||||
let count = suffix.count
|
||||
for (i, value) in suffix.enumerated() {
|
||||
if self[self.index(self.endIndex, offsetBy: i - count)] != value {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
/// Returns an Array containing the common prefix of this collection and the given collection.
|
||||
/// - Parameters:
|
||||
/// - collection: A collection to search for a common prefix with this collection.
|
||||
/// - Returns: An Array containing the common prefix of this collection and the given collection.
|
||||
@inlinable func commonPrefix(with collection: some Collection<Element>) -> [Element] {
|
||||
var prefix: [Element] = []
|
||||
for (i, value) in self.enumerated() where i < collection.count {
|
||||
if value == collection[collection.index(collection.startIndex, offsetBy: i)] {
|
||||
prefix.append(value)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
return prefix
|
||||
}
|
||||
}
|
||||
314
Sources/SwiftUtils/CharacterUtils.swift
Normal file
314
Sources/SwiftUtils/CharacterUtils.swift
Normal file
@@ -0,0 +1,314 @@
|
||||
//
|
||||
// extension Character.swift
|
||||
// Keyboard
|
||||
//
|
||||
// Created by ensan on 2020/09/03.
|
||||
// Copyright © 2020 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
public enum CharacterUtils {
|
||||
/// 小書きのかなカナ集合
|
||||
private static let kogakiKana: Set<Character> = [
|
||||
"ぁ", "ぃ", "ぅ", "ぇ", "ぉ", "ゕ", "ゖ", "っ", "ゃ", "ゅ", "ょ", "ゎ",
|
||||
"ァ", "ィ", "ゥ", "ェ", "ォ", "ヵ", "ヶ", "ッ", "ャ", "ュ", "ョ", "ヮ"
|
||||
]
|
||||
/// 濁点付きのかなカナ集合
|
||||
private static let dakutenKana: Set<Character> = [
|
||||
"ゔ", "が", "ぎ", "ぐ", "げ", "ご", "ざ", "じ", "ず", "ぜ", "ぞ", "だ", "ぢ", "づ", "で", "ど", "ば", "び", "ぶ", "べ", "ぼ",
|
||||
"ヴ", "ガ", "ギ", "グ", "ゲ", "ゴ", "ザ", "ジ", "ズ", "ゼ", "ゾ", "ダ", "ヂ", "ヅ", "デ", "ド", "バ", "ビ", "ブ", "ベ", "ボ"
|
||||
]
|
||||
|
||||
/// 小書きかなか否か
|
||||
static func isKogana(_ character: Character) -> Bool {
|
||||
kogakiKana.contains(character)
|
||||
}
|
||||
|
||||
/// ローマ字(a-z, A-Zか否か)
|
||||
@inlinable public static func isRomanLetter(_ character: Character) -> Bool {
|
||||
character.isASCII && character.isCased
|
||||
}
|
||||
|
||||
/// 自分が小書きであれば該当する文字を返す。
|
||||
public static func kogaki(_ character: Character) -> Character {
|
||||
switch character {
|
||||
case "あ":return "ぁ"
|
||||
case "い":return "ぃ"
|
||||
case "う":return "ぅ"
|
||||
case "え":return "ぇ"
|
||||
case "お":return "ぉ"
|
||||
case "か":return "ゕ"
|
||||
case "け":return "ゖ"
|
||||
case "つ":return "っ"
|
||||
case "や":return "ゃ"
|
||||
case "ゆ":return "ゅ"
|
||||
case "よ":return "ょ"
|
||||
case "わ":return "ゎ"
|
||||
case "ア":return "ァ"
|
||||
case "イ":return "ィ"
|
||||
case "ウ":return "ゥ"
|
||||
case "エ":return "ェ"
|
||||
case "オ":return "ォ"
|
||||
case "カ":return "ヵ"
|
||||
case "ケ":return "ヶ"
|
||||
case "ツ":return "ッ"
|
||||
case "ヤ":return "ャ"
|
||||
case "ユ":return "ュ"
|
||||
case "ヨ":return "ョ"
|
||||
case "ワ":return "ヮ"
|
||||
default: return character
|
||||
}
|
||||
}
|
||||
|
||||
/// 小書きから大書きを返す
|
||||
public static func ogaki(_ character: Character) -> Character {
|
||||
switch character {
|
||||
case "ぁ":return "あ"
|
||||
case "ぃ":return "い"
|
||||
case "ぅ":return "う"
|
||||
case "ぇ":return "え"
|
||||
case "ぉ":return "お"
|
||||
case "ゕ":return "か"
|
||||
case "ゖ":return "け"
|
||||
case "っ":return "つ"
|
||||
case "ゃ":return "や"
|
||||
case "ゅ":return "ゆ"
|
||||
case "ょ":return "よ"
|
||||
case "ゎ":return "わ"
|
||||
case "ァ":return "ア"
|
||||
case "ィ":return "イ"
|
||||
case "ゥ":return "ウ"
|
||||
case "ェ":return "エ"
|
||||
case "ォ":return "オ"
|
||||
case "ヵ":return "カ"
|
||||
case "ヶ":return "ケ"
|
||||
case "ッ":return "ツ"
|
||||
case "ャ":return "ヤ"
|
||||
case "ュ":return "ユ"
|
||||
case "ョ":return "ヨ"
|
||||
case "ヮ":return "ワ"
|
||||
default: return character
|
||||
}
|
||||
}
|
||||
|
||||
/// 濁点付きか否か
|
||||
public static func isDakuten(_ character: Character) -> Bool {
|
||||
dakutenKana.contains(character)
|
||||
}
|
||||
/// 濁点をつけて返す
|
||||
public static func dakuten(_ character: Character) -> Character {
|
||||
switch character {
|
||||
case"う":return "ゔ"
|
||||
case"か":return "が"
|
||||
case"き":return "ぎ"
|
||||
case"く":return "ぐ"
|
||||
case"け":return "げ"
|
||||
case"こ":return "ご"
|
||||
case"さ":return "ざ"
|
||||
case"し":return "じ"
|
||||
case"す":return "ず"
|
||||
case"せ":return "ぜ"
|
||||
case"そ":return "ぞ"
|
||||
case"た":return "だ"
|
||||
case"ち":return "ぢ"
|
||||
case"つ":return "づ"
|
||||
case"て":return "で"
|
||||
case"と":return "ど"
|
||||
case"は":return "ば"
|
||||
case"ひ":return "び"
|
||||
case"ふ":return "ぶ"
|
||||
case"へ":return "べ"
|
||||
case"ほ":return "ぼ"
|
||||
case"ウ":return "ヴ"
|
||||
case"カ":return "ガ"
|
||||
case"キ":return "ギ"
|
||||
case"ク":return "グ"
|
||||
case"ケ":return "ゲ"
|
||||
case"コ":return "ゴ"
|
||||
case"サ":return "ザ"
|
||||
case"シ":return "ジ"
|
||||
case"ス":return "ズ"
|
||||
case"セ":return "ゼ"
|
||||
case"ソ":return "ゾ"
|
||||
case"タ":return "ダ"
|
||||
case"チ":return "ヂ"
|
||||
case"ツ":return "ヅ"
|
||||
case"テ":return "デ"
|
||||
case"ト":return "ド"
|
||||
case"ハ":return "バ"
|
||||
case"ヒ":return "ビ"
|
||||
case"フ":return "ブ"
|
||||
case"ヘ":return "ベ"
|
||||
case"ホ":return "ボ"
|
||||
default: return character
|
||||
}
|
||||
}
|
||||
/// 濁点を外して返す
|
||||
public static func mudakuten(_ character: Character) -> Character {
|
||||
switch character {
|
||||
case"ゔ":return "う"
|
||||
case"が":return "か"
|
||||
case"ぎ":return "き"
|
||||
case"ぐ":return "く"
|
||||
case"げ":return "け"
|
||||
case"ご":return "こ"
|
||||
case"ざ":return "さ"
|
||||
case"じ":return "し"
|
||||
case"ず":return "す"
|
||||
case"ぜ":return "せ"
|
||||
case"ぞ":return "そ"
|
||||
case"だ":return "た"
|
||||
case"ぢ":return "ち"
|
||||
case"づ":return "つ"
|
||||
case"で":return "て"
|
||||
case"ど":return "と"
|
||||
case"ば":return "は"
|
||||
case"び":return "ひ"
|
||||
case"ぶ":return "ふ"
|
||||
case"べ":return "へ"
|
||||
case"ぼ":return "ほ"
|
||||
case"ヴ":return "ウ"
|
||||
case"ガ":return "カ"
|
||||
case"ギ":return "キ"
|
||||
case"グ":return "ク"
|
||||
case"ゲ":return "ケ"
|
||||
case"ゴ":return "コ"
|
||||
case"ザ":return "サ"
|
||||
case"ジ":return "シ"
|
||||
case"ズ":return "ス"
|
||||
case"ゼ":return "セ"
|
||||
case"ゾ":return "ソ"
|
||||
case"ダ":return "タ"
|
||||
case"ヂ":return "チ"
|
||||
case"ヅ":return "ツ"
|
||||
case"デ":return "テ"
|
||||
case"ド":return "ト"
|
||||
case"バ":return "ハ"
|
||||
case"ビ":return "ヒ"
|
||||
case"ブ":return "フ"
|
||||
case"ベ":return "ヘ"
|
||||
case"ボ":return "ホ"
|
||||
default: return character
|
||||
}
|
||||
}
|
||||
/// 半濁点かどうか
|
||||
public static func isHandakuten(_ character: Character) -> Bool {
|
||||
[
|
||||
"ぱ", "ぴ", "ぷ", "ぺ", "ぽ",
|
||||
"パ", "ピ", "プ", "ペ", "ポ"
|
||||
].contains(character)
|
||||
}
|
||||
/// 半濁点をつけて返す
|
||||
public static func handakuten(_ character: Character) -> Character {
|
||||
switch character {
|
||||
case"は":return "ぱ"
|
||||
case"ひ":return "ぴ"
|
||||
case"ふ":return "ぷ"
|
||||
case"へ":return "ぺ"
|
||||
case"ほ":return "ぽ"
|
||||
case"ハ":return "パ"
|
||||
case"ヒ":return "ピ"
|
||||
case"フ":return "プ"
|
||||
case"ヘ":return "ペ"
|
||||
case"ホ":return "ポ"
|
||||
default: return character
|
||||
}
|
||||
}
|
||||
/// 半濁点を外して返す
|
||||
public static func muhandakuten(_ character: Character) -> Character {
|
||||
switch character {
|
||||
case"ぱ":return "は"
|
||||
case"ぴ":return "ひ"
|
||||
case"ぷ":return "ふ"
|
||||
case"ぺ":return "へ"
|
||||
case"ぽ":return "ほ"
|
||||
case"パ":return "ハ"
|
||||
case"ピ":return "ヒ"
|
||||
case"プ":return "フ"
|
||||
case"ペ":return "ヘ"
|
||||
case"ポ":return "ホ"
|
||||
default: return character
|
||||
}
|
||||
}
|
||||
|
||||
/// 濁点、小書き、半濁点などを相互に変換する関数。
|
||||
public static func requestChange(_ character: Character) -> String {
|
||||
if character.isLowercase {
|
||||
return character.uppercased()
|
||||
}
|
||||
if character.isUppercase {
|
||||
return character.lowercased()
|
||||
}
|
||||
|
||||
if Set(["あ", "い", "え", "お", "や", "ゆ", "よ", "わ"]).contains(character) {
|
||||
return String(kogaki(character))
|
||||
}
|
||||
|
||||
if Set(["ぁ", "ぃ", "ぇ", "ぉ", "ゃ", "ゅ", "ょ", "ゎ"]).contains(character) {
|
||||
return String(ogaki(character))
|
||||
}
|
||||
|
||||
if Set(["か", "き", "く", "け", "こ", "さ", "し", "す", "せ", "そ", "た", "ち", "て", "と"]).contains(character) {
|
||||
return String(dakuten(character))
|
||||
}
|
||||
|
||||
if Set(["が", "ぎ", "ぐ", "げ", "ご", "ざ", "じ", "ず", "ぜ", "ぞ", "だ", "ぢ", "で", "ど"]).contains(character) {
|
||||
return String(mudakuten(character))
|
||||
}
|
||||
|
||||
if Set(["つ", "う"]).contains(character) {
|
||||
return String(kogaki(character))
|
||||
}
|
||||
|
||||
if Set(["っ", "ぅ"]).contains(character) {
|
||||
return String(dakuten(ogaki(character)))
|
||||
}
|
||||
|
||||
if Set(["づ", "ゔ"]).contains(character) {
|
||||
return String(mudakuten(character))
|
||||
}
|
||||
|
||||
if Set(["は", "ひ", "ふ", "へ", "ほ"]).contains(character) {
|
||||
return String(dakuten(character))
|
||||
}
|
||||
|
||||
if Set(["ば", "び", "ぶ", "べ", "ぼ"]).contains(character) {
|
||||
return String(handakuten(mudakuten(character)))
|
||||
}
|
||||
|
||||
if Set(["ぱ", "ぴ", "ぷ", "ぺ", "ぽ"]).contains(character) {
|
||||
return String(muhandakuten(character))
|
||||
}
|
||||
|
||||
return String(character)
|
||||
}
|
||||
}
|
||||
|
||||
public extension Character {
|
||||
/// Returns the Katakanized version of the character.
|
||||
@inlinable func toKatakana() -> Character {
|
||||
if self.unicodeScalars.count != 1 {
|
||||
return self
|
||||
}
|
||||
let scalar = self.unicodeScalars.first!
|
||||
if 0x3041 <= scalar.value && scalar.value <= 0x3096 {
|
||||
return Character(UnicodeScalar(scalar.value + 96)!)
|
||||
} else {
|
||||
return self
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the Hiraganized version of the character.
|
||||
@inlinable func toHiragana() -> Character {
|
||||
if self.unicodeScalars.count != 1 {
|
||||
return self
|
||||
}
|
||||
let scalar = self.unicodeScalars.first!
|
||||
if 0x30A1 <= scalar.value && scalar.value <= 0x30F6 {
|
||||
return Character(UnicodeScalar(scalar.value - 96)!)
|
||||
} else {
|
||||
return self
|
||||
}
|
||||
}
|
||||
}
|
||||
18
Sources/SwiftUtils/CodableSupport.swift
Normal file
18
Sources/SwiftUtils/CodableSupport.swift
Normal file
@@ -0,0 +1,18 @@
|
||||
//
|
||||
// CodableSupport.swift
|
||||
// azooKey
|
||||
//
|
||||
// Created by ensan on 2021/03/17.
|
||||
// Copyright © 2021 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
public extension Encodable {
|
||||
/// Encodes this value into the given container.
|
||||
/// - Parameters:
|
||||
/// - container: The container to encode this value into.
|
||||
func containerEncode<CodingKeys: CodingKey>(container: inout KeyedEncodingContainer<CodingKeys>, key: CodingKeys) throws {
|
||||
try container.encode(self, forKey: key)
|
||||
}
|
||||
}
|
||||
26
Sources/SwiftUtils/DataUtils.swift
Normal file
26
Sources/SwiftUtils/DataUtils.swift
Normal file
@@ -0,0 +1,26 @@
|
||||
//
|
||||
// extension Data.swift
|
||||
// azooKey
|
||||
//
|
||||
// Created by ensan on 2022/10/22.
|
||||
// Copyright © 2022 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
extension Data {
|
||||
/// Converts this data to an array of the given type.
|
||||
/// - Parameter:
|
||||
/// - type: The type to convert this data to.
|
||||
/// - Returns: An array of the given type.
|
||||
@inlinable public func toArray<T>(of type: T.Type) -> [T] {
|
||||
self.withUnsafeBytes {pointer -> [T] in
|
||||
Array(
|
||||
UnsafeBufferPointer(
|
||||
start: pointer.baseAddress!.assumingMemoryBound(to: type),
|
||||
count: pointer.count / MemoryLayout<T>.size
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
79
Sources/SwiftUtils/Debug.swift
Normal file
79
Sources/SwiftUtils/Debug.swift
Normal file
@@ -0,0 +1,79 @@
|
||||
//
|
||||
// Debug.swift
|
||||
//
|
||||
//
|
||||
// Created by ensan on 2023/04/30.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
/// Prints the given items to the standard output if the build setting "DEBUG" is set.
|
||||
/// - Parameter:
|
||||
/// - items: The items to print.
|
||||
/// - Note: This function is always preferred over `print` in the codebase.
|
||||
@_disfavoredOverload
|
||||
@inlinable public func debug(_ items: Any...) {
|
||||
#if DEBUG
|
||||
var result = ""
|
||||
for value in items {
|
||||
if result.isEmpty {
|
||||
result.append("\(value)")
|
||||
} else {
|
||||
result.append(" ")
|
||||
result.append("\(value)")
|
||||
}
|
||||
}
|
||||
print(result)
|
||||
#endif
|
||||
}
|
||||
|
||||
@inlinable public func debug(_ item1: @autoclosure () -> Any) {
|
||||
#if DEBUG
|
||||
print(item1())
|
||||
#endif
|
||||
}
|
||||
@inlinable public func debug(_ item1: @autoclosure () -> Any, _ item2: @autoclosure () -> Any) {
|
||||
#if DEBUG
|
||||
print(item1(), item2())
|
||||
#endif
|
||||
}
|
||||
@inlinable public func debug(_ item1: @autoclosure () -> Any, _ item2: @autoclosure () -> Any, _ item3: @autoclosure () -> Any) {
|
||||
#if DEBUG
|
||||
print(item1(), item2(), item3())
|
||||
#endif
|
||||
}
|
||||
@inlinable public func debug(_ item1: @autoclosure () -> Any, _ item2: @autoclosure () -> Any, _ item3: @autoclosure () -> Any, _ item4: @autoclosure () -> Any) {
|
||||
#if DEBUG
|
||||
print(item1(), item2(), item3(), item4())
|
||||
#endif
|
||||
}
|
||||
@inlinable public func debug(_ item1: @autoclosure () -> Any, _ item2: @autoclosure () -> Any, _ item3: @autoclosure () -> Any, _ item4: @autoclosure () -> Any, _ item5: @autoclosure () -> Any) {
|
||||
#if DEBUG
|
||||
print(item1(), item2(), item3(), item4(), item5())
|
||||
#endif
|
||||
}
|
||||
@inlinable public func debug(_ item1: @autoclosure () -> Any, _ item2: @autoclosure () -> Any, _ item3: @autoclosure () -> Any, _ item4: @autoclosure () -> Any, _ item5: @autoclosure () -> Any, _ item6: @autoclosure () -> Any) {
|
||||
#if DEBUG
|
||||
print(item1(), item2(), item3(), item4(), item5(), item6())
|
||||
#endif
|
||||
}
|
||||
@inlinable public func debug(_ item1: @autoclosure () -> Any, _ item2: @autoclosure () -> Any, _ item3: @autoclosure () -> Any, _ item4: @autoclosure () -> Any, _ item5: @autoclosure () -> Any, _ item6: @autoclosure () -> Any, _ item7: @autoclosure () -> Any) {
|
||||
#if DEBUG
|
||||
print(item1(), item2(), item3(), item4(), item5(), item6(), item7())
|
||||
#endif
|
||||
}
|
||||
@inlinable public func debug(_ item1: @autoclosure () -> Any, _ item2: @autoclosure () -> Any, _ item3: @autoclosure () -> Any, _ item4: @autoclosure () -> Any, _ item5: @autoclosure () -> Any, _ item6: @autoclosure () -> Any, _ item7: @autoclosure () -> Any, _ item8: @autoclosure () -> Any) {
|
||||
#if DEBUG
|
||||
print(item1(), item2(), item3(), item4(), item5(), item6(), item7(), item8())
|
||||
#endif
|
||||
}
|
||||
@inlinable public func debug(_ item1: @autoclosure () -> Any, _ item2: @autoclosure () -> Any, _ item3: @autoclosure () -> Any, _ item4: @autoclosure () -> Any, _ item5: @autoclosure () -> Any, _ item6: @autoclosure () -> Any, _ item7: @autoclosure () -> Any, _ item8: @autoclosure () -> Any, _ item9: @autoclosure () -> Any) {
|
||||
#if DEBUG
|
||||
print(item1(), item2(), item3(), item4(), item5(), item6(), item7(), item8(), item9())
|
||||
#endif
|
||||
}
|
||||
@inlinable public func debug(_ item1: @autoclosure () -> Any, _ item2: @autoclosure () -> Any, _ item3: @autoclosure () -> Any, _ item4: @autoclosure () -> Any, _ item5: @autoclosure () -> Any, _ item6: @autoclosure () -> Any, _ item7: @autoclosure () -> Any, _ item8: @autoclosure () -> Any, _ item9: @autoclosure () -> Any, _ item10: @autoclosure () -> Any) {
|
||||
#if DEBUG
|
||||
print(item1(), item2(), item3(), item4(), item5(), item6(), item7(), item8(), item9(), item10())
|
||||
#endif
|
||||
}
|
||||
18
Sources/SwiftUtils/Modify.swift
Normal file
18
Sources/SwiftUtils/Modify.swift
Normal file
@@ -0,0 +1,18 @@
|
||||
//
|
||||
// Modify.swift
|
||||
// azooKey
|
||||
//
|
||||
// Created by ensan on 2022/10/10.
|
||||
// Copyright © 2022 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
/// Modifies the given value and returns the result.
|
||||
/// - Parameters:
|
||||
/// - value: The value to modify.
|
||||
/// - process: The process to modify the value.
|
||||
/// - Note: This function should be used when specific subscript setter is called for multiple times.
|
||||
@inlinable public func withMutableValue<T>(_ value: inout T, process: (inout T) -> Void) {
|
||||
process(&value)
|
||||
}
|
||||
117
Sources/SwiftUtils/StringUtils.swift
Normal file
117
Sources/SwiftUtils/StringUtils.swift
Normal file
@@ -0,0 +1,117 @@
|
||||
//
|
||||
// extension StringProtocol.swift
|
||||
// Keyboard
|
||||
//
|
||||
// Created by ensan on 2020/10/16.
|
||||
// Copyright © 2020 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
public extension StringProtocol {
|
||||
/// ローマ字と数字のみかどうか
|
||||
/// - note: 空文字列の場合`false`を返す。
|
||||
@inlinable
|
||||
var onlyRomanAlphabetOrNumber: Bool {
|
||||
!isEmpty && range(of: "[^a-zA-Z0-9]", options: .regularExpression) == nil
|
||||
}
|
||||
/// ローマ字のみかどうか
|
||||
/// - note: 空文字列の場合`false`を返す。
|
||||
@inlinable
|
||||
var onlyRomanAlphabet: Bool {
|
||||
!isEmpty && range(of: "[^a-zA-Z]", options: .regularExpression) == nil
|
||||
}
|
||||
/// ローマ字を含むかどうか
|
||||
/// - note: 空文字列の場合`false`を返す。
|
||||
/// 以前は正規表現ベースで実装していたが、パフォーマンス上良くなかったので以下のような実装にしたところ40倍程度高速化した。
|
||||
@inlinable
|
||||
var containsRomanAlphabet: Bool {
|
||||
for value in self.utf8 {
|
||||
if (UInt8(ascii: "a") <= value && value <= UInt8(ascii: "z")) || (UInt8(ascii: "A") <= value && value <= UInt8(ascii: "Z")) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
/// 英語として許容可能な文字のみで構成されているか。
|
||||
/// - note: 空文字列の場合`false`を返す。
|
||||
@inlinable
|
||||
var isEnglishSentence: Bool {
|
||||
!isEmpty && range(of: "[^0-9a-zA-Z\n !'_<>\\[\\]{}*@`\\^|~=\"#$%&\\+\\(\\),\\-\\./:;?’\\\\]", options: .regularExpression) == nil
|
||||
}
|
||||
|
||||
/// 仮名か
|
||||
@inlinable
|
||||
var isKana: Bool {
|
||||
!isEmpty && range(of: "[^ぁ-ゖァ-ヶ]", options: .regularExpression) == nil
|
||||
}
|
||||
|
||||
/// Returns a String value in which Hiraganas are all converted to Katakana.
|
||||
/// - Returns: A String value in which Hiraganas are all converted to Katakana.
|
||||
@inlinable func toKatakana() -> String {
|
||||
// カタカナはutf16で常に2バイトなので、utf16単位で処理して良い
|
||||
let result = self.utf16.map { scalar -> UInt16 in
|
||||
if 0x3041 <= scalar && scalar <= 0x3096 {
|
||||
return scalar + 96
|
||||
} else {
|
||||
return scalar
|
||||
}
|
||||
}
|
||||
return String(utf16CodeUnits: result, count: result.count)
|
||||
}
|
||||
|
||||
/// Returns a String value in which Katakana are all converted to Hiragana.
|
||||
/// - Returns: A String value in which Katakana are all converted to Hiragana.
|
||||
@inlinable func toHiragana() -> String {
|
||||
// ひらがなはutf16で常に2バイトなので、utf16単位で処理して良い
|
||||
let result = self.utf16.map { scalar -> UInt16 in
|
||||
if 0x30A1 <= scalar && scalar <= 0x30F6 {
|
||||
return scalar - 96
|
||||
} else {
|
||||
return scalar
|
||||
}
|
||||
}
|
||||
return String(utf16CodeUnits: result, count: result.count)
|
||||
}
|
||||
|
||||
/// Returns an Index value that is the specified distance from the start index.
|
||||
/// - Parameter:
|
||||
/// - offset: The distance to offset from the start index.
|
||||
/// - Returns: An Index value that is the specified distance from the start index.
|
||||
@inlinable
|
||||
func indexFromStart(_ offset: Int) -> Index {
|
||||
self.index(self.startIndex, offsetBy: offset)
|
||||
}
|
||||
|
||||
// エスケープが必要なのは次の文字:
|
||||
/*
|
||||
\ -> \\
|
||||
\0 -> \0
|
||||
\n -> \n
|
||||
\t -> \t
|
||||
, -> \c
|
||||
" -> \d
|
||||
*/
|
||||
// please use these letters in order to avoid user-inputting text crash
|
||||
func escaped() -> String {
|
||||
var result = self.replacingOccurrences(of: "\\", with: "\\b")
|
||||
result = result.replacingOccurrences(of: "\0", with: "\\0")
|
||||
result = result.replacingOccurrences(of: "\n", with: "\\n")
|
||||
result = result.replacingOccurrences(of: "\t", with: "\\t")
|
||||
result = result.replacingOccurrences(of: ",", with: "\\c")
|
||||
result = result.replacingOccurrences(of: " ", with: "\\s")
|
||||
result = result.replacingOccurrences(of: "\"", with: "\\d")
|
||||
return result
|
||||
}
|
||||
|
||||
func unescaped() -> String {
|
||||
var result = self.replacingOccurrences(of: "\\d", with: "\"")
|
||||
result = result.replacingOccurrences(of: "\\s", with: " ")
|
||||
result = result.replacingOccurrences(of: "\\c", with: ",")
|
||||
result = result.replacingOccurrences(of: "\\t", with: "\t")
|
||||
result = result.replacingOccurrences(of: "\\n", with: "\n")
|
||||
result = result.replacingOccurrences(of: "\\0", with: "\0")
|
||||
result = result.replacingOccurrences(of: "\\b", with: "\\")
|
||||
return result
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user