Move to root

This commit is contained in:
ensan
2023-07-23 00:34:27 +09:00
parent 6c0efe4d92
commit 98bc180c17
85 changed files with 22 additions and 74 deletions

View File

@ -0,0 +1,843 @@
//
// DicdataStore.swift
// Keyboard
//
// Created by ensan on 2020/09/17.
// Copyright © 2020 ensan. All rights reserved.
//
import Algorithms
import Foundation
import SwiftUtils
public final class DicdataStore {
public init(convertRequestOptions: ConvertRequestOptions) {
self.requestOptions = convertRequestOptions
self.setup()
}
init(requestOptions: ConvertRequestOptions = .default) {
self.requestOptions = requestOptions
debug("DicdataStoreが初期化されました")
self.setup()
}
private var ccParsed: [Bool] = .init(repeating: false, count: 1319)
private var ccLines: [[Int: PValue]] = []
private var mmValue: [PValue] = []
private let threshold: PValue = -17
private var loudses: [String: LOUDS] = [:]
private var importedLoudses: Set<String> = []
private var charsID: [Character: UInt8] = [:]
private var learningManager = LearningManager()
private var zeroHintPredictionDicdata: [DicdataElement]?
private var osUserDict: [DicdataElement] = []
internal let maxlength: Int = 20
private let midCount = 502
private let cidCount = 1319
private var requestOptions: ConvertRequestOptions = .default
private let numberFormatter = NumberFormatter()
///
private func setup() {
numberFormatter.numberStyle = .spellOut
numberFormatter.locale = .init(identifier: "ja-JP")
self.ccLines = [[Int: PValue]].init(repeating: [:], count: CIDData.totalCount)
do {
let string = try String(contentsOf: self.requestOptions.dictionaryResourceURL.appendingPathComponent("louds/charID.chid", isDirectory: false), encoding: String.Encoding.utf8)
charsID = [Character: UInt8].init(uniqueKeysWithValues: string.enumerated().map {($0.element, UInt8($0.offset))})
} catch {
debug("ファイルが存在しません: \(error)")
}
do {
let url = requestOptions.dictionaryResourceURL.appendingPathComponent("mm.binary", isDirectory: false)
do {
let binaryData = try Data(contentsOf: url, options: [.uncached])
self.mmValue = binaryData.toArray(of: Float.self).map {PValue($0)}
} catch {
debug("Failed to read the file.")
self.mmValue = [PValue].init(repeating: .zero, count: self.midCount * self.midCount)
}
}
_ = self.loadLOUDS(identifier: "user")
_ = self.loadLOUDS(identifier: "memory")
}
public enum Notification {
case importOSUserDict([DicdataElement])
case setRequestOptions(ConvertRequestOptions)
case forgetMemory(Candidate)
case closeKeyboard
}
func sendToDicdataStore(_ data: Notification) {
switch data {
case .closeKeyboard:
self.closeKeyboard()
case let .importOSUserDict(osUserDict):
self.osUserDict = osUserDict
case let .forgetMemory(candidate):
self.learningManager.forgetMemory(data: candidate.data)
// louds
self.reloadMemory()
case let .setRequestOptions(value):
// bundleURLsetup
if value.dictionaryResourceURL != self.requestOptions.dictionaryResourceURL {
self.requestOptions = value
self.setup()
} else {
self.requestOptions = value
}
let shouldReset = self.learningManager.setRequestOptions(options: value)
if shouldReset {
self.reloadMemory()
}
}
}
private func reloadMemory() {
self.loudses.removeValue(forKey: "memory")
self.importedLoudses.remove("memory")
}
private func reloadUser() {
self.loudses.removeValue(forKey: "user")
self.importedLoudses.remove("user")
}
private func closeKeyboard() {
self.learningManager.save()
// savememoryLOUDS使
self.reloadMemory()
self.reloadUser()
}
///
private static func getPenalty(data: DicdataElement) -> PValue {
-2.0 / PValue(data.word.count)
}
///
private func shouldBeRemoved(value: PValue, wordCount: Int) -> Bool {
let d = value - self.threshold
if d < 0 {
return true
}
// d
return -2.0 / PValue(wordCount) < -d
}
///
internal func shouldBeRemoved(data: DicdataElement) -> Bool {
let d = data.value() - self.threshold
if d < 0 {
return true
}
return Self.getPenalty(data: data) < -d
}
private func loadLOUDS(identifier: String) -> LOUDS? {
if importedLoudses.contains(identifier) {
return self.loudses[identifier]
}
importedLoudses.insert(identifier)
if let louds = LOUDS.load(identifier, option: self.requestOptions) {
self.loudses[identifier] = louds
return louds
} else {
debug("loudsの読み込みに失敗、identifierは\(identifier)")
return nil
}
}
private func perfectMatchLOUDS(identifier: String, charIDs: [UInt8]) -> [Int] {
guard let louds = self.loadLOUDS(identifier: identifier) else {
return []
}
return [louds.searchNodeIndex(chars: charIDs)].compactMap {$0}
}
private func throughMatchLOUDS(identifier: String, charIDs: [UInt8], depth: Range<Int>) -> [Int] {
guard let louds = self.loadLOUDS(identifier: identifier) else {
return []
}
let result = louds.byfixNodeIndices(chars: charIDs)
// result[1]3..<5 (34)14..<6
return Array(result[min(depth.lowerBound + 1, result.endIndex) ..< min(depth.upperBound + 1, result.endIndex)])
}
private func prefixMatchLOUDS(identifier: String, charIDs: [UInt8], depth: Int = .max) -> [Int] {
guard let louds = self.loadLOUDS(identifier: identifier) else {
return []
}
return louds.prefixNodeIndices(chars: charIDs, maxDepth: depth)
}
private func getDicdataFromLoudstxt3(identifier: String, indices: Set<Int>) -> [DicdataElement] {
debug("getDicdataFromLoudstxt3", identifier, indices)
// split = 2048
let dict = [Int: [Int]].init(grouping: indices, by: {$0 >> 11})
var data: [DicdataElement] = []
for (key, value) in dict {
data.append(contentsOf: LOUDS.getDataForLoudstxt3(identifier + "\(key)", indices: value.map {$0 & 2047}, option: self.requestOptions))
}
return data
}
/// kana2lattice
/// - Parameters:
/// - inputData:
/// - from:
/// - toIndexRange: `from ..< (toIndexRange)`
public func getLOUDSDataInRange(inputData: ComposingText, from fromIndex: Int, toIndexRange: Range<Int>? = nil) -> [LatticeNode] {
let toIndexLeft = toIndexRange?.startIndex ?? fromIndex
let toIndexRight = min(toIndexRange?.endIndex ?? inputData.input.count, fromIndex + self.maxlength)
debug("getLOUDSDataInRange", fromIndex, toIndexRange?.description ?? "nil", toIndexLeft, toIndexRight)
if fromIndex > toIndexLeft || toIndexLeft >= toIndexRight {
debug("getLOUDSDataInRange: index is wrong")
return []
}
let segments = (fromIndex ..< toIndexRight).reduce(into: []) { (segments: inout [String], rightIndex: Int) in
segments.append((segments.last ?? "") + String(inputData.input[rightIndex].character.toKatakana()))
}
// MARK:
var stringToInfo = inputData.getRangesWithTypos(fromIndex, rightIndexRange: toIndexLeft ..< toIndexRight)
// MARK:
let stringSet = stringToInfo.keys.map {($0, $0.map {self.charsID[$0, default: .max]})}
let (minCharIDsCount, maxCharIDsCount) = stringSet.lazy.map {$0.1.count}.minAndMax() ?? (0, -1)
// :
let group = [Character: [([Character], [UInt8])]].init(grouping: stringSet, by: {$0.0.first!})
let depth = minCharIDsCount - 1 ..< maxCharIDsCount
var indices: [(String, Set<Int>)] = group.map {dic in
let key = String(dic.key)
let set = dic.value.flatMapSet {(_, charIDs) in self.throughMatchLOUDS(identifier: key, charIDs: charIDs, depth: depth)}
return (key, set)
}
indices.append(("user", stringSet.flatMapSet {self.throughMatchLOUDS(identifier: "user", charIDs: $0.1, depth: depth)}))
if learningManager.enabled {
indices.append(("memory", stringSet.flatMapSet {self.throughMatchLOUDS(identifier: "memory", charIDs: $0.1, depth: depth)}))
}
// MARK: indices
var dicdata: [DicdataElement] = []
for (identifier, value) in indices {
let result: [DicdataElement] = self.getDicdataFromLoudstxt3(identifier: identifier, indices: value).compactMap { (data) -> DicdataElement? in
let rubyArray = Array(data.ruby)
let penalty = stringToInfo[rubyArray, default: (0, .zero)].penalty
if penalty.isZero {
return data
}
let ratio = Self.penaltyRatio[data.lcid]
let pUnit: PValue = Self.getPenalty(data: data) / 2 //
let adjust = pUnit * penalty * ratio
if self.shouldBeRemoved(value: data.value() + adjust, wordCount: rubyArray.count) {
return nil
}
return data.adjustedData(adjust)
}
dicdata.append(contentsOf: result)
}
dicdata.append(contentsOf: stringSet.flatMap {self.learningManager.temporaryThroughMatch(charIDs: $0.1, depth: depth)})
for i in toIndexLeft ..< toIndexRight {
do {
let result = self.getWiseDicdata(convertTarget: segments[i - fromIndex], inputData: inputData, inputRange: fromIndex ..< i + 1)
for item in result {
stringToInfo[Array(item.ruby)] = (i, 0)
}
dicdata.append(contentsOf: result)
}
do {
let result = self.getMatchOSUserDict(segments[i - fromIndex])
for item in result {
stringToInfo[Array(item.ruby)] = (i, 0)
}
dicdata.append(contentsOf: result)
}
}
if fromIndex == .zero {
let result: [LatticeNode] = dicdata.compactMap {
guard let endIndex = stringToInfo[Array($0.ruby)]?.endIndex else {
return nil
}
let node = LatticeNode(data: $0, inputRange: fromIndex ..< endIndex + 1)
node.prevs.append(RegisteredNode.BOSNode())
return node
}
return result
} else {
let result: [LatticeNode] = dicdata.compactMap {
guard let endIndex = stringToInfo[Array($0.ruby)]?.endIndex else {
return nil
}
return LatticeNode(data: $0, inputRange: fromIndex ..< endIndex + 1)
}
return result
}
}
/// kana2latticelouds
/// - Parameters:
/// - inputData:
/// - from:
/// - to:
public func getLOUDSData(inputData: ComposingText, from fromIndex: Int, to toIndex: Int) -> [LatticeNode] {
if toIndex - fromIndex > self.maxlength || fromIndex > toIndex {
return []
}
let segment = inputData.input[fromIndex...toIndex].reduce(into: "") {$0.append($1.character)}.toKatakana()
let string2penalty = inputData.getRangeWithTypos(fromIndex, toIndex)
// MARK: indices
// :
let strings = string2penalty.keys.map {
(key: $0, charIDs: $0.map {self.charsID[$0, default: .max]})
}
let group = [Character: [(key: [Character], charIDs: [UInt8])]].init(grouping: strings, by: {$0.key.first!})
var indices: [(String, Set<Int>)] = group.map {dic in
let head = String(dic.key)
let set = dic.value.flatMapSet { (_, charIDs) in
self.perfectMatchLOUDS(identifier: head, charIDs: charIDs)
}
return (head, set)
}
do {
let set = strings.flatMapSet { (_, charIDs) in
self.perfectMatchLOUDS(identifier: "user", charIDs: charIDs)
}
indices.append(("user", set))
}
if learningManager.enabled {
let set = strings.flatMapSet { (_, charIDs) in
self.perfectMatchLOUDS(identifier: "memory", charIDs: charIDs)
}
indices.append(("memory", set))
}
var dicdata: [DicdataElement] = []
for (identifier, value) in indices {
let result: [DicdataElement] = self.getDicdataFromLoudstxt3(identifier: identifier, indices: value).compactMap { (data) -> DicdataElement? in
let rubyArray = Array(data.ruby)
let penalty = string2penalty[rubyArray, default: .zero]
if penalty.isZero {
return data
}
let ratio = Self.penaltyRatio[data.lcid]
let pUnit: PValue = Self.getPenalty(data: data) / 2 //
let adjust = pUnit * penalty * ratio
if self.shouldBeRemoved(value: data.value() + adjust, wordCount: rubyArray.count) {
return nil
}
return data.adjustedData(adjust)
}
dicdata.append(contentsOf: result)
}
dicdata.append(contentsOf: strings.flatMap {self.learningManager.temporaryPerfectMatch(charIDs: $0.charIDs)})
dicdata.append(contentsOf: self.getWiseDicdata(convertTarget: segment, inputData: inputData, inputRange: fromIndex ..< toIndex + 1))
dicdata.append(contentsOf: self.getMatchOSUserDict(segment))
if fromIndex == .zero {
let result: [LatticeNode] = dicdata.map {
let node = LatticeNode(data: $0, inputRange: fromIndex ..< toIndex + 1)
node.prevs.append(RegisteredNode.BOSNode())
return node
}
return result
} else {
let result: [LatticeNode] = dicdata.map {LatticeNode(data: $0, inputRange: fromIndex ..< toIndex + 1)}
return result
}
}
internal func getZeroHintPredictionDicdata() -> [DicdataElement] {
if let dicdata = self.zeroHintPredictionDicdata {
return dicdata
}
do {
let csvString = try String(contentsOf: requestOptions.dictionaryResourceURL.appendingPathComponent("p/p_null.csv", isDirectory: false), encoding: String.Encoding.utf8)
let csvLines = csvString.split(separator: "\n")
let csvData = csvLines.map {$0.split(separator: ",", omittingEmptySubsequences: false)}
let dicdata: [DicdataElement] = csvData.map {self.parseLoudstxt2FormattedEntry(from: $0)}
self.zeroHintPredictionDicdata = dicdata
return dicdata
} catch {
debug(error)
self.zeroHintPredictionDicdata = []
return []
}
}
///
/// - Parameters:
/// - head:
/// - Returns:
///
internal func getPredictionLOUDSDicdata(key: some StringProtocol) -> [DicdataElement] {
let count = key.count
if count == .zero {
return []
}
// 1
if count == 1 {
do {
let csvString = try String(contentsOf: requestOptions.dictionaryResourceURL.appendingPathComponent("p/p_\(key).csv", isDirectory: false), encoding: String.Encoding.utf8)
let csvLines = csvString.split(separator: "\n")
let csvData = csvLines.map {$0.split(separator: ",", omittingEmptySubsequences: false)}
let dicdata: [DicdataElement] = csvData.map {self.parseLoudstxt2FormattedEntry(from: $0)}
return dicdata
} catch {
debug("ファイルが存在しません: \(error)")
return []
}
} else if count == 2 {
var result: [DicdataElement] = []
let first = String(key.first!)
let charIDs = key.map {self.charsID[$0, default: .max]}
// 700
let prefixIndices = self.prefixMatchLOUDS(identifier: first, charIDs: charIDs, depth: 5).prefix(700)
result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: first, indices: Set(prefixIndices)))
let userDictIndices = self.prefixMatchLOUDS(identifier: "user", charIDs: charIDs, depth: 5).prefix(700)
result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: "user", indices: Set(userDictIndices)))
if learningManager.enabled {
let memoryDictIndices = self.prefixMatchLOUDS(identifier: "memory", charIDs: charIDs, depth: 5).prefix(700)
result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: "memory", indices: Set(memoryDictIndices)))
result.append(contentsOf: self.learningManager.temporaryPrefixMatch(charIDs: charIDs))
}
return result
} else {
var result: [DicdataElement] = []
let first = String(key.first!)
let charIDs = key.map {self.charsID[$0, default: .max]}
// 700
let prefixIndices = self.prefixMatchLOUDS(identifier: first, charIDs: charIDs).prefix(700)
result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: first, indices: Set(prefixIndices)))
let userDictIndices = self.prefixMatchLOUDS(identifier: "user", charIDs: charIDs).prefix(700)
result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: "user", indices: Set(userDictIndices)))
if learningManager.enabled {
let memoryDictIndices = self.prefixMatchLOUDS(identifier: "memory", charIDs: charIDs).prefix(700)
result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: "memory", indices: Set(memoryDictIndices)))
result.append(contentsOf: self.learningManager.temporaryPrefixMatch(charIDs: charIDs))
}
return result
}
}
private func parseLoudstxt2FormattedEntry(from dataString: [some StringProtocol]) -> DicdataElement {
let ruby = String(dataString[0])
let word = dataString[1].isEmpty ? ruby:String(dataString[1])
let lcid = Int(dataString[2]) ?? .zero
let rcid = Int(dataString[3]) ?? lcid
let mid = Int(dataString[4]) ?? .zero
let value: PValue = PValue(dataString[5]) ?? -30.0
return DicdataElement(word: word, ruby: ruby, lcid: lcid, rcid: rcid, mid: mid, value: value)
}
///
/// - parameters:
/// - convertTarget:
/// - note
/// - Converter
private func getWiseDicdata(convertTarget: String, inputData: ComposingText, inputRange: Range<Int>) -> [DicdataElement] {
var result: [DicdataElement] = []
result.append(contentsOf: self.getJapaneseNumberDicdata(head: convertTarget))
if inputData.input[..<inputRange.startIndex].last?.character.isNumber != true && inputData.input[inputRange.endIndex...].first?.character.isNumber != true, let number = Float(convertTarget) {
result.append(DicdataElement(ruby: convertTarget, cid: CIDData..cid, mid: MIDData..mid, value: -14))
if number.truncatingRemainder(dividingBy: 1) == 0 {
let int = Int(number)
if int < Int(1E18) && -Int(1E18) < int, let kansuji = self.numberFormatter.string(from: NSNumber(value: int)) {
result.append(DicdataElement(word: kansuji, ruby: convertTarget, cid: CIDData..cid, mid: MIDData..mid, value: -16))
}
}
}
// convertTarget
if requestOptions.keyboardLanguage == .en_US && convertTarget.onlyRomanAlphabet {
result.append(DicdataElement(ruby: convertTarget, cid: CIDData..cid, mid: MIDData..mid, value: -14))
}
//
if requestOptions.keyboardLanguage != .en_US && inputData.input[inputRange].allSatisfy({$0.inputStyle == .roman2kana}) {
if let katakana = Roman2Kana.katakanaChanges[convertTarget], let hiragana = Roman2Kana.hiraganaChanges[Array(convertTarget)] {
result.append(DicdataElement(word: String(hiragana), ruby: katakana, cid: CIDData..cid, mid: MIDData..mid, value: -13))
result.append(DicdataElement(ruby: katakana, cid: CIDData..cid, mid: MIDData..mid, value: -14))
}
}
//
if convertTarget.count == 1 {
let katakana = convertTarget.toKatakana()
let hiragana = convertTarget.toHiragana()
if convertTarget == katakana {
result.append(DicdataElement(ruby: katakana, cid: CIDData..cid, mid: MIDData..mid, value: -14))
} else {
result.append(DicdataElement(word: hiragana, ruby: katakana, cid: CIDData..cid, mid: MIDData..mid, value: -13))
result.append(DicdataElement(ruby: katakana, cid: CIDData..cid, mid: MIDData..mid, value: -14))
}
}
//
if convertTarget.count == 1, let first = convertTarget.first {
var value: PValue = -14
let hs = Self.fullwidthToHalfwidth[first, default: first]
if hs != first {
result.append(DicdataElement(word: convertTarget, ruby: convertTarget, cid: CIDData..cid, mid: MIDData..mid, value: value))
value -= 5.0
result.append(DicdataElement(word: String(hs), ruby: convertTarget, cid: CIDData..cid, mid: MIDData..mid, value: value))
value -= 5.0
}
if let fs = Self.halfwidthToFullwidth[first], fs != first {
result.append(DicdataElement(word: convertTarget, ruby: convertTarget, cid: CIDData..cid, mid: MIDData..mid, value: value))
value -= 5.0
result.append(DicdataElement(word: String(fs), ruby: convertTarget, cid: CIDData..cid, mid: MIDData..mid, value: value))
value -= 5.0
}
for group in Self.weakRelatingSymbolGroups where group.contains(hs) {
for symbol in group where symbol != hs {
result.append(DicdataElement(word: String(symbol), ruby: convertTarget, cid: CIDData..cid, mid: MIDData..mid, value: value))
value -= 5.0
if let fs = Self.halfwidthToFullwidth[symbol] {
result.append(DicdataElement(word: String(fs), ruby: convertTarget, cid: CIDData..cid, mid: MIDData..mid, value: value))
value -= 5.0
}
}
}
}
return result
}
//
private static let (fullwidthToHalfwidth, halfwidthToFullwidth) = zip(
"+ー*=・!#%&'"〜|£$¥@`;:<>,.\/_ ̄-",
"+ー*=・!#%&'"〜|£$¥@`;:<>,.\/_ ̄-".applyingTransform(.fullwidthToHalfwidth, reverse: false)!
)
.reduce(into: ([Character: Character](), [Character: Character]())) { (results: inout ([Character: Character], [Character: Character]), values: (Character, Character)) in
results.0[values.0] = values.1
results.1[values.1] = values.0
}
// ()
// strongRelatingSymbolGroups
//
// 1
//
private static let weakRelatingSymbolGroups: [[Character]] = [
// ()
["", ""], //
["", "", "", ""],
["", ""],
["", ""],
["", ""],
["", ""],
["", "辻󠄀"],
["禰󠄀", ""],
["煉󠄁", ""],
["", ""], //
["", ""],
["", "𠮷"], //
["", "𣘺", "", "𫞎"],
["", "", ""],
["", ""],
["", ""],
["", ""],
["", ""],
["", ""],
["", ""],
//
["", "", "", "", ""], //
["^", ""], //
["¥", "$", "¢", "", "£", ""], //
["%", ""], //
["°", "", ""],
[""], //
["*", "", "✳︎", "✴︎"], //
["", "", "", ""],
["+", "±", ""],
["×", "", "✖️"],
["÷", "" ],
["<", "", "", "", "", "", "«"],
[">", "", "", "", "", "", "»"],
["=", "", "", ""],
[":", ";"],
["!", "❗️", "❣️", "‼︎", "⁉︎", "", "‼️", "⁉️", "¡"],
["?", "", "⁉︎", "", "", "⁉️", "¿"],
["", "", "", "☎︎"],
["", "", "", "", "", "", "", ""],
["", "", "", ""], //
["", "", "", "", "", "", "", "", "", "", "⚪︎", ""], //
["", "", "", "", "↙︎", "↖︎", "↘︎", "↗︎", "↔︎", "↕︎", "↪︎", "↩︎", ""], //
["", "", "", "", "", "", "", "𝄞", "𝄞"], //
["", "", ""] //
]
private func loadCCBinary(url: URL) -> [(Int32, Float)] {
do {
let binaryData = try Data(contentsOf: url, options: [.uncached])
return binaryData.toArray(of: (Int32, Float).self)
} catch {
debug("Failed to read the file.", error)
return []
}
}
/// OSruby
private func getMatchOSUserDict(_ ruby: some StringProtocol) -> [DicdataElement] {
self.osUserDict.filter {$0.ruby == ruby}
}
/// OSruby
internal func getPrefixMatchOSUserDict(_ ruby: some StringProtocol) -> [DicdataElement] {
self.osUserDict.filter {$0.ruby.hasPrefix(ruby)}
}
//
// TODO: previous
internal func updateLearningData(_ candidate: Candidate, with previous: DicdataElement?) {
if let previous {
self.learningManager.update(data: [previous] + candidate.data)
} else {
self.learningManager.update(data: candidate.data)
}
}
/// class id
/// - Parameters:
/// - former: id
/// - latter: id
/// - Returns:
///
/// -
/// : 0.115224 : ___CCValue
public func getCCValue(_ former: Int, _ latter: Int) -> PValue {
if !ccParsed[former] {
let url = requestOptions.dictionaryResourceURL.appendingPathComponent("cb/\(former).binary", isDirectory: false)
let values = loadCCBinary(url: url)
ccLines[former] = [Int: PValue].init(uniqueKeysWithValues: values.map {(Int($0.0), PValue($0.1))})
ccParsed[former] = true
}
let defaultValue = ccLines[former][-1, default: -25]
return ccLines[former][latter, default: defaultValue]
}
/// meaning id
/// - Parameters:
/// - former: id
/// - latter: id
/// - Returns:
///
/// -
public func getMMValue(_ former: Int, _ latter: Int) -> PValue {
if former == 500 || latter == 500 {
return 0
}
return self.mmValue[former * self.midCount + latter]
}
private static let possibleLOUDS: Set<Character> = [
" ", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "*", "", "", "", "´", "¨", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "", "", "(", ")", "#", "%", "&", "^", "_", "'", "\""
]
//
internal static func existLOUDS(for character: Character) -> Bool {
Self.possibleLOUDS.contains(character)
}
/*
*
*
*
*
*
*
*/
/// class id
/// - Parameters:
/// - c_former: id
/// - c_latter: id
/// - Returns:
///
internal static func isClause(_ former: Int, _ latter: Int) -> Bool {
// EOS
let latter_wordtype = Self.wordTypes[latter]
if latter_wordtype == 3 {
return false
}
let former_wordtype = Self.wordTypes[former]
if former_wordtype == 3 {
return false
}
if latter_wordtype == 0 {
return former_wordtype != 0
}
if latter_wordtype == 1 {
return former_wordtype != 0
}
return false
}
/// wordTypes使
private static let BOS_EOS_wordIDs: Set<Int> = [CIDData.BOS.cid, CIDData.EOS.cid]
/// wordTypes使
private static let PREPOSITION_wordIDs: Set<Int> = [1315, 6, 557, 558, 559, 560]
/// wordTypes使
private static let INPOSITION_wordIDs: Set<Int> = Set<Int>(Array(561..<868)
+ Array(1283..<1297)
+ Array(1306..<1310)
+ Array(11..<53)
+ Array(555..<557)
+ Array(1281..<1283)
).union([1314, 3, 2, 4, 5, 1, 9])
/*
private static let POSTPOSITION_wordIDs: Set<Int> = Set<Int>((7...8).map{$0}
+ (54..<555).map{$0}
+ (868..<1281).map{$0}
+ (1297..<1306).map{$0}
+ (1310..<1314).map{$0}
).union([10])
*/
/// - Returns:
/// - 3 when BOS/EOS
/// - 0 when preposition
/// - 1 when core
/// - 2 when postposition
/// - 11B1.3KB
static let wordTypes = (0...1319).map(_judgeWordType)
/// wordTypes使
private static func _judgeWordType(cid: Int) -> UInt8 {
if Self.BOS_EOS_wordIDs.contains(cid) {
return 3 // BOS/EOS
}
if Self.PREPOSITION_wordIDs.contains(cid) {
return 0 //
}
if Self.INPOSITION_wordIDs.contains(cid) {
return 1 //
}
return 2 //
}
internal static func includeMMValueCalculation(_ data: DicdataElement) -> Bool {
//
if 895...1280 ~= data.lcid || 895...1280 ~= data.rcid {
return true
}
//
if 1297...1305 ~= data.lcid || 1297...1305 ~= data.rcid {
return true
}
//
return wordTypes[data.lcid] == 1 || wordTypes[data.rcid] == 1
}
/// - 12B2.6KB
static let penaltyRatio = (0...1319).map(_getTypoPenaltyRatio)
/// penaltyRatio使
internal static func _getTypoPenaltyRatio(_ lcid: Int) -> PValue {
// 147...368, 369...554
if 147...554 ~= lcid {
return 2.5
}
return 1
}
//
internal static func needWValueMemory(_ data: DicdataElement) -> Bool {
//
if 147...554 ~= data.lcid {
return false
}
//
if 557...560 ~= data.lcid {
return false
}
//
if 1297...1305 ~= data.lcid {
return false
}
//
if 6...9 ~= data.lcid {
return false
}
if 0 == data.lcid || 1316 == data.lcid {
return false
}
return true
}
internal static let possibleNexts: [String: [String]] = [
"x": ["", "", "", "", "", "", "", "", "", ""],
"l": ["", "", "", "", "", "", "", "", "", ""],
"xt": [""],
"lt": [""],
"xts": [""],
"lts": [""],
"xy": ["", "", ""],
"ly": ["", "", ""],
"xw": [""],
"lw": [""],
"v": [""],
"k": ["", "", "", "", ""],
"q": ["クァ", "クィ", "クゥ", "クェ", "クォ"],
"qy": ["クャ", "クィ", "クュ", "クェ", "クョ"],
"qw": ["クヮ", "クィ", "クゥ", "クェ", "クォ"],
"ky": ["キャ", "キィ", "キュ", "キェ", "キョ"],
"g": ["", "", "", "", ""],
"gy": ["ギャ", "ギィ", "ギュ", "ギェ", "ギョ"],
"s": ["", "", "", "", ""],
"sy": ["シャ", "シィ", "シュ", "シェ", "ショ"],
"sh": ["シャ", "シィ", "シュ", "シェ", "ショ"],
"z": ["", "", "", "", ""],
"zy": ["ジャ", "ジィ", "ジュ", "ジェ", "ジョ"],
"j": [""],
"t": ["", "", "", "", ""],
"ty": ["チャ", "チィ", "チュ", "チェ", "チョ"],
"ts": [""],
"th": ["テャ", "ティ", "テュ", "テェ", "テョ"],
"tw": ["トァ", "トィ", "トゥ", "トェ", "トォ"],
"cy": ["チャ", "チィ", "チュ", "チェ", "チョ"],
"ch": [""],
"d": ["", "", "", "", ""],
"dy": ["ヂャ", "ヂィ", "ヂュ", "ヂェ", "ヂョ"],
"dh": ["デャ", "ディ", "デュ", "デェ", "デョ"],
"dw": ["ドァ", "ドィ", "ドゥ", "ドェ", "ドォ"],
"n": ["", "", "", "", "", ""],
"ny": ["ニャ", "ニィ", "ニュ", "ニェ", "ニョ"],
"h": ["", "", "", "", ""],
"hy": ["ヒャ", "ヒィ", "ヒュ", "ヒェ", "ヒョ"],
"hw": ["ファ", "フィ", "フェ", "フォ"],
"f": [""],
"b": ["", "", "", "", ""],
"by": ["ビャ", "ビィ", "ビュ", "ビェ", "ビョ"],
"p": ["", "", "", "", ""],
"py": ["ピャ", "ピィ", "ピュ", "ピェ", "ピョ"],
"m": ["", "", "", "", ""],
"my": ["ミャ", "ミィ", "ミュ", "ミェ", "ミョ"],
"y": ["", "", "イェ", ""],
"r": ["", "", "", "", ""],
"ry": ["リャ", "リィ", "リュ", "リェ", "リョ"],
"w": ["", "ウィ", "ウェ", ""],
"wy": ["", ""]
]
}