Files
AzooKeyKanaKanjiConverter/Sources/KanaKanjiConverterModule/DicdataStore/LearningMemory.swift
2025-05-25 19:36:11 +09:00

884 lines
42 KiB
Swift
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//
// LearningMemory.swift
// Keyboard
//
// Created by ensan on 2021/02/01.
// Copyright © 2021 ensan. All rights reserved.
//
import Foundation
import SwiftUtils
private struct MetadataElement: CustomDebugStringConvertible {
init(day: UInt16, count: UInt8) {
self.lastUsedDay = day
self.lastUpdatedDay = day
self.count = count
}
var lastUsedDay: UInt16
var lastUpdatedDay: UInt16
var count: UInt8
var debugDescription: String {
"(lastUsedDay: \(lastUsedDay), lastUpdatedDay: \(lastUpdatedDay), count: \(count))"
}
}
///
struct LongTermLearningMemory {
private static func pauseFileURL(directoryURL: URL) -> URL {
directoryURL.appendingPathComponent(".pause", isDirectory: false)
}
private static func loudsFileURL(asTemporaryFile: Bool, directoryURL: URL) -> URL {
if asTemporaryFile {
return directoryURL.appendingPathComponent("memory.louds.2", isDirectory: false)
} else {
return directoryURL.appendingPathComponent("memory.louds", isDirectory: false)
}
}
private static func metadataFileURL(asTemporaryFile: Bool, directoryURL: URL) -> URL {
if asTemporaryFile {
return directoryURL.appendingPathComponent("memory.memorymetadata.2", isDirectory: false)
} else {
return directoryURL.appendingPathComponent("memory.memorymetadata", isDirectory: false)
}
}
private static func loudsCharsFileURL(asTemporaryFile: Bool, directoryURL: URL) -> URL {
if asTemporaryFile {
return directoryURL.appendingPathComponent("memory.loudschars2.2", isDirectory: false)
} else {
return directoryURL.appendingPathComponent("memory.loudschars2", isDirectory: false)
}
}
private static func loudsTxt3FileURL(_ value: String, asTemporaryFile: Bool, directoryURL: URL) -> URL {
if asTemporaryFile {
return directoryURL.appendingPathComponent("memory\(value).loudstxt3.2", isDirectory: false)
} else {
return directoryURL.appendingPathComponent("memory\(value).loudstxt3", isDirectory: false)
}
}
private static func fileExist(_ url: URL) -> Bool {
FileManager.default.fileExists(atPath: url.path)
}
///
/// - Parameters:
/// - fromURL:
/// - toURL: `fromURL`
private static func overwrite(from fromURL: URL, to toURL: URL) throws {
//
// - copy
// - copy
try? FileManager.default.removeItem(at: toURL)
// `.2`replace
try FileManager.default.copyItem(at: fromURL, to: toURL)
}
///
/// - note:
static func memoryCollapsed(directoryURL: URL) -> Bool {
fileExist(pauseFileURL(directoryURL: directoryURL))
}
static var txtFileSplit: Int { 2048 }
private static func BoolToUInt64(_ bools: [Bool]) -> [UInt64] {
let unit = 64
let value = bools.count.quotientAndRemainder(dividingBy: unit)
let _bools = bools + [Bool].init(repeating: true, count: (unit - value.remainder) % unit)
var result = [UInt64]()
for i in 0...value.quotient {
var value: UInt64 = 0
for j in 0..<unit {
value += (_bools[i * unit + j] ? 1:0) << (unit - j - 1)
}
result.append(value)
}
return result
}
/// - note:
/// (`metadata.count`)(`dicdata.ruby.count`)value
/// valuevalue
/// 1`[-5, -8]`2`[-3, -6]`4`[-2, -5]`
fileprivate static func valueForData(metadata: MetadataElement, dicdata: DicdataElement) -> PValue {
let d = 1 - Double(metadata.count) / 255
return PValue(-1 - 4 / Double(dicdata.ruby.count) - 3 * pow(d, 3))
}
fileprivate struct MetadataBlock {
var metadata: [MetadataElement]
func makeBinary() -> Data {
var data = Data()
var metadata: [MetadataElement] = self.metadata.map { MetadataElement(day: $0.lastUsedDay, count: $0.count) }
// 1byte
var count = UInt8(metadata.count)
data.append(contentsOf: Data(bytes: &count, count: MemoryLayout<UInt8>.size))
for i in metadata.indices {
data.append(contentsOf: Data(bytes: &metadata[i], count: MemoryLayout<MetadataElement>.size))
}
return data
}
}
fileprivate struct DataBlock {
var count: Int {
data.count
}
var ruby: String
var data: [(word: String, lcid: Int, rcid: Int, mid: Int, score: PValue)]
init(dicdata: [DicdataElement]) {
self.ruby = ""
self.data = []
for element in dicdata {
if self.ruby.isEmpty {
self.ruby = element.ruby
}
self.data.append((element.word, element.lcid, element.rcid, element.mid, element.value()))
}
}
func makeLoudstxt3Entry() -> Data {
var data = Data()
// 2byte
var count = UInt16(self.count)
data.append(contentsOf: Data(bytes: &count, count: MemoryLayout<UInt16>.size))
//
// 10byte110*count byte
for (_, lcid, rcid, mid, score) in self.data {
assert(0 <= lcid && lcid <= UInt16.max)
assert(0 <= rcid && rcid <= UInt16.max)
assert(0 <= mid && mid <= UInt16.max)
var lcid = UInt16(lcid)
var rcid = UInt16(rcid)
var mid = UInt16(mid)
data.append(contentsOf: Data(bytes: &lcid, count: MemoryLayout<UInt16>.size))
data.append(contentsOf: Data(bytes: &rcid, count: MemoryLayout<UInt16>.size))
data.append(contentsOf: Data(bytes: &mid, count: MemoryLayout<UInt16>.size))
var score = Float32(score)
data.append(contentsOf: Data(bytes: &score, count: MemoryLayout<Float32>.size))
}
// word
// ruby
let text = ([self.ruby] + self.data.map { $0.word == self.ruby ? "" : $0.word }).joined(separator: "\t")
data.append(contentsOf: text.data(using: .utf8, allowLossyConversion: false)!)
return data
}
}
///
static func reset(directoryURL: URL) throws {
//
let fileURLs = try FileManager.default.contentsOfDirectory(at: directoryURL, includingPropertiesForKeys: nil)
for file in fileURLs {
if file.isFileURL && (
//
file.path.hasSuffix(".loudstxt3")
|| file.path.hasSuffix(".loudschars2")
|| file.path.hasSuffix(".memorymetadata")
|| file.path.hasSuffix(".louds")
//
|| file.path.hasSuffix(".loudstxt3.2")
|| file.path.hasSuffix(".loudschars2.2")
|| file.path.hasSuffix(".memorymetadata.2")
|| file.path.hasSuffix(".louds.2")
// .pause
|| file.path.hasSuffix(".pause")
//
|| file.path.hasSuffix("learningMemory.txt")
) {
try FileManager.default.removeItem(at: file)
}
}
}
///
static func merge(tempTrie: consuming TemporalLearningMemoryTrie, forgetTargets: [DicdataElement] = [], directoryURL: URL, maxMemoryCount: Int, char2UInt8: [Character: UInt8]) throws {
// MARK: `.pause``merge``.2``merge`
if fileExist(pauseFileURL(directoryURL: directoryURL)) {
debug("LongTermLearningMemory merge collapsion detected, trying recovery...")
try overwriteTempFiles(
directoryURL: directoryURL,
loudsFileTemp: nil,
loudsCharsFileTemp: nil,
metadataFileTemp: nil,
loudsTxt3FileCount: nil,
removingRead2File: true
)
}
// MARK:
let startTime = Date()
let today = LearningManager.today
var newTrie = consume tempTrie
// :
// dataCount(UInt32), count, data*count, count, data*count, ...
// MARK: `metadataFile`
let ltMetadata = (try? Data(contentsOf: metadataFileURL(asTemporaryFile: false, directoryURL: directoryURL))) ?? Data([.zero, .zero, .zero, .zero])
var metadataOffset = 0
// 4byteentry count
let entryCount = ltMetadata[metadataOffset ..< metadataOffset + 4].toArray(of: UInt32.self)[0]
metadataOffset += 4
debug("LongTermLearningMemory merge entryCount", entryCount, ltMetadata.count)
// loudstxt3
for loudstxtIndex in 0 ..< Int(entryCount) / txtFileSplit + 1 {
let loudstxtData: Data
do {
loudstxtData = try Data(contentsOf: loudsTxt3FileURL("\(loudstxtIndex)", asTemporaryFile: false, directoryURL: directoryURL))
} catch {
debug("LongTermLearningMemory merge failed to read \(loudstxtIndex)", error)
continue
}
// loudstxt3
let count = Int(loudstxtData[0 ..< 2].toArray(of: UInt16.self)[0])
let indices = loudstxtData[2 ..< 2 + 4 * count].toArray(of: UInt32.self)
for i in 0 ..< count {
//
// 1byte
let itemCount = Int(ltMetadata[metadataOffset ..< metadataOffset + 1].toArray(of: UInt8.self)[0])
metadataOffset += 1
let metadata = (0 ..< itemCount).map {
let range = metadataOffset + $0 * MemoryLayout<MetadataElement>.size ..< metadataOffset + ($0 + 1) * MemoryLayout<MetadataElement>.size
return ltMetadata[range].toArray(of: MetadataElement.self)[0]
}
metadataOffset += itemCount * MemoryLayout<MetadataElement>.size
// index
let startIndex = Int(indices[i])
let endIndex = i == (indices.endIndex - 1) ? loudstxtData.endIndex : Int(indices[i + 1])
let elements = LOUDS.parseBinary(binary: loudstxtData[startIndex ..< endIndex])
// trie
guard let ruby = elements.first?.ruby,
let chars = LearningManager.keyToChars(ruby, char2UInt8: char2UInt8) else {
continue
}
var newDicdata: [DicdataElement] = []
var newMetadata: [MetadataElement] = []
assert(elements.count == metadata.count, "elements count and metadata count must be equal.")
for (dicdataElement, metadataElement) in zip(elements, metadata) {
//
if forgetTargets.contains(dicdataElement) {
debug("LongTermLearningMemory merge stopped because it is a forget target", dicdataElement)
continue
}
if ruby != dicdataElement.ruby {
debug("LongTermLearningMemory merge stopped because dicdataElement has different ruby", dicdataElement, ruby)
continue
}
var metadataElement = metadataElement
if today < metadataElement.lastUpdatedDay || today < metadataElement.lastUsedDay {
//
metadataElement = MetadataElement(day: today, count: 1)
}
guard today - metadataElement.lastUsedDay < 128 else {
// 128使
debug("LongTermLearningMemory merge stopped because metadata is strange", dicdataElement, metadataElement, today)
continue
}
var dicdataElement = dicdataElement
// 32
while today - metadataElement.lastUpdatedDay > 32 {
metadataElement.count >>= 1
metadataElement.lastUpdatedDay += 32
}
//
guard metadataElement.count > 0 else {
debug("LongTermLearningMemory merge stopped because count is zero", dicdataElement, metadataElement)
continue
}
dicdataElement.baseValue = valueForData(metadata: metadataElement, dicdata: dicdataElement)
newDicdata.append(dicdataElement)
newMetadata.append(metadataElement)
}
newTrie.append(dicdata: newDicdata, chars: chars, metadata: newMetadata)
}
//
if newTrie.dicdata.count > maxMemoryCount {
break
}
}
// newTrieLOUDS
try self.update(trie: newTrie, directoryURL: directoryURL)
debug("LongTermLearningMemory merge ⏰", Date().timeIntervalSince(startTime), newTrie.dicdata.count)
}
fileprivate static func make_loudstxt3(lines: [DataBlock]) -> Data {
let lc = lines.count //
let count = Data(bytes: [UInt16(lc)], count: 2) // UInt16
let data = lines.map { $0.makeLoudstxt3Entry() }
let body = data.reduce(Data(), +) //
let header_endIndex: UInt32 = 2 + UInt32(lc) * UInt32(MemoryLayout<UInt32>.size)
let headerArray = data.dropLast().reduce(into: [header_endIndex]) {array, value in //
array.append(array.last! + UInt32(value.count))
}
let header = Data(bytes: headerArray, count: MemoryLayout<UInt32>.size * headerArray.count)
let binary = count + header + body
return binary
}
enum UpdateError: Error {
/// `.pause`
case pauseFileExist
}
///
///
/// 1. `memory.louds.2`
/// 2. `.pause`
/// 3. `.2`
/// 4. `.pause`
///
///
/// * `.pause``.2`
/// * `.pause``3``.pause`
///
/// `.pause``.2``.pause``.2`
///
/// 1
///
/// 33
static func update(trie: TemporalLearningMemoryTrie, directoryURL: URL) throws {
// MARK: `.pause`
//
guard !fileExist(pauseFileURL(directoryURL: directoryURL)) else {
throw UpdateError.pauseFileExist
}
// MARK: `.2`
var nodes2Characters: [UInt8] = [0x0, 0x0]
var dicdata: [DataBlock] = [.init(dicdata: []), .init(dicdata: [])]
var metadata: [MetadataBlock] = [.init(metadata: []), .init(metadata: [])]
var bits: [Bool] = [true, false]
var currentNodes: [(UInt8, Int)] = trie.nodes[0].children.sorted(by: {$0.key < $1.key})
bits += [Bool](repeating: true, count: currentNodes.count) + [false]
while !currentNodes.isEmpty {
currentNodes.forEach {char, nodeIndex in
nodes2Characters.append(char)
dicdata.append(DataBlock(dicdata: trie.nodes[nodeIndex].dataIndices.map {trie.dicdata[$0]}))
metadata.append(MetadataBlock(metadata: trie.nodes[nodeIndex].dataIndices.map {trie.metadata[$0]}))
bits += [Bool](repeating: true, count: trie.nodes[nodeIndex].children.count) + [false]
}
currentNodes = currentNodes.flatMap {(_, nodeIndex) in trie.nodes[nodeIndex].children.sorted(by: {$0.key < $1.key})}
}
let bytes = Self.BoolToUInt64(bits)
let loudsFileTemp = loudsFileURL(asTemporaryFile: true, directoryURL: directoryURL)
do {
let binary = Data(bytes: bytes, count: bytes.count * 8)
try binary.write(to: loudsFileTemp)
}
let loudsCharsFileTemp = loudsCharsFileURL(asTemporaryFile: true, directoryURL: directoryURL)
do {
let binary = Data(bytes: nodes2Characters, count: nodes2Characters.count)
try binary.write(to: loudsCharsFileTemp)
}
let metadataFileTemp = metadataFileURL(asTemporaryFile: true, directoryURL: directoryURL)
do {
let binary = Data(bytes: [UInt32(metadata.count)], count: 4) // UInt32
let result = metadata.reduce(into: binary) {
$0.append(contentsOf: $1.makeBinary())
}
try result.write(to: metadataFileTemp)
}
let loudsTxt3FileCount: Int
do {
loudsTxt3FileCount = ((dicdata.count) / txtFileSplit) + 1
let indiceses: [Range<Int>] = (0..<loudsTxt3FileCount).map {
let start = $0 * txtFileSplit
let _end = ($0 + 1) * txtFileSplit
let end = dicdata.count < _end ? dicdata.count:_end
return start..<end
}
for indices in indiceses {
do {
let start = indices.startIndex / txtFileSplit
let binary = make_loudstxt3(lines: Array(dicdata[indices]))
try binary.write(to: loudsTxt3FileURL("\(start)", asTemporaryFile: true, directoryURL: directoryURL), options: .atomic)
}
}
}
// MARK: `.pause`
try Data().write(to: pauseFileURL(directoryURL: directoryURL))
// MARK: `.2`
try overwriteTempFiles(
directoryURL: directoryURL,
loudsFileTemp: loudsFileTemp,
loudsCharsFileTemp: loudsCharsFileTemp,
metadataFileTemp: metadataFileTemp,
loudsTxt3FileCount: loudsTxt3FileCount,
// MARK: `.pause`
removingRead2File: true
)
}
/// - note: `.pause`
private static func overwriteTempFiles(directoryURL: URL, loudsFileTemp: URL?, loudsCharsFileTemp: URL?, metadataFileTemp: URL?, loudsTxt3FileCount: Int?, removingRead2File: Bool) throws {
try overwrite(
from: loudsCharsFileTemp ?? loudsCharsFileURL(asTemporaryFile: true, directoryURL: directoryURL),
to: loudsCharsFileURL(asTemporaryFile: false, directoryURL: directoryURL)
)
try overwrite(
from: metadataFileTemp ?? metadataFileURL(asTemporaryFile: true, directoryURL: directoryURL),
to: metadataFileURL(asTemporaryFile: false, directoryURL: directoryURL)
)
if let loudsTxt3FileCount {
for i in 0 ..< loudsTxt3FileCount {
try overwrite(
from: loudsTxt3FileURL("\(i)", asTemporaryFile: true, directoryURL: directoryURL),
to: loudsTxt3FileURL("\(i)", asTemporaryFile: false, directoryURL: directoryURL)
)
}
} else {
let fileURLs = try FileManager.default.contentsOfDirectory(at: directoryURL, includingPropertiesForKeys: nil)
for file in fileURLs {
if file.isFileURL && file.path.hasSuffix(".loudstxt3.2") {
try overwrite(from: file, to: URL(fileURLWithPath: String(file.path.dropLast(2))))
}
}
}
// `.louds`
try overwrite(
from: loudsFileTemp ?? loudsFileURL(asTemporaryFile: true, directoryURL: directoryURL),
to: loudsFileURL(asTemporaryFile: false, directoryURL: directoryURL)
)
if removingRead2File {
try FileManager.default.removeItem(at: pauseFileURL(directoryURL: directoryURL))
}
}
}
///
struct TemporalLearningMemoryTrie {
struct Node {
var dataIndices: [Int] = [] // loudstxt3
var children: [UInt8: Int] = [:] // characterID
}
fileprivate var nodes = [Node()]
fileprivate var dicdata: [DicdataElement] = []
fileprivate var metadata: [MetadataElement] = []
///
///
fileprivate mutating func append(dicdata: [DicdataElement], chars: [UInt8], metadata: [MetadataElement]) {
assert(dicdata.count == metadata.count, "count of dicdata and metadata do not match")
var index = 0
for char in chars {
if let nextIndex = nodes[index].children[char] {
index = nextIndex
} else {
let nextIndex = nodes.endIndex
nodes[index].children[char] = nextIndex
nodes.append(Node())
index = nextIndex
}
}
for (dicdataElement, metadataElement) in zip(dicdata, metadata) {
if let dataIndex = nodes[index].dataIndices.first(where: {Self.sameDicdataIfRubyIsEqual(left: self.dicdata[$0], right: dicdataElement)}) {
// nodes[index]使
withMutableValue(&self.metadata[dataIndex]) { currentMetadata in
currentMetadata.lastUsedDay = max(currentMetadata.lastUsedDay, metadataElement.lastUsedDay)
currentMetadata.lastUpdatedDay = max(currentMetadata.lastUpdatedDay, metadataElement.lastUpdatedDay)
currentMetadata.count += min(.max - currentMetadata.count, metadataElement.count)
}
self.dicdata[dataIndex] = dicdataElement
// value
self.dicdata[dataIndex].baseValue = LongTermLearningMemory.valueForData(metadata: self.metadata[dataIndex], dicdata: dicdataElement)
self.dicdata[dataIndex].metadata = .isLearned
} else {
// nodes[index]datanodes[index]
let dataIndex = self.dicdata.endIndex
self.dicdata.append(dicdataElement)
self.metadata.append(metadataElement)
nodes[index].dataIndices.append(dataIndex)
self.dicdata[dataIndex].metadata = .isLearned
}
}
}
/// 2DicdataElement
private static func sameDicdataIfRubyIsEqual(left: DicdataElement, right: DicdataElement) -> Bool {
left.lcid == right.lcid && left.rcid == right.rcid && left.word == right.word
}
mutating func memorize(dicdataElement: DicdataElement, chars: [UInt8]) {
var index = 0
for char in chars {
if let nextIndex = nodes[index].children[char] {
index = nextIndex
} else {
let nextIndex = nodes.endIndex
nodes[index].children[char] = nextIndex
nodes.append(Node())
index = nextIndex
}
}
// 200
let day = LearningManager.today
if let dataIndex = nodes[index].dataIndices.first(where: {Self.sameDicdataIfRubyIsEqual(left: self.dicdata[$0], right: dicdataElement)}) {
withMutableValue(&self.metadata[dataIndex]) {
$0.count += min(.max - $0.count, 1)
$0.lastUsedDay = day
}
// adjust
self.dicdata[dataIndex].adjust = LongTermLearningMemory.valueForData(metadata: self.metadata[dataIndex], dicdata: dicdataElement) - dicdataElement.baseValue
self.dicdata[dataIndex].metadata = .isLearned
} else {
let dataIndex = self.dicdata.endIndex
var dicdataElement = dicdataElement
let metadataElement = MetadataElement(day: day, count: 1)
// adjust
dicdataElement.adjust = LongTermLearningMemory.valueForData(metadata: metadataElement, dicdata: dicdataElement) - dicdataElement.baseValue
dicdataElement.metadata = .isLearned
self.dicdata.append(dicdataElement)
self.metadata.append(metadataElement)
nodes[index].dataIndices.append(dataIndex)
}
}
@discardableResult
mutating func forget(dicdataElement: DicdataElement, chars: [UInt8]) -> Bool {
var index = 0
for char in chars {
if let nextIndex = nodes[index].children[char] {
index = nextIndex
} else {
//
return false
}
}
//
// dataIndices(dicdata)
nodes[index].dataIndices.removeAll(where: {self.dicdata[$0] == dicdataElement})
return true
}
func perfectMatch(chars: [UInt8]) -> [DicdataElement] {
var index = 0
for char in chars {
if let nextIndex = nodes[index].children[char] {
index = nextIndex
} else {
return []
}
}
return nodes[index].dataIndices.map {self.dicdata[$0]}
}
func throughMatch(chars: [UInt8], depth: Range<Int>) -> [DicdataElement] {
var index = 0
var indices: [Int] = []
for (offset, char) in chars.enumerated() {
if let nextIndex = nodes[index].children[char] {
index = nextIndex
if depth.contains(offset) {
indices.append(contentsOf: nodes[index].dataIndices)
}
} else {
return indices.map {self.dicdata[$0]}
}
}
return indices.map {self.dicdata[$0]}
}
func prefixMatch(chars: [UInt8]) -> [DicdataElement] {
var index = 0
for char in chars {
if let nextIndex = nodes[index].children[char] {
index = nextIndex
} else {
return []
}
}
var nodeIndices: [Int] = Array(nodes[index].children.values)
var indices: [Int] = nodes[index].dataIndices
while let index = nodeIndices.popLast() {
nodeIndices.append(contentsOf: nodes[index].children.values)
indices.append(contentsOf: nodes[index].dataIndices)
}
return indices.map {self.dicdata[$0]}
}
}
final class LearningManager {
private static func updateChar2Int8(bundleURL: URL, target: inout [Character: UInt8]) {
do {
let chidURL = bundleURL.appendingPathComponent("louds/charID.chid", isDirectory: false)
let string = try String(contentsOf: chidURL, encoding: .utf8)
target = [Character: UInt8].init(uniqueKeysWithValues: string.enumerated().map {($0.element, UInt8($0.offset))})
} catch {
debug("Error: louds/charID.chidが存在しません。このエラーは深刻ですが、テスト時には無視できる場合があります。Description: \(error)")
}
}
private var char2UInt8: [Character: UInt8] = [:]
static var today: UInt16 {
UInt16(Int(Date().timeIntervalSince1970) / 86400) - 19000
}
static func keyToChars(_ key: some StringProtocol, char2UInt8: [Character: UInt8]) -> [UInt8]? {
var chars: [UInt8] = []
chars.reserveCapacity(key.count)
for character in key {
if let char = char2UInt8[character] {
chars.append(char)
} else {
return nil
}
}
return chars
}
private var temporaryMemory: TemporalLearningMemoryTrie = .init()
private var options: ConvertRequestOptions = .default
private var memoryCollapsed: Bool = false
var enabled: Bool {
(!self.memoryCollapsed) && self.options.learningType.needUsingMemory
}
init() {
self.memoryCollapsed = LongTermLearningMemory.memoryCollapsed(directoryURL: self.options.memoryDirectoryURL)
if self.memoryCollapsed && options.learningType.needUsingMemory {
do {
try LongTermLearningMemory.merge(
tempTrie: TemporalLearningMemoryTrie(),
directoryURL: self.options.memoryDirectoryURL,
maxMemoryCount: options.maxMemoryCount,
char2UInt8: char2UInt8
)
} catch {
debug("LearningManager init: automatic merge failed", error)
}
self.memoryCollapsed = LongTermLearningMemory.memoryCollapsed(directoryURL: self.options.memoryDirectoryURL)
}
if memoryCollapsed {
//
debug("LearningManager init: Memory Collapsed")
}
if !options.learningType.needUsingMemory {
return
}
Self.updateChar2Int8(bundleURL: options.dictionaryResourceURL, target: &char2UInt8)
}
/// - Returns: Whether cache should be reseted or not.
func setRequestOptions(options: ConvertRequestOptions) -> Bool {
// `char2Int8`
if options.dictionaryResourceURL != self.options.dictionaryResourceURL {
Self.updateChar2Int8(bundleURL: options.dictionaryResourceURL, target: &char2UInt8)
}
self.options = options
switch options.learningType {
case .inputAndOutput, .onlyOutput: break
case .nothing:
self.temporaryMemory = TemporalLearningMemoryTrie()
}
//
if options.shouldResetMemory {
self.reset()
self.options.shouldResetMemory = false
return true
}
return false
}
func temporaryPerfectMatch(charIDs: [UInt8]) -> [DicdataElement] {
if !options.learningType.needUsingMemory {
return []
}
return self.temporaryMemory.perfectMatch(chars: charIDs)
}
func temporaryThroughMatch(charIDs: [UInt8], depth: Range<Int>) -> [DicdataElement] {
if !options.learningType.needUsingMemory {
return []
}
return self.temporaryMemory.throughMatch(chars: charIDs, depth: depth)
}
func temporaryPrefixMatch(charIDs: [UInt8]) -> [DicdataElement] {
if !options.learningType.needUsingMemory {
return []
}
return self.temporaryMemory.prefixMatch(chars: charIDs)
}
func update(data: [DicdataElement]) {
self.update(data: [], updatePart: data)
}
/// `updatePart``data`
func update(data: [DicdataElement], updatePart: [DicdataElement]) {
if !options.learningType.needUpdateMemory {
return
}
//
for datum in updatePart where DicdataStore.needWValueMemory(datum) {
guard let chars = Self.keyToChars(datum.ruby, char2UInt8: char2UInt8) else {
continue
}
self.temporaryMemory.memorize(dicdataElement: datum, chars: chars)
}
if data.count + updatePart.count == 1 {
return
}
// bigram
do {
var firstClause: DicdataElement?
var secondClause: DicdataElement?
for (datum, index) in zip(data.chained(updatePart), 0 ..< data.count + updatePart.count) {
if var newFirstClause = firstClause {
if var newSecondClause = secondClause {
if DicdataStore.isClause(newFirstClause.rcid, datum.lcid) {
// indexcontinue
guard data.endIndex <= index else {
continue
}
// firstClausesecondClause, bigram
let element = DicdataElement(
word: newFirstClause.word + newSecondClause.word,
ruby: newFirstClause.ruby + newSecondClause.ruby,
lcid: newFirstClause.lcid,
rcid: newFirstClause.rcid,
mid: newSecondClause.mid,
value: newFirstClause.baseValue + newSecondClause.baseValue
)
// firstClause
firstClause = secondClause
secondClause = datum
guard let chars = Self.keyToChars(element.ruby, char2UInt8: char2UInt8) else {
continue
}
debug("LearningManager update first/second", element)
self.temporaryMemory.memorize(dicdataElement: element, chars: chars)
} else {
// firstClausesecondClause, secondClause
newSecondClause.word.append(contentsOf: datum.word)
newSecondClause.ruby.append(contentsOf: datum.ruby)
newSecondClause.rcid = datum.rcid
if DicdataStore.includeMMValueCalculation(datum) {
newSecondClause.mid = datum.mid
}
newSecondClause.baseValue += datum.baseValue
secondClause = newSecondClause
}
} else {
if DicdataStore.isClause(newFirstClause.rcid, datum.lcid) {
// firstClause, secondClause
secondClause = datum
} else {
// firstClause, firstClause
newFirstClause.word.append(contentsOf: datum.word)
newFirstClause.ruby.append(contentsOf: datum.ruby)
newFirstClause.rcid = datum.rcid
if DicdataStore.includeMMValueCalculation(datum) {
newFirstClause.mid = datum.mid
}
newFirstClause.baseValue += datum.baseValue
firstClause = newFirstClause
}
}
} else {
firstClause = datum
}
}
if let firstClause, let secondClause {
let element = DicdataElement(
word: firstClause.word + secondClause.word,
ruby: firstClause.ruby + secondClause.ruby,
lcid: firstClause.lcid,
rcid: firstClause.rcid,
mid: secondClause.mid,
value: firstClause.baseValue + secondClause.baseValue
)
if let chars = Self.keyToChars(element.ruby, char2UInt8: char2UInt8) {
debug("LearningManager update first/second rest", element)
self.temporaryMemory.memorize(dicdataElement: element, chars: chars)
}
}
}
//
let data = data.chained(updatePart)
let element = DicdataElement(
word: data.reduce(into: "") {$0.append(contentsOf: $1.word)},
ruby: data.reduce(into: "") {$0.append(contentsOf: $1.ruby)},
lcid: data.first?.lcid ?? CIDData..cid,
rcid: data.last?.rcid ?? CIDData..cid,
mid: data.last?.mid ?? MIDData..mid,
value: data.reduce(into: 0) {$0 += $1.baseValue}
)
guard let chars = Self.keyToChars(element.ruby, char2UInt8: char2UInt8) else {
return
}
debug("LearningManager update all", element)
self.temporaryMemory.memorize(dicdataElement: element, chars: chars)
}
///
func forgetMemory(data: [DicdataElement]) {
// 1. temporary memory
for element in data {
guard let chars = Self.keyToChars(element.ruby, char2UInt8: char2UInt8) else {
continue
}
self.temporaryMemory.forget(dicdataElement: element, chars: chars)
}
// 2. longterm memory
do {
try LongTermLearningMemory.merge(tempTrie: self.temporaryMemory, forgetTargets: data, directoryURL: self.options.memoryDirectoryURL, maxMemoryCount: options.maxMemoryCount, char2UInt8: char2UInt8)
// temporaryMemory
self.temporaryMemory = TemporalLearningMemoryTrie()
} catch {
//
debug("LearningManager resetLearning: Failed to save LongTermLearningMemory", error)
}
//
self.memoryCollapsed = LongTermLearningMemory.memoryCollapsed(directoryURL: self.options.memoryDirectoryURL)
}
func save() {
if !options.learningType.needUpdateMemory {
return
}
do {
try LongTermLearningMemory.merge(tempTrie: self.temporaryMemory, directoryURL: self.options.memoryDirectoryURL, maxMemoryCount: options.maxMemoryCount, char2UInt8: char2UInt8)
// temporaryMemory
self.temporaryMemory = TemporalLearningMemoryTrie()
} catch {
//
debug("LearningManager save: Failed to save LongTermLearningMemory", error)
}
//
self.memoryCollapsed = LongTermLearningMemory.memoryCollapsed(directoryURL: self.options.memoryDirectoryURL)
}
func reset() {
self.temporaryMemory = TemporalLearningMemoryTrie()
do {
try LongTermLearningMemory.reset(directoryURL: self.options.memoryDirectoryURL)
} catch {
debug("LearningManager reset failed", error)
}
}
}