feat: エスケープされた辞書名をサポート(v2.3.b2) (#106)

* feat: support v2.3.b2 dictionary (https://github.com/ensan-hcl/azooKey_dictionary_storage/pull/3)

* update to v2.3.b2
This commit is contained in:
Miwa
2024-06-23 17:53:13 +09:00
committed by GitHub
parent c2e88ae720
commit fbf09a76eb
3 changed files with 44 additions and 28 deletions

View File

@ -66,8 +66,8 @@ public final class DicdataStore {
self.mmValue = [PValue].init(repeating: .zero, count: self.midCount * self.midCount) self.mmValue = [PValue].init(repeating: .zero, count: self.midCount * self.midCount)
} }
} }
_ = self.loadLOUDS(identifier: "user") _ = self.loadLOUDS(query: "user")
_ = self.loadLOUDS(identifier: "memory") _ = self.loadLOUDS(query: "memory")
} }
public enum Notification { public enum Notification {
@ -151,30 +151,46 @@ public final class DicdataStore {
return Self.getPenalty(data: data) < -d return Self.getPenalty(data: data) < -d
} }
func loadLOUDS(identifier: String) -> LOUDS? { func loadLOUDS(query: String) -> LOUDS? {
if importedLoudses.contains(identifier) { if importedLoudses.contains(query) {
return self.loudses[identifier] return self.loudses[query]
} }
// LOUDSimportedLoudses
importedLoudses.insert(identifier) importedLoudses.insert(query)
// ASCII
let identifier = [
#"\n"#: "[0A]",
#" "#: "[20]",
#"""#: "[22]",
#"'"#: "[27]",
#"*"#: "[2A]",
#"+"#: "[2B]",
#"."#: "[2E]",
#"/"#: "[2F]",
#":"#: "[3A]",
#"<"#: "[3C]",
#">"#: "[3E]",
#"\"#: "[5C]",
#"|"#: "[7C]",
][query, default: query]
if let louds = LOUDS.load(identifier, option: self.requestOptions) { if let louds = LOUDS.load(identifier, option: self.requestOptions) {
self.loudses[identifier] = louds self.loudses[query] = louds
return louds return louds
} else { } else {
debug("loudsの読み込みに失敗identifierは\(identifier)") debug("loudsの読み込みに失敗identifierは\(query)(id: \(identifier))")
return nil return nil
} }
} }
private func perfectMatchLOUDS(identifier: String, charIDs: [UInt8]) -> [Int] { private func perfectMatchLOUDS(query: String, charIDs: [UInt8]) -> [Int] {
guard let louds = self.loadLOUDS(identifier: identifier) else { guard let louds = self.loadLOUDS(query: query) else {
return [] return []
} }
return [louds.searchNodeIndex(chars: charIDs)].compactMap {$0} return [louds.searchNodeIndex(chars: charIDs)].compactMap {$0}
} }
private func throughMatchLOUDS(identifier: String, charIDs: [UInt8], depth: Range<Int>) -> [Int] { private func throughMatchLOUDS(query: String, charIDs: [UInt8], depth: Range<Int>) -> [Int] {
guard let louds = self.loadLOUDS(identifier: identifier) else { guard let louds = self.loadLOUDS(query: query) else {
return [] return []
} }
let result = louds.byfixNodeIndices(chars: charIDs) let result = louds.byfixNodeIndices(chars: charIDs)
@ -182,8 +198,8 @@ public final class DicdataStore {
return Array(result[min(depth.lowerBound + 1, result.endIndex) ..< min(depth.upperBound + 1, result.endIndex)]) return Array(result[min(depth.lowerBound + 1, result.endIndex) ..< min(depth.upperBound + 1, result.endIndex)])
} }
private func prefixMatchLOUDS(identifier: String, charIDs: [UInt8], depth: Int = .max) -> [Int] { private func prefixMatchLOUDS(query: String, charIDs: [UInt8], depth: Int = .max) -> [Int] {
guard let louds = self.loadLOUDS(identifier: identifier) else { guard let louds = self.loadLOUDS(query: query) else {
return [] return []
} }
return louds.prefixNodeIndices(chars: charIDs, maxDepth: depth) return louds.prefixNodeIndices(chars: charIDs, maxDepth: depth)
@ -228,12 +244,12 @@ public final class DicdataStore {
let depth = minCharIDsCount - 1 ..< maxCharIDsCount let depth = minCharIDsCount - 1 ..< maxCharIDsCount
var indices: [(String, Set<Int>)] = group.map {dic in var indices: [(String, Set<Int>)] = group.map {dic in
let key = String(dic.key) let key = String(dic.key)
let set = dic.value.flatMapSet {(_, charIDs) in self.throughMatchLOUDS(identifier: key, charIDs: charIDs, depth: depth)} let set = dic.value.flatMapSet {(_, charIDs) in self.throughMatchLOUDS(query: key, charIDs: charIDs, depth: depth)}
return (key, set) return (key, set)
} }
indices.append(("user", stringSet.flatMapSet {self.throughMatchLOUDS(identifier: "user", charIDs: $0.1, depth: depth)})) indices.append(("user", stringSet.flatMapSet {self.throughMatchLOUDS(query: "user", charIDs: $0.1, depth: depth)}))
if learningManager.enabled { if learningManager.enabled {
indices.append(("memory", stringSet.flatMapSet {self.throughMatchLOUDS(identifier: "memory", charIDs: $0.1, depth: depth)})) indices.append(("memory", stringSet.flatMapSet {self.throughMatchLOUDS(query: "memory", charIDs: $0.1, depth: depth)}))
} }
// MARK: 検索によって得たindicesから辞書データを実際に取り出していく // MARK: 検索によって得たindicesから辞書データを実際に取り出していく
var dicdata: [DicdataElement] = [] var dicdata: [DicdataElement] = []
@ -347,7 +363,7 @@ public final class DicdataStore {
var dicdata: [DicdataElement] = [] var dicdata: [DicdataElement] = []
let depth = minString.count - 1 ..< maxString.count let depth = minString.count - 1 ..< maxString.count
for identifier in keys { for identifier in keys {
dicdata.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: identifier, indices: self.throughMatchLOUDS(identifier: identifier, charIDs: maxIDs, depth: depth))) dicdata.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: identifier, indices: self.throughMatchLOUDS(query: identifier, charIDs: maxIDs, depth: depth)))
} }
if learningManager.enabled { if learningManager.enabled {
// temporalな学習結果にpenaltyを加えて追加する // temporalな学習結果にpenaltyを加えて追加する
@ -399,19 +415,19 @@ public final class DicdataStore {
var indices: [(String, Set<Int>)] = group.map {dic in var indices: [(String, Set<Int>)] = group.map {dic in
let head = String(dic.key) let head = String(dic.key)
let set = dic.value.flatMapSet { (_, charIDs) in let set = dic.value.flatMapSet { (_, charIDs) in
self.perfectMatchLOUDS(identifier: head, charIDs: charIDs) self.perfectMatchLOUDS(query: head, charIDs: charIDs)
} }
return (head, set) return (head, set)
} }
do { do {
let set = strings.flatMapSet { (_, charIDs) in let set = strings.flatMapSet { (_, charIDs) in
self.perfectMatchLOUDS(identifier: "user", charIDs: charIDs) self.perfectMatchLOUDS(query: "user", charIDs: charIDs)
} }
indices.append(("user", set)) indices.append(("user", set))
} }
if learningManager.enabled { if learningManager.enabled {
let set = strings.flatMapSet { (_, charIDs) in let set = strings.flatMapSet { (_, charIDs) in
self.perfectMatchLOUDS(identifier: "memory", charIDs: charIDs) self.perfectMatchLOUDS(query: "memory", charIDs: charIDs)
} }
indices.append(("memory", set)) indices.append(("memory", set))
} }
@ -502,15 +518,15 @@ public final class DicdataStore {
} else { } else {
Int.max Int.max
} }
let prefixIndices = self.prefixMatchLOUDS(identifier: first, charIDs: charIDs, depth: depth).prefix(700) let prefixIndices = self.prefixMatchLOUDS(query: first, charIDs: charIDs, depth: depth).prefix(700)
result.append( result.append(
contentsOf: self.getDicdataFromLoudstxt3(identifier: first, indices: Set(consume prefixIndices)) contentsOf: self.getDicdataFromLoudstxt3(identifier: first, indices: Set(consume prefixIndices))
.filter { Self.predictionUsable[$0.rcid] } .filter { Self.predictionUsable[$0.rcid] }
) )
let userDictIndices = self.prefixMatchLOUDS(identifier: "user", charIDs: charIDs, depth: depth).prefix(700) let userDictIndices = self.prefixMatchLOUDS(query: "user", charIDs: charIDs, depth: depth).prefix(700)
result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: "user", indices: Set(consume userDictIndices))) result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: "user", indices: Set(consume userDictIndices)))
if learningManager.enabled { if learningManager.enabled {
let memoryDictIndices = self.prefixMatchLOUDS(identifier: "memory", charIDs: charIDs).prefix(700) let memoryDictIndices = self.prefixMatchLOUDS(query: "memory", charIDs: charIDs).prefix(700)
result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: "memory", indices: Set(consume memoryDictIndices))) result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: "memory", indices: Set(consume memoryDictIndices)))
result.append(contentsOf: self.learningManager.temporaryPrefixMatch(charIDs: charIDs)) result.append(contentsOf: self.learningManager.temporaryPrefixMatch(charIDs: charIDs))
} }

View File

@ -156,7 +156,7 @@ extension DicdataStore {
continue continue
} }
let graphNode = lookupGraph.nodes[graphNodeIndex] let graphNode = lookupGraph.nodes[graphNodeIndex]
guard let louds = self.loadLOUDS(identifier: String(graphNode.character.toKatakana())) else { guard let louds = self.loadLOUDS(query: String(graphNode.character.toKatakana())) else {
continue continue
} }
/// graphNodeIndex /// graphNodeIndex
@ -220,7 +220,7 @@ extension DicdataStore {
continue continue
} }
let graphNode = lookupGraph.nodes[graphNodeIndex] let graphNode = lookupGraph.nodes[graphNodeIndex]
guard let louds = self.loadLOUDS(identifier: String(graphNode.character.toKatakana())) else { guard let louds = self.loadLOUDS(query: String(graphNode.character.toKatakana())) else {
continue continue
} }
/// graphNodeIndex /// graphNodeIndex