feat: エスケープされた辞書名をサポート(v2.3.b2) (#106)

* feat: support v2.3.b2 dictionary (https://github.com/ensan-hcl/azooKey_dictionary_storage/pull/3)

* update to v2.3.b2
This commit is contained in:
Miwa
2024-06-23 17:53:13 +09:00
committed by GitHub
parent c2e88ae720
commit fbf09a76eb
3 changed files with 44 additions and 28 deletions

View File

@ -66,8 +66,8 @@ public final class DicdataStore {
self.mmValue = [PValue].init(repeating: .zero, count: self.midCount * self.midCount)
}
}
_ = self.loadLOUDS(identifier: "user")
_ = self.loadLOUDS(identifier: "memory")
_ = self.loadLOUDS(query: "user")
_ = self.loadLOUDS(query: "memory")
}
public enum Notification {
@ -151,30 +151,46 @@ public final class DicdataStore {
return Self.getPenalty(data: data) < -d
}
func loadLOUDS(identifier: String) -> LOUDS? {
if importedLoudses.contains(identifier) {
return self.loudses[identifier]
func loadLOUDS(query: String) -> LOUDS? {
if importedLoudses.contains(query) {
return self.loudses[query]
}
importedLoudses.insert(identifier)
// LOUDSimportedLoudses
importedLoudses.insert(query)
// ASCII
let identifier = [
#"\n"#: "[0A]",
#" "#: "[20]",
#"""#: "[22]",
#"'"#: "[27]",
#"*"#: "[2A]",
#"+"#: "[2B]",
#"."#: "[2E]",
#"/"#: "[2F]",
#":"#: "[3A]",
#"<"#: "[3C]",
#">"#: "[3E]",
#"\"#: "[5C]",
#"|"#: "[7C]",
][query, default: query]
if let louds = LOUDS.load(identifier, option: self.requestOptions) {
self.loudses[identifier] = louds
self.loudses[query] = louds
return louds
} else {
debug("loudsの読み込みに失敗identifierは\(identifier)")
debug("loudsの読み込みに失敗identifierは\(query)(id: \(identifier))")
return nil
}
}
private func perfectMatchLOUDS(identifier: String, charIDs: [UInt8]) -> [Int] {
guard let louds = self.loadLOUDS(identifier: identifier) else {
private func perfectMatchLOUDS(query: String, charIDs: [UInt8]) -> [Int] {
guard let louds = self.loadLOUDS(query: query) else {
return []
}
return [louds.searchNodeIndex(chars: charIDs)].compactMap {$0}
}
private func throughMatchLOUDS(identifier: String, charIDs: [UInt8], depth: Range<Int>) -> [Int] {
guard let louds = self.loadLOUDS(identifier: identifier) else {
private func throughMatchLOUDS(query: String, charIDs: [UInt8], depth: Range<Int>) -> [Int] {
guard let louds = self.loadLOUDS(query: query) else {
return []
}
let result = louds.byfixNodeIndices(chars: charIDs)
@ -182,8 +198,8 @@ public final class DicdataStore {
return Array(result[min(depth.lowerBound + 1, result.endIndex) ..< min(depth.upperBound + 1, result.endIndex)])
}
private func prefixMatchLOUDS(identifier: String, charIDs: [UInt8], depth: Int = .max) -> [Int] {
guard let louds = self.loadLOUDS(identifier: identifier) else {
private func prefixMatchLOUDS(query: String, charIDs: [UInt8], depth: Int = .max) -> [Int] {
guard let louds = self.loadLOUDS(query: query) else {
return []
}
return louds.prefixNodeIndices(chars: charIDs, maxDepth: depth)
@ -228,12 +244,12 @@ public final class DicdataStore {
let depth = minCharIDsCount - 1 ..< maxCharIDsCount
var indices: [(String, Set<Int>)] = group.map {dic in
let key = String(dic.key)
let set = dic.value.flatMapSet {(_, charIDs) in self.throughMatchLOUDS(identifier: key, charIDs: charIDs, depth: depth)}
let set = dic.value.flatMapSet {(_, charIDs) in self.throughMatchLOUDS(query: key, charIDs: charIDs, depth: depth)}
return (key, set)
}
indices.append(("user", stringSet.flatMapSet {self.throughMatchLOUDS(identifier: "user", charIDs: $0.1, depth: depth)}))
indices.append(("user", stringSet.flatMapSet {self.throughMatchLOUDS(query: "user", charIDs: $0.1, depth: depth)}))
if learningManager.enabled {
indices.append(("memory", stringSet.flatMapSet {self.throughMatchLOUDS(identifier: "memory", charIDs: $0.1, depth: depth)}))
indices.append(("memory", stringSet.flatMapSet {self.throughMatchLOUDS(query: "memory", charIDs: $0.1, depth: depth)}))
}
// MARK: 検索によって得たindicesから辞書データを実際に取り出していく
var dicdata: [DicdataElement] = []
@ -347,7 +363,7 @@ public final class DicdataStore {
var dicdata: [DicdataElement] = []
let depth = minString.count - 1 ..< maxString.count
for identifier in keys {
dicdata.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: identifier, indices: self.throughMatchLOUDS(identifier: identifier, charIDs: maxIDs, depth: depth)))
dicdata.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: identifier, indices: self.throughMatchLOUDS(query: identifier, charIDs: maxIDs, depth: depth)))
}
if learningManager.enabled {
// temporalな学習結果にpenaltyを加えて追加する
@ -399,19 +415,19 @@ public final class DicdataStore {
var indices: [(String, Set<Int>)] = group.map {dic in
let head = String(dic.key)
let set = dic.value.flatMapSet { (_, charIDs) in
self.perfectMatchLOUDS(identifier: head, charIDs: charIDs)
self.perfectMatchLOUDS(query: head, charIDs: charIDs)
}
return (head, set)
}
do {
let set = strings.flatMapSet { (_, charIDs) in
self.perfectMatchLOUDS(identifier: "user", charIDs: charIDs)
self.perfectMatchLOUDS(query: "user", charIDs: charIDs)
}
indices.append(("user", set))
}
if learningManager.enabled {
let set = strings.flatMapSet { (_, charIDs) in
self.perfectMatchLOUDS(identifier: "memory", charIDs: charIDs)
self.perfectMatchLOUDS(query: "memory", charIDs: charIDs)
}
indices.append(("memory", set))
}
@ -502,15 +518,15 @@ public final class DicdataStore {
} else {
Int.max
}
let prefixIndices = self.prefixMatchLOUDS(identifier: first, charIDs: charIDs, depth: depth).prefix(700)
let prefixIndices = self.prefixMatchLOUDS(query: first, charIDs: charIDs, depth: depth).prefix(700)
result.append(
contentsOf: self.getDicdataFromLoudstxt3(identifier: first, indices: Set(consume prefixIndices))
.filter { Self.predictionUsable[$0.rcid] }
)
let userDictIndices = self.prefixMatchLOUDS(identifier: "user", charIDs: charIDs, depth: depth).prefix(700)
let userDictIndices = self.prefixMatchLOUDS(query: "user", charIDs: charIDs, depth: depth).prefix(700)
result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: "user", indices: Set(consume userDictIndices)))
if learningManager.enabled {
let memoryDictIndices = self.prefixMatchLOUDS(identifier: "memory", charIDs: charIDs).prefix(700)
let memoryDictIndices = self.prefixMatchLOUDS(query: "memory", charIDs: charIDs).prefix(700)
result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: "memory", indices: Set(consume memoryDictIndices)))
result.append(contentsOf: self.learningManager.temporaryPrefixMatch(charIDs: charIDs))
}

View File

@ -156,7 +156,7 @@ extension DicdataStore {
continue
}
let graphNode = lookupGraph.nodes[graphNodeIndex]
guard let louds = self.loadLOUDS(identifier: String(graphNode.character.toKatakana())) else {
guard let louds = self.loadLOUDS(query: String(graphNode.character.toKatakana())) else {
continue
}
/// graphNodeIndex
@ -220,7 +220,7 @@ extension DicdataStore {
continue
}
let graphNode = lookupGraph.nodes[graphNodeIndex]
guard let louds = self.loadLOUDS(identifier: String(graphNode.character.toKatakana())) else {
guard let louds = self.loadLOUDS(query: String(graphNode.character.toKatakana())) else {
continue
}
/// graphNodeIndex