refactor: 辞書検索関連の関数の実装を統合し、名前を整理 (#209)

* perf: 同じloudsに対する検索をバルク処理することによって、処理の効率化を実現

* fix: bug

* test: add typo correction test

* chore: finalize imp;

* feat: update search-related impls
This commit is contained in:
Miwa
2025-06-27 23:21:11 +09:00
committed by GitHub
parent f5037e393c
commit 74d4d412c3
2 changed files with 49 additions and 35 deletions

View File

@ -27,7 +27,7 @@ public final class DicdataStore {
private var mmValue: [PValue] = []
private var loudses: [String: LOUDS] = [:]
private var loudstxts: [String: Data] = [:]
private var loudstxts: [String: Data] = [:]
private var importedLoudses: Set<String> = []
private var charsID: [Character: UInt8] = [:]
private var learningManager = LearningManager()
@ -227,28 +227,29 @@ public final class DicdataStore {
}
}
func perfectMatchLOUDS(query: String, charIDs: [UInt8]) -> [Int] {
///
/// - Parameters:
/// - query: LOUDS1"user"
/// - charIDs: ID
/// - Returns: ID1
///
/// IDLOUDS
///
func perfectMatchingSearch(query: String, charIDs: [UInt8]) -> [Int] {
guard let louds = self.loadLOUDS(query: query) else {
return []
}
return [louds.searchNodeIndex(chars: charIDs)].compactMap {$0}
}
private func throughMatchLOUDS(query: String, charIDs: [UInt8], depth: Range<Int>) -> [Int] {
guard let louds = self.loadLOUDS(query: query) else {
return []
}
let result = louds.byfixNodeIndices(chars: charIDs)
// result[1]3..<5 (34)14..<6
return Array(result[min(depth.lowerBound + 1, result.endIndex) ..< min(depth.upperBound + 1, result.endIndex)])
}
///
/// prefix...LOUDS
/// - Parameters:
/// - group: 1ID
/// - depth: `2..<4`23
/// - Returns:
private func throughMatchLOUDS(group: [String: [([Character], [UInt8])]], depth: Range<Int>) -> [(key: String, indices: Set<Int>)] {
/// - group: CharactercharID
/// - depth: prefix: `2..<4` 23prefix
/// - Returns:
///
/// LOUDS
private func movingTowardPrefixSearch(group: [String: [([Character], [UInt8])]], depth: Range<Int>) -> [(key: String, indices: Set<Int>)] {
let indices: [(String, Set<Int>)] = group.map {dic in
guard let louds = self.loadLOUDS(query: dic.key) else {
return (dic.key, [])
@ -260,7 +261,17 @@ public final class DicdataStore {
return indices
}
private func prefixMatchLOUDS(query: String, charIDs: [UInt8], depth: Int = .max, maxCount: Int = .max) -> [Int] {
/// prefixprefix matchLOUDS
/// - Parameters:
/// - query: 1"user"
/// - charIDs: ID
/// - depth:
/// - maxCount:
/// - Returns: prefix
///
/// prefixLOUDS`maxCount``depth`
/// ABCABCABCDABCDE
private func startingFromPrefixSearch(query: String, charIDs: [UInt8], depth: Int = .max, maxCount: Int = .max) -> [Int] {
guard let louds = self.loadLOUDS(query: query) else {
return []
}
@ -313,11 +324,11 @@ public final class DicdataStore {
let (minCharIDsCount, maxCharIDsCount) = stringSet.lazy.map {$0.1.count}.minAndMax() ?? (0, -1)
let depth = minCharIDsCount - 1 ..< maxCharIDsCount
let group = [String: [([Character], [UInt8])]].init(grouping: stringSet, by: {String($0.0.first!)})
var indices = self.throughMatchLOUDS(group: group, depth: depth)
var indices = self.movingTowardPrefixSearch(group: group, depth: depth)
if learningManager.enabled {
indices.append(contentsOf: self.throughMatchLOUDS(group: ["user": stringSet, "memory": stringSet], depth: depth))
indices.append(contentsOf: self.movingTowardPrefixSearch(group: ["user": stringSet, "memory": stringSet], depth: depth))
} else {
indices.append(contentsOf: self.throughMatchLOUDS(group: ["user": stringSet], depth: depth))
indices.append(contentsOf: self.movingTowardPrefixSearch(group: ["user": stringSet], depth: depth))
}
// MARK: indices
var dicdata: [DicdataElement] = []
@ -421,15 +432,18 @@ public final class DicdataStore {
return [characterNode]
}
let maxIDs = maxString.map(self.character2charId)
var keys = [String(stringToEndIndex.keys.first!.first!), "user"]
var group: [String: [([Character], [UInt8])]] = [
String(stringToEndIndex.keys.first!.first!): [(maxString, maxIDs)],
"user": [(maxString, maxIDs)],
]
if learningManager.enabled {
keys.append("memory")
group["memory"] = group["user"]
}
// MARK: indices
var dicdata: [DicdataElement] = []
let depth = minString.count - 1 ..< maxString.count
for identifier in keys {
dicdata.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: identifier, indices: self.throughMatchLOUDS(query: identifier, charIDs: maxIDs, depth: depth)))
for (identifier, indices) in self.movingTowardPrefixSearch(group: group, depth: depth) {
dicdata.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: identifier, indices: indices))
}
if learningManager.enabled {
// temporalpenalty
@ -485,19 +499,19 @@ public final class DicdataStore {
var indices: [(String, Set<Int>)] = group.map {dic in
let head = String(dic.key)
let set = dic.value.flatMapSet { (_, charIDs) in
self.perfectMatchLOUDS(query: head, charIDs: charIDs)
self.perfectMatchingSearch(query: head, charIDs: charIDs)
}
return (head, set)
}
do {
let set = strings.flatMapSet { (_, charIDs) in
self.perfectMatchLOUDS(query: "user", charIDs: charIDs)
self.perfectMatchingSearch(query: "user", charIDs: charIDs)
}
indices.append(("user", set))
}
if learningManager.enabled {
let set = strings.flatMapSet { (_, charIDs) in
self.perfectMatchLOUDS(query: "memory", charIDs: charIDs)
self.perfectMatchingSearch(query: "memory", charIDs: charIDs)
}
indices.append(("memory", set))
}
@ -591,16 +605,16 @@ public final class DicdataStore {
} else {
Int.max
}
let prefixIndices = self.prefixMatchLOUDS(query: first, charIDs: charIDs, depth: depth, maxCount: maxCount)
let prefixIndices = self.startingFromPrefixSearch(query: first, charIDs: charIDs, depth: depth, maxCount: maxCount)
result.append(
contentsOf: self.getDicdataFromLoudstxt3(identifier: first, indices: Set(prefixIndices))
.filter { Self.predictionUsable[$0.rcid] }
)
let userDictIndices = self.prefixMatchLOUDS(query: "user", charIDs: charIDs, maxCount: maxCount)
let userDictIndices = self.startingFromPrefixSearch(query: "user", charIDs: charIDs, maxCount: maxCount)
result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: "user", indices: Set(consume userDictIndices)))
if learningManager.enabled {
let memoryDictIndices = self.prefixMatchLOUDS(query: "memory", charIDs: charIDs, maxCount: maxCount)
let memoryDictIndices = self.startingFromPrefixSearch(query: "memory", charIDs: charIDs, maxCount: maxCount)
result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: "memory", indices: Set(consume memoryDictIndices)))
result.append(contentsOf: self.learningManager.temporaryPrefixMatch(charIDs: charIDs))
}
@ -1073,4 +1087,4 @@ public final class DicdataStore {
"w": ["", "ウィ", "ウェ", ""],
"wy": ["", ""]
]
}
}

View File

@ -78,7 +78,7 @@ final class LearningMemoryTests: XCTestCase {
let dicdataStore = DicdataStore(requestOptions: options)
dicdataStore.sendToDicdataStore(.setRequestOptions(options))
let charIDs = "テスト".map { dicdataStore.character2charId($0) }
let indices = dicdataStore.perfectMatchLOUDS(query: "memory", charIDs: charIDs)
let indices = dicdataStore.perfectMatchingSearch(query: "memory", charIDs: charIDs)
let dicdata = dicdataStore.getDicdataFromLoudstxt3(identifier: "memory", indices: indices)
XCTAssertFalse(dicdata.isEmpty)
XCTAssertTrue(dicdata.contains { $0.word == element.word && $0.ruby == element.ruby })
@ -95,7 +95,7 @@ final class LearningMemoryTests: XCTestCase {
)
)
let indices2 = dicdataStore.perfectMatchLOUDS(query: "memory", charIDs: charIDs)
let indices2 = dicdataStore.perfectMatchingSearch(query: "memory", charIDs: charIDs)
let dicdata2 = dicdataStore.getDicdataFromLoudstxt3(identifier: "memory", indices: indices2)
XCTAssertFalse(dicdata2.contains { $0.word == element.word && $0.ruby == element.ruby })
}
@ -118,7 +118,7 @@ final class LearningMemoryTests: XCTestCase {
let dicdataStore = DicdataStore(requestOptions: options)
dicdataStore.sendToDicdataStore(.setRequestOptions(options))
let charIDs = "テスト".map { dicdataStore.character2charId($0) }
let indices = dicdataStore.perfectMatchLOUDS(query: "memory", charIDs: charIDs)
let indices = dicdataStore.perfectMatchingSearch(query: "memory", charIDs: charIDs)
let dicdata = dicdataStore.getDicdataFromLoudstxt3(identifier: "memory", indices: indices)
XCTAssertFalse(dicdata.isEmpty)
XCTAssertEqual(dicdata.count { $0.word == element.word && $0.ruby == element.ruby }, 2)
@ -135,7 +135,7 @@ final class LearningMemoryTests: XCTestCase {
)
)
let indices2 = dicdataStore.perfectMatchLOUDS(query: "memory", charIDs: charIDs)
let indices2 = dicdataStore.perfectMatchingSearch(query: "memory", charIDs: charIDs)
let dicdata2 = dicdataStore.getDicdataFromLoudstxt3(identifier: "memory", indices: indices2)
XCTAssertFalse(dicdata2.contains { $0.word == element.word && $0.ruby == element.ruby })
}