fix: depth関連の実装を適切に修正

This commit is contained in:
Miwa / Ensan
2025-06-29 15:01:20 +09:00
parent 763b059a45
commit 2e61ad1638
4 changed files with 48 additions and 38 deletions

View File

@ -275,7 +275,7 @@ public final class DicdataStore {
var targetLOUDS: [String: LOUDS.MovingTowardPrefixSearchHelper] = [:] var targetLOUDS: [String: LOUDS.MovingTowardPrefixSearchHelper] = [:]
var stringToInfo: [([Character], (endIndex: Int, penalty: PValue))] = [] var stringToInfo: [([Character], (endIndex: Int, penalty: PValue))] = []
var temporaryMemoryDicdata: [DicdataElement] = [] var temporaryMemoryDicdata: [Int: [DicdataElement]] = [:]
// //
while let (characters, info) = generator.next() { while let (characters, info) = generator.next() {
guard let firstCharacter = characters.first else { guard let firstCharacter = characters.first else {
@ -292,7 +292,7 @@ public final class DicdataStore {
for key in keys { for key in keys {
withMutableValue(&targetLOUDS[key]) { helper in withMutableValue(&targetLOUDS[key]) { helper in
if helper == nil, let louds = self.loadLOUDS(query: key) { if helper == nil, let louds = self.loadLOUDS(query: key) {
helper = LOUDS.MovingTowardPrefixSearchHelper(louds: louds, depth: 0 ..< .max) helper = LOUDS.MovingTowardPrefixSearchHelper(louds: louds)
} }
guard helper != nil else { guard helper != nil else {
return return
@ -303,20 +303,22 @@ public final class DicdataStore {
} }
} }
// //
let result = self.learningManager.movingTowardPrefixSearchOnTemporaryMemory(charIDs: consume charIDs, depth: 0 ..< .max) let result = self.learningManager.movingTowardPrefixSearchOnTemporaryMemory(charIDs: consume charIDs)
updated = updated || !(result.dicdata.isEmpty) updated = updated || !(result.dicdata.isEmpty)
availableMaxIndex = max(availableMaxIndex, result.availableMaxIndex) availableMaxIndex = max(availableMaxIndex, result.availableMaxIndex)
for data in result.dicdata { for (depth, dicdata) in result.dicdata {
if info.penalty.isZero { for data in dicdata {
temporaryMemoryDicdata.append(data) if info.penalty.isZero {
temporaryMemoryDicdata[depth, default: []].append(data)
}
let ratio = Self.penaltyRatio[data.lcid]
let pUnit: PValue = Self.getPenalty(data: data) / 2 //
let adjust = pUnit * info.penalty * ratio
if self.shouldBeRemoved(value: data.value() + adjust, wordCount: data.ruby.count) {
continue
}
temporaryMemoryDicdata[depth, default: []].append(data.adjustedData(adjust))
} }
let ratio = Self.penaltyRatio[data.lcid]
let pUnit: PValue = Self.getPenalty(data: data) / 2 //
let adjust = pUnit * info.penalty * ratio
if self.shouldBeRemoved(value: data.value() + adjust, wordCount: data.ruby.count) {
continue
}
temporaryMemoryDicdata.append(data.adjustedData(adjust))
} }
if availableMaxIndex < characters.endIndex - 1 { if availableMaxIndex < characters.endIndex - 1 {
// //
@ -326,11 +328,14 @@ public final class DicdataStore {
stringToInfo.append((characters, info)) stringToInfo.append((characters, info))
} }
} }
let minCount = stringToInfo.map {$0.0.count}.min() ?? 0
print(#function, minCount, stringToInfo.map{$0.0})
return ( return (
Dictionary(stringToInfo, uniquingKeysWith: {$0.penalty < $1.penalty ? $1 : $0}), Dictionary(stringToInfo, uniquingKeysWith: {$0.penalty < $1.penalty ? $1 : $0}),
targetLOUDS.map { ($0.key, $0.value.indices)}, targetLOUDS.map { ($0.key, $0.value.indicesInDepth(depth: minCount - 1 ..< .max) )},
temporaryMemoryDicdata temporaryMemoryDicdata.flatMap {
minCount < $0.key + 1 ? $0.value : []
}
) )
} }
/// prefixprefix matchLOUDS /// prefixprefix matchLOUDS
@ -490,7 +495,9 @@ public final class DicdataStore {
} }
if learningManager.enabled { if learningManager.enabled {
// temporalpenalty // temporalpenalty
dicdata.append(contentsOf: self.learningManager.movingTowardPrefixSearchOnTemporaryMemory(charIDs: consume maxIDs, depth: depth).dicdata) dicdata.append(
contentsOf: self.learningManager.movingTowardPrefixSearchOnTemporaryMemory(charIDs: consume maxIDs, depth: depth).dicdata.flatMap { $0.value }
)
} }
for (key, value) in stringToEndIndex { for (key, value) in stringToEndIndex {
let convertTarget = String(key) let convertTarget = String(key)

View File

@ -584,22 +584,22 @@ struct TemporalLearningMemoryTrie {
return nodes[index].dataIndices.map {self.dicdata[$0]} return nodes[index].dataIndices.map {self.dicdata[$0]}
} }
func movingTowardPrefixSearch(chars: [UInt8], depth: Range<Int>) -> (dicdata: [DicdataElement], availableMaxIndex: Int) { func movingTowardPrefixSearch(chars: [UInt8], depth: Range<Int>) -> (dicdata: [Int: [DicdataElement]], availableMaxIndex: Int) {
var index = 0 var index = 0
var availableMaxIndex = 0 var availableMaxIndex = 0
var indices: [Int] = [] var indices: [Int: [Int]] = [:]
for (offset, char) in chars.enumerated() { for (offset, char) in chars.enumerated() {
if let nextIndex = nodes[index].children[char] { if let nextIndex = nodes[index].children[char] {
availableMaxIndex = index availableMaxIndex = index
index = nextIndex index = nextIndex
if depth.contains(offset) { if depth.contains(offset) {
indices.append(contentsOf: nodes[index].dataIndices) indices[offset] = nodes[index].dataIndices
} }
} else { } else {
return (indices.map {self.dicdata[$0]}, availableMaxIndex) return (indices.mapValues { items in items.map { self.dicdata[$0] }}, availableMaxIndex)
} }
} }
return (indices.map {self.dicdata[$0]}, availableMaxIndex) return (indices.mapValues { items in items.map { self.dicdata[$0] }}, availableMaxIndex)
} }
func prefixMatch(chars: [UInt8]) -> [DicdataElement] { func prefixMatch(chars: [UInt8]) -> [DicdataElement] {
@ -720,9 +720,9 @@ final class LearningManager {
return self.temporaryMemory.perfectMatch(chars: charIDs) return self.temporaryMemory.perfectMatch(chars: charIDs)
} }
func movingTowardPrefixSearchOnTemporaryMemory(charIDs: [UInt8], depth: Range<Int>) -> (dicdata: [DicdataElement], availableMaxIndex: Int) { func movingTowardPrefixSearchOnTemporaryMemory(charIDs: [UInt8], depth: Range<Int> = 0 ..< .max) -> (dicdata: [Int: [DicdataElement]], availableMaxIndex: Int) {
guard let options, options.learningType.needUsingMemory else { guard let options, options.learningType.needUsingMemory else {
return ([], 0) return ([:], 0)
} }
return self.temporaryMemory.movingTowardPrefixSearch(chars: charIDs, depth: depth) return self.temporaryMemory.movingTowardPrefixSearch(chars: charIDs, depth: depth)
} }

View File

@ -240,25 +240,30 @@ package struct LOUDS: Sendable {
// //
var targets = targets var targets = targets
targets.sort(by: Self.lexLessThan) targets.sort(by: Self.lexLessThan)
var helper = MovingTowardPrefixSearchHelper(louds: self, depth: depth) var helper = MovingTowardPrefixSearchHelper(louds: self)
for target in targets { for target in targets {
_ = helper.update(target: target) _ = helper.update(target: target)
} }
return helper.indices return helper.indicesInDepth(depth: depth)
} }
struct MovingTowardPrefixSearchHelper { struct MovingTowardPrefixSearchHelper {
init(louds: LOUDS, depth: Range<Int>) { init(louds: LOUDS) {
self.louds = louds self.louds = louds
self.depth = depth
} }
let louds: LOUDS let louds: LOUDS
let depth: Range<Int>
// //
var indices: [Int] = [] var indices: [(depth: Int, index: Int)] = []
// //
var stack: [(nodeIndex: Int, char: UInt8)] = [] var stack: [(nodeIndex: Int, char: UInt8)] = []
func indicesInDepth(depth: Range<Int>) -> [Int] {
return self.indices
.lazy
.filter { depth.contains($0.depth) }
.map { $0.index }
}
/// `target` /// `target`
/// - Parameter target: `CharID` /// - Parameter target: `CharID`
/// - Returns: `updated``indices``availableMaxIndex` /// - Returns: `updated``indices``availableMaxIndex`
@ -266,7 +271,7 @@ package struct LOUDS: Sendable {
var updated = false var updated = false
var availableMaxIndex = 0 var availableMaxIndex = 0
// iupperBound // iupperBound
for (i, char) in target.enumerated() where i < self.depth.upperBound { for (i, char) in target.enumerated() {
if i < self.stack.count, self.stack[i].char == char { if i < self.stack.count, self.stack[i].char == char {
// //
availableMaxIndex = i availableMaxIndex = i
@ -280,11 +285,9 @@ package struct LOUDS: Sendable {
// //
// stacknodeIndexchar // stacknodeIndexchar
if let nodeIndex = self.louds.searchCharNodeIndex(from: self.stack.last?.nodeIndex ?? 1, char: char) { if let nodeIndex = self.louds.searchCharNodeIndex(from: self.stack.last?.nodeIndex ?? 1, char: char) {
if self.depth.contains(i) { self.indices.append((i, nodeIndex))
self.indices.append(nodeIndex) updated = true
updated = true availableMaxIndex = i
availableMaxIndex = i
}
self.stack.append((nodeIndex, char)) self.stack.append((nodeIndex, char))
} else { } else {
// //

View File

@ -27,7 +27,7 @@ final class TemporalLearningMemoryTrieTests: XCTestCase {
XCTAssertEqual(result1.first?.word, element1.word) XCTAssertEqual(result1.first?.word, element1.word)
XCTAssertTrue(result1.first?.metadata.contains(.isLearned) ?? false) XCTAssertTrue(result1.first?.metadata.contains(.isLearned) ?? false)
let result2 = trie.movingTowardPrefixSearch(chars: chars(for: element2.ruby), depth: (element2.ruby.count - 1)..<element2.ruby.count).dicdata let result2 = trie.movingTowardPrefixSearch(chars: chars(for: element2.ruby), depth: (element2.ruby.count - 1)..<element2.ruby.count).dicdata.flatMap { $0.value }
XCTAssertEqual(result2.map { $0.word }, [element2.word]) XCTAssertEqual(result2.map { $0.word }, [element2.word])
let prefixResult = trie.prefixMatch(chars: chars(for: "テス")) let prefixResult = trie.prefixMatch(chars: chars(for: "テス"))