Fix an issue that dicdata from temproary learning manager are not applied penalty

This commit is contained in:
ensan-hcl
2023-09-23 18:16:33 +09:00
parent bbe85eb4e6
commit 1e5624db47
2 changed files with 40 additions and 6 deletions

View File

@@ -42,8 +42,9 @@ public struct DicdataElement: Equatable, Hashable, Sendable {
self.adjust = adjust
}
public func adjustedData(_ adjustValue: PValue) -> Self {
.init(word: word, ruby: ruby, lcid: lcid, rcid: rcid, mid: mid, value: baseValue, adjust: adjustValue + self.adjust)
public consuming func adjustedData(_ adjustValue: PValue) -> Self {
self.adjust += adjustValue
return self
}
public var word: String

View File

@@ -120,7 +120,7 @@ public final class DicdataStore {
}
///
@inlinable static func getPenalty(data: DicdataElement) -> PValue {
@inlinable static func getPenalty(data: borrowing DicdataElement) -> PValue {
-2.0 / PValue(data.word.count)
}
@@ -135,7 +135,7 @@ public final class DicdataStore {
}
///
@inlinable func shouldBeRemoved(data: DicdataElement) -> Bool {
@inlinable func shouldBeRemoved(data: borrowing DicdataElement) -> Bool {
let d = data.value() - self.threshold
if d < 0 {
return true
@@ -247,7 +247,23 @@ public final class DicdataStore {
}
dicdata.append(contentsOf: result)
}
dicdata.append(contentsOf: stringSet.flatMap {self.learningManager.temporaryThroughMatch(charIDs: $0.1, depth: depth)})
// temporalpenalty
for (_, charIds) in consume stringSet {
for data in self.learningManager.temporaryThroughMatch(charIDs: consume charIds, depth: depth) {
let rubyArray = Array(data.ruby)
let penalty = stringToInfo[rubyArray, default: (0, .zero)].penalty
if penalty.isZero {
dicdata.append(data)
}
let ratio = Self.penaltyRatio[data.lcid]
let pUnit: PValue = Self.getPenalty(data: data) / 2 //
let adjust = pUnit * penalty * ratio
if self.shouldBeRemoved(value: data.value() + adjust, wordCount: rubyArray.count) {
continue
}
dicdata.append(data.adjustedData(adjust))
}
}
for i in toIndexLeft ..< toIndexRight {
do {
@@ -343,7 +359,24 @@ public final class DicdataStore {
}
dicdata.append(contentsOf: result)
}
dicdata.append(contentsOf: strings.flatMap {self.learningManager.temporaryPerfectMatch(charIDs: $0.charIDs)})
// temporalpenalty
for (characters, charIds) in consume strings {
for data in self.learningManager.temporaryPerfectMatch(charIDs: consume charIds) {
// perfect matchArray(data.ruby)characters
let penalty = string2penalty[characters, default: .zero]
if penalty.isZero {
dicdata.append(data)
}
let ratio = Self.penaltyRatio[data.lcid]
let pUnit: PValue = Self.getPenalty(data: data) / 2 //
let adjust = pUnit * penalty * ratio
if self.shouldBeRemoved(value: data.value() + adjust, wordCount: characters.count) {
continue
}
dicdata.append(data.adjustedData(adjust))
}
}
dicdata.append(contentsOf: self.getWiseDicdata(convertTarget: segment, inputData: inputData, inputRange: fromIndex ..< toIndex + 1))
dicdata.append(contentsOf: self.getMatchOSUserDict(segment))