mirror of
https://github.com/mii443/AzooKeyKanaKanjiConverter.git
synced 2025-12-03 02:58:27 +00:00
[Performance] LOUDSのlookupを高速化 (#28)
* simplify calculation * cleanup * add code fro debug * fix bug * cleanup * cleanup * cleanup
This commit is contained in:
@@ -8,30 +8,26 @@
|
||||
|
||||
import Foundation
|
||||
|
||||
private extension UInt64 {
|
||||
static let prefixOne: UInt64 = 1 << 63
|
||||
}
|
||||
|
||||
/// LOUDS
|
||||
struct LOUDS {
|
||||
struct LOUDS: Sendable {
|
||||
private typealias Unit = UInt64
|
||||
private static let unit = 64
|
||||
private static let uExp = 6
|
||||
|
||||
private let bits: [Unit]
|
||||
private let indices: Range<Int>
|
||||
private let char2nodeIndices: [[Int]]
|
||||
/// 0の数(1の数ではない)
|
||||
private let rankLarge: [Int]
|
||||
///
|
||||
/// LOUDSのサイズが4GBまでは`UInt32`で十分
|
||||
private let rankLarge: [UInt32]
|
||||
|
||||
@inlinable init(bytes: [UInt64], nodeIndex2ID: [UInt8]) {
|
||||
self.bits = bytes
|
||||
self.char2nodeIndices = nodeIndex2ID.enumerated().reduce(into: .init(repeating: [], count: 1 << 8)) { list, data in
|
||||
list[Int(data.element)].append(data.offset)
|
||||
}
|
||||
self.indices = self.bits.indices
|
||||
self.rankLarge = bytes.reduce(into: [0]) {
|
||||
$0.append(($0.last ?? 0) &+ (Self.unit &- $1.nonzeroBitCount))
|
||||
$0.append($0.last! &+ UInt32(Self.unit &- $1.nonzeroBitCount))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -49,50 +45,49 @@ struct LOUDS {
|
||||
// 探しているのは、startIndexが含まれるbitsのindex `i`
|
||||
var left = (parentNodeIndex >> Self.uExp) &- 1
|
||||
while true {
|
||||
let dif = parentNodeIndex &- self.rankLarge[left &+ 1]
|
||||
let dif = parentNodeIndex &- Int(self.rankLarge[Int(left) &+ 1])
|
||||
if dif >= Self.unit {
|
||||
left &+= dif >> Self.uExp
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
guard let i = (left &+ 1 ..< self.bits.count).first(where: {(index: Int) in self.rankLarge[index &+ 1] >= parentNodeIndex}) else {
|
||||
var i: Int?
|
||||
for index in left &+ 1 ..< self.bits.endIndex where self.rankLarge[index &+ 1] >= parentNodeIndex {
|
||||
i = index
|
||||
break
|
||||
}
|
||||
guard let i else {
|
||||
return 0 ..< 0
|
||||
}
|
||||
|
||||
return self.bits.withUnsafeBufferPointer {(buffer: UnsafeBufferPointer<Unit>) -> Range<Int> in
|
||||
// 探索パート②
|
||||
// 目標は`k`の発見
|
||||
// 今のbyteの中を探索し、超過分(dif)の0を手に入れたところでkが確定する。
|
||||
let byte = buffer[i]
|
||||
let dif = self.rankLarge[i &+ 1] &- parentNodeIndex // 0の数の超過分
|
||||
var count = Unit(Self.unit &- byte.nonzeroBitCount) // 0の数
|
||||
var k = Self.unit
|
||||
|
||||
for c in 0 ..< Self.unit {
|
||||
if count == dif {
|
||||
k = c
|
||||
break
|
||||
}
|
||||
// byteの上からc桁めが0なら == (byte << 0)が100………00より小さければ == 最初の1桁を一番下に持ってきた値そのもの
|
||||
count &-= (byte << c) < Unit.prefixOne ? 1:0
|
||||
}
|
||||
|
||||
let start = (i << Self.uExp) &+ k &- parentNodeIndex &+ 1
|
||||
if dif == .zero {
|
||||
// 探索パート②
|
||||
// 目標はparentNodeIndex番目の0の位置である`k`の発見
|
||||
let byte = self.bits[i]
|
||||
var k = 0
|
||||
for _ in 0 ..< parentNodeIndex - Int(self.rankLarge[i]) {
|
||||
k = (~(byte << k)).leadingZeroBitCount &+ k &+ 1
|
||||
}
|
||||
let start = (i << Self.uExp) &+ k &- parentNodeIndex &+ 1
|
||||
// ちょうどparentNodeIndex個の0がi番目にあるかどうか
|
||||
if self.rankLarge[i &+ 1] == parentNodeIndex {
|
||||
return self.bits.withUnsafeBufferPointer {(buffer: UnsafeBufferPointer<Unit>) -> Range<Int> in
|
||||
var j = i &+ 1
|
||||
while buffer[j] == Unit.max {
|
||||
j &+= 1
|
||||
}
|
||||
let byte2 = buffer[j]
|
||||
// 最初の0を探す作業
|
||||
let a = (0 ..< Self.unit).first(where: {(byte2 << $0) < Unit.prefixOne})
|
||||
return start ..< (j << Self.uExp) &+ (a ?? 0) &- parentNodeIndex &+ 1
|
||||
} else {
|
||||
// 次の0を探す作業
|
||||
let a = (k ..< Self.unit).first(where: {(byte << $0) < Unit.prefixOne})
|
||||
return start ..< (i << Self.uExp) &+ (a ?? 0) &- parentNodeIndex &+ 1
|
||||
// 反転して、先頭から0の数を数えると最初の0の位置が出てくる
|
||||
// Ex. 1110_0000 => [000]1_1111 => 3
|
||||
let byte2 = buffer[j]
|
||||
let a = (~byte2).leadingZeroBitCount % Self.unit
|
||||
return start ..< (j << Self.uExp) &+ a &- parentNodeIndex &+ 1
|
||||
}
|
||||
} else {
|
||||
// difが0以上の場合、k番目以降の初めての0を発見したい
|
||||
// 例えばk=1の場合
|
||||
// Ex. 1011_1101 => 0111_1010 => 1000_0101 => 1 => 2
|
||||
let a = ((~(byte << k)).leadingZeroBitCount &+ k) % Self.unit
|
||||
return start ..< (i << Self.uExp) &+ a &- parentNodeIndex &+ 1
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user