Files
AzooKeyKanaKanjiConverter/Sources/KanaKanjiConverterModule/LOUDS/LOUDS.swift
Miwa f5037e393c perf: 同じloudsに対する検索をバルク処理することによって、処理の効率化を実現 (#208)
* perf: 同じloudsに対する検索をバルク処理することによって、処理の効率化を実現

* fix: bug

* test: add typo correction test

* chore: finalize imp;
2025-06-27 22:32:46 +09:00

276 lines
12 KiB
Swift
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//
// LOUDS.swift
// Keyboard
//
// Created by ensan on 2020/09/30.
// Copyright © 2020 ensan. All rights reserved.
//
import Foundation
/// LOUDS
package struct LOUDS: Sendable {
private typealias Unit = UInt64
private static let unit = 64
private static let uExp = 6
private let bits: [Unit]
/// indexflattenArray
/// - seealso: flatChar2nodeIndicesIndex
private let flatChar2nodeIndices: [Int]
/// 256Array`flatChar2nodeIndices[flatChar2nodeIndicesIndex[char - 1] ..< flatChar2nodeIndicesIndex[char]]``nodeIndices`
private let flatChar2nodeIndicesIndex: [Int]
/// 01
///
/// LOUDS4GB`UInt32`
private let rankLarge: [UInt32]
@inlinable init(bytes: [UInt64], nodeIndex2ID: [UInt8]) {
self.bits = bytes
// flatChar2nodeIndicesIndex
// charnodeIndices
var flatChar2nodeIndicesIndex = [Int](repeating: 0, count: 256)
flatChar2nodeIndicesIndex.withUnsafeMutableBufferPointer { buffer in
for value in nodeIndex2ID {
buffer[Int(value)] += 1
}
//
for i in 1 ..< 256 {
buffer[i] = buffer[i - 1] + buffer[i]
}
}
// flatChar2nodeIndices
// flatChar2nodeIndicesIndexcountsindex
var counts = [Int](repeating: 0, count: 256)
self.flatChar2nodeIndices = counts.withUnsafeMutableBufferPointer { countsBuffer in
var flatChar2nodeIndices = [Int](repeating: 0, count: nodeIndex2ID.count)
for (i, value) in zip(nodeIndex2ID.indices, nodeIndex2ID) {
if value == .zero {
flatChar2nodeIndices[countsBuffer[Int(value)]] = i
} else {
flatChar2nodeIndices[flatChar2nodeIndicesIndex[Int(value) - 1] + countsBuffer[Int(value)]] = i
}
countsBuffer[Int(value)] += 1
}
return flatChar2nodeIndices
}
self.flatChar2nodeIndicesIndex = flatChar2nodeIndicesIndex
var rankLarge: [UInt32] = .init(repeating: 0, count: bytes.count + 1)
rankLarge.withUnsafeMutableBufferPointer { buffer in
for (i, byte) in zip(bytes.indices, bytes) {
buffer[i + 1] = buffer[i] &+ UInt32(Self.unit &- byte.nonzeroBitCount)
}
}
self.rankLarge = rankLarge
}
/// parentNodeIndex01Index
@inlinable func childNodeIndices(from parentNodeIndex: Int) -> Range<Int> {
//
// startIndex == parentNodeIndex0index
// endIndex == parentNodeIndex+10index
// childNodeIndices
// startIndex0endIndex
//
// rankLarge0dif
// left
// startIndexbitsindex `i`
var left = parentNodeIndex >> Self.uExp
var right = self.rankLarge.endIndex - 1
while left <= right {
let mid = (left + right) / 2
if self.rankLarge[mid] >= parentNodeIndex {
right = mid - 1
} else {
left = mid + 1
}
}
guard left != self.rankLarge.endIndex else {
return 0 ..< 0
}
let i = left - 1
return self.bits.withUnsafeBufferPointer {(buffer: UnsafeBufferPointer<Unit>) -> Range<Int> in
//
// parentNodeIndex0`k`
let byte = buffer[i]
var k = 0
for _ in 0 ..< parentNodeIndex - Int(self.rankLarge[i]) {
k = (~(byte << k)).leadingZeroBitCount &+ k &+ 1
}
let start = (i << Self.uExp) &+ k &- parentNodeIndex &+ 1
// parentNodeIndex0i
if self.rankLarge[i &+ 1] == parentNodeIndex {
var j = i &+ 1
while buffer[j] == Unit.max {
j &+= 1
}
// 0
// 00
// Ex. 1110_0000 => [000]1_1111 => 3
let byte2 = buffer[j]
let a = (~byte2).leadingZeroBitCount % Self.unit
return start ..< (j << Self.uExp) &+ a &- parentNodeIndex &+ 1
} else {
// dif0k0
// k=1
// Ex. 1011_1101 => 0111_1010 => 1000_0101 => 1 => 2
let a = ((~(byte << k)).leadingZeroBitCount &+ k) % Self.unit
return start ..< (i << Self.uExp) &+ a &- parentNodeIndex &+ 1
}
}
}
/// charIndex
/// `childNodeIndices`0.02
@inlinable func searchCharNodeIndex(from parentNodeIndex: Int, char: UInt8) -> Int? {
// char2nodeIndices調
let childNodeIndices = self.childNodeIndices(from: parentNodeIndex)
let nodeIndices: ArraySlice<Int> = if char == .zero {
self.flatChar2nodeIndices[0 ..< self.flatChar2nodeIndicesIndex[Int(char)]]
} else {
self.flatChar2nodeIndices[self.flatChar2nodeIndicesIndex[Int(char - 1)] ..< self.flatChar2nodeIndicesIndex[Int(char)]]
}
var left = nodeIndices.startIndex
var right = nodeIndices.endIndex
while left < right {
let mid = (left + right) >> 1
if childNodeIndices.startIndex <= nodeIndices[mid] {
right = mid
} else {
left = mid + 1
}
}
if left < nodeIndices.endIndex && childNodeIndices.contains(nodeIndices[left]) {
return nodeIndices[left]
} else {
return nil
}
}
///
/// - Parameter chars: CharID
/// - Returns: loudstxt3
@inlinable func searchNodeIndex(chars: [UInt8]) -> Int? {
var index = 1
for char in chars {
if let nodeIndex = self.searchCharNodeIndex(from: index, char: char) {
index = nodeIndex
} else {
return nil
}
}
return index
}
@inlinable func prefixNodeIndices(nodeIndex: Int, depth: Int = 0, maxDepth: Int, maxCount: Int) -> [Int] {
var childNodeIndices = Array(self.childNodeIndices(from: nodeIndex))
if depth == maxDepth {
return childNodeIndices
}
for index in childNodeIndices {
if childNodeIndices.count > maxCount {
break
}
childNodeIndices.append(
contentsOf: self.prefixNodeIndices(
nodeIndex: index,
depth: depth + 1,
maxDepth: maxDepth,
maxCount: maxCount - childNodeIndices.count
)
)
}
return childNodeIndices
}
///
///
///
/// - Parameter chars: CharID
/// - Parameter maxDepth:
/// - Returns: loudstxt3
@inlinable package func prefixNodeIndices(chars: [UInt8], maxDepth: Int, maxCount: Int) -> [Int] {
guard let nodeIndex = self.searchNodeIndex(chars: chars) else {
return []
}
return self.prefixNodeIndices(nodeIndex: nodeIndex, maxDepth: maxDepth, maxCount: maxCount)
}
///
///
///
/// - Parameter chars: CharID
/// - Returns: loudstxt3
/// - Note:
@inlinable func byfixNodeIndices(chars: [UInt8]) -> [Int] {
var indices = [1]
for char in chars {
if let nodeIndex = self.searchCharNodeIndex(from: indices.last!, char: char) {
indices.append(nodeIndex)
} else {
break
}
}
return indices
}
///
private static func lexLessThan(_ lhs: [UInt8], _ rhs: [UInt8]) -> Bool {
let minCount = Swift.min(lhs.count, rhs.count)
for i in 0..<minCount {
let l = lhs[i]
let r = rhs[i]
if l != r {
return l < r
}
}
return lhs.count < rhs.count
}
///
///
///
/// - Parameter chars: CharID
/// - Returns: loudstxt3
/// - Note:
@inlinable func byfixNodeIndices(targets: [[UInt8]], depth: Range<Int>) -> [Int] {
//
// let targets = targets.sorted(by: Self.lexLessThan)
var targets = targets
targets.sort(by: Self.lexLessThan)
//
var indices: [Int] = []
//
var stack: [(nodeIndex: Int, char: UInt8)] = []
for chars in targets {
// iupperBound
for (i, char) in chars.enumerated() where i < depth.upperBound {
if i < stack.count, stack[i].char == char {
//
continue
} else if i < stack.count, stack[i].char != char {
// stack
stack = Array(stack[..<i])
}
// stack[i]
assert(i >= stack.count, "stack[\(i)] must not exist for logical reason.")
//
// stacknodeIndexchar
if let nodeIndex = self.searchCharNodeIndex(from: stack.last?.nodeIndex ?? 1, char: char) {
if depth.contains(i) {
indices.append(nodeIndex)
}
stack.append((nodeIndex, char))
} else {
//
break
}
}
}
return indices
}
}