Files
AzooKeyKanaKanjiConverter/Sources/KanaKanjiConverterModule/LOUDS/LOUDS.swift
2025-06-29 15:01:20 +09:00

301 lines
13 KiB
Swift
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//
// LOUDS.swift
// Keyboard
//
// Created by ensan on 2020/09/30.
// Copyright © 2020 ensan. All rights reserved.
//
import Foundation
/// LOUDS
package struct LOUDS: Sendable {
private typealias Unit = UInt64
private static let unit = 64
private static let uExp = 6
private let bits: [Unit]
/// indexflattenArray
/// - seealso: flatChar2nodeIndicesIndex
private let flatChar2nodeIndices: [Int]
/// 256Array`flatChar2nodeIndices[flatChar2nodeIndicesIndex[char - 1] ..< flatChar2nodeIndicesIndex[char]]``nodeIndices`
private let flatChar2nodeIndicesIndex: [Int]
/// 01
///
/// LOUDS4GB`UInt32`
private let rankLarge: [UInt32]
@inlinable init(bytes: [UInt64], nodeIndex2ID: [UInt8]) {
self.bits = bytes
// flatChar2nodeIndicesIndex
// charnodeIndices
var flatChar2nodeIndicesIndex = [Int](repeating: 0, count: 256)
flatChar2nodeIndicesIndex.withUnsafeMutableBufferPointer { buffer in
for value in nodeIndex2ID {
buffer[Int(value)] += 1
}
//
for i in 1 ..< 256 {
buffer[i] = buffer[i - 1] + buffer[i]
}
}
// flatChar2nodeIndices
// flatChar2nodeIndicesIndexcountsindex
var counts = [Int](repeating: 0, count: 256)
self.flatChar2nodeIndices = counts.withUnsafeMutableBufferPointer { countsBuffer in
var flatChar2nodeIndices = [Int](repeating: 0, count: nodeIndex2ID.count)
for (i, value) in zip(nodeIndex2ID.indices, nodeIndex2ID) {
if value == .zero {
flatChar2nodeIndices[countsBuffer[Int(value)]] = i
} else {
flatChar2nodeIndices[flatChar2nodeIndicesIndex[Int(value) - 1] + countsBuffer[Int(value)]] = i
}
countsBuffer[Int(value)] += 1
}
return flatChar2nodeIndices
}
self.flatChar2nodeIndicesIndex = flatChar2nodeIndicesIndex
var rankLarge: [UInt32] = .init(repeating: 0, count: bytes.count + 1)
rankLarge.withUnsafeMutableBufferPointer { buffer in
for (i, byte) in zip(bytes.indices, bytes) {
buffer[i + 1] = buffer[i] &+ UInt32(Self.unit &- byte.nonzeroBitCount)
}
}
self.rankLarge = rankLarge
}
/// parentNodeIndex01Index
@inlinable func childNodeIndices(from parentNodeIndex: Int) -> Range<Int> {
//
// startIndex == parentNodeIndex0index
// endIndex == parentNodeIndex+10index
// childNodeIndices
// startIndex0endIndex
//
// rankLarge0dif
// left
// startIndexbitsindex `i`
var left = parentNodeIndex >> Self.uExp
var right = self.rankLarge.endIndex - 1
while left <= right {
let mid = (left + right) / 2
if self.rankLarge[mid] >= parentNodeIndex {
right = mid - 1
} else {
left = mid + 1
}
}
guard left != self.rankLarge.endIndex else {
return 0 ..< 0
}
let i = left - 1
return self.bits.withUnsafeBufferPointer {(buffer: UnsafeBufferPointer<Unit>) -> Range<Int> in
//
// parentNodeIndex0`k`
let byte = buffer[i]
var k = 0
for _ in 0 ..< parentNodeIndex - Int(self.rankLarge[i]) {
k = (~(byte << k)).leadingZeroBitCount &+ k &+ 1
}
let start = (i << Self.uExp) &+ k &- parentNodeIndex &+ 1
// parentNodeIndex0i
if self.rankLarge[i &+ 1] == parentNodeIndex {
var j = i &+ 1
while buffer[j] == Unit.max {
j &+= 1
}
// 0
// 00
// Ex. 1110_0000 => [000]1_1111 => 3
let byte2 = buffer[j]
let a = (~byte2).leadingZeroBitCount % Self.unit
return start ..< (j << Self.uExp) &+ a &- parentNodeIndex &+ 1
} else {
// dif0k0
// k=1
// Ex. 1011_1101 => 0111_1010 => 1000_0101 => 1 => 2
let a = ((~(byte << k)).leadingZeroBitCount &+ k) % Self.unit
return start ..< (i << Self.uExp) &+ a &- parentNodeIndex &+ 1
}
}
}
/// charIndex
/// `childNodeIndices`0.02
@inlinable func searchCharNodeIndex(from parentNodeIndex: Int, char: UInt8) -> Int? {
// char2nodeIndices調
let childNodeIndices = self.childNodeIndices(from: parentNodeIndex)
let nodeIndices: ArraySlice<Int> = if char == .zero {
self.flatChar2nodeIndices[0 ..< self.flatChar2nodeIndicesIndex[Int(char)]]
} else {
self.flatChar2nodeIndices[self.flatChar2nodeIndicesIndex[Int(char - 1)] ..< self.flatChar2nodeIndicesIndex[Int(char)]]
}
var left = nodeIndices.startIndex
var right = nodeIndices.endIndex
while left < right {
let mid = (left + right) >> 1
if childNodeIndices.startIndex <= nodeIndices[mid] {
right = mid
} else {
left = mid + 1
}
}
if left < nodeIndices.endIndex && childNodeIndices.contains(nodeIndices[left]) {
return nodeIndices[left]
} else {
return nil
}
}
///
/// - Parameter chars: CharID
/// - Returns: loudstxt3
@inlinable func searchNodeIndex(chars: [UInt8]) -> Int? {
var index = 1
for char in chars {
if let nodeIndex = self.searchCharNodeIndex(from: index, char: char) {
index = nodeIndex
} else {
return nil
}
}
return index
}
@inlinable func prefixNodeIndices(nodeIndex: Int, depth: Int = 0, maxDepth: Int, maxCount: Int) -> [Int] {
var childNodeIndices = Array(self.childNodeIndices(from: nodeIndex))
if depth == maxDepth {
return childNodeIndices
}
for index in childNodeIndices {
if childNodeIndices.count > maxCount {
break
}
childNodeIndices.append(
contentsOf: self.prefixNodeIndices(
nodeIndex: index,
depth: depth + 1,
maxDepth: maxDepth,
maxCount: maxCount - childNodeIndices.count
)
)
}
return childNodeIndices
}
///
///
///
/// - Parameter chars: CharID
/// - Parameter maxDepth:
/// - Returns: loudstxt3
@inlinable package func prefixNodeIndices(chars: [UInt8], maxDepth: Int, maxCount: Int) -> [Int] {
guard let nodeIndex = self.searchNodeIndex(chars: chars) else {
return []
}
return self.prefixNodeIndices(nodeIndex: nodeIndex, maxDepth: maxDepth, maxCount: maxCount)
}
///
///
///
/// - Parameter chars: CharID
/// - Returns: loudstxt3
/// - Note:
@inlinable func byfixNodeIndices(chars: [UInt8]) -> [Int] {
var indices = [1]
for char in chars {
if let nodeIndex = self.searchCharNodeIndex(from: indices.last!, char: char) {
indices.append(nodeIndex)
} else {
break
}
}
return indices
}
///
private static func lexLessThan(_ lhs: [UInt8], _ rhs: [UInt8]) -> Bool {
let minCount = Swift.min(lhs.count, rhs.count)
for i in 0..<minCount {
let l = lhs[i]
let r = rhs[i]
if l != r {
return l < r
}
}
return lhs.count < rhs.count
}
///
///
///
/// - Parameter chars: CharID
/// - Returns: loudstxt3
/// - Note:
@inlinable func byfixNodeIndices(targets: [[UInt8]], depth: Range<Int>) -> [Int] {
//
var targets = targets
targets.sort(by: Self.lexLessThan)
var helper = MovingTowardPrefixSearchHelper(louds: self)
for target in targets {
_ = helper.update(target: target)
}
return helper.indicesInDepth(depth: depth)
}
struct MovingTowardPrefixSearchHelper {
init(louds: LOUDS) {
self.louds = louds
}
let louds: LOUDS
//
var indices: [(depth: Int, index: Int)] = []
//
var stack: [(nodeIndex: Int, char: UInt8)] = []
func indicesInDepth(depth: Range<Int>) -> [Int] {
return self.indices
.lazy
.filter { depth.contains($0.depth) }
.map { $0.index }
}
/// `target`
/// - Parameter target: `CharID`
/// - Returns: `updated``indices``availableMaxIndex`
@inlinable mutating func update(target: [UInt8]) -> (updated: Bool, availableMaxIndex: Int) {
var updated = false
var availableMaxIndex = 0
// iupperBound
for (i, char) in target.enumerated() {
if i < self.stack.count, self.stack[i].char == char {
//
availableMaxIndex = i
continue
} else if i < self.stack.count, self.stack[i].char != char {
// stack
self.stack = Array(self.stack[..<i])
}
// stack[i]
assert(i >= self.stack.count, "stack[\(i)] must not exist for logical reason.")
//
// stacknodeIndexchar
if let nodeIndex = self.louds.searchCharNodeIndex(from: self.stack.last?.nodeIndex ?? 1, char: char) {
self.indices.append((i, nodeIndex))
updated = true
availableMaxIndex = i
self.stack.append((nodeIndex, char))
} else {
//
break
}
}
return (updated, availableMaxIndex)
}
}
}