[Experimental] ConvertGraphを実装し、その上での完全一致変換を実装 (#47)

* ConvertGraphを実装し、その上での完全一致変換を実装

* 名前空間を汚染していたので修正

* Implementation completed (without test)

* move directory to use default dictionary

* fix implementations to enable conversion

* add test cases

* Backward searchで発見された候補を明示的に削除

* fix tests

* simplify
This commit is contained in:
Miwa / Ensan
2024-02-24 23:21:44 +09:00
committed by GitHub
parent 52fc9ae4c2
commit 0795b8cf84
15 changed files with 706 additions and 327 deletions

View File

@@ -73,6 +73,6 @@ public struct DicdataElement: Equatable, Hashable, Sendable {
extension DicdataElement: CustomDebugStringConvertible {
public var debugDescription: String {
"(ruby: \(self.ruby), word: \(self.word), cid: (\(self.lcid), \(self.rcid)), mid: \(self.mid), value: \(self.baseValue)+\(self.adjust)=\(self.value())"
"(ruby: \(self.ruby), word: \(self.word), cid: (\(self.lcid), \(self.rcid)), mid: \(self.mid), value: \(self.baseValue)+\(self.adjust)=\(self.value()))"
}
}

View File

@@ -102,6 +102,10 @@ public final class DicdataStore {
}
}
func character2charId(_ character: Character) -> UInt8 {
self.charsID[character, default: .max]
}
private func reloadMemory() {
self.loudses.removeValue(forKey: "memory")
self.importedLoudses.remove("memory")
@@ -143,7 +147,7 @@ public final class DicdataStore {
return Self.getPenalty(data: data) < -d
}
private func loadLOUDS(identifier: String) -> LOUDS? {
func loadLOUDS(identifier: String) -> LOUDS? {
if importedLoudses.contains(identifier) {
return self.loudses[identifier]
}
@@ -213,7 +217,7 @@ public final class DicdataStore {
var stringToInfo = inputData.getRangesWithTypos(fromIndex, rightIndexRange: toIndexLeft ..< toIndexRight)
// MARK:
let stringSet = stringToInfo.keys.map {($0, $0.map {self.charsID[$0, default: .max]})}
let stringSet = stringToInfo.keys.map {($0, $0.map(self.character2charId))}
let (minCharIDsCount, maxCharIDsCount) = stringSet.lazy.map {$0.1.count}.minAndMax() ?? (0, -1)
// :
let group = [Character: [([Character], [UInt8])]].init(grouping: stringSet, by: {$0.0.first!})
@@ -318,7 +322,7 @@ public final class DicdataStore {
// MARK: indices
// :
let strings = string2penalty.keys.map {
(key: $0, charIDs: $0.map {self.charsID[$0, default: .max]})
(key: $0, charIDs: $0.map(self.character2charId))
}
let group = [Character: [(key: [Character], charIDs: [UInt8])]].init(grouping: strings, by: {$0.key.first!})
@@ -433,7 +437,7 @@ public final class DicdataStore {
} else if count == 2 {
var result: [DicdataElement] = []
let first = String(key.first!)
let charIDs = key.map {self.charsID[$0, default: .max]}
let charIDs = key.map(self.character2charId)
// 700
let prefixIndices = self.prefixMatchLOUDS(identifier: first, charIDs: charIDs, depth: 5).prefix(700)
result.append(
@@ -451,7 +455,7 @@ public final class DicdataStore {
} else {
var result: [DicdataElement] = []
let first = String(key.first!)
let charIDs = key.map {self.charsID[$0, default: .max]}
let charIDs = key.map(self.character2charId)
// 700
let prefixIndices = self.prefixMatchLOUDS(identifier: first, charIDs: charIDs).prefix(700)
result.append(

View File

@@ -127,24 +127,24 @@ extension LOUDS {
}
/// index
static func getDataForLoudstxt3(_ identifier: String, indices: [Int], option: ConvertRequestOptions) -> [Int: [DicdataElement]] {
static func getDataForLoudstxt3(_ identifier: String, indices: [Int], option: ConvertRequestOptions) -> [(loudsNodeIndex: Int, dicdata: [DicdataElement])] {
let binary: Data
do {
let url = getLoudstxt3URL(identifier, option: option)
binary = try Data(contentsOf: url)
} catch {
debug("getDataForLoudstxt3: \(error)")
return [:]
return []
}
let lc = binary[0..<2].toArray(of: UInt16.self)[0]
let header_endIndex: UInt32 = 2 + UInt32(lc) * UInt32(MemoryLayout<UInt32>.size)
let ui32array = binary[2..<header_endIndex].toArray(of: UInt32.self)
var result: [Int: [DicdataElement]] = [:]
var result: [(loudsNodeIndex: Int, dicdata: [DicdataElement])] = []
for index in indices {
let startIndex = Int(ui32array[index])
let endIndex = index == (lc - 1) ? binary.endIndex : Int(ui32array[index + 1])
result[index] = parseBinary(binary: binary[startIndex ..< endIndex])
result.append((index, parseBinary(binary: binary[startIndex ..< endIndex])))
}
return result
}