add more test cases

This commit is contained in:
Miwa / Ensan
2024-02-27 01:48:14 +09:00
parent 85a15051b4
commit cda10cc87e
3 changed files with 113 additions and 16 deletions

View File

@ -127,7 +127,7 @@ extension LOUDS {
}
/// index
static func getDataForLoudstxt3(_ identifier: String, indices: [Int], option: ConvertRequestOptions) -> [(loudsNodeIndex: Int, dicdata: [DicdataElement])] {
static func getDataForLoudstxt3(_ identifier: String, indices: [(trueIndex: Int, keyIndex: Int)], option: ConvertRequestOptions) -> [(loudsNodeIndex: Int, dicdata: [DicdataElement])] {
let binary: Data
do {
let url = getLoudstxt3URL(identifier, option: option)
@ -141,10 +141,10 @@ extension LOUDS {
let header_endIndex: UInt32 = 2 + UInt32(lc) * UInt32(MemoryLayout<UInt32>.size)
let ui32array = binary[2..<header_endIndex].toArray(of: UInt32.self)
var result: [(loudsNodeIndex: Int, dicdata: [DicdataElement])] = []
for index in indices {
let startIndex = Int(ui32array[index])
let endIndex = index == (lc - 1) ? binary.endIndex : Int(ui32array[index + 1])
result.append((index, parseBinary(binary: binary[startIndex ..< endIndex])))
for (trueIndex, keyIndex) in indices {
let startIndex = Int(ui32array[keyIndex])
let endIndex = keyIndex == (lc - 1) ? binary.endIndex : Int(ui32array[keyIndex + 1])
result.append((trueIndex, parseBinary(binary: binary[startIndex ..< endIndex])))
}
return result
}

View File

@ -82,7 +82,7 @@ extension LOUDS {
extension DicdataStore {
func buildConvertGraph(inputGraph: consuming InputGraph, option: ConvertRequestOptions) -> ConvertGraph {
let lookupGraph = LookupGraph.build(input: consume inputGraph, character2CharId: { self.character2charId($0.toKatakana()) } )
var stack: [Int] = Array(lookupGraph.nextIndices(for: lookupGraph.root))
var stack: [Int] = Array(lookupGraph.nextIndices(for: lookupGraph.root) + lookupGraph.structure.allowedNextIndex[0, default: []])
var graphNodeIndex2LatticeNodes: [Int: [ConvertGraph.LatticeNode]] = [:]
while let graphNodeIndex = stack.popLast() {
let graphNode = lookupGraph.nodes[graphNodeIndex]
@ -109,6 +109,7 @@ extension DicdataStore {
}
graphNodeIndex2LatticeNodes[graphNodeIndex] = latticeNodes
stack.append(contentsOf: lookupGraph.nextIndices(for: graphNode))
stack.append(contentsOf: lookupGraph.structure.allowedNextIndex[graphNodeIndex, default: []])
}
return ConvertGraph.build(input: consume lookupGraph, nodeIndex2LatticeNode: graphNodeIndex2LatticeNodes)
}
@ -119,7 +120,8 @@ extension DicdataStore {
var data: [(loudsNodeIndex: Int, dicdata: [DicdataElement])] = []
for (key, value) in dict {
// FIXME: use local option
data.append(contentsOf: LOUDS.getDataForLoudstxt3(identifier + "\(key)", indices: value.map {$0 & 2047}, option: option))
// trueIndexkeyIndexsplit-1=2047&
data.append(contentsOf: LOUDS.getDataForLoudstxt3(identifier + "\(key)", indices: value.map {(trueIndex: $0, keyIndex: $0 & 2047)}, option: option))
}
return data
}
@ -130,17 +132,18 @@ final class LookupGraphTests: XCTestCase {
.withDefaultDictionary(requireJapanesePrediction: false, requireEnglishPrediction: false, keyboardLanguage: .ja_JP, learningType: .nothing, memoryDirectoryURL: URL(fileURLWithPath: ""), sharedContainerURL: URL(fileURLWithPath: ""), metadata: .init(appVersionString: "Test"))
}
func setup() -> (dicdataStore: DicdataStore, character2CharId: (Character) -> UInt8, louds_シ: LOUDS?) {
func setup() -> (dicdataStore: DicdataStore, character2CharId: (Character) -> UInt8) {
let dicdataStore = DicdataStore(convertRequestOptions: requestOptions())
let character2CharId: (Character) -> UInt8 = { dicdataStore.character2charId($0.toKatakana()) }
let louds = LOUDS.load("", option: requestOptions())
return (dicdataStore, character2CharId, louds)
return (dicdataStore, character2CharId)
}
func testByfixNodeIndices_しかい() throws {
let values = setup()
XCTAssertNotNil(values.louds_シ)
guard let louds = values.louds_シ else { return }
guard let louds = LOUDS.load("", option: requestOptions()) else {
XCTFail()
return
}
let correctGraph = CorrectGraph.build(input: [
.init(character: "", inputStyle: .direct),
.init(character: "", inputStyle: .direct),
@ -168,12 +171,59 @@ final class LookupGraphTests: XCTestCase {
XCTAssertTrue(dicdata.contains {$0.word == "市外"})
XCTAssertTrue(dicdata.contains {$0.word == "市街"})
XCTAssertTrue(dicdata.contains {$0.word == "死骸"})
// all keys
XCTAssertEqual(
dicdata.mapSet {$0.ruby}.symmetricDifference(["", "シカ", "シカイ", "シガ", "シガイ"]),
[]
)
}
func testByfixNodeIndices_たいかく() throws {
let values = setup()
guard let louds = LOUDS.load("", option: requestOptions()) else {
XCTFail()
return
}
let correctGraph = CorrectGraph.build(input: [
.init(character: "", inputStyle: .direct),
.init(character: "", inputStyle: .direct),
.init(character: "", inputStyle: .direct),
.init(character: "", inputStyle: .direct),
])
let inputGraph = InputGraph.build(input: correctGraph)
let lookupGraph = LookupGraph.build(input: inputGraph, character2CharId: values.character2CharId)
let startNodeIndex = lookupGraph.nextIndices(for: lookupGraph.root).first(where: { lookupGraph.nodes[$0].character == "" })
XCTAssertNotNil(startNodeIndex)
let (loudsNodeIndices, _) = louds.byfixNodeIndices(lookupGraph, startGraphNodeIndex: startNodeIndex ?? 0)
let dicdataWithIndex = values.dicdataStore.getDicdataFromLoudstxt3(identifier: "", indices: loudsNodeIndices, option: requestOptions())
let dicdata = dicdataWithIndex.flatMapSet { $0.dicdata }
//
XCTAssertTrue(dicdata.contains {$0.word == ""})
//
XCTAssertTrue(dicdata.contains {$0.word == "タイ"})
XCTAssertTrue(dicdata.contains {$0.word == "他意"})
//
XCTAssertTrue(dicdata.contains {$0.word == "対価"})
//
XCTAssertTrue(dicdata.contains {$0.word == "大河"})
//
XCTAssertTrue(dicdata.contains {$0.word == "体格"})
//
XCTAssertTrue(dicdata.contains {$0.word == "退学"})
// all keys
XCTAssertEqual(
dicdata.mapSet {$0.ruby}.symmetricDifference(["", "タイ", "タイカ", "タイガ", "タイカク", "タイガク"]),
[]
)
}
func testByfixNodeIndices_sittai() throws {
let values = setup()
XCTAssertNotNil(values.louds_シ)
guard let louds = values.louds_シ else { return }
guard let louds = LOUDS.load("", option: requestOptions()) else {
XCTFail()
return
}
//
let correctGraph = CorrectGraph.build(input: [
.init(character: "s", inputStyle: .roman2kana),
@ -199,12 +249,19 @@ final class LookupGraphTests: XCTestCase {
XCTAssertTrue(dicdata.contains {$0.word == "叱咤"})
//
XCTAssertTrue(dicdata.contains {$0.word == "失態"})
// all keys
XCTAssertEqual(
dicdata.mapSet {$0.ruby}.symmetricDifference(["", "シッ", "シッタ", "シッタイ"]),
[]
)
}
func testByfixNodeIndices_sitsi() throws {
let values = setup()
XCTAssertNotNil(values.louds_シ)
guard let louds = values.louds_シ else { return }
guard let louds = LOUDS.load("", option: requestOptions()) else {
XCTFail()
return
}
// ts -> ta
let correctGraph = CorrectGraph.build(input: [
.init(character: "s", inputStyle: .roman2kana),
@ -231,5 +288,10 @@ final class LookupGraphTests: XCTestCase {
//
XCTAssertTrue(dicdata.contains {$0.word == "死体"})
XCTAssertTrue(dicdata.contains {$0.word == "肢体"})
// all keys
XCTAssertEqual(
dicdata.mapSet {$0.ruby}.symmetricDifference(["", "シツ", "シタ", "シタイ"]),
[]
)
}
}

View File

@ -46,6 +46,41 @@ final class ExperimentalConversionTests: XCTestCase {
.withDefaultDictionary(requireJapanesePrediction: false, requireEnglishPrediction: false, keyboardLanguage: .ja_JP, learningType: .nothing, memoryDirectoryURL: URL(fileURLWithPath: ""), sharedContainerURL: URL(fileURLWithPath: ""), metadata: .init(appVersionString: "Test"))
}
func testBuildConvertGraph_たいかく() throws {
let dicdataStore = DicdataStore(requestOptions: requestOptions())
var c = ComposingText()
c.insertAtCursorPosition("たいかく", inputStyle: .direct)
let correctGraph = CorrectGraph.build(input: c.input)
let inputGraph = InputGraph.build(input: consume correctGraph)
let convertGraph = dicdataStore.buildConvertGraph(inputGraph: inputGraph, option: requestOptions())
XCTAssertEqual(
convertGraph.nodes.first {
$0.latticeNodes.contains(where: {$0.data.word == ""})
}?.latticeNodes.mapSet {$0.data.ruby}
.symmetricDifference(["", "タイ", "タイカ", "タイガ", "タイカク", "タイガク"]),
[]
)
}
func testConversion_たいかく() throws {
let dicdataStore = DicdataStore(requestOptions: requestOptions())
let kana2kanji = Kana2Kanji(dicdataStore: dicdataStore)
var c = ComposingText()
c.insertAtCursorPosition("たいかく", inputStyle: .direct)
let result = kana2kanji._experimental_all(c, option: requestOptions())
XCTAssertTrue(result.joinedPrevs().contains("体格")) //
XCTAssertTrue(result.joinedPrevs().contains("退学")) //
}
func testConversion_むらさき() throws {
let dicdataStore = DicdataStore(requestOptions: requestOptions())
let kana2kanji = Kana2Kanji(dicdataStore: dicdataStore)
var c = ComposingText()
c.insertAtCursorPosition("むらさき", inputStyle: .direct)
let result = kana2kanji._experimental_all(c, option: requestOptions())
XCTAssertTrue(result.joinedPrevs().contains("")) //
}
func testConversion() throws {
let dicdataStore = DicdataStore(requestOptions: requestOptions())
let kana2kanji = Kana2Kanji(dicdataStore: dicdataStore)