Files
AzooKeyKanaKanjiConverter/Sources/KanaKanjiConverterModule/Kana2Kanji/all_with_prefix_constraint.swift
Miwa 55ffe3c708 [Experimental] Zenzai (#92)
* experimental rinna integration

* Update impl

* update

* Bump swift-actions/setup-swift from 1 to 2

Bumps [swift-actions/setup-swift](https://github.com/swift-actions/setup-swift) from 1 to 2.
- [Release notes](https://github.com/swift-actions/setup-swift/releases)
- [Commits](https://github.com/swift-actions/setup-swift/compare/v1...v2)

---
updated-dependencies:
- dependency-name: swift-actions/setup-swift
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>

* change test

* change impl

* take gpt2 weight as option

* don't use async

* support status check

* enhance error

* avoid percent encode

* update

* GPT-2 based kana-kanji conversion is now perfectly workinggit statusgit status

* fix a bug

* Rename gpt2/llama -> zenz

* cleanup

* internal apiを綺麗にした

* cleanup experimental commands

* update

* partially support incremental input using cache

* fix names

* fix bug

* support roman2kana

* cleanup

* fix minor bugs

* improve logic

* fix minor bug

* fix minor bug

* fix minor bug

* optimize

* optimize performance

* Optimize cache hit

* cli: add anco session command

* fix cache hit bugs

* improve session commands

* maybe this will work better for incremental input environment

* speed up zenzai by using n_best alternatives

* update zenz context

* adding no_typo api

* add inference limit

* fix bug

* reset install_cli

* make package buildable -- but llama.cpp features just do not work at this point because metal is not preprocessed

* add proper availability checks

* change macOS minimum version

* fix several problems

* code cleanup

* enable ubuntu build

* fix build error

* fix ubuntu build

* fix borrowing

* update install_cli.sh

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-05-15 01:36:45 +09:00

101 lines
5.4 KiB
Swift
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import Foundation
import SwiftUtils
extension Kana2Kanji {
/// ,
/// - Parameters:
/// - inputData:
/// - N_best: N_best
/// - Returns:
///
/// ###
/// (0)
///
/// (1)
///
/// (2)(1)registerN_best
///
/// (3)(1)registerresultEOS
///
/// (4)
func kana2lattice_all_with_prefix_constraint(_ inputData: ComposingText, N_best: Int, constraint: String) -> (result: LatticeNode, nodes: Nodes) {
debug("新規に計算を行います。inputされた文字列は\(inputData.input.count)文字分の\(inputData.convertTarget)。制約は\(constraint)")
let count: Int = inputData.input.count
let result: LatticeNode = LatticeNode.EOSNode
let nodes: [[LatticeNode]] = (.zero ..< count).map {dicdataStore.getFrozenLOUDSDataInRange(inputData: inputData, from: $0)}
// inodes
for (i, nodeArray) in nodes.enumerated() {
// node
for node in nodeArray {
if node.prevs.isEmpty {
continue
}
if self.dicdataStore.shouldBeRemoved(data: node.data) {
continue
}
//
let wValue: PValue = node.data.value()
if i == 0 {
// values
node.values = node.prevs.map {$0.totalValue + wValue + self.dicdataStore.getCCValue($0.data.rcid, node.data.lcid)}
} else {
// values
node.values = node.prevs.map {$0.totalValue + wValue}
}
//
let nextIndex: Int = node.inputRange.endIndex
// count
if nextIndex == count {
for index in node.prevs.indices {
let newnode: RegisteredNode = node.getRegisteredNode(index, value: node.values[index])
let text = newnode.getCandidateData().data.reduce(into: "") { $0.append(contentsOf: $1.word)} + node.data.word
if text.hasPrefix(constraint) {
result.prevs.append(newnode)
}
}
} else {
let candidates = node.getCandidateData().map {
$0.data.reduce(into: "") { $0.append(contentsOf: $1.word)} + node.data.word
}
// nodenextnode
for nextnode in nodes[nextIndex] {
// node.registered.isEmpty
if self.dicdataStore.shouldBeRemoved(data: nextnode.data) {
continue
}
//
let ccValue: PValue = self.dicdataStore.getCCValue(node.data.rcid, nextnode.data.lcid)
// nodeprevnode
for (index, value) in node.values.enumerated() {
//
// common prefix
// AB ABC (OK)
// AB A (OK)
// AB AC (NG)
let text = candidates[index] + nextnode.data.word
if !text.hasPrefix(constraint) && !constraint.hasPrefix(text) {
continue
}
let newValue: PValue = ccValue + value
// index
let lastindex: Int = (nextnode.prevs.lastIndex(where: {$0.totalValue >= newValue}) ?? -1) + 1
if lastindex == N_best {
continue
}
let newnode: RegisteredNode = node.getRegisteredNode(index, value: newValue)
//
if nextnode.prevs.count >= N_best {
nextnode.prevs.removeLast()
}
// removeinsert (insertO(N))
nextnode.prevs.insert(newnode, at: lastindex)
}
}
}
}
}
return (result: result, nodes: nodes)
}
}