mirror of
https://github.com/mii443/akaza.git
synced 2025-12-03 11:08:29 +00:00
implemented rx-sys
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -10,3 +10,4 @@ dist/
|
|||||||
*.so
|
*.so
|
||||||
perf.data
|
perf.data
|
||||||
callgrind.*
|
callgrind.*
|
||||||
|
target/
|
||||||
|
|||||||
3
.gitmodules
vendored
3
.gitmodules
vendored
@@ -13,3 +13,6 @@
|
|||||||
[submodule "pyakaza/pybind11"]
|
[submodule "pyakaza/pybind11"]
|
||||||
path = pyakaza/pybind11
|
path = pyakaza/pybind11
|
||||||
url = https://github.com/pybind/pybind11.git
|
url = https://github.com/pybind/pybind11.git
|
||||||
|
[submodule "rx-sys/rx"]
|
||||||
|
path = rx-sys/rx
|
||||||
|
url = git@github.com:yt76/rx.git
|
||||||
|
|||||||
9
bin/Cargo.lock → Cargo.lock
generated
9
bin/Cargo.lock → Cargo.lock
generated
@@ -2,13 +2,6 @@
|
|||||||
# It is not intended for manual editing.
|
# It is not intended for manual editing.
|
||||||
version = 3
|
version = 3
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "akaza-dump-systemlm-unigram"
|
|
||||||
version = "0.1.0"
|
|
||||||
dependencies = [
|
|
||||||
"marisa-sys",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cc"
|
name = "cc"
|
||||||
version = "1.0.78"
|
version = "1.0.78"
|
||||||
@@ -16,7 +9,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "a20104e2335ce8a659d6dd92a51a767a0c062599c73b343fd152cb401e828c3d"
|
checksum = "a20104e2335ce8a659d6dd92a51a767a0c062599c73b343fd152cb401e828c3d"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "marisa-sys"
|
name = "rx-sys"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cc",
|
"cc",
|
||||||
2
Cargo.toml
Normal file
2
Cargo.toml
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
[workspace]
|
||||||
|
members = ["rx-sys"]
|
||||||
@@ -1,2 +0,0 @@
|
|||||||
[workspace]
|
|
||||||
members = ["marisa-sys", "akaza-dump-systemlm-unigram"]
|
|
||||||
8
docs/akaza2-plan.md
Normal file
8
docs/akaza2-plan.md
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
# Akaza2 の実装計画
|
||||||
|
|
||||||
|
Akaza2 では、Rust へのプログラミング言語の移行を主眼においている。
|
||||||
|
|
||||||
|
- Rust を使ったことないから面白そうであること
|
||||||
|
- C++ を利用したツールチェーンに python が絡んできてメンテナンスが難しすぎること
|
||||||
|
|
||||||
|
などの理由がある。
|
||||||
11
docs/rx.md
Normal file
11
docs/rx.md
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
# 共通接頭辞検索ライブラリの選定
|
||||||
|
|
||||||
|
akaza1 では MARISA を採用していた。
|
||||||
|
これを akaza2 では rx に変更する。
|
||||||
|
|
||||||
|
- MARISA は C++ のライブラリであり、同梱するには大きいことから外部依存となってくること。
|
||||||
|
- MARISA は C++ なので、これをラップすると結構大きいバインディングライブラリになること。
|
||||||
|
- RX はシングルファイルの C ライブラリなので、ソースコードを git submodule で入れても大した大きさにならないこと。
|
||||||
|
- 最終的に rust に移植することも可能なぐらい小さいライブラリであること
|
||||||
|
|
||||||
|
などの理由から rx への移行を検討する。
|
||||||
7
rx-sys/Cargo.lock
generated
Normal file
7
rx-sys/Cargo.lock
generated
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
# This file is automatically @generated by Cargo.
|
||||||
|
# It is not intended for manual editing.
|
||||||
|
version = 3
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rx-sys"
|
||||||
|
version = "0.1.0"
|
||||||
9
rx-sys/Cargo.toml
Normal file
9
rx-sys/Cargo.toml
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
[package]
|
||||||
|
name = "rx-sys"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
|
||||||
|
[build-dependencies]
|
||||||
|
cc = "1.0.78"
|
||||||
3
rx-sys/README.md
Normal file
3
rx-sys/README.md
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
rx の rust bindings です。
|
||||||
|
|
||||||
|
lib/bindings.rs は bindgen から生成したものをベースにしています。
|
||||||
8
rx-sys/build.rs
Normal file
8
rx-sys/build.rs
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
extern crate cc;
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
cc::Build::new()
|
||||||
|
.file("rx/rx.c")
|
||||||
|
.include("rx")
|
||||||
|
.compile("rx");
|
||||||
|
}
|
||||||
1
rx-sys/rx
Submodule
1
rx-sys/rx
Submodule
Submodule rx-sys/rx added at 8744286dfe
285
rx-sys/src/bindings.rs
Normal file
285
rx-sys/src/bindings.rs
Normal file
@@ -0,0 +1,285 @@
|
|||||||
|
/* BGased on generated code by rust-bindgen 0.63.0 from rx/rx.h */
|
||||||
|
|
||||||
|
use std::ffi::CString;
|
||||||
|
use std::ffi::CStr;
|
||||||
|
|
||||||
|
pub const RX_SEARCH_DEFAULT: u32 = 0;
|
||||||
|
pub const RX_SEARCH_PREDICTIVE: u32 = 1;
|
||||||
|
pub const RX_SEARCH_1LEVEL: u32 = 2;
|
||||||
|
pub const RX_SEARCH_DEPTH_SHIFT: u32 = 2;
|
||||||
|
pub const RX_SEARCH_DEPTH_MASK: u32 = 1020;
|
||||||
|
|
||||||
|
pub struct RXBuilder {
|
||||||
|
rx: *mut rx_builder,
|
||||||
|
}
|
||||||
|
impl RXBuilder {
|
||||||
|
unsafe fn new() -> RXBuilder {
|
||||||
|
RXBuilder { rx: rx_builder_create() }
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe fn add(&self, str: String) {
|
||||||
|
let p= CString::new(str).expect("Cannot convert to CString");
|
||||||
|
rx_builder_add(self.rx, p.as_ptr());
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe fn get_size(&self) -> i32 {
|
||||||
|
return rx_builder_get_size(self.rx);
|
||||||
|
}
|
||||||
|
unsafe fn get_image(&self) -> *mut u8 {
|
||||||
|
return rx_builder_get_image(self.rx);
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe fn set_bits(&self, bits: i32) {
|
||||||
|
rx_builder_set_bits(self.rx, bits);
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe fn build(&self) -> i32 {
|
||||||
|
return rx_builder_build(self.rx);
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe fn get_key_index(&self, str: String) -> i32 {
|
||||||
|
let p= CString::new(str).expect("Cannot convert to CString");
|
||||||
|
return rx_builder_get_key_index(self.rx, p.as_ptr());
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe fn dump(&self) {
|
||||||
|
rx_builder_dump(self.rx);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl Drop for RXBuilder {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
unsafe {
|
||||||
|
rx_builder_release(self.rx);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub type SearchCallback = unsafe extern "C" fn(
|
||||||
|
*mut ::std::os::raw::c_void,
|
||||||
|
*const ::std::os::raw::c_char,
|
||||||
|
::std::os::raw::c_int,
|
||||||
|
::std::os::raw::c_int
|
||||||
|
) -> i32;
|
||||||
|
|
||||||
|
struct Rx {
|
||||||
|
rx: *mut rx,
|
||||||
|
}
|
||||||
|
impl Rx {
|
||||||
|
unsafe fn open(ptr: *mut u8) -> Rx {
|
||||||
|
Rx { rx: rx_open(ptr) }
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe extern "C" fn trampoline<F>(
|
||||||
|
cookie: *mut ::std::os::raw::c_void,
|
||||||
|
s: *const ::std::os::raw::c_char,
|
||||||
|
len: ::std::os::raw::c_int,
|
||||||
|
id: ::std::os::raw::c_int,
|
||||||
|
) -> i32
|
||||||
|
where
|
||||||
|
F: FnMut(
|
||||||
|
String,
|
||||||
|
::std::os::raw::c_int,
|
||||||
|
::std::os::raw::c_int) -> i32,
|
||||||
|
{
|
||||||
|
let cookie = &mut *(cookie as *mut F);
|
||||||
|
let cs = CStr::from_ptr(s);
|
||||||
|
cookie(cs.to_str().unwrap().to_string(), len, id)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
fn get_trampoline<F>(_closure: &F) -> SearchCallback
|
||||||
|
where
|
||||||
|
F: FnMut(String,
|
||||||
|
::std::os::raw::c_int,
|
||||||
|
::std::os::raw::c_int) -> i32,
|
||||||
|
{
|
||||||
|
Rx::trampoline::<F>
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe fn search<F>(&self, flags: i32, s: String, cbbb: F)
|
||||||
|
where F: FnMut(String, i32, i32) -> i32 {
|
||||||
|
let mut closure = cbbb;
|
||||||
|
let cb = Rx::get_trampoline(&closure);
|
||||||
|
|
||||||
|
let p= CString::new(s).unwrap();
|
||||||
|
rx_search(
|
||||||
|
self.rx,
|
||||||
|
flags,
|
||||||
|
p.as_ptr(),
|
||||||
|
Some(cb),
|
||||||
|
&mut closure as *mut _ as *mut ::std::os::raw::c_void,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO support RBX
|
||||||
|
|
||||||
|
// TODO make following parts, private.
|
||||||
|
|
||||||
|
#[repr(C)]
|
||||||
|
#[derive(Debug, Copy, Clone)]
|
||||||
|
pub struct rx {
|
||||||
|
_unused: [u8; 0],
|
||||||
|
}
|
||||||
|
#[repr(C)]
|
||||||
|
#[derive(Debug, Copy, Clone)]
|
||||||
|
pub struct rx_builder {
|
||||||
|
_unused: [u8; 0],
|
||||||
|
}
|
||||||
|
#[repr(C)]
|
||||||
|
#[derive(Debug, Copy, Clone)]
|
||||||
|
pub struct rbx {
|
||||||
|
_unused: [u8; 0],
|
||||||
|
}
|
||||||
|
#[repr(C)]
|
||||||
|
#[derive(Debug, Copy, Clone)]
|
||||||
|
pub struct rbx_builder {
|
||||||
|
_unused: [u8; 0],
|
||||||
|
}
|
||||||
|
extern "C" {
|
||||||
|
pub fn rx_builder_create() -> *mut rx_builder;
|
||||||
|
|
||||||
|
pub fn rx_builder_release(builder: *mut rx_builder);
|
||||||
|
|
||||||
|
pub fn rx_builder_add(builder: *mut rx_builder, word: *const ::std::os::raw::c_char);
|
||||||
|
|
||||||
|
pub fn rx_builder_build(builder: *mut rx_builder) -> ::std::os::raw::c_int;
|
||||||
|
|
||||||
|
pub fn rx_builder_get_image(builder: *mut rx_builder) -> *mut ::std::os::raw::c_uchar;
|
||||||
|
|
||||||
|
pub fn rx_builder_get_size(builder: *mut rx_builder) -> ::std::os::raw::c_int;
|
||||||
|
|
||||||
|
pub fn rx_builder_get_key_index(
|
||||||
|
builder: *mut rx_builder,
|
||||||
|
key: *const ::std::os::raw::c_char,
|
||||||
|
) -> ::std::os::raw::c_int;
|
||||||
|
|
||||||
|
pub fn rx_builder_set_bits(builder: *mut rx_builder, bits: ::std::os::raw::c_int);
|
||||||
|
|
||||||
|
pub fn rx_builder_dump(builder: *mut rx_builder);
|
||||||
|
|
||||||
|
pub fn rx_open(image: *const ::std::os::raw::c_uchar) -> *mut rx;
|
||||||
|
|
||||||
|
pub fn rx_close(r: *mut rx);
|
||||||
|
|
||||||
|
pub fn rx_search(
|
||||||
|
r: *const rx,
|
||||||
|
flags: ::std::os::raw::c_int,
|
||||||
|
s: *const ::std::os::raw::c_char,
|
||||||
|
cb: ::std::option::Option<
|
||||||
|
unsafe extern "C" fn(
|
||||||
|
cookie: *mut ::std::os::raw::c_void,
|
||||||
|
s: *const ::std::os::raw::c_char,
|
||||||
|
len: ::std::os::raw::c_int,
|
||||||
|
id: ::std::os::raw::c_int,
|
||||||
|
) -> ::std::os::raw::c_int,
|
||||||
|
>,
|
||||||
|
cookie: *mut ::std::os::raw::c_void,
|
||||||
|
);
|
||||||
|
|
||||||
|
pub fn rx_search_expand(
|
||||||
|
r: *const rx,
|
||||||
|
flags: ::std::os::raw::c_int,
|
||||||
|
s: *const ::std::os::raw::c_char,
|
||||||
|
cb: ::std::option::Option<
|
||||||
|
unsafe extern "C" fn(
|
||||||
|
cookie: *mut ::std::os::raw::c_void,
|
||||||
|
s: *const ::std::os::raw::c_char,
|
||||||
|
len: ::std::os::raw::c_int,
|
||||||
|
id: ::std::os::raw::c_int,
|
||||||
|
) -> ::std::os::raw::c_int,
|
||||||
|
>,
|
||||||
|
cookie: *mut ::std::os::raw::c_void,
|
||||||
|
cb_expand_chars: ::std::option::Option<
|
||||||
|
unsafe extern "C" fn(
|
||||||
|
expansion_data: *const ::std::os::raw::c_void,
|
||||||
|
s: ::std::os::raw::c_char,
|
||||||
|
) -> *const ::std::os::raw::c_char,
|
||||||
|
>,
|
||||||
|
expansion_data: *const ::std::os::raw::c_void,
|
||||||
|
);
|
||||||
|
|
||||||
|
pub fn rx_reverse(
|
||||||
|
r: *const rx,
|
||||||
|
n: ::std::os::raw::c_int,
|
||||||
|
buf: *mut ::std::os::raw::c_char,
|
||||||
|
len: ::std::os::raw::c_int,
|
||||||
|
) -> *mut ::std::os::raw::c_char;
|
||||||
|
|
||||||
|
pub fn rbx_builder_create() -> *mut rbx_builder;
|
||||||
|
|
||||||
|
pub fn rbx_builder_set_length_coding(
|
||||||
|
builder: *mut rbx_builder,
|
||||||
|
min: ::std::os::raw::c_int,
|
||||||
|
step: ::std::os::raw::c_int,
|
||||||
|
);
|
||||||
|
|
||||||
|
pub fn rbx_builder_push(
|
||||||
|
builder: *mut rbx_builder,
|
||||||
|
bytes: *const ::std::os::raw::c_char,
|
||||||
|
len: ::std::os::raw::c_int,
|
||||||
|
);
|
||||||
|
|
||||||
|
pub fn rbx_builder_build(builder: *mut rbx_builder) -> ::std::os::raw::c_int;
|
||||||
|
|
||||||
|
pub fn rbx_builder_get_image(builder: *mut rbx_builder) -> *mut ::std::os::raw::c_uchar;
|
||||||
|
|
||||||
|
pub fn rbx_builder_get_size(builder: *mut rbx_builder) -> ::std::os::raw::c_int;
|
||||||
|
|
||||||
|
pub fn rbx_builder_release(builder: *mut rbx_builder);
|
||||||
|
|
||||||
|
pub fn rbx_open(image: *const ::std::os::raw::c_uchar) -> *mut rbx;
|
||||||
|
|
||||||
|
pub fn rbx_close(r: *mut rbx);
|
||||||
|
|
||||||
|
pub fn rbx_get(
|
||||||
|
r: *mut rbx,
|
||||||
|
idx: ::std::os::raw::c_int,
|
||||||
|
len: *mut ::std::os::raw::c_int,
|
||||||
|
) -> *const ::std::os::raw::c_uchar;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test() {
|
||||||
|
unsafe {
|
||||||
|
let rx_builder = RXBuilder::new();
|
||||||
|
rx_builder.set_bits(8);
|
||||||
|
rx_builder.add("apple".to_string());
|
||||||
|
rx_builder.add("ago".to_string());
|
||||||
|
rx_builder.add("abc".to_string());
|
||||||
|
rx_builder.add("quick".to_string());
|
||||||
|
rx_builder.build();
|
||||||
|
|
||||||
|
assert_eq!(rx_builder.get_size(), 39);
|
||||||
|
rx_builder.dump();
|
||||||
|
|
||||||
|
let idx = rx_builder.get_key_index("abc".to_string());
|
||||||
|
assert_eq!(idx, 0);
|
||||||
|
let idx2 = rx_builder.get_key_index("apple".to_string());
|
||||||
|
assert_eq!(idx2, 2);
|
||||||
|
let idx3 = rx_builder.get_key_index("UNKNOWN".to_string());
|
||||||
|
assert_eq!(idx3, -1);
|
||||||
|
|
||||||
|
let rx = Rx::open(rx_builder.get_image());
|
||||||
|
{
|
||||||
|
let mut i = 0;
|
||||||
|
rx.search(0, "abc".to_string(), |s, len, id| {
|
||||||
|
println!("s={}, len={}, id={}", s, len, id);
|
||||||
|
i += 1;
|
||||||
|
/* returns non-zero value if you want to stop more traversal. */
|
||||||
|
0
|
||||||
|
});
|
||||||
|
assert_eq!(i, 1);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
let mut i = 0;
|
||||||
|
rx.search(1, "a".to_string(), |s, len, id| {
|
||||||
|
println!("s={}, len={}, id={}", s, len, id);
|
||||||
|
i += 1;
|
||||||
|
/* returns non-zero value if you want to stop more traversal. */
|
||||||
|
0
|
||||||
|
});
|
||||||
|
assert_eq!(i, 3);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
7
rx-sys/src/lib.rs
Normal file
7
rx-sys/src/lib.rs
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
#![allow(non_upper_case_globals)]
|
||||||
|
#![allow(non_camel_case_types)]
|
||||||
|
#![allow(non_snake_case)]
|
||||||
|
#![allow(dead_code)]
|
||||||
|
|
||||||
|
include!("bindings.rs");
|
||||||
|
|
||||||
Reference in New Issue
Block a user