implemented rx-sys

This commit is contained in:
Tokuhiro Matsuno
2022-12-25 00:17:50 +09:00
parent 5b6fcd95fa
commit a89e180efe
14 changed files with 346 additions and 10 deletions

1
.gitignore vendored
View File

@@ -10,3 +10,4 @@ dist/
*.so
perf.data
callgrind.*
target/

3
.gitmodules vendored
View File

@@ -13,3 +13,6 @@
[submodule "pyakaza/pybind11"]
path = pyakaza/pybind11
url = https://github.com/pybind/pybind11.git
[submodule "rx-sys/rx"]
path = rx-sys/rx
url = git@github.com:yt76/rx.git

View File

@@ -2,13 +2,6 @@
# It is not intended for manual editing.
version = 3
[[package]]
name = "akaza-dump-systemlm-unigram"
version = "0.1.0"
dependencies = [
"marisa-sys",
]
[[package]]
name = "cc"
version = "1.0.78"
@@ -16,7 +9,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a20104e2335ce8a659d6dd92a51a767a0c062599c73b343fd152cb401e828c3d"
[[package]]
name = "marisa-sys"
name = "rx-sys"
version = "0.1.0"
dependencies = [
"cc",

2
Cargo.toml Normal file
View File

@@ -0,0 +1,2 @@
[workspace]
members = ["rx-sys"]

View File

@@ -1,2 +0,0 @@
[workspace]
members = ["marisa-sys", "akaza-dump-systemlm-unigram"]

8
docs/akaza2-plan.md Normal file
View File

@@ -0,0 +1,8 @@
# Akaza2 の実装計画
Akaza2 では、Rust へのプログラミング言語の移行を主眼においている。
- Rust を使ったことないから面白そうであること
- C++ を利用したツールチェーンに python が絡んできてメンテナンスが難しすぎること
などの理由がある。

11
docs/rx.md Normal file
View File

@@ -0,0 +1,11 @@
# 共通接頭辞検索ライブラリの選定
akaza1 では MARISA を採用していた。
これを akaza2 では rx に変更する。
- MARISA は C++ のライブラリであり、同梱するには大きいことから外部依存となってくること。
- MARISA は C++ なので、これをラップすると結構大きいバインディングライブラリになること。
- RX はシングルファイルの C ライブラリなので、ソースコードを git submodule で入れても大した大きさにならないこと。
- 最終的に rust に移植することも可能なぐらい小さいライブラリであること
などの理由から rx への移行を検討する。

7
rx-sys/Cargo.lock generated Normal file
View File

@@ -0,0 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "rx-sys"
version = "0.1.0"

9
rx-sys/Cargo.toml Normal file
View File

@@ -0,0 +1,9 @@
[package]
name = "rx-sys"
version = "0.1.0"
edition = "2021"
[dependencies]
[build-dependencies]
cc = "1.0.78"

3
rx-sys/README.md Normal file
View File

@@ -0,0 +1,3 @@
rx の rust bindings です。
lib/bindings.rs は bindgen から生成したものをベースにしています。

8
rx-sys/build.rs Normal file
View File

@@ -0,0 +1,8 @@
extern crate cc;
fn main() {
cc::Build::new()
.file("rx/rx.c")
.include("rx")
.compile("rx");
}

1
rx-sys/rx Submodule

Submodule rx-sys/rx added at 8744286dfe

285
rx-sys/src/bindings.rs Normal file
View File

@@ -0,0 +1,285 @@
/* BGased on generated code by rust-bindgen 0.63.0 from rx/rx.h */
use std::ffi::CString;
use std::ffi::CStr;
pub const RX_SEARCH_DEFAULT: u32 = 0;
pub const RX_SEARCH_PREDICTIVE: u32 = 1;
pub const RX_SEARCH_1LEVEL: u32 = 2;
pub const RX_SEARCH_DEPTH_SHIFT: u32 = 2;
pub const RX_SEARCH_DEPTH_MASK: u32 = 1020;
pub struct RXBuilder {
rx: *mut rx_builder,
}
impl RXBuilder {
unsafe fn new() -> RXBuilder {
RXBuilder { rx: rx_builder_create() }
}
unsafe fn add(&self, str: String) {
let p= CString::new(str).expect("Cannot convert to CString");
rx_builder_add(self.rx, p.as_ptr());
}
unsafe fn get_size(&self) -> i32 {
return rx_builder_get_size(self.rx);
}
unsafe fn get_image(&self) -> *mut u8 {
return rx_builder_get_image(self.rx);
}
unsafe fn set_bits(&self, bits: i32) {
rx_builder_set_bits(self.rx, bits);
}
unsafe fn build(&self) -> i32 {
return rx_builder_build(self.rx);
}
unsafe fn get_key_index(&self, str: String) -> i32 {
let p= CString::new(str).expect("Cannot convert to CString");
return rx_builder_get_key_index(self.rx, p.as_ptr());
}
unsafe fn dump(&self) {
rx_builder_dump(self.rx);
}
}
impl Drop for RXBuilder {
fn drop(&mut self) {
unsafe {
rx_builder_release(self.rx);
}
}
}
pub type SearchCallback = unsafe extern "C" fn(
*mut ::std::os::raw::c_void,
*const ::std::os::raw::c_char,
::std::os::raw::c_int,
::std::os::raw::c_int
) -> i32;
struct Rx {
rx: *mut rx,
}
impl Rx {
unsafe fn open(ptr: *mut u8) -> Rx {
Rx { rx: rx_open(ptr) }
}
unsafe extern "C" fn trampoline<F>(
cookie: *mut ::std::os::raw::c_void,
s: *const ::std::os::raw::c_char,
len: ::std::os::raw::c_int,
id: ::std::os::raw::c_int,
) -> i32
where
F: FnMut(
String,
::std::os::raw::c_int,
::std::os::raw::c_int) -> i32,
{
let cookie = &mut *(cookie as *mut F);
let cs = CStr::from_ptr(s);
cookie(cs.to_str().unwrap().to_string(), len, id)
}
fn get_trampoline<F>(_closure: &F) -> SearchCallback
where
F: FnMut(String,
::std::os::raw::c_int,
::std::os::raw::c_int) -> i32,
{
Rx::trampoline::<F>
}
unsafe fn search<F>(&self, flags: i32, s: String, cbbb: F)
where F: FnMut(String, i32, i32) -> i32 {
let mut closure = cbbb;
let cb = Rx::get_trampoline(&closure);
let p= CString::new(s).unwrap();
rx_search(
self.rx,
flags,
p.as_ptr(),
Some(cb),
&mut closure as *mut _ as *mut ::std::os::raw::c_void,
);
}
}
// TODO support RBX
// TODO make following parts, private.
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct rx {
_unused: [u8; 0],
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct rx_builder {
_unused: [u8; 0],
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct rbx {
_unused: [u8; 0],
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct rbx_builder {
_unused: [u8; 0],
}
extern "C" {
pub fn rx_builder_create() -> *mut rx_builder;
pub fn rx_builder_release(builder: *mut rx_builder);
pub fn rx_builder_add(builder: *mut rx_builder, word: *const ::std::os::raw::c_char);
pub fn rx_builder_build(builder: *mut rx_builder) -> ::std::os::raw::c_int;
pub fn rx_builder_get_image(builder: *mut rx_builder) -> *mut ::std::os::raw::c_uchar;
pub fn rx_builder_get_size(builder: *mut rx_builder) -> ::std::os::raw::c_int;
pub fn rx_builder_get_key_index(
builder: *mut rx_builder,
key: *const ::std::os::raw::c_char,
) -> ::std::os::raw::c_int;
pub fn rx_builder_set_bits(builder: *mut rx_builder, bits: ::std::os::raw::c_int);
pub fn rx_builder_dump(builder: *mut rx_builder);
pub fn rx_open(image: *const ::std::os::raw::c_uchar) -> *mut rx;
pub fn rx_close(r: *mut rx);
pub fn rx_search(
r: *const rx,
flags: ::std::os::raw::c_int,
s: *const ::std::os::raw::c_char,
cb: ::std::option::Option<
unsafe extern "C" fn(
cookie: *mut ::std::os::raw::c_void,
s: *const ::std::os::raw::c_char,
len: ::std::os::raw::c_int,
id: ::std::os::raw::c_int,
) -> ::std::os::raw::c_int,
>,
cookie: *mut ::std::os::raw::c_void,
);
pub fn rx_search_expand(
r: *const rx,
flags: ::std::os::raw::c_int,
s: *const ::std::os::raw::c_char,
cb: ::std::option::Option<
unsafe extern "C" fn(
cookie: *mut ::std::os::raw::c_void,
s: *const ::std::os::raw::c_char,
len: ::std::os::raw::c_int,
id: ::std::os::raw::c_int,
) -> ::std::os::raw::c_int,
>,
cookie: *mut ::std::os::raw::c_void,
cb_expand_chars: ::std::option::Option<
unsafe extern "C" fn(
expansion_data: *const ::std::os::raw::c_void,
s: ::std::os::raw::c_char,
) -> *const ::std::os::raw::c_char,
>,
expansion_data: *const ::std::os::raw::c_void,
);
pub fn rx_reverse(
r: *const rx,
n: ::std::os::raw::c_int,
buf: *mut ::std::os::raw::c_char,
len: ::std::os::raw::c_int,
) -> *mut ::std::os::raw::c_char;
pub fn rbx_builder_create() -> *mut rbx_builder;
pub fn rbx_builder_set_length_coding(
builder: *mut rbx_builder,
min: ::std::os::raw::c_int,
step: ::std::os::raw::c_int,
);
pub fn rbx_builder_push(
builder: *mut rbx_builder,
bytes: *const ::std::os::raw::c_char,
len: ::std::os::raw::c_int,
);
pub fn rbx_builder_build(builder: *mut rbx_builder) -> ::std::os::raw::c_int;
pub fn rbx_builder_get_image(builder: *mut rbx_builder) -> *mut ::std::os::raw::c_uchar;
pub fn rbx_builder_get_size(builder: *mut rbx_builder) -> ::std::os::raw::c_int;
pub fn rbx_builder_release(builder: *mut rbx_builder);
pub fn rbx_open(image: *const ::std::os::raw::c_uchar) -> *mut rbx;
pub fn rbx_close(r: *mut rbx);
pub fn rbx_get(
r: *mut rbx,
idx: ::std::os::raw::c_int,
len: *mut ::std::os::raw::c_int,
) -> *const ::std::os::raw::c_uchar;
}
#[test]
fn test() {
unsafe {
let rx_builder = RXBuilder::new();
rx_builder.set_bits(8);
rx_builder.add("apple".to_string());
rx_builder.add("ago".to_string());
rx_builder.add("abc".to_string());
rx_builder.add("quick".to_string());
rx_builder.build();
assert_eq!(rx_builder.get_size(), 39);
rx_builder.dump();
let idx = rx_builder.get_key_index("abc".to_string());
assert_eq!(idx, 0);
let idx2 = rx_builder.get_key_index("apple".to_string());
assert_eq!(idx2, 2);
let idx3 = rx_builder.get_key_index("UNKNOWN".to_string());
assert_eq!(idx3, -1);
let rx = Rx::open(rx_builder.get_image());
{
let mut i = 0;
rx.search(0, "abc".to_string(), |s, len, id| {
println!("s={}, len={}, id={}", s, len, id);
i += 1;
/* returns non-zero value if you want to stop more traversal. */
0
});
assert_eq!(i, 1);
}
{
let mut i = 0;
rx.search(1, "a".to_string(), |s, len, id| {
println!("s={}, len={}, id={}", s, len, id);
i += 1;
/* returns non-zero value if you want to stop more traversal. */
0
});
assert_eq!(i, 3);
}
}
}

7
rx-sys/src/lib.rs Normal file
View File

@@ -0,0 +1,7 @@
#![allow(non_upper_case_globals)]
#![allow(non_camel_case_types)]
#![allow(non_snake_case)]
#![allow(dead_code)]
include!("bindings.rs");