Skip to content

Commit

Permalink
prepare release
Browse files Browse the repository at this point in the history
  • Loading branch information
TianLiangZhou committed Jun 16, 2023
1 parent 183019c commit 50828d9
Show file tree
Hide file tree
Showing 10 changed files with 327 additions and 213 deletions.
394 changes: 239 additions & 155 deletions Cargo.lock

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
[package]
name = "ffi_pinyin"
version = "0.1.0"
version = "0.1.1"
authors = ["zhoutianliang <mfkgdyve@gmail.com>"]
edition = "2018"
edition = "2021"
build = "build.rs"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
Expand All @@ -13,7 +13,7 @@ crate-type = ["cdylib"]
path = "src/lib.rs"

[build-dependencies]
cbindgen = "0.20.0"
cbindgen = "0.24.3"

[dependencies]
pinyin = "0.9.0"
pinyin = "0.10.0"
2 changes: 2 additions & 0 deletions examples/example.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
echo "音标未识别跳过: ", $py->plain("PHP永远滴神,rust永远的神", true, false, '-'), "\n";
echo "音标未识别不分隔: ", $py->plain("PHP永远滴神,rust永远的神", false, false, '-', true), "\n";

echo "URL slug:" . $py->slug("JavaScript使用FileReader读取本地文件内容"), "\n";

var_export($py->plainArray("PHP永远滴神,rust永远的神", false, false, true));
echo "\n";
var_export($py->toneArray("我的中国心,永恒之❤️", true, false));
Expand Down
3 changes: 2 additions & 1 deletion lib/ffi_pinyin.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ char *to_pinyin(const char *str,
int is_multi,
unsigned char separator,
int not_split_unknown_char,
Mode mode);
Mode mode,
int is_slug);

struct PinyinArray *to_pinyin_array(const char *str,
int is_ignore_unknown_char,
Expand Down
Binary file removed lib/libffi_pinyin.arm.dylib
Binary file not shown.
Binary file modified lib/libffi_pinyin.dll
Binary file not shown.
Binary file modified lib/libffi_pinyin.dylib
Binary file not shown.
Binary file modified lib/libffi_pinyin.so
Binary file not shown.
26 changes: 19 additions & 7 deletions src/Pinyin.php
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ class Pinyin
*/
private function __construct()
{
if (ini_get('ffi.enable') == false) {
if (!ini_get('ffi.enable')) {
throw new RuntimeException("请设置php.ini中的ffi.enable参数");
}
$this->ffi = $this->makeFFI();
Expand Down Expand Up @@ -84,6 +84,21 @@ private function __clone()

}

/**
* 生成URL Slugs
*
* @param string $str
* @param string $separator
* @return string
*/
public function slug(string $str, string $separator = '-'): string
{
if (empty($str)) {
return "";
}
return $this->toPinyin($str, false, false, $separator, false, self::Plain, 1);
}

/**
* 普通风格没有音调
*
Expand Down Expand Up @@ -261,14 +276,15 @@ public function letterArray(string $str, bool $isSkipUnknown = true, bool $isMul
* @param string $separator
* @param int $notSplitUnknownChar
* @param int $mode
* @param int $isSlug
* @return string
*/
private function toPinyin(string $str, int $isSkipUnknown, int $isMulti, string $separator, int $notSplitUnknownChar, int $mode): string
private function toPinyin(string $str, int $isSkipUnknown, int $isMulti, string $separator, int $notSplitUnknownChar, int $mode, int $isSlug = 0): string
{
if (strlen($separator) != 1) {
throw new \InvalidArgumentException("Separator only supports ascii characters");
}
$CData = $this->ffi->to_pinyin($str, $isSkipUnknown, $isMulti, ord($separator), $notSplitUnknownChar, $mode);
$CData = $this->ffi->to_pinyin($str, $isSkipUnknown, $isMulti, ord($separator), $notSplitUnknownChar, $mode, $isSlug);
$result = FFI::string($CData);
$this->ffi->free_pointer($CData);
return $result;
Expand Down Expand Up @@ -316,10 +332,6 @@ private function defaultLibraryPath(): string
$suffix = PHP_SHLIB_SUFFIX;
if (PHP_OS == 'Darwin') {
$suffix = 'dylib';
//mac m1 m2 arm64
if(php_uname('m') == 'arm64'){
$suffix= 'arm.dylib';
}
}
$filepath = __DIR__ . '/../lib/libffi_pinyin.' . $suffix;
if (file_exists($filepath)) {
Expand Down
107 changes: 61 additions & 46 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use pinyin::{Pinyin, PinyinMulti, ToPinyin, ToPinyinMulti};
use pinyin::{Pinyin, ToPinyin, ToPinyinMulti};
use std::ffi::{CStr, CString};
use std::os::raw::{c_char, c_int, c_schar, c_uchar};
use std::thread::spawn;
use std::os::raw::{c_char, c_int, c_uchar};
use std::{mem, ptr};

#[repr(C)]
Expand Down Expand Up @@ -89,37 +88,47 @@ fn to_convert(
separator: char,
not_split_unknown_char: bool,
mode: Mode,
is_slug: bool,
) -> String {
let chars = str.chars().collect::<Vec<char>>();
let mut unknown = String::new();
let mut vec: Vec<String> = Vec::new();
let sep = separator.to_string();
for word in str.to_pinyin().enumerate() {
match word.1 {
None => {
if is_ignore_unknown_char {
continue;
}
let word_char = chars.get(word.0).unwrap();
if not_split_unknown_char {
unknown.push(*word_char);
if word.0 == chars.len() - 1 {
vec.push(unknown.clone());
unknown.clear();
if is_slug {
if *word_char == ' ' {
unknown.push(separator);
continue;
}
if !word_char.is_ascii_alphanumeric() {
continue;
}
} else {
} else if !not_split_unknown_char {
vec.push(word_char.to_string());
continue;
}
unknown.push(*word_char);
}
Some(py) => {
if !is_ignore_unknown_char && not_split_unknown_char && unknown.len() > 0 {
vec.push(unknown.clone());
if unknown.len() > 0 {
vec.push(unknown.to_string());
unknown.clear();
}
vec.push(match_mode(py, mode));
}
}
}
vec.join(separator.encode_utf8(&mut [0; 4]))
if unknown.len() > 0 {
vec.push(unknown.to_string());
}
vec.join(sep.as_str())
.replace(&format!("{}{}", sep, sep), sep.as_str())
}

///
Expand All @@ -142,19 +151,15 @@ fn to_convert_multi(
continue;
}
let word_char = chars.get(word.0).unwrap();
if not_split_unknown_char {
unknown.push(*word_char);
if word.0 == chars.len() - 1 {
vec.push(unknown.clone());
unknown.clear();
}
} else {
if !not_split_unknown_char {
vec.push(word_char.to_string());
continue;
}
unknown.push(*word_char);
}
Some(multi) => {
if !is_ignore_unknown_char && not_split_unknown_char && unknown.len() > 0 {
vec.push(unknown.clone());
if unknown.len() > 0 {
vec.push(unknown.to_string());
unknown.clear();
}
vec.push(
Expand All @@ -167,6 +172,10 @@ fn to_convert_multi(
}
}
}
if unknown.len() > 0 {
vec.push(unknown.to_string());
unknown.clear();
}
vec.join(separator.encode_utf8(&mut [0; 4]))
}

Expand All @@ -188,19 +197,15 @@ fn to_convert_array(
continue;
}
let word_char = chars.get(word.0).unwrap();
if not_split_unknown_char {
unknown.push(*word_char);
if word.0 == chars.len() - 1 {
vec.push(PinyinStr::from_string(unknown.clone(), 0));
unknown.clear();
}
} else {
vec.push(PinyinStr::from_string(word_char.to_string(), 0))
if !not_split_unknown_char {
vec.push(PinyinStr::from_string(word_char.to_string(), 0));
continue;
}
unknown.push(*word_char);
}
Some(multi) => {
if !is_ignore_unknown_char && not_split_unknown_char && unknown.len() > 0 {
vec.push(PinyinStr::from_string(unknown.clone(), 0));
if unknown.len() > 0 {
vec.push(PinyinStr::from_string(unknown.to_string(), 0));
unknown.clear();
}
vec.push(PinyinStr::from_string(
Expand All @@ -222,26 +227,26 @@ fn to_convert_array(
continue;
}
let word_char = chars.get(word.0).unwrap();
if not_split_unknown_char {
unknown.push(*word_char);
if word.0 == chars.len() - 1 {
vec.push(PinyinStr::from_string(unknown.clone(), 0));
unknown.clear();
}
} else {
vec.push(PinyinStr::from_string(word_char.to_string(), 0))
if !not_split_unknown_char {
vec.push(PinyinStr::from_string(word_char.to_string(), 0));
continue;
}
unknown.push(*word_char);
}
Some(py) => {
if !is_ignore_unknown_char && not_split_unknown_char && unknown.len() > 0 {
vec.push(PinyinStr::from_string(unknown.clone(), 0));
if unknown.len() > 0 {
vec.push(PinyinStr::from_string(unknown.to_string(), 0));
unknown.clear();
}
vec.push(PinyinStr::from_string(match_mode(py, mode), 1))
}
}
}
}
if unknown.len() > 0 {
vec.push(PinyinStr::from_string(unknown.to_string(), 0));
unknown.clear();
}
vec
}

Expand All @@ -260,8 +265,9 @@ pub extern "C" fn to_pinyin(
separator: c_uchar,
not_split_unknown_char: c_int,
mode: Mode,
is_slug: c_int,
) -> *mut c_char {
let pinyin_str = if is_multi == 1 {
let mut pinyin_str = if is_multi == 1 {
to_convert_multi(
const_to_str(str),
is_ignore_unknown_char == 1,
Expand All @@ -276,8 +282,12 @@ pub extern "C" fn to_pinyin(
separator as char,
not_split_unknown_char == 1,
mode,
is_slug == 1,
)
};
if is_slug == 1 {
pinyin_str = pinyin_str.to_lowercase()
}
return CString::new(pinyin_str).unwrap().into_raw();
}

Expand Down Expand Up @@ -307,15 +317,15 @@ pub extern "C" fn free_pointer(ptr: *mut c_char) {
return;
}
// Here we reclaim ownership of the data the pointer points to, to free the memory properly.
CString::from_raw(ptr);
let _ = CString::from_raw(ptr);
}
}

#[no_mangle]
pub unsafe extern "C" fn free_array(array: *mut PinyinArray) {
if !array.is_null() {
Vec::from_raw_parts((*array).array, (*array).len, (*array).len);
Box::from_raw(array);
let _ = Box::from_raw(array);
}
}

Expand All @@ -325,15 +335,20 @@ fn const_to_str(str: *const c_char) -> &'static str {

#[cfg(test)]
mod tests {
use crate::{to_convert, to_convert_array, to_convert_multi, Mode};
use crate::{match_mode, to_convert, to_convert_array, to_convert_multi, Mode};
use pinyin::ToPinyin;
use std::ffi::CStr;
use std::os::raw::c_char;

#[test]
fn it_works() {
let str = "slug标题类型测试 test test通过";

let pinyin_str_slug = to_convert(str, false, '-', true, Mode::Plain, true);
println!("slug: {}", pinyin_str_slug);
let str = "测试中文汉字转拼音。😊,rust yyds加上不能识别的结尾。。。";

let pinyin_str = to_convert(str, false, '-', true, Mode::Plain);
let pinyin_str = to_convert(str, false, '-', true, Mode::Plain, false);
println!("plain: {}", pinyin_str);

let pinyin_str = to_convert_multi(str, true, '-', true, Mode::Tone);
Expand Down

0 comments on commit 50828d9

Please sign in to comment.