//下面这个laravel的东西 我改改代码 就可以算是有大用
从 https://github.com/overtrue/pinyin 改装的 改装后的内容见 附件从laravel改装过来
基于 CC-CEDICT 词典的中文转拼音工具,更准确的支持多音字的汉字转拼音解决方案。
安装
使用 Composer 安装:
composer require "overtrue/pinyin:~3.0"
使用
拼音数组
use Overtrue\Pinyin\Pinyin;
$pinyin = new Pinyin();
$pinyin->convert('带着希望去旅行,比到达终点更美好');
// ["dai", "zhe", "xi", "wang", "qu", "lv", "xing", "bi", "dao", "da", "zhong", "dian", "geng", "mei", "hao"]
$pinyin->convert('带着希望去旅行,比到达终点更美好', PINYIN_UNICODE);
// ["dài","zhe","xī","wàng","qù","lǚ","xíng","bǐ","dào","dá","zhōng","diǎn","gèng","měi","hǎo"]
$pinyin->convert('带着希望去旅行,比到达终点更美好', PINYIN_ASCII);
//["dai4","zhe","xi1","wang4","qu4","lv3","xing2","bi3","dao4","da2","zhong1","dian3","geng4","mei3","hao3"]
选项:
选项 | 描述 |
---|---|
PINYIN_NONE | 不带音调输出: mei hao |
PINYIN_ASCII | 带数字式音调: mei3 hao3 |
PINYIN_UNICODE | UNICODE 式音调:měi hǎo |
生成用于链接的拼音字符串
$pinyin->permalink('带着希望去旅行'); // dai-zhe-xi-wang-qu-lv-xing
$pinyin->permalink('带着希望去旅行', '.'); // dai.zhe.xi.wang.qu.lv.xing
获取首字符字符串
$pinyin->abbr('带着希望去旅行'); // dzxwqlx
$pinyin->abbr('带着希望去旅行', '-'); // d-z-x-w-q-l-x
翻译整段文字为拼音
将会保留中文字符:,。 ! ? : “ ” ‘ ’
并替换为对应的英文符号。
$pinyin->sentence('带着希望去旅行,比到达终点更美好!');
// dai zhe xi wang qu lv xing, bi dao da zhong dian geng mei hao!
$pinyin->sentence('带着希望去旅行,比到达终点更美好!', true);
// dài zhe xī wàng qù lǚ xíng, bǐ dào dá zhōng diǎn gèng měi hǎo!
翻译姓名
姓名的姓的读音有些与普通字不一样,比如 ‘单’ 常见的音为 dan
,而作为姓的时候读 shan
。
$pinyin->name('单某某'); // ['shan', 'mou', 'mou']
$pinyin->name('单某某', PINYIN_UNICODE); // ["shàn","mǒu","mǒu"]
在 Laravel 中使用
独立的包在这里:overtrue/laravel-pinyin
Contribution
欢迎提意见及完善补充词库 tools/patches/
来自 https://github.com/overtrue/pinyin
最简单、最准确的PHP中文转拼音的类 github项目地址:https://github.com/jifei/Pinyin
支持获取拼音以及拼音的缩写即首字母
支持utf-8、gbk等编码
能准确匹配6千多个常用汉字
单个汉字,一句话,中英文混合都完美支持
例子utf-8版include_once 'Pinyin.php';
echo Pinyin::getPinyin("早上好");//获取拼音
echo Pinyin::getShortPinyin("早上好");//获取拼音缩写
例子gbk版include_once 'Pinyin.php';
echo Pinyin::getPinyin("早上好",'gb2312');//获取拼音
echo Pinyin::getShortPinyin("早上好",'gb2312');//获取拼音缩写
来自 https://segmentfault.com/a/1190000002957586
//下面不太好用
PHP汉字转拼音更准确的解决方案
到目前为止这可能是最准确的一个解决方案了,大GitHub上各种找,没有一个好用的,大部分都只是汉字转拼音,所以包含多音字的结果基本都错误。当然也有基于词典的转换工具,不过还是解决不了词库不全造成的多音字问题(当然,完全解决是不太可能的,或者词库会无比的大)。比如康熙来了
大部分工具的试用结果是:kang xi lai liao.
基于上面的原因,我造了下面这个轮子。特点如下:
- 基于CC-CEDICT.词典,解决大部分词汇问题;
- 添加补充词典,解决CC-CEDICT不全或者不准确的问题;
- 添加词频表,根据使用频率再一步提高多音字的准确度;
Pinyin
基于CC-CEDICT词典的中文转拼音工具, 更准确的汉字转拼音解决方案。 CC-CEDICT.
use \Overtrue\Pinyin;
echo Pinyin::pinyin('带着希望去旅行,比到达终点更美好');
// dài zhe xī wàng qù lǔ xíng bǐ dào dá zhōng diǎn gèng měi hǎo
//多音字
// 了
Pinyin::pinyin('了然'); // liǎo rán
Pinyin::pinyin('来了'); // lái le
// 还
Pinyin::pinyin('还有'); // hái yǒu
Pinyin::pinyin('交还'); // jiāo huán
// 什
Pinyin::pinyin('什么'); // shén me
Pinyin::pinyin('什锦'); // shí jǐn
// 便
Pinyin::pinyin('便当'); // biàn dāng
Pinyin::pinyin('便宜'); // pián yí
// 剥
Pinyin::pinyin('剥皮'); // bāo pí
Pinyin::pinyin('剥皮器'); // bō pí qì
// 不
Pinyin::pinyin('赔不是'); // péi bú shi
Pinyin::pinyin('跑了和尚,跑不了庙'); // pǎo le hé shàng , pǎo bù liǎo miào
// 降
Pinyin::pinyin('降温'); // jiàng wēn
Pinyin::pinyin('投降'); // tóu xiáng
// 都
Pinyin::pinyin('首都'); // shǒu dū
Pinyin::pinyin('都什么年代了'); // dōu shén me nián dài le
// 乐
Pinyin::pinyin('快乐'); // kuài lè
Pinyin::pinyin('音乐'); // yīn yuè
// 长
Pinyin::pinyin('成长'); // chéng zhǎng
Pinyin::pinyin('长江'); // cháng jiāng
// 难
Pinyin::pinyin('难民'); // nàn mín
Pinyin::pinyin('难过'); // nán guò
...
安装
- 使用 Composer 安装:
composer require overtrue/pinyin >=1.4
或者在你的项目composer.json加入:
{
"require": {
"overtrue/pinyin": ">=1.4"
}
}
- 直接下载文件
src/Overtrue/Pinyin.php
引入到项目中。
使用
<?php
use \Overtrue\Pinyin;
//获取拼音
echo Pinyin::pinyin('带着希望去旅行,比到达终点更美好');
//或者: Overtrue\pinyin($string);
// dài zhe xī wàng qù lǔ xíng bǐ dào dá zhōng diǎn gèng měi hǎo
//获取首字母
echo Pinyin::letter('带着希望去旅行,比到达终点更美好');
// D Z X W Q L X B D D Z D G M H
设置
delimiter
分隔符,默认为一个空格 ‘ ‘;traditional
繁体accent
是否输出音调;letter
只输出首字母,或者直接使用Pinyin::letter($string)
;only_chinese
只保留$string中中文部分。全局设置:*
Pinyin::set('delimiter', '-');
- 临时设置:*
Pinyin::pinyin($word, $settings)
在调用的方法后传参
example:
Pinyin::set('delimiter', '-');//全局
echo Pinyin::pinyin('带着希望去旅行,比到达终点更美好');
// dài-zhe-xī-wàng-qù-lǔ-xíng-bǐ-dào-dá-zhōng-diǎn-gèng-měi-hǎo
$setting = [
'delimiter' => '-',
'accent' => false,
];
echo Pinyin::pinyin('带着希望去旅行,比到达终点更美好', $setting);//这里的setting只是临时修改,并非全局设置
// dai-zhe-xi-wang-qu-lu-xing-bi-dao-da-zhong-dian-geng-mei-hao
Pinyin::set('accent', false);
echo Pinyin::pinyin('带着希望去旅行,比到达终点更美好');
// dai zhe xi wang qu lu xing bi dao da zhong dian geng mei hao
Contribution
欢迎提意见及完善补充词库 src/Overtrue/data/additional.php
!
来自 http://overtrue.me/articles/2014/10/php-chinese-to-pinyin.html
php将汉字转成拼音
- /**
- * exchange
- * 把GB2312的汉字,转成ASIIC码
- * $str var 这个参数传得内容是要转的汉字,这里的汉字必须是gb2312,或者gbk
- */
- function exchange($str){
- $ret="";
- for($j=0;$j<160){
- $q=ord(substr($str,++$j,1));
- $p=$p*256+$q-65536;
- }
- $ret.=search_ping($p);
- }
- return $ret;
- }
- /**
- * search_ping
- * 把拼音和数字对应,并根据参数,找到拼音
- * $num int 汉字对应的数字
- */
- function search_ping($num){
- $d=array(
- array("a",-20319),
- array("ai",-20317),
- array("an",-20304),
- array("ang",-20295),
- array("ao",-20292),
- array("ba",-20283),
- array("bai",-20265),
- array("ban",-20257),
- array("bang",-20242),
- array("bao",-20230),
- array("bei",-20051),
- array("ben",-20036),
- array("beng",-20032),
- array("bi",-20026),
- array("bian",-20002),
- array("biao",-19990),
- array("bie",-19986),
- array("bin",-19982),
- array("bing",-19976),
- array("bo",-19805),
- array("bu",-19784),
- array("ca",-19775),
- array("cai",-19774),
- array("can",-19763),
- array("cang",-19756),
- array("cao",-19751),
- array("ce",-19746),
- array("ceng",-19741),
- array("cha",-19739),
- array("chai",-19728),
- array("chan",-19725),
- array("chang",-19715),
- array("chao",-19540),
- array("che",-19531),
- array("chen",-19525),
- array("cheng",-19515),
- array("chi",-19500),
- array("chong",-19484),
- array("chou",-19479),
- array("chu",-19467),
- array("chuai",-19289),
- array("chuan",-19288),
- array("chuang",-19281),
- array("chui",-19275),
- array("chun",-19270),
- array("chuo",-19263),
- array("ci",-19261),
- array("cong",-19249),
- array("cou",-19243),
- array("cu",-19242),
- array("cuan",-19238),
- array("cui",-19235),
- array("cun",-19227),
- array("cuo",-19224),
- array("da",-19218),
- array("dai",-19212),
- array("dan",-19038),
- array("dang",-19023),
- array("dao",-19018),
- array("de",-19006),
- array("deng",-19003),
- array("di",-18996),
- array("dian",-18977),
- array("diao",-18961),
- array("die",-18952),
- array("ding",-18783),
- array("diu",-18774),
- array("dong",-18773),
- array("dou",-18763),
- array("du",-18756),
- array("duan",-18741),
- array("dui",-18735),
- array("dun",-18731),
- array("duo",-18722),
- array("e",-18710),
- array("en",-18697),
- array("er",-18696),
- array("fa",-18526),
- array("fan",-18518),
- array("fang",-18501),
- array("fei",-18490),
- array("fen",-18478),
- array("feng",-18463),
- array("fo",-18448),
- array("fou",-18447),
- array("fu",-18446),
- array("ga",-18239),
- array("gai",-18237),
- array("gan",-18231),
- array("gang",-18220),
- array("gao",-18211),
- array("ge",-18201),
- array("gei",-18184),
- array("gen",-18183),
- array("geng",-18181),
- array("gong",-18012),
- array("gou",-17997),
- array("gu",-17988),
- array("gua",-17970),
- array("guai",-17964),
- array("guan",-17961),
- array("guang",-17950),
- array("gui",-17947),
- array("gun",-17931),
- array("guo",-17928),
- array("ha",-17922),
- array("hai",-17759),
- array("han",-17752),
- array("hang",-17733),
- array("hao",-17730),
- array("he",-17721),
- array("hei",-17703),
- array("hen",-17701),
- array("heng",-17697),
- array("hong",-17692),
- array("hou",-17683),
- array("hu",-17676),
- array("hua",-17496),
- array("huai",-17487),
- array("huan",-17482),
- array("huang",-17468),
- array("hui",-17454),
- array("hun",-17433),
- array("huo",-17427),
- array("ji",-17417),
- array("jia",-17202),
- array("jian",-17185),
- array("jiang",-16983),
- array("jiao",-16970),
- array("jie",-16942),
- array("jin",-16915),
- array("jing",-16733),
- array("jiong",-16708),
- array("jiu",-16706),
- array("ju",-16689),
- array("juan",-16664),
- array("jue",-16657),
- array("jun",-16647),
- array("ka",-16474),
- array("kai",-16470),
- array("kan",-16465),
- array("kang",-16459),
- array("kao",-16452),
- array("ke",-16448),
- array("ken",-16433),
- array("keng",-16429),
- array("kong",-16427),
- array("kou",-16423),
- array("ku",-16419),
- array("kua",-16412),
- array("kuai",-16407),
- array("kuan",-16403),
- array("kuang",-16401),
- array("kui",-16393),
- array("kun",-16220),
- array("kuo",-16216),
- array("la",-16212),
- array("lai",-16205),
- array("lan",-16202),
- array("lang",-16187),
- array("lao",-16180),
- array("le",-16171),
- array("lei",-16169),
- array("leng",-16158),
- array("li",-16155),
- array("lia",-15959),
- array("lian",-15958),
- array("liang",-15944),
- array("liao",-15933),
- array("lie",-15920),
- array("lin",-15915),
- array("ling",-15903),
- array("liu",-15889),
- array("long",-15878),
- array("lou",-15707),
- array("lu",-15701),
- array("lv",-15681),
- array("luan",-15667),
- array("lue",-15661),
- array("lun",-15659),
- array("luo",-15652),
- array("ma",-15640),
- array("mai",-15631),
- array("man",-15625),
- array("mang",-15454),
- array("mao",-15448),
- array("me",-15436),
- array("mei",-15435),
- array("men",-15419),
- array("meng",-15416),
- array("mi",-15408),
- array("mian",-15394),
- array("miao",-15385),
- array("mie",-15377),
- array("min",-15375),
- array("ming",-15369),
- array("miu",-15363),
- array("mo",-15362),
- array("mou",-15183),
- array("mu",-15180),
- array("na",-15165),
- array("nai",-15158),
- array("nan",-15153),
- array("nang",-15150),
- array("nao",-15149),
- array("ne",-15144),
- array("nei",-15143),
- array("nen",-15141),
- array("neng",-15140),
- array("ni",-15139),
- array("nian",-15128),
- array("niang",-15121),
- array("niao",-15119),
- array("nie",-15117),
- array("nin",-15110),
- array("ning",-15109),
- array("niu",-14941),
- array("nong",-14937),
- array("nu",-14933),
- array("nv",-14930),
- array("nuan",-14929),
- array("nue",-14928),
- array("nuo",-14926),
- array("o",-14922),
- array("ou",-14921),
- array("pa",-14914),
- array("pai",-14908),
- array("pan",-14902),
- array("pang",-14894),
- array("pao",-14889),
- array("pei",-14882),
- array("pen",-14873),
- array("peng",-14871),
- array("pi",-14857),
- array("pian",-14678),
- array("piao",-14674),
- array("pie",-14670),
- array("pin",-14668),
- array("ping",-14663),
- array("po",-14654),
- array("pu",-14645),
- array("qi",-14630),
- array("qia",-14594),
- array("qian",-14429),
- array("qiang",-14407),
- array("qiao",-14399),
- array("qie",-14384),
- array("qin",-14379),
- array("qing",-14368),
- array("qiong",-14355),
- array("qiu",-14353),
- array("qu",-14345),
- array("quan",-14170),
- array("que",-14159),
- array("qun",-14151),
- array("ran",-14149),
- array("rang",-14145),
- array("rao",-14140),
- array("re",-14137),
- array("ren",-14135),
- array("reng",-14125),
- array("ri",-14123),
- array("rong",-14122),
- array("rou",-14112),
- array("ru",-14109),
- array("ruan",-14099),
- array("rui",-14097),
- array("run",-14094),
- array("ruo",-14092),
- array("sa",-14090),
- array("sai",-14087),
- array("san",-14083),
- array("sang",-13917),
- array("sao",-13914),
- array("se",-13910),
- array("sen",-13907),
- array("seng",-13906),
- array("sha",-13905),
- array("shai",-13896),
- array("shan",-13894),
- array("shang",-13878),
- array("shao",-13870),
- array("she",-13859),
- array("shen",-13847),
- array("sheng",-13831),
- array("shi",-13658),
- array("shou",-13611),
- array("shu",-13601),
- array("shua",-13406),
- array("shuai",-13404),
- array("shuan",-13400),
- array("shuang",-13398),
- array("shui",-13395),
- array("shun",-13391),
- array("shuo",-13387),
- array("si",-13383),
- array("song",-13367),
- array("sou",-13359),
- array("su",-13356),
- array("suan",-13343),
- array("sui",-13340),
- array("sun",-13329),
- array("suo",-13326),
- array("ta",-13318),
- array("tai",-13147),
- array("tan",-13138),
- array("tang",-13120),
- array("tao",-13107),
- array("te",-13096),
- array("teng",-13095),
- array("ti",-13091),
- array("tian",-13076),
- array("tiao",-13068),
- array("tie",-13063),
- array("ting",-13060),
- array("tong",-12888),
- array("tou",-12875),
- array("tu",-12871),
- array("tuan",-12860),
- array("tui",-12858),
- array("tun",-12852),
- array("tuo",-12849),
- array("wa",-12838),
- array("wai",-12831),
- array("wan",-12829),
- array("wang",-12812),
- array("wei",-12802),
- array("wen",-12607),
- array("weng",-12597),
- array("wo",-12594),
- array("wu",-12585),
- array("xi",-12556),
- array("xia",-12359),
- array("xian",-12346),
- array("xiang",-12320),
- array("xiao",-12300),
- array("xie",-12120),
- array("xin",-12099),
- array("xing",-12089),
- array("xiong",-12074),
- array("xiu",-12067),
- array("xu",-12058),
- array("xuan",-12039),
- array("xue",-11867),
- array("xun",-11861),
- array("ya",-11847),
- array("yan",-11831),
- array("yang",-11798),
- array("yao",-11781),
- array("ye",-11604),
- array("yi",-11589),
- array("yin",-11536),
- array("ying",-11358),
- array("yo",-11340),
- array("yong",-11339),
- array("you",-11324),
- array("yu",-11303),
- array("yuan",-11097),
- array("yue",-11077),
- array("yun",-11067),
- array("za",-11055),
- array("zai",-11052),
- array("zan",-11045),
- array("zang",-11041),
- array("zao",-11038),
- array("ze",-11024),
- array("zei",-11020),
- array("zen",-11019),
- array("zeng",-11018),
- array("zha",-11014),
- array("zhai",-10838),
- array("zhan",-10832),
- array("zhang",-10815),
- array("zhao",-10800),
- array("zhe",-10790),
- array("zhen",-10780),
- array("zheng",-10764),
- array("zhi",-10587),
- array("zhong",-10544),
- array("zhou",-10533),
- array("zhu",-10519),
- array("zhua",-10331),
- array("zhuai",-10329),
- array("zhuan",-10328),
- array("zhuang",-10322),
- array("zhui",-10315),
- array("zhun",-10309),
- array("zhuo",-10307),
- array("zi",-10296),
- array("zong",-10281),
- array("zou",-10274),
- array("zu",-10270),
- array("zuan",-10262),
- array("zui",-10260),
- array("zun",-10256),
- array("zuo",-10254)
- );
echo exchange("张");
来自 http://blog.51yip.com/php/545.html