-
-
Save logue/67b0bcd942ad67a7ad09450b493b71f5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
class Kana2Roma | |
{ | |
public $charset = 'utf-8'; | |
public $mode_Krows = 'k'; //か・く・こ(k or c) | |
public $mode_XArows = 'l'; //小文字ぁ行と「っ」( L or X) | |
public $mode_TYrows = 'ch'; //ち行+小文字や行(ty or ch or cy) | |
public $mode_SYrows = 'sh'; //し行+小文字や行(sy or sh) | |
public $mode_JYrows = 'j'; //じ行+小文字や行(j or zy or jy) | |
public $mode_Sstr = 'sh'; //し(s or sh or c) | |
public $mode_Jstr = 'j'; //じ(j or z) | |
public $mode_TUstr = 'ts'; //つ(t or ts) | |
public $mode_FUstr = 'f'; //ふ(h or f) | |
public $mode_TIstr = 'ch'; //ち(t or ch) | |
public $mode_Nstr = 'n'; //ん(n or nn) | |
public $strout = true; //配列でなく文字で返すかどうか | |
public $chop = false; //ローマ字文字列をアルファベット1文字ごとに分解するかどうか | |
public $vowel = ['a', 'i', 'u', 'e', 'o']; | |
public $child = ['a', 'k', 's', 't', 'n', 'h', 'm', 'y', 'r', 'w', 'g', 'z', 'd', 'b', 'p', 'x', 'y', 't']; | |
public $symbol = ['!', '?', '-', "'", ',']; | |
public $number = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']; | |
public $cols_H = [ | |
'A'=> ['あ', 'か', 'さ', 'た', 'な', 'は', 'ま', 'や', 'ら', 'わ', 'が', 'ざ', 'だ', 'ば', 'ぱ', 'ぁ', 'ゃ'], | |
'I'=> ['い', 'き', 'し', 'ち', 'に', 'ひ', 'み', '@', 'り', '@', 'ぎ', 'じ', 'ぢ', 'び', 'ぴ', 'ぃ'], | |
'U'=> ['う', 'く', 'す', 'つ', 'ぬ', 'ふ', 'む', 'ゆ', 'る', 'ん', 'ぐ', 'ず', 'づ', 'ぶ', 'ぷ', 'ぅ', 'ゅ', 'っ'], | |
'E'=> ['え', 'け', 'せ', 'て', 'ね', 'へ', 'め', '@', 'れ', '@', 'げ', 'ぜ', 'で', 'べ', 'ぺ', 'ぇ'], | |
'O'=> ['お', 'こ', 'そ', 'と', 'の', 'ほ', 'も', 'よ', 'ろ', 'を', 'ご', 'ぞ', 'ど', 'ぼ', 'ぽ', 'ぉ', 'ょ'], | |
]; | |
public $const; | |
/** | |
* 文字列分割→字数で分岐→ローマ字変換. | |
* | |
* @param null|string $txt 入力文字 | |
*/ | |
public function __invoke(?string $txt = null): string | |
{ | |
if (empty($txt) && !empty($this->const)) { | |
$txt = $this->const; | |
} | |
if (empty($txt) && empty($this->const)) { | |
return null; | |
} | |
$txt = mb_convert_kana($txt, 'c', $this->charset); | |
$stack = $this->_TextSlice($txt); | |
$out = []; | |
for ($i = 0; $i < \count($stack); $i++) { | |
if (mb_strlen($stack[$i], $this->charset) === 1) { | |
$str = $this->_baseOne($stack[$i]); | |
$out[] = $this->stringChopper($str); | |
} else { | |
$str2 = $this->_baseTwo($stack[$i]); | |
$out[] = $this->stringChopper($str2); | |
} | |
} | |
if ($this->strout) { | |
return implode('', $out); | |
} | |
return $this->flatten($out); | |
} | |
/** | |
* ローマ字文字列分解 | |
* $this->chop がtrueならアルファベット毎に分解. | |
* | |
* @param mixed $str ローマ字(日本語1文字分) | |
*/ | |
public function stringChopper($str) | |
{ | |
$out = []; | |
if ($this->chop && !$this->strout) { | |
for ($n = 0; $n < mb_strlen($str, $this->charset); $n++) { | |
$out[] = mb_substr($str, $n, 1); | |
} | |
return $out; | |
} | |
return $str; | |
} | |
/** | |
* 文章を1文字単位に分割する. | |
* | |
* @param string $txt 文章 | |
*/ | |
public function _TextSlice(string $txt): array | |
{ | |
$max = mb_strlen($txt, $this->charset); | |
$n = 0; | |
$array = []; | |
for ($i = 0; $i < $max; $i++) { | |
$n++; //次 | |
$str = mb_substr($txt, $i, 1); //今の文字 | |
$nxt = mb_substr($txt, $n, 1); //次の文字 | |
//隣接する1文字目が小文字や行なら | |
if (preg_match('/(ゃ|ゅ|ょ)/D', $nxt)) { | |
$array[] = $str.$nxt; | |
$i++; | |
$n++; | |
} elseif ($str === 'っ' && array_search($nxt, $this->symbol, true) === false) { | |
if (array_search($nxt, $this->number, true) === false) { | |
$array[] = $str.$nxt; | |
$i++; | |
$n++; | |
} else { | |
$array[] = $str; | |
} | |
} else { | |
$array[] = $str; | |
} | |
} | |
return $array; | |
} | |
/** | |
* 変換ベース(2文字) | |
* 小文字とセットで2文字になってる文字を判別して処理を分配する. | |
* | |
* @param string $str 変換する文字(小文字とセットで2文字) | |
*/ | |
public function _baseTwo(string $str) | |
{ | |
if (preg_match('/っ/D', $str)) { | |
if (mb_strlen($str, $this->charset) === 2) { | |
$txt = $this->_baseOne(mb_substr($str, 1, 1)); | |
return mb_substr($txt, 0, 1).$txt; | |
} | |
return $this->_baseOne($str); | |
} | |
switch ($str) { | |
case 'ちゃ': | |
return $this->mode_TYrows.$this->vowel[0]; | |
break; | |
case 'ちゅ': | |
return $this->mode_TYrows.$this->vowel[2]; | |
break; | |
case 'ちょ': | |
return $this->mode_TYrows.$this->vowel[4]; | |
break; | |
case 'しゃ': | |
return $this->mode_SYrows.$this->vowel[0]; | |
break; | |
case 'しゅ': | |
return $this->mode_SYrows.$this->vowel[2]; | |
break; | |
case 'しょ': | |
return $this->mode_SYrows.$this->vowel[4]; | |
break; | |
case 'じゃ': | |
return $this->mode_JYrows.$this->vowel[0]; | |
break; | |
case 'じゅ': | |
return $this->mode_JYrows.$this->vowel[2]; | |
break; | |
case 'じょ': | |
return $this->mode_JYrows.$this->vowel[4]; | |
break; | |
default: | |
$first = $this->_baseOne(mb_substr($str, 0, 1)); | |
$second = $this->_baseOne(mb_substr($str, 1, 1)); | |
return mb_substr($first, 0, 1).$second; | |
} | |
} | |
/** | |
* 変換ベース(1文字) | |
* あいうえお行の配列(cols_H,number,symbol)から文字が何かを判別して各関数へ処理を分配する. | |
* | |
* @param {Object} $str 変換する文字(1文字のみ) | |
*/ | |
public function _baseOne($str) | |
{ | |
if (array_search($str, $this->cols_H['A'], true) !== false) {//あ行 | |
return $this->_Change_A_Rows(array_search($str, $this->cols_H['A'], true)); | |
} | |
if (array_search($str, $this->cols_H['I'], true) !== false) {//い行 | |
return $this->_Change_I_Rows(array_search($str, $this->cols_H['I'], true)); | |
} | |
if (array_search($str, $this->cols_H['U'], true) !== false) {//う行 | |
return $this->_Change_U_Rows(array_search($str, $this->cols_H['U'], true)); | |
} | |
if (array_search($str, $this->cols_H['E'], true) !== false) {//え行 | |
return $this->_Change_E_Rows(array_search($str, $this->cols_H['E'], true)); | |
} | |
if (array_search($str, $this->cols_H['O'], true) !== false) {//お行 | |
return $this->_Change_O_Rows(array_search($str, $this->cols_H['O'], true)); | |
} | |
if (array_search($str, $this->symbol, true) !== false) {//記号 | |
return $this->symbol[array_search($str, $this->symbol, true)]; | |
} | |
if (array_search($str, $this->number, true) !== false) {//数字 | |
return $str; | |
} | |
return null; | |
} | |
/** | |
* 単音あ行文字をローマ字に. | |
* | |
* @param {Object} $key ひらがな配列のキー番号 | |
*/ | |
public function _Change_A_Rows($key) | |
{ | |
if ($key === 1) {//か行 | |
return $this->mode_Krows.$this->vowel[0]; | |
} | |
if ($key === 15) {//小文字ぁ行 | |
return $this->mode_XArows.$this->vowel[0]; | |
} | |
if ($key === 0) { | |
return $this->vowel[0]; | |
} | |
return $this->child[$key].$this->vowel[0]; | |
} | |
/** | |
* 単音い行文字をローマ字に | |
* | |
* @param {Object} $key ひらがな配列のキー番号 | |
*/ | |
public function _Change_I_Rows($key) | |
{ | |
if ($key === 0) {//母音 | |
return $this->vowel[1]; | |
} | |
if ($key === 15) {//小文字ぁ行 | |
return $this->mode_XArows.$this->vowel[1]; | |
} | |
if ($key === 2) {//し | |
return $this->mode_Sstr.$this->vowel[1]; | |
} | |
if ($key === 11) {//じ | |
return $this->mode_Jstr.$this->vowel[1]; | |
} | |
if ($key === 3) {//ち | |
return $this->mode_TIstr.$this->vowel[1]; | |
} | |
return $this->child[$key].$this->vowel[1]; | |
} | |
/** | |
* 単音う行文字をローマ字に | |
* | |
* @param {Object} $key ひらがな配列のキー番号 | |
*/ | |
public function _Change_U_Rows($key) | |
{ | |
if ($key === 0) {//母音 | |
return $this->vowel[2]; | |
} | |
if ($key === 1) {//く | |
return $this->mode_Krows.$this->vowel[2]; | |
} | |
if ($key === 15) {//小文字ぁ行 | |
return $this->mode_XArows.$this->vowel[2]; | |
} | |
if ($key === 3) {//つ | |
return $this->mode_TUstr.$this->vowel[2]; | |
} | |
if ($key === 5) {//ふ | |
return $this->mode_FUstr.$this->vowel[2]; | |
} | |
if ($key === 9) {//ん | |
return $this->mode_Nstr; | |
} | |
if ($key === 17) {//っ | |
return $this->mode_XArows.$this->mode_TUstr.$this->vowel[2]; | |
} | |
return $this->child[$key].$this->vowel[2]; | |
} | |
/** | |
* 単音え行文字をローマ字に | |
* | |
* @param {Object} $key ひらがな配列のキー番号 | |
*/ | |
public function _Change_E_Rows($key) | |
{ | |
if ($key === 0) {//母音 | |
return $this->vowel[3]; | |
} | |
if ($key === 15) {//小文字ぁ行 | |
return $this->mode_XArows.$this->vowel[3]; | |
} | |
return $this->child[$key].$this->vowel[3]; | |
} | |
/** | |
* 単音お行文字をローマ字に | |
* | |
* @param {Object} $key ひらがな配列のキー番号 | |
*/ | |
public function _Change_O_Rows($key) | |
{ | |
if ($key === 0) {//母音 | |
return $this->vowel[4]; | |
} | |
if ($key === 1) {//こ | |
return $this->mode_Krows.$this->vowel[4]; | |
} | |
if ($key === 15) {//小文字ぁ行 | |
return $this->mode_XArows.$this->vowel[4]; | |
} | |
return $this->child[$key].$this->vowel[4]; | |
} | |
public function flatten($array) | |
{ | |
$tmp = []; | |
while (($v = array_shift($array)) !== null) { | |
if (\is_array($v)) { | |
$array = array_merge($v, $array); | |
} else { | |
$tmp[] = $v; | |
} | |
} | |
return $tmp; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
$roma = new Kana2Roma(); | |
echo $roma('アキハバラあったかまつり'); | |
//結果 = akihabaraattakamatsuri |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment