Skip to content
This repository was archived by the owner on Dec 11, 2020. It is now read-only.

Commit 5b21829

Browse files
committed
Merge pull request #490 from fzaninotto/add_transliterator
Add transliterator to email and username
2 parents d82603a + eeaa79c commit 5b21829

27 files changed

+188
-817
lines changed

composer.json

+3
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
"phpunit/phpunit": "~4.0",
1717
"squizlabs/php_codesniffer": "~1.5"
1818
},
19+
"suggest": {
20+
"ext-intl": "*"
21+
},
1922
"autoload": {
2023
"psr-4": {
2124
"Faker\\": "src/Faker/"

src/Faker/Provider/Internet.php

+131-8
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,140 @@ class Internet extends \Faker\Provider\Base
2929
'http://{{domainName}}/{{slug}}.html',
3030
'https://{{domainName}}/{{slug}}.html',
3131
);
32+
33+
public static function toAscii($string)
34+
{
35+
$transliterationTable = array(
36+
'IJ' => 'I','Ö' => 'O','Œ' => 'O','Ü' => 'U','ä' => 'a','æ' => 'a',
37+
'ij' => 'i','ö' => 'o','œ' => 'o','ü' => 'u','ß' => 's','ſ' => 's',
38+
'À' => 'A','Á' => 'A','Â' => 'A','Ã' => 'A','Ä' => 'A','Å' => 'A',
39+
'Æ' => 'A','Ā' => 'A','Ą' => 'A','Ă' => 'A','Ç' => 'C','Ć' => 'C',
40+
'Č' => 'C','Ĉ' => 'C','Ċ' => 'C','Ď' => 'D','Đ' => 'D','È' => 'E',
41+
'É' => 'E','Ê' => 'E','Ë' => 'E','Ē' => 'E','Ę' => 'E','Ě' => 'E',
42+
'Ĕ' => 'E','Ė' => 'E','Ĝ' => 'G','Ğ' => 'G','Ġ' => 'G','Ģ' => 'G',
43+
'Ĥ' => 'H','Ħ' => 'H','Ì' => 'I','Í' => 'I','Î' => 'I','Ï' => 'I',
44+
'Ī' => 'I','Ĩ' => 'I','Ĭ' => 'I','Į' => 'I','İ' => 'I','Ĵ' => 'J',
45+
'Ķ' => 'K','Ľ' => 'K','Ĺ' => 'K','Ļ' => 'K','Ŀ' => 'K','Ł' => 'L',
46+
'Ñ' => 'N','Ń' => 'N','Ň' => 'N','Ņ' => 'N','Ŋ' => 'N','Ò' => 'O',
47+
'Ó' => 'O','Ô' => 'O','Õ' => 'O','Ø' => 'O','Ō' => 'O','Ő' => 'O',
48+
'Ŏ' => 'O','Ŕ' => 'R','Ř' => 'R','Ŗ' => 'R','Ś' => 'S','Ş' => 'S',
49+
'Ŝ' => 'S','Ș' => 'S','Š' => 'S','Ť' => 'T','Ţ' => 'T','Ŧ' => 'T',
50+
'Ț' => 'T','Ù' => 'U','Ú' => 'U','Û' => 'U','Ū' => 'U','Ů' => 'U',
51+
'Ű' => 'U','Ŭ' => 'U','Ũ' => 'U','Ų' => 'U','Ŵ' => 'W','Ŷ' => 'Y',
52+
'Ÿ' => 'Y','Ý' => 'Y','Ź' => 'Z','Ż' => 'Z','Ž' => 'Z','à' => 'a',
53+
'á' => 'a','â' => 'a','ã' => 'a','ā' => 'a','ą' => 'a','ă' => 'a',
54+
'å' => 'a','ç' => 'c','ć' => 'c','č' => 'c','ĉ' => 'c','ċ' => 'c',
55+
'ď' => 'd','đ' => 'd','è' => 'e','é' => 'e','ê' => 'e','ë' => 'e',
56+
'ē' => 'e','ę' => 'e','ě' => 'e','ĕ' => 'e','ė' => 'e','ƒ' => 'f',
57+
'ĝ' => 'g','ğ' => 'g','ġ' => 'g','ģ' => 'g','ĥ' => 'h','ħ' => 'h',
58+
'ì' => 'i','í' => 'i','î' => 'i','ï' => 'i','ī' => 'i','ĩ' => 'i',
59+
'ĭ' => 'i','į' => 'i','ı' => 'i','ĵ' => 'j','ķ' => 'k','ĸ' => 'k',
60+
'ł' => 'l','ľ' => 'l','ĺ' => 'l','ļ' => 'l','ŀ' => 'l','ñ' => 'n',
61+
'ń' => 'n','ň' => 'n','ņ' => 'n','ʼn' => 'n','ŋ' => 'n','ò' => 'o',
62+
'ó' => 'o','ô' => 'o','õ' => 'o','ø' => 'o','ō' => 'o','ő' => 'o',
63+
'ŏ' => 'o','ŕ' => 'r','ř' => 'r','ŗ' => 'r','ś' => 's','š' => 's',
64+
'ť' => 't','ù' => 'u','ú' => 'u','û' => 'u','ū' => 'u','ů' => 'u',
65+
'ű' => 'u','ŭ' => 'u','ũ' => 'u','ų' => 'u','ŵ' => 'w','ÿ' => 'y',
66+
'ý' => 'y','ŷ' => 'y','ż' => 'z','ź' => 'z','ž' => 'z','Α' => 'A',
67+
'Ά' => 'A','' => 'A','' => 'A','' => 'A','' => 'A','' => 'A',
68+
'' => 'A','' => 'A','' => 'A','' => 'A','' => 'A','' => 'A',
69+
'' => 'A','' => 'A','' => 'A','' => 'A','' => 'A','' => 'A',
70+
'' => 'A','' => 'A','' => 'A','Β' => 'B','Γ' => 'G','Δ' => 'D',
71+
'Ε' => 'E','Έ' => 'E','' => 'E','' => 'E','' => 'E','' => 'E',
72+
'' => 'E','' => 'E','' => 'E','Ζ' => 'Z','Η' => 'I','Ή' => 'I',
73+
'' => 'I','' => 'I','' => 'I','' => 'I','' => 'I','' => 'I',
74+
'' => 'I','' => 'I','' => 'I','' => 'I','' => 'I','' => 'I',
75+
'' => 'I','' => 'I','' => 'I','' => 'I','' => 'I','' => 'I',
76+
'Θ' => 'T','Ι' => 'I','Ί' => 'I','Ϊ' => 'I','' => 'I','' => 'I',
77+
'' => 'I','' => 'I','' => 'I','' => 'I','' => 'I','Ἷ' => 'I',
78+
'' => 'I','' => 'I','' => 'I','Κ' => 'K','Λ' => 'L','Μ' => 'M',
79+
'Ν' => 'N','Ξ' => 'K','Ο' => 'O','Ό' => 'O','' => 'O','' => 'O',
80+
'' => 'O','' => 'O','' => 'O','' => 'O','' => 'O','Π' => 'P',
81+
'Ρ' => 'R','' => 'R','Σ' => 'S','Τ' => 'T','Υ' => 'Y','Ύ' => 'Y',
82+
'Ϋ' => 'Y','' => 'Y','' => 'Y','' => 'Y','' => 'Y','' => 'Y',
83+
'' => 'Y','' => 'Y','Φ' => 'F','Χ' => 'X','Ψ' => 'P','Ω' => 'O',
84+
'Ώ' => 'O','' => 'O','' => 'O','' => 'O','' => 'O','' => 'O',
85+
'' => 'O','' => 'O','' => 'O','' => 'O','' => 'O','' => 'O',
86+
'' => 'O','' => 'O','' => 'O','' => 'O','' => 'O','' => 'O',
87+
'' => 'O','α' => 'a','ά' => 'a','' => 'a','' => 'a','' => 'a',
88+
'' => 'a','' => 'a','' => 'a','' => 'a','' => 'a','' => 'a',
89+
'' => 'a','' => 'a','' => 'a','' => 'a','' => 'a','' => 'a',
90+
'' => 'a','' => 'a','' => 'a','' => 'a','' => 'a','' => 'a',
91+
'' => 'a','' => 'a','' => 'a','β' => 'b','γ' => 'g','δ' => 'd',
92+
'ε' => 'e','έ' => 'e','' => 'e','' => 'e','' => 'e','' => 'e',
93+
'' => 'e','' => 'e','' => 'e','ζ' => 'z','η' => 'i','ή' => 'i',
94+
'' => 'i','' => 'i','' => 'i','' => 'i','' => 'i','' => 'i',
95+
'' => 'i','' => 'i','' => 'i','' => 'i','' => 'i','' => 'i',
96+
'' => 'i','' => 'i','' => 'i','' => 'i','' => 'i','' => 'i',
97+
'' => 'i','' => 'i','' => 'i','' => 'i','θ' => 't','ι' => 'i',
98+
'ί' => 'i','ϊ' => 'i','ΐ' => 'i','' => 'i','' => 'i','' => 'i',
99+
'' => 'i','' => 'i','' => 'i','' => 'i','' => 'i','' => 'i',
100+
'' => 'i','' => 'i','' => 'i','' => 'i','' => 'i','κ' => 'k',
101+
'λ' => 'l','μ' => 'm','ν' => 'n','ξ' => 'k','ο' => 'o','ό' => 'o',
102+
'' => 'o','' => 'o','' => 'o','' => 'o','' => 'o','' => 'o',
103+
'' => 'o','π' => 'p','ρ' => 'r','' => 'r','' => 'r','σ' => 's',
104+
'ς' => 's','τ' => 't','υ' => 'y','ύ' => 'y','ϋ' => 'y','ΰ' => 'y',
105+
'' => 'y','' => 'y','' => 'y','' => 'y','' => 'y','' => 'y',
106+
'' => 'y','' => 'y','' => 'y','' => 'y','' => 'y','' => 'y',
107+
'' => 'y','' => 'y','φ' => 'f','χ' => 'x','ψ' => 'p','ω' => 'o',
108+
'ώ' => 'o','' => 'o','' => 'o','' => 'o','' => 'o','' => 'o',
109+
'' => 'o','' => 'o','' => 'o','' => 'o','' => 'o','' => 'o',
110+
'' => 'o','' => 'o','' => 'o','' => 'o','' => 'o','' => 'o',
111+
'' => 'o','' => 'o','' => 'o','' => 'o','' => 'o','А' => 'A',
112+
'Б' => 'B','В' => 'V','Г' => 'G','Д' => 'D','Е' => 'E','Ё' => 'E',
113+
'Ж' => 'Z','З' => 'Z','И' => 'I','Й' => 'I','К' => 'K','Л' => 'L',
114+
'М' => 'M','Н' => 'N','О' => 'O','П' => 'P','Р' => 'R','С' => 'S',
115+
'Т' => 'T','У' => 'U','Ф' => 'F','Х' => 'K','Ц' => 'T','Ч' => 'C',
116+
'Ш' => 'S','Щ' => 'S','Ы' => 'Y','Э' => 'E','Ю' => 'Y','Я' => 'Y',
117+
'а' => 'A','б' => 'B','в' => 'V','г' => 'G','д' => 'D','е' => 'E',
118+
'ё' => 'E','ж' => 'Z','з' => 'Z','и' => 'I','й' => 'I','к' => 'K',
119+
'л' => 'L','м' => 'M','н' => 'N','о' => 'O','п' => 'P','р' => 'R',
120+
'с' => 'S','т' => 'T','у' => 'U','ф' => 'F','х' => 'K','ц' => 'T',
121+
'ч' => 'C','ш' => 'S','щ' => 'S','ы' => 'Y','э' => 'E','ю' => 'Y',
122+
'я' => 'Y','ð' => 'd','Ð' => 'D','þ' => 't','Þ' => 'T','' => 'a',
123+
'' => 'b','' => 'g','' => 'd','' => 'e','' => 'v','' => 'z',
124+
'' => 't','' => 'i','' => 'k','' => 'l','' => 'm','' => 'n',
125+
'' => 'o','' => 'p','' => 'z','' => 'r','' => 's','' => 't',
126+
'' => 'u','' => 'p','' => 'k','' => 'g','' => 'q','' => 's',
127+
'' => 'c','' => 't','' => 'd','' => 't','' => 'c','' => 'k',
128+
'' => 'j','' => 'h','ā' => 'a','ţ' => 't','ʼ' => "'", '̧' => '',
129+
'' => 'h','ʼ' => "'",'' => "'",'' => "'",'' => 'u','/' => '',
130+
'ế' => 'e','' => 'a','' => 'i','' => 'a','' => 'e','' => 'i',
131+
'' => 'o','' => 'e','ơ' => 'o','' => 'a','' => 'a','ư' => 'u',
132+
'' => 'a','' => 'a','' => 'd','' => 'H','' => 'D','' => 'd',
133+
'Ģ' => 'G','Š' => 'S','ļ' => 'l','ž' => 'z','Ē' => 'E','ņ' => 'n',
134+
'Č' => 'C','ș' => 's','ț' => 't', '' => 'o','' => 'a','ş' => 's',
135+
"'" => '', 'ու' => 'u','ա' => 'a','բ' => 'b','գ' => 'g','դ' => 'd',
136+
'ե' => 'e','զ' => 'z','է' => 'e','ը' => 'y','թ' => 't','ժ' => 'zh',
137+
'ի' => 'i','լ' => 'l','խ' => 'kh','ծ' => 'ts','կ' => 'k','հ' => 'h',
138+
'ձ' => 'dz','ղ' => 'gh','ճ' => 'ch','մ' => 'm','յ' => 'y','ն' => 'n',
139+
'շ' => 'sh','ո' => 'o','չ' => 'ch','պ' => 'p','ջ' => 'j','ռ' => 'r',
140+
'ս' => 's','վ' => 'v','տ' => 't','ր' => 'r','ց' => 'ts','փ' => 'p',
141+
'ք' => 'q','և' => 'ev','օ' => 'o','ֆ' => 'f',
142+
);
32143

144+
return str_replace(array_keys($transliterationTable), array_values($transliterationTable), $string);
145+
}
146+
147+
private static function transliterate($string)
148+
{
149+
if (function_exists('transliterator_transliterate')) {
150+
$transString = transliterator_transliterate("Any-Latin; Latin-ASCII; NFD; [:Nonspacing Mark:] Remove; NFC; Lower();", $string);
151+
} else {
152+
$transString = static::toAscii($string);
153+
}
154+
155+
return preg_replace('/[^A-Za-z0-9_.]/u', '', $transString);
156+
}
157+
33158
/**
34159
* @example 'jdoe@acme.biz'
35160
*/
36161
public function email()
37162
{
38163
$format = static::randomElement(static::$emailFormats);
39-
40-
return preg_replace('/\s/u', '', $this->generator->parse($format));
164+
165+
return $this->generator->parse($format);
41166
}
42167

43168
/**
@@ -91,10 +216,10 @@ final public static function safeEmailDomain()
91216
public function userName()
92217
{
93218
$format = static::randomElement(static::$userNameFormats);
219+
$username = static::bothify($this->generator->parse($format));
94220

95-
return static::toLower(static::bothify($this->generator->parse($format)));
221+
return static::transliterate($username);
96222
}
97-
98223
/**
99224
* @example 'fY4èHdZv68'
100225
*/
@@ -119,11 +244,9 @@ public function domainName()
119244
public function domainWord()
120245
{
121246
$company = $this->generator->format('company');
122-
$companyElements = explode(' ', $company);
123-
$company = $companyElements[0];
124-
$company = preg_replace('/\W/u', '', $company);
247+
$companyElements = function_exists('mb_split') ? mb_split(' ', $company) : explode(' ', $company);
125248

126-
return static::toLower($company);
249+
return static::transliterate($companyElements[0]);
127250
}
128251

129252
/**

src/Faker/Provider/bg_BG/Internet.php

-23
Original file line numberDiff line numberDiff line change
@@ -6,27 +6,4 @@ class Internet extends \Faker\Provider\Internet
66
{
77
protected static $freeEmailDomain = array('gmail.com', 'yahoo.com', 'hotmail.com', 'mail.bg', 'abv.bg', 'dir.bg');
88
protected static $tld = array('bg', 'bg', 'bg', 'bg', 'bg', 'bg', 'com', 'biz', 'info', 'net', 'org');
9-
10-
/**
11-
* @example 'jdoe'
12-
*/
13-
public function userName()
14-
{
15-
$format = static::randomElement(static::$userNameFormats);
16-
17-
return static::bothify($this->generator->parse($format));
18-
}
19-
20-
/**
21-
* @example 'faber'
22-
*/
23-
public function domainWord()
24-
{
25-
$company = $this->generator->format('company');
26-
$companyElements = explode(' ', $company);
27-
$company = $companyElements[0];
28-
$company = preg_replace('/\W/u', '', $company);
29-
30-
return $company;
31-
}
329
}

src/Faker/Provider/cs_CZ/Internet.php

-25
Original file line numberDiff line numberDiff line change
@@ -4,31 +4,6 @@
44

55
class Internet extends \Faker\Provider\Internet
66
{
7-
87
protected static $freeEmailDomain = array('gmail.com', 'yahoo.com', 'seznam.cz', 'atlas.cz', 'centrum.cz', 'email.cz', 'post.cz');
9-
108
protected static $tld = array('cz', 'cz', 'cz', 'cz', 'cz', 'cz', 'com', 'info', 'net', 'org');
11-
12-
/**
13-
* Converts czech characters to their ASCII representation
14-
*
15-
* @return string
16-
*/
17-
private function toAscii($string)
18-
{
19-
$from = array('Ě', 'ě', 'Š', 'š', 'Č', 'č', 'Ř', 'ř', 'Ž', 'ž', 'Ý', 'ý', 'Á', 'á', 'Í', 'í', 'É', 'é', 'Ó', 'ó', 'Ú', 'ú', 'Ů', 'ů', 'Ď', 'ď', 'Ť', 'ť', 'Ň', 'ň');
20-
$to = array('E', 'e', 'S', 's', 'C', 'c', 'R', 'r', 'Z', 'z', 'Y', 'y', 'A', 'a', 'I', 'i', 'E', 'e', 'O', 'o', 'U', 'u', 'U', 'u', 'D', 'd', 'T', 't', 'N', 'n');
21-
22-
return str_replace($from, $to, $string);
23-
}
24-
25-
public function email()
26-
{
27-
return $this->toAscii(parent::email());
28-
}
29-
30-
public function userName()
31-
{
32-
return $this->toAscii(parent::userName());
33-
}
349
}

src/Faker/Provider/da_DK/Internet.php

-38
Original file line numberDiff line numberDiff line change
@@ -27,42 +27,4 @@ class Internet extends \Faker\Provider\Internet
2727
protected static $tld = array(
2828
'com', 'com', 'com', 'biz', 'info', 'net', 'org', 'dk', 'dk', 'dk',
2929
);
30-
31-
/**
32-
* Converts Danish characters to their ASCII representation
33-
*
34-
* @return string
35-
*/
36-
private static function toAscii($string)
37-
{
38-
$from = array('æ', 'ø', 'å', 'Æ', 'Ø', 'Å');
39-
$to = array('ae', 'oe', 'aa', 'AE', 'OE', 'AA');
40-
41-
return str_replace($from, $to, $string);
42-
}
43-
44-
/**
45-
* @example 'jeppe'
46-
* @return string
47-
*/
48-
public function userName()
49-
{
50-
$format = static::randomElement(static::$userNameFormats);
51-
52-
return static::toLower(static::toAscii(static::bothify($this->generator->parse($format))));
53-
}
54-
55-
/**
56-
* @example 'jensen.dk'
57-
* @return string
58-
*/
59-
public function domainWord()
60-
{
61-
$company = $this->generator->format('company');
62-
$companyElements = explode(' ', $company);
63-
$company = $companyElements[0];
64-
$company = preg_replace('/\W/u', '', $company);
65-
66-
return static::toLower(static::toAscii($company));
67-
}
6830
}

src/Faker/Provider/de_AT/Internet.php

-36
Original file line numberDiff line numberDiff line change
@@ -6,40 +6,4 @@ class Internet extends \Faker\Provider\Internet
66
{
77
protected static $freeEmailDomain = array('aon.at', 'chello.at', 'gmail.com', 'gmx.at', 'univie.ac.at');
88
protected static $tld = array('at', 'co.at', 'com', 'net', 'org');
9-
10-
/**
11-
* Converts German characters (Umlaute) to their ASCII representation
12-
*
13-
* @return string
14-
*/
15-
private static function toAscii($string)
16-
{
17-
$from = array('ä', 'Ä', 'ü', 'Ü', 'ö', 'Ö', 'ß');
18-
$to = array('a', 'A', 'u', 'U', 'o', 'O', 'ss');
19-
20-
return str_replace($from, $to, $string);
21-
}
22-
23-
/**
24-
* @example 'jdoe'
25-
*/
26-
public function userName()
27-
{
28-
$format = static::randomElement(static::$userNameFormats);
29-
30-
return static::toLower(static::toAscii(static::bothify($this->generator->parse($format))));
31-
}
32-
33-
/**
34-
* @example 'faber'
35-
*/
36-
public function domainWord()
37-
{
38-
$company = $this->generator->format('company');
39-
$companyElements = explode(' ', $company);
40-
$company = $companyElements[0];
41-
$company = preg_replace('/\W/u', '', $company);
42-
43-
return static::toLower(static::toAscii($company));
44-
}
459
}

src/Faker/Provider/de_DE/Internet.php

-36
Original file line numberDiff line numberDiff line change
@@ -6,40 +6,4 @@ class Internet extends \Faker\Provider\Internet
66
{
77
protected static $freeEmailDomain = array('web.de', 'gmail.com', 'hotmail.de', 'yahoo.de', 'googlemail.com', 'aol.de', 'gmx.de');
88
protected static $tld = array('com', 'com', 'com', 'net', 'org', 'de', 'de', 'de');
9-
10-
/**
11-
* Converts French characters to their ASCII representation
12-
*
13-
* @return string
14-
*/
15-
private static function toAscii($string)
16-
{
17-
$from = array('ä', 'Ä', 'ü', 'Ü', 'ö', 'Ö', 'ß');
18-
$to = array('a', 'A', 'u', 'U', 'o', 'O', 'ss');
19-
20-
return str_replace($from, $to, $string);
21-
}
22-
23-
/**
24-
* @example 'jdoe'
25-
*/
26-
public function userName()
27-
{
28-
$format = static::randomElement(static::$userNameFormats);
29-
30-
return static::toLower(static::toAscii(static::bothify($this->generator->parse($format))));
31-
}
32-
33-
/**
34-
* @example 'faber'
35-
*/
36-
public function domainWord()
37-
{
38-
$company = $this->generator->format('company');
39-
$companyElements = explode(' ', $company);
40-
$company = $companyElements[0];
41-
$company = preg_replace('/\W/u', '', $company);
42-
43-
return static::toLower(static::toAscii($company));
44-
}
459
}

0 commit comments

Comments
 (0)