diff --git a/.docker/php/Dockerfile b/.docker/php/Dockerfile new file mode 100644 index 0000000..e6ccba8 --- /dev/null +++ b/.docker/php/Dockerfile @@ -0,0 +1,22 @@ +# ---------------------- +# The FPM base container +# ---------------------- +FROM php:7.4-fpm-alpine AS dev + +RUN apk add --no-cache --virtual .build-deps \ + $PHPIZE_DEPS + +# Cleanup apk cache and temp files +RUN rm -rf /var/cache/apk/* /tmp/* + +# ---------------------- +# Composer install step +# ---------------------- + +# Get latest Composer +COPY --from=composer:latest /usr/bin/composer /usr/bin/composer + +# ---------------------- +# The FPM production container +# ---------------------- +FROM dev diff --git a/.docker/php/www.conf b/.docker/php/www.conf new file mode 100644 index 0000000..39bef0c --- /dev/null +++ b/.docker/php/www.conf @@ -0,0 +1,85 @@ +; Start a new pool named 'www'. +; the variable $pool can be used in any directive and will be replaced by the +; pool name ('www' here) +[www] + +; Unix user/group of processes +; Note: The user is mandatory. If the group is not set, the default user's group +; will be used. +user = www-data +group = www-data + +; The address on which to accept FastCGI requests. +; Valid syntaxes are: +; 'ip.add.re.ss:port' - to listen on a TCP socket to a specific IPv4 address on +; a specific port; +; '[ip:6:addr:ess]:port' - to listen on a TCP socket to a specific IPv6 address on +; a specific port; +; 'port' - to listen on a TCP socket to all addresses +; (IPv6 and IPv4-mapped) on a specific port; +; '/path/to/unix/socket' - to listen on a unix socket. +; Note: This value is mandatory. +listen = 9000 + +; Choose how the process manager will control the number of child processes. +; Possible Values: +; static - a fixed number (pm.max_children) of child processes; +; dynamic - the number of child processes are set dynamically based on the +; following directives. With this process management, there will be +; always at least 1 children. +; pm.max_children - the maximum number of children that can +; be alive at the same time. +; pm.start_servers - the number of children created on startup. +; pm.min_spare_servers - the minimum number of children in 'idle' +; state (waiting to process). If the number +; of 'idle' processes is less than this +; number then some children will be created. +; pm.max_spare_servers - the maximum number of children in 'idle' +; state (waiting to process). If the number +; of 'idle' processes is greater than this +; number then some children will be killed. +; ondemand - no children are created at startup. Children will be forked when +; new requests will connect. The following parameter are used: +; pm.max_children - the maximum number of children that +; can be alive at the same time. +; pm.process_idle_timeout - The number of seconds after which +; an idle process will be killed. +; Note: This value is mandatory. +pm = dynamic + +; The number of child processes to be created when pm is set to 'static' and the +; maximum number of child processes when pm is set to 'dynamic' or 'ondemand'. +; This value sets the limit on the number of simultaneous requests that will be +; served. Equivalent to the ApacheMaxClients directive with mpm_prefork. +; Equivalent to the PHP_FCGI_CHILDREN environment variable in the original PHP +; CGI. The below defaults are based on a server without much resources. Don't +; forget to tweak pm.* to fit your needs. +; Note: Used when pm is set to 'static', 'dynamic' or 'ondemand' +; Note: This value is mandatory. +pm.max_children = 5 + +; The number of child processes created on startup. +; Note: Used only when pm is set to 'dynamic' +; Default Value: min_spare_servers + (max_spare_servers - min_spare_servers) / 2 +pm.start_servers = 2 + +; The desired minimum number of idle server processes. +; Note: Used only when pm is set to 'dynamic' +; Note: Mandatory when pm is set to 'dynamic' +pm.min_spare_servers = 1 + +; The desired maximum number of idle server processes. +; Note: Used only when pm is set to 'dynamic' +; Note: Mandatory when pm is set to 'dynamic' +pm.max_spare_servers = 3 + +; The number of seconds after which an idle process will be killed. +; Note: Used only when pm is set to 'ondemand' +; Default Value: 10s +;pm.process_idle_timeout = 10s; + +; The number of requests each child process should execute before respawning. +; This can be useful to work around memory leaks in 3rd party libraries. For +; endless request processing specify '0'. Equivalent to PHP_FCGI_MAX_REQUESTS. +; Default Value: 0 +;pm.max_requests = 500 diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..6537ca4 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,15 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +indent_style = space +indent_size = 4 +trim_trailing_whitespace = true + +[*.md] +trim_trailing_whitespace = false + +[*.{yml,yaml}] +indent_size = 2 diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..d437b77 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,11 @@ +* text=auto + +/.docker export-ignore +/.github export-ignore +/tests export-ignore +/.gitattributes export-ignore +/.gitignore export-ignore +/CODE_OF_CONDUCT.md export-ignore +/CONTRIBUTING.md export-ignore +/docker-compose.yaml export-ignore +/phpunit.xml.dist export-ignore diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..717fe2c --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,41 @@ +name: tests + +on: [ push, pull_request ] + +jobs: + test: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: true + matrix: + os: [ ubuntu-latest ] + php: [ 7.4, 8.0, 8.1, 8.2 ] + dependency-version: [ prefer-lowest, prefer-stable ] + + name: P${{ matrix.php }} - ${{ matrix.dependency-version }} + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Cache dependencies + uses: actions/cache@v2 + with: + path: ~/.composer/cache/files + key: dependencies-php-${{ matrix.php }}-composer-${{ hashFiles('composer.json') }} + + - name: Setup PHP + uses: shivammathur/setup-php@v2 + with: + php-version: ${{ matrix.php }} + extensions: mbstring + tools: composer:v2 + coverage: none + + - name: Install dependencies + run: | + composer install --no-interaction + composer update --${{ matrix.dependency-version }} --prefer-dist --no-interaction + + - name: Execute tests + run: vendor/bin/phpunit --testdox diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..19b41b6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +/.idea/ +/vendor/ +.phpunit.result.cache +composer.lock +phpunit.xml diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..c70174d --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023, Anton Komarev + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..a7ebbe2 --- /dev/null +++ b/README.md @@ -0,0 +1,80 @@ +# PHP Unicode + +

+Releases +Build +License +

+ +## Introduction + +Streamline Unicode characters (code points) and composite characters manipulations. + +## Installation + +Pull in the package through Composer. + +```shell +composer require cybercog/php-unicode +``` + +## Usage + +### Instantiate Unicode Character + +```php +$character = \Cog\Unicode\Character::ofChar('ΓΏ'); + +$character = \Cog\Unicode\Character::ofDecimal(255); + +$character = \Cog\Unicode\Character::ofHexadecimal('U+00FF'); + +$character = \Cog\Unicode\Character::ofHtmlEntity('ÿ'); + +$character = \Cog\Unicode\Character::ofXmlEntity('ÿ'); +``` + +### Represent Unicode Character in any format + +```php +$character = \Cog\Unicode\Character::ofChar('ΓΏ'); + +echo $character->toChar(); // (string) "ΓΏ" + +echo $character->toDecimal(); // (int) 255 + +echo $character->toHexadecimal(); // (string) "U+00FF" + +echo $character->toHtmlEntity(); // (string) "ÿ" + +echo $character->toXmlEntity(); // (string) "ÿ" +``` + +### Instantiate Unicode CompositeCharacter + +```php +$compositeCharacter = \Cog\Unicode\CompositeCharacter::ofChars('πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦'); +``` + +### Represent Unicode CompositeCharacter in any format + +```php +$compositeCharacter = \Cog\Unicode\CompositeCharacter::ofChars('πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦'); + +$compositeCharacter->toChars(); // (string) "πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦" +``` + +## License + +- `PHP Unicode` package is open-sourced software licensed under the [MIT license](LICENSE) by [Anton Komarev]. + +## About CyberCog + +[CyberCog] is a Social Unity of enthusiasts. Research the best solutions in product & software development is our passion. + +- [Follow us on Twitter](https://twitter.com/cybercog) + +CyberCog + +[Anton Komarev]: https://komarev.com +[CyberCog]: https://cybercog.su diff --git a/composer.json b/composer.json new file mode 100644 index 0000000..bf7fc33 --- /dev/null +++ b/composer.json @@ -0,0 +1,47 @@ +{ + "name": "cybercog/php-unicode", + "description": "PHP Unicode library", + "type": "library", + "license": "MIT", + "keywords": [ + "cog", + "unicode", + "code-point", + "character", + "composite-character", + "symbol", + "emoji", + "html-entity", + "xml-entity" + ], + "authors": [ + { + "name": "Anton Komarev", + "email": "anton@komarev.com", + "homepage": "https://komarev.com", + "role": "Developer" + } + ], + "autoload": { + "psr-4": { + "Cog\\Unicode\\": "src/" + } + }, + "autoload-dev": { + "psr-4": { + "Test\\Unit\\Cog\\Unicode\\": "test/" + } + }, + "require": { + "php": "^7.4|^8.0", + "ext-mbstring": "*" + }, + "require-dev": { + "phpunit/phpunit": "^9.0|^10.0" + }, + "config": { + "sort-packages": true + }, + "minimum-stability": "dev", + "prefer-stable" : true +} diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..e82d107 --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,13 @@ +version: "3.9" +services: + app: + container_name: php-unicode-lib + image: php-unicode-lib + build: + context: ./ + dockerfile: ./.docker/php/Dockerfile + restart: unless-stopped + working_dir: /app + volumes: + - ./:/app + - ./.docker/php/www.conf:/usr/local/etc/php-fpm.d/www.conf:ro diff --git a/phpunit.xml.dist b/phpunit.xml.dist new file mode 100644 index 0000000..6575766 --- /dev/null +++ b/phpunit.xml.dist @@ -0,0 +1,13 @@ + + + + + ./test/Unit + + + diff --git a/src/Character.php b/src/Character.php new file mode 100644 index 0000000..3f4ab00 --- /dev/null +++ b/src/Character.php @@ -0,0 +1,117 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace Cog\Unicode; + +final class Character +{ + private int $decimal; + + private function __construct( + int $decimal + ) { + $this->decimal = $decimal; + + if ($decimal < 0x0000 || $decimal > 0x10FFFF) { + throw new \OutOfRangeException( + "Character code point value `$decimal` is out of range", + ); + } + } + + public static function ofDecimal( + int $decimal + ): self { + return new self( + $decimal, + ); + } + + public static function ofHexadecimal( + string $hexadecimal + ): self { + if (preg_match('#^U\+[0-9A-Fa-f]{4,}$#', $hexadecimal) !== 1) { + throw new \InvalidArgumentException( + "Invalid hexadecimal format `$hexadecimal`", + ); + } + + return new self( + hexdec($hexadecimal), + ); + } + + public static function ofChar( + string $char + ): self { + if (mb_strlen($char) !== 1) { + throw new \InvalidArgumentException( + "Cannot instantiate Character of char `$char`, length is not equal to 1", + ); + } + + return new self( + mb_ord($char), + ); + } + + public static function ofHtmlEntity( + string $htmlEntity + ): self { + return self::ofChar( + html_entity_decode( + $htmlEntity, + ENT_HTML5 | ENT_QUOTES | ENT_SUBSTITUTE, + ), + ); + } + + public static function ofXmlEntity( + string $xmlEntity + ): self { + return self::ofChar( + html_entity_decode( + $xmlEntity, + ENT_XML1 | ENT_QUOTES | ENT_SUBSTITUTE, + ), + ); + } + + public function toDecimal(): int + { + return $this->decimal; + } + + public function toHexadecimal(): string + { + return sprintf('U+%04X', $this->decimal); + } + + public function toChar(): string + { + return mb_chr($this->decimal); + } + + public function toHtmlEntity(): string + { + return htmlentities( + $this->toChar(), + ENT_HTML5 | ENT_QUOTES | ENT_SUBSTITUTE, + ); + } + + public function toXmlEntity(): string + { + return '&#x' . dechex($this->decimal) . ';'; + } +} diff --git a/src/CompositeCharacter.php b/src/CompositeCharacter.php new file mode 100644 index 0000000..f85abfc --- /dev/null +++ b/src/CompositeCharacter.php @@ -0,0 +1,61 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace Cog\Unicode; + +final class CompositeCharacter +{ + private array $characterList; + + /** + * @param list $characterList The composite character code point sequence. + */ + private function __construct( + array $characterList + ) { + $this->characterList = $characterList; + } + + public static function ofChars( + string $chars + ): self { + if ($chars === '') { + throw new \InvalidArgumentException( + 'CompositeCharacter cannot be built from empty chars string', + ); + } + + $characterList = []; + + $charList = preg_split('//u', $chars, -1, PREG_SPLIT_NO_EMPTY); + + for ($i = 0; $i < count($charList); $i++) { + $characterList[] = Character::ofChar( + $charList[$i], + ); + } + + return new self($characterList); + } + + public function toChars(): string + { + $result = ''; + + foreach ($this->characterList as $character) { + $result .= $character->toChar(); + } + + return $result; + } +} diff --git a/test/Unit/CharacterTest.php b/test/Unit/CharacterTest.php new file mode 100644 index 0000000..f156d20 --- /dev/null +++ b/test/Unit/CharacterTest.php @@ -0,0 +1,283 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace Test\Unit\Cog\Unicode; + +use Cog\Unicode\Character; +use PHPUnit\Framework\TestCase; + +final class CharacterTest extends TestCase +{ + /** @dataProvider provideUnicodeMap */ + public function testItCanInstantiateOfCharacter( + string $char, + int $decimal, + string $hexadecimal, + string $htmlEntity, + string $xmlEntity + ): void { + $character = Character::ofChar($char); + + $this->assertSame( + $char, + $character->toChar(), + ); + $this->assertSame( + $decimal, + $character->toDecimal(), + ); + $this->assertSame( + $hexadecimal, + $character->toHexadecimal(), + ); + $this->assertSame( + $htmlEntity, + $character->toHtmlEntity(), + ); + $this->assertSame( + $xmlEntity, + $character->toXmlEntity(), + ); + } + + /** @dataProvider provideUnicodeMap */ + public function testItCanInstantiateOfDecimal( + string $char, + int $decimal, + string $hexadecimal, + string $htmlEntity, + string $xmlEntity + ): void { + $character = Character::ofDecimal($decimal); + + $this->assertSame( + $char, + $character->toChar(), + ); + $this->assertSame( + $decimal, + $character->toDecimal(), + ); + $this->assertSame( + $hexadecimal, + $character->toHexadecimal(), + ); + $this->assertSame( + $htmlEntity, + $character->toHtmlEntity(), + ); + $this->assertSame( + $xmlEntity, + $character->toXmlEntity(), + ); + } + + /** @dataProvider provideUnicodeMap */ + public function testItCanInstantiateOfHexadecimal( + string $char, + int $decimal, + string $hexadecimal, + string $htmlEntity, + string $xmlEntity + ): void { + $character = Character::ofHexadecimal($hexadecimal); + + $this->assertSame( + $char, + $character->toChar(), + ); + $this->assertSame( + $decimal, + $character->toDecimal(), + ); + $this->assertSame( + $hexadecimal, + $character->toHexadecimal(), + ); + $this->assertSame( + $htmlEntity, + $character->toHtmlEntity(), + ); + $this->assertSame( + $xmlEntity, + $character->toXmlEntity(), + ); + } + + /** @dataProvider provideUnicodeMap */ + public function testItCanInstantiateOfHtmlEntity( + string $char, + int $decimal, + string $hexadecimal, + string $htmlEntity, + string $xmlEntity + ): void { + $character = Character::ofHtmlEntity($htmlEntity); + + $this->assertSame( + $char, + $character->toChar(), + ); + $this->assertSame( + $decimal, + $character->toDecimal(), + ); + $this->assertSame( + $hexadecimal, + $character->toHexadecimal(), + ); + $this->assertSame( + $htmlEntity, + $character->toHtmlEntity(), + ); + $this->assertSame( + $xmlEntity, + $character->toXmlEntity(), + ); + } + + /** @dataProvider provideUnicodeMap */ + public function testItCanInstantiateOfXmlEntity( + string $char, + int $decimal, + string $hexadecimal, + string $htmlEntity, + string $xmlEntity + ): void { + if ($xmlEntity === '�') { + $this->markTestSkipped('XML does not have NULL value'); + } + + $character = Character::ofXmlEntity($xmlEntity); + + $this->assertSame( + $char, + $character->toChar(), + ); + $this->assertSame( + $decimal, + $character->toDecimal(), + ); + $this->assertSame( + $hexadecimal, + $character->toHexadecimal(), + ); + $this->assertSame( + $htmlEntity, + $character->toHtmlEntity(), + ); + $this->assertSame( + $xmlEntity, + $character->toXmlEntity(), + ); + } + + public function testItCannotInstantiateOfCharacterWithEmptyString(): void + { + $this->expectException(\InvalidArgumentException::class); + + $char = ''; + + Character::ofChar($char); + } + + public function testItCannotInstantiateOfCharacterWithMoreThanOneCharacter(): void + { + $this->expectException(\InvalidArgumentException::class); + + $char = 'AA'; + + Character::ofChar($char); + } + + public function testItCannotInstantiateOfDecimalWithNegativeValue(): void + { + $this->expectException(\OutOfRangeException::class); + + $decimal = -1; + + Character::ofDecimal($decimal); + } + + public function testItCannotInstantiateOfHexadecimalWithTooLowValue(): void + { + $this->expectException(\OutOfRangeException::class); + + $hexadecimal = 'U+FFFFFFFE'; // Min unicode hexadecimal -1 + + Character::ofHexadecimal($hexadecimal); + } + + public function testItCannotInstantiateOfHexadecimalWithTooBigValue(): void + { + $this->expectException(\OutOfRangeException::class); + + $hexadecimal = 'U+110000'; // Max unicode hexadecimal +1 + + Character::ofHexadecimal($hexadecimal); + } + + public function testItCannotInstantiateOfHtmlEntityWithEmptyString(): void + { + $this->expectException(\InvalidArgumentException::class); + + $htmlEntity = ''; + + Character::ofHtmlEntity($htmlEntity); + } + + public function testItCannotInstantiateOfHtmlEntityWithMoreThanOneCharacter(): void + { + $this->expectException(\InvalidArgumentException::class); + + $htmlEntity = '© '; + + Character::ofHtmlEntity($htmlEntity); + } + + public function testItCannotInstantiateOfXmlEntityWithEmptyString(): void + { + $this->expectException(\InvalidArgumentException::class); + + $xmlEntity = ''; + + Character::ofXmlEntity($xmlEntity); + } + + public function testItCannotInstantiateOfXmlEntityWithMoreThanOneCharacter(): void + { + $this->expectException(\InvalidArgumentException::class); + + $xmlEntity = '™˜'; + + Character::ofXmlEntity($xmlEntity); + } + + public static function provideUnicodeMap(): array + { + return [ + ["\x00", 0, 'U+0000', "\x00", '�'], + ['τΏΏ', 1114111, 'U+10FFFF', 'τΏΏ', '􏿿'], + [' ', 32, 'U+0020', ' ', ' '], + ['A', 65, 'U+0041', 'A', 'A'], + ['Β ', 160, 'U+00A0', ' ', ' '], + ['ΓΏ', 255, 'U+00FF', 'ÿ', 'ÿ'], + ['Δ€', 256, 'U+0100', 'Ā', 'Ā'], + ['ΕΏ', 383, 'U+017F', 'ΕΏ', 'ſ'], + ['€', 8364, 'U+20AC', '€', '€'], + ['βš™', 9881, 'U+2699', 'βš™', '⚙'], + ['πŸ‘¨', 128104, 'U+1F468', 'πŸ‘¨', '👨'], + ['οΏ½', 65533, 'U+FFFD', 'οΏ½', '�'], + ]; + } +} diff --git a/test/Unit/CompositeCharacterTest.php b/test/Unit/CompositeCharacterTest.php new file mode 100644 index 0000000..8324db9 --- /dev/null +++ b/test/Unit/CompositeCharacterTest.php @@ -0,0 +1,75 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace Test\Unit\Cog\Unicode; + +use Cog\Unicode\CompositeCharacter; +use PHPUnit\Framework\TestCase; + +final class CompositeCharacterTest extends TestCase +{ + /** @dataProvider provideUnicodeMapSimple */ + public function testItCanInstantiateOfCharactersWithSingleCharacter( + string $characters + ): void { + $compositeCharacter = CompositeCharacter::ofChars($characters); + + $this->assertSame( + $characters, + $compositeCharacter->toChars(), + ); + } + + /** @dataProvider provideUnicodeMapComposite */ + public function testItCanInstantiateOfCharactersWithManyCharacters( + string $characters + ): void { + $compositeCharacter = CompositeCharacter::ofChars($characters); + + $this->assertSame( + $characters, + $compositeCharacter->toChars(), + ); + } + + public function testItCannotInstantiateOfCharactersWithEmptyString(): void + { + $this->expectException(\InvalidArgumentException::class); + + $characters = ''; + + CompositeCharacter::ofChars($characters); + } + + public static function provideUnicodeMapSimple(): array + { + return [ + ["\x00"], + ['τΏΏ'], + [' '], + ['A'], + ['ΓΏ'], + ['Δ€'], + ['ΕΏ'], + ['βš™'], + ['οΏ½'], + ]; + } + + public static function provideUnicodeMapComposite(): array + { + return [ + ['πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦'], + ]; + } +}