From e5391682c7252c847d7d4c580b5372a4e410f3bd Mon Sep 17 00:00:00 2001 From: Herwin Date: Tue, 14 Jan 2025 19:07:50 +0100 Subject: [PATCH 1/2] Implement KOI8-R encoding --- include/natalie.hpp | 1 + .../encoding/koi8r_encoding_object.hpp | 18 +++ include/natalie/encodings.hpp | 1 + spec/core/string/valid_encoding_spec.rb | 4 +- src/encoding/koi8r_encoding_object.cpp | 139 ++++++++++++++++++ src/natalie.cpp | 4 + src/regexp_object.cpp | 2 +- test/natalie/encoding_test.rb | 6 + 8 files changed, 171 insertions(+), 4 deletions(-) create mode 100644 include/natalie/encoding/koi8r_encoding_object.hpp create mode 100644 src/encoding/koi8r_encoding_object.cpp diff --git a/include/natalie.hpp b/include/natalie.hpp index d7fbacb18..65b67eb57 100644 --- a/include/natalie.hpp +++ b/include/natalie.hpp @@ -58,6 +58,7 @@ #include "natalie/encoding/iso88597_encoding_object.hpp" #include "natalie/encoding/iso88598_encoding_object.hpp" #include "natalie/encoding/iso88599_encoding_object.hpp" +#include "natalie/encoding/koi8r_encoding_object.hpp" #include "natalie/encoding/shiftjis_encoding_object.hpp" #include "natalie/encoding/us_ascii_encoding_object.hpp" #include "natalie/encoding/utf16be_encoding_object.hpp" diff --git a/include/natalie/encoding/koi8r_encoding_object.hpp b/include/natalie/encoding/koi8r_encoding_object.hpp new file mode 100644 index 000000000..f46b6064d --- /dev/null +++ b/include/natalie/encoding/koi8r_encoding_object.hpp @@ -0,0 +1,18 @@ +#pragma once + +#include +#include + +#include "natalie/encoding/single_byte_encoding_object.hpp" +#include "natalie/string_object.hpp" + +namespace Natalie { + +using namespace TM; + +class Koi8REncodingObject : public SingleByteEncodingObject { +public: + Koi8REncodingObject(); +}; + +} diff --git a/include/natalie/encodings.hpp b/include/natalie/encodings.hpp index 5561a6a3c..536008dc4 100644 --- a/include/natalie/encodings.hpp +++ b/include/natalie/encodings.hpp @@ -47,6 +47,7 @@ enum class Encoding : size_t { ISO_8859_14, ISO_8859_15, ISO_8859_16, + KOI8_R, Windows_1250, Windows_1251, Windows_1252, diff --git a/spec/core/string/valid_encoding_spec.rb b/spec/core/string/valid_encoding_spec.rb index 8237bc04b..93cf554c5 100644 --- a/spec/core/string/valid_encoding_spec.rb +++ b/spec/core/string/valid_encoding_spec.rb @@ -54,9 +54,7 @@ str.force_encoding('ISO-8859-14').valid_encoding?.should be_true str.force_encoding('ISO-8859-15').valid_encoding?.should be_true str.force_encoding('ISO-8859-16').valid_encoding?.should be_true - NATFIXME 'Implement KOI8-R encoding', exception: ArgumentError do - str.force_encoding('KOI8-R').valid_encoding?.should be_true - end + str.force_encoding('KOI8-R').valid_encoding?.should be_true NATFIXME 'Implement KOI8-U encoding', exception: ArgumentError do str.force_encoding('KOI8-U').valid_encoding?.should be_true end diff --git a/src/encoding/koi8r_encoding_object.cpp b/src/encoding/koi8r_encoding_object.cpp new file mode 100644 index 000000000..44dbd2995 --- /dev/null +++ b/src/encoding/koi8r_encoding_object.cpp @@ -0,0 +1,139 @@ +#include "natalie.hpp" + +namespace Natalie { + +static const long KOI8R[] = { + 0X2500, + 0X2502, + 0X250C, + 0X2510, + 0X2514, + 0X2518, + 0X251C, + 0X2524, + 0X252C, + 0X2534, + 0X253C, + 0X2580, + 0X2584, + 0X2588, + 0X258C, + 0X2590, + 0X2591, + 0X2592, + 0X2593, + 0X2320, + 0X25A0, + 0X2219, + 0X221A, + 0X2248, + 0X2264, + 0X2265, + 0XA0, + 0X2321, + 0XB0, + 0XB2, + 0XB7, + 0XF7, + 0X2550, + 0X2551, + 0X2552, + 0X451, + 0X2553, + 0X2554, + 0X2555, + 0X2556, + 0X2557, + 0X2558, + 0X2559, + 0X255A, + 0X255B, + 0X255C, + 0X255D, + 0X255E, + 0X255F, + 0X2560, + 0X2561, + 0X401, + 0X2562, + 0X2563, + 0X2564, + 0X2565, + 0X2566, + 0X2567, + 0X2568, + 0X2569, + 0X256A, + 0X256B, + 0X256C, + 0XA9, + 0X44E, + 0X430, + 0X431, + 0X446, + 0X434, + 0X435, + 0X444, + 0X433, + 0X445, + 0X438, + 0X439, + 0X43A, + 0X43B, + 0X43C, + 0X43D, + 0X43E, + 0X43F, + 0X44F, + 0X440, + 0X441, + 0X442, + 0X443, + 0X436, + 0X432, + 0X44C, + 0X44B, + 0X437, + 0X448, + 0X44D, + 0X449, + 0X447, + 0X44A, + 0X42E, + 0X410, + 0X411, + 0X426, + 0X414, + 0X415, + 0X424, + 0X413, + 0X425, + 0X418, + 0X419, + 0X41A, + 0X41B, + 0X41C, + 0X41D, + 0X41E, + 0X41F, + 0X42F, + 0X420, + 0X421, + 0X422, + 0X423, + 0X416, + 0X412, + 0X42C, + 0X42B, + 0X417, + 0X428, + 0X42D, + 0X429, + 0X427, + 0X42A, +}; + +Koi8REncodingObject::Koi8REncodingObject() + : SingleByteEncodingObject { Encoding::KOI8_R, { "KOI8-R", "CP878" }, KOI8R } { } + +} diff --git a/src/natalie.cpp b/src/natalie.cpp index f8262ef8a..b6f89b11b 100644 --- a/src/natalie.cpp +++ b/src/natalie.cpp @@ -338,6 +338,10 @@ Env *build_top_env() { Encoding->const_set("ISO_8859_16"_s, EncodingIso885916); Encoding->const_set("ISO8859_16"_s, EncodingIso885916); + Value EncodingKoi8R = new Koi8REncodingObject {}; + Encoding->const_set("KOI8_R"_s, EncodingKoi8R); + Encoding->const_set("CP878"_s, EncodingKoi8R); + Value EncodingWindows1250 = new Windows1250EncodingObject {}; Encoding->const_set("Windows_1250"_s, EncodingWindows1250); Encoding->const_set("WINDOWS_1250"_s, EncodingWindows1250); diff --git a/src/regexp_object.cpp b/src/regexp_object.cpp index cea0abc41..c0aa3c645 100644 --- a/src/regexp_object.cpp +++ b/src/regexp_object.cpp @@ -65,7 +65,7 @@ static const auto ruby_encoding_lookup = []() { // ONIG_ENCODING_EUC_CN has no local encoding map.put(ONIG_ENCODING_SHIFT_JIS, Encoding::SHIFT_JIS); // ONIG_ENCODING_WINDOWS_31J has no local encoding - // ONIG_ENCODING_KOI8_R has no local encoding + map.put(ONIG_ENCODING_KOI8_R, Encoding::KOI8_R); // ONIG_ENCODING_KOI8_U has no local encoding map.put(ONIG_ENCODING_WINDOWS_1250, Encoding::Windows_1250); map.put(ONIG_ENCODING_WINDOWS_1251, Encoding::Windows_1251); diff --git a/test/natalie/encoding_test.rb b/test/natalie/encoding_test.rb index 8df8b9878..e12993914 100644 --- a/test/natalie/encoding_test.rb +++ b/test/natalie/encoding_test.rb @@ -770,6 +770,12 @@ end end + describe 'KOI8-R' do + it 'can be used to draw a table' do + "\xA4\x80\xA7".dup.force_encoding('KOI8-R').encode('UTF-8').should == '╓─╖' + end + end + describe 'Windows-1250' do it 'can convert codepoints' do [ From 0bd4cd9a09bd4d0a0dd14c91a97b6f5e46734db9 Mon Sep 17 00:00:00 2001 From: Herwin Date: Tue, 14 Jan 2025 19:12:37 +0100 Subject: [PATCH 2/2] Implement KOI8-U encoding --- include/natalie.hpp | 1 + .../encoding/koi8u_encoding_object.hpp | 18 +++ include/natalie/encodings.hpp | 1 + spec/core/kernel/shared/sprintf_encoding.rb | 8 +- spec/core/string/valid_encoding_spec.rb | 4 +- src/encoding/koi8u_encoding_object.cpp | 139 ++++++++++++++++++ src/natalie.cpp | 2 + src/regexp_object.cpp | 2 +- test/natalie/encoding_test.rb | 10 ++ 9 files changed, 176 insertions(+), 9 deletions(-) create mode 100644 include/natalie/encoding/koi8u_encoding_object.hpp create mode 100644 src/encoding/koi8u_encoding_object.cpp diff --git a/include/natalie.hpp b/include/natalie.hpp index 65b67eb57..c7b7d2716 100644 --- a/include/natalie.hpp +++ b/include/natalie.hpp @@ -59,6 +59,7 @@ #include "natalie/encoding/iso88598_encoding_object.hpp" #include "natalie/encoding/iso88599_encoding_object.hpp" #include "natalie/encoding/koi8r_encoding_object.hpp" +#include "natalie/encoding/koi8u_encoding_object.hpp" #include "natalie/encoding/shiftjis_encoding_object.hpp" #include "natalie/encoding/us_ascii_encoding_object.hpp" #include "natalie/encoding/utf16be_encoding_object.hpp" diff --git a/include/natalie/encoding/koi8u_encoding_object.hpp b/include/natalie/encoding/koi8u_encoding_object.hpp new file mode 100644 index 000000000..71b66d88d --- /dev/null +++ b/include/natalie/encoding/koi8u_encoding_object.hpp @@ -0,0 +1,18 @@ +#pragma once + +#include +#include + +#include "natalie/encoding/single_byte_encoding_object.hpp" +#include "natalie/string_object.hpp" + +namespace Natalie { + +using namespace TM; + +class Koi8UEncodingObject : public SingleByteEncodingObject { +public: + Koi8UEncodingObject(); +}; + +} diff --git a/include/natalie/encodings.hpp b/include/natalie/encodings.hpp index 536008dc4..8068b24c0 100644 --- a/include/natalie/encodings.hpp +++ b/include/natalie/encodings.hpp @@ -48,6 +48,7 @@ enum class Encoding : size_t { ISO_8859_15, ISO_8859_16, KOI8_R, + KOI8_U, Windows_1250, Windows_1251, Windows_1252, diff --git a/spec/core/kernel/shared/sprintf_encoding.rb b/spec/core/kernel/shared/sprintf_encoding.rb index 0e3ca76d4..5e3a64a8d 100644 --- a/spec/core/kernel/shared/sprintf_encoding.rb +++ b/spec/core/kernel/shared/sprintf_encoding.rb @@ -14,11 +14,9 @@ end it "returns a String in the same encoding as the format String if compatible" do - NATFIXME 'KOI8_U encoding not implemented', exception: NameError do - string = "%s".dup.force_encoding(Encoding::KOI8_U) - result = @method.call(string, "dogs") - result.encoding.should equal(Encoding::KOI8_U) - end + string = "%s".dup.force_encoding(Encoding::KOI8_U) + result = @method.call(string, "dogs") + result.encoding.should equal(Encoding::KOI8_U) end it "returns a String in the argument's encoding if format encoding is more restrictive" do diff --git a/spec/core/string/valid_encoding_spec.rb b/spec/core/string/valid_encoding_spec.rb index 93cf554c5..39e338082 100644 --- a/spec/core/string/valid_encoding_spec.rb +++ b/spec/core/string/valid_encoding_spec.rb @@ -55,9 +55,7 @@ str.force_encoding('ISO-8859-15').valid_encoding?.should be_true str.force_encoding('ISO-8859-16').valid_encoding?.should be_true str.force_encoding('KOI8-R').valid_encoding?.should be_true - NATFIXME 'Implement KOI8-U encoding', exception: ArgumentError do - str.force_encoding('KOI8-U').valid_encoding?.should be_true - end + str.force_encoding('KOI8-U').valid_encoding?.should be_true str.force_encoding('Shift_JIS').valid_encoding?.should be_false "\xD8\x00".dup.force_encoding('UTF-16BE').valid_encoding?.should be_false "\x00\xD8".dup.force_encoding('UTF-16LE').valid_encoding?.should be_false diff --git a/src/encoding/koi8u_encoding_object.cpp b/src/encoding/koi8u_encoding_object.cpp new file mode 100644 index 000000000..5bc1dc63a --- /dev/null +++ b/src/encoding/koi8u_encoding_object.cpp @@ -0,0 +1,139 @@ +#include "natalie.hpp" + +namespace Natalie { + +static const long KOI8U[] = { + 0X2500, + 0X2502, + 0X250C, + 0X2510, + 0X2514, + 0X2518, + 0X251C, + 0X2524, + 0X252C, + 0X2534, + 0X253C, + 0X2580, + 0X2584, + 0X2588, + 0X258C, + 0X2590, + 0X2591, + 0X2592, + 0X2593, + 0X2320, + 0X25A0, + 0X2219, + 0X221A, + 0X2248, + 0X2264, + 0X2265, + 0XA0, + 0X2321, + 0XB0, + 0XB2, + 0XB7, + 0XF7, + 0X2550, + 0X2551, + 0X2552, + 0X451, + 0X454, + 0X2554, + 0X456, + 0X457, + 0X2557, + 0X2558, + 0X2559, + 0X255A, + 0X255B, + 0X491, + 0X255D, + 0X255E, + 0X255F, + 0X2560, + 0X2561, + 0X401, + 0X404, + 0X2563, + 0X406, + 0X407, + 0X2566, + 0X2567, + 0X2568, + 0X2569, + 0X256A, + 0X490, + 0X256C, + 0XA9, + 0X44E, + 0X430, + 0X431, + 0X446, + 0X434, + 0X435, + 0X444, + 0X433, + 0X445, + 0X438, + 0X439, + 0X43A, + 0X43B, + 0X43C, + 0X43D, + 0X43E, + 0X43F, + 0X44F, + 0X440, + 0X441, + 0X442, + 0X443, + 0X436, + 0X432, + 0X44C, + 0X44B, + 0X437, + 0X448, + 0X44D, + 0X449, + 0X447, + 0X44A, + 0X42E, + 0X410, + 0X411, + 0X426, + 0X414, + 0X415, + 0X424, + 0X413, + 0X425, + 0X418, + 0X419, + 0X41A, + 0X41B, + 0X41C, + 0X41D, + 0X41E, + 0X41F, + 0X42F, + 0X420, + 0X421, + 0X422, + 0X423, + 0X416, + 0X412, + 0X42C, + 0X42B, + 0X417, + 0X428, + 0X42D, + 0X429, + 0X427, + 0X42A, +}; + +Koi8UEncodingObject::Koi8UEncodingObject() + : SingleByteEncodingObject { Encoding::KOI8_U, { "KOI8-U" }, KOI8U } { } + +} diff --git a/src/natalie.cpp b/src/natalie.cpp index b6f89b11b..75bc41144 100644 --- a/src/natalie.cpp +++ b/src/natalie.cpp @@ -341,6 +341,8 @@ Env *build_top_env() { Value EncodingKoi8R = new Koi8REncodingObject {}; Encoding->const_set("KOI8_R"_s, EncodingKoi8R); Encoding->const_set("CP878"_s, EncodingKoi8R); + Value EncodingKoi8U = new Koi8UEncodingObject {}; + Encoding->const_set("KOI8_U"_s, EncodingKoi8U); Value EncodingWindows1250 = new Windows1250EncodingObject {}; Encoding->const_set("Windows_1250"_s, EncodingWindows1250); diff --git a/src/regexp_object.cpp b/src/regexp_object.cpp index c0aa3c645..cde0defe8 100644 --- a/src/regexp_object.cpp +++ b/src/regexp_object.cpp @@ -66,7 +66,7 @@ static const auto ruby_encoding_lookup = []() { map.put(ONIG_ENCODING_SHIFT_JIS, Encoding::SHIFT_JIS); // ONIG_ENCODING_WINDOWS_31J has no local encoding map.put(ONIG_ENCODING_KOI8_R, Encoding::KOI8_R); - // ONIG_ENCODING_KOI8_U has no local encoding + map.put(ONIG_ENCODING_KOI8_U, Encoding::KOI8_U); map.put(ONIG_ENCODING_WINDOWS_1250, Encoding::Windows_1250); map.put(ONIG_ENCODING_WINDOWS_1251, Encoding::Windows_1251); map.put(ONIG_ENCODING_WINDOWS_1252, Encoding::Windows_1252); diff --git a/test/natalie/encoding_test.rb b/test/natalie/encoding_test.rb index e12993914..ca7b998eb 100644 --- a/test/natalie/encoding_test.rb +++ b/test/natalie/encoding_test.rb @@ -776,6 +776,16 @@ end end + describe 'KOI8-U' do + it 'can be used to draw a flipped double table with just 2 legs' do + "\xA9\xA0\xAC".dup.force_encoding('KOI8-U').encode('UTF-8').should == '╘═╛' + end + + it 'cannot be used to draw a regular table' do + "\xA4\x80\xA7".dup.force_encoding('KOI8-U').encode('UTF-8').should == 'є─ї' + end + end + describe 'Windows-1250' do it 'can convert codepoints' do [