Skip to content

Commit

Permalink
Merge pull request #2505 from herwinw/koi8
Browse files Browse the repository at this point in the history
Implement KOI8 Encodings
  • Loading branch information
herwinw committed Jan 15, 2025
2 parents 5f1b2f2 + 0bd4cd9 commit e9f11df
Show file tree
Hide file tree
Showing 11 changed files with 347 additions and 13 deletions.
2 changes: 2 additions & 0 deletions include/natalie.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@
#include "natalie/encoding/iso88597_encoding_object.hpp"
#include "natalie/encoding/iso88598_encoding_object.hpp"
#include "natalie/encoding/iso88599_encoding_object.hpp"
#include "natalie/encoding/koi8r_encoding_object.hpp"
#include "natalie/encoding/koi8u_encoding_object.hpp"
#include "natalie/encoding/shiftjis_encoding_object.hpp"
#include "natalie/encoding/us_ascii_encoding_object.hpp"
#include "natalie/encoding/utf16be_encoding_object.hpp"
Expand Down
18 changes: 18 additions & 0 deletions include/natalie/encoding/koi8r_encoding_object.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#pragma once

#include <assert.h>
#include <initializer_list>

#include "natalie/encoding/single_byte_encoding_object.hpp"
#include "natalie/string_object.hpp"

namespace Natalie {

using namespace TM;

class Koi8REncodingObject : public SingleByteEncodingObject {
public:
Koi8REncodingObject();
};

}
18 changes: 18 additions & 0 deletions include/natalie/encoding/koi8u_encoding_object.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#pragma once

#include <assert.h>
#include <initializer_list>

#include "natalie/encoding/single_byte_encoding_object.hpp"
#include "natalie/string_object.hpp"

namespace Natalie {

using namespace TM;

class Koi8UEncodingObject : public SingleByteEncodingObject {
public:
Koi8UEncodingObject();
};

}
2 changes: 2 additions & 0 deletions include/natalie/encodings.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ enum class Encoding : size_t {
ISO_8859_14,
ISO_8859_15,
ISO_8859_16,
KOI8_R,
KOI8_U,
Windows_1250,
Windows_1251,
Windows_1252,
Expand Down
8 changes: 3 additions & 5 deletions spec/core/kernel/shared/sprintf_encoding.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,9 @@
end

it "returns a String in the same encoding as the format String if compatible" do
NATFIXME 'KOI8_U encoding not implemented', exception: NameError do
string = "%s".dup.force_encoding(Encoding::KOI8_U)
result = @method.call(string, "dogs")
result.encoding.should equal(Encoding::KOI8_U)
end
string = "%s".dup.force_encoding(Encoding::KOI8_U)
result = @method.call(string, "dogs")
result.encoding.should equal(Encoding::KOI8_U)
end

it "returns a String in the argument's encoding if format encoding is more restrictive" do
Expand Down
8 changes: 2 additions & 6 deletions spec/core/string/valid_encoding_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,8 @@
str.force_encoding('ISO-8859-14').valid_encoding?.should be_true
str.force_encoding('ISO-8859-15').valid_encoding?.should be_true
str.force_encoding('ISO-8859-16').valid_encoding?.should be_true
NATFIXME 'Implement KOI8-R encoding', exception: ArgumentError do
str.force_encoding('KOI8-R').valid_encoding?.should be_true
end
NATFIXME 'Implement KOI8-U encoding', exception: ArgumentError do
str.force_encoding('KOI8-U').valid_encoding?.should be_true
end
str.force_encoding('KOI8-R').valid_encoding?.should be_true
str.force_encoding('KOI8-U').valid_encoding?.should be_true
str.force_encoding('Shift_JIS').valid_encoding?.should be_false
"\xD8\x00".dup.force_encoding('UTF-16BE').valid_encoding?.should be_false
"\x00\xD8".dup.force_encoding('UTF-16LE').valid_encoding?.should be_false
Expand Down
139 changes: 139 additions & 0 deletions src/encoding/koi8r_encoding_object.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
#include "natalie.hpp"

namespace Natalie {

static const long KOI8R[] = {
0X2500,
0X2502,
0X250C,
0X2510,
0X2514,
0X2518,
0X251C,
0X2524,
0X252C,
0X2534,
0X253C,
0X2580,
0X2584,
0X2588,
0X258C,
0X2590,
0X2591,
0X2592,
0X2593,
0X2320,
0X25A0,
0X2219,
0X221A,
0X2248,
0X2264,
0X2265,
0XA0,
0X2321,
0XB0,
0XB2,
0XB7,
0XF7,
0X2550,
0X2551,
0X2552,
0X451,
0X2553,
0X2554,
0X2555,
0X2556,
0X2557,
0X2558,
0X2559,
0X255A,
0X255B,
0X255C,
0X255D,
0X255E,
0X255F,
0X2560,
0X2561,
0X401,
0X2562,
0X2563,
0X2564,
0X2565,
0X2566,
0X2567,
0X2568,
0X2569,
0X256A,
0X256B,
0X256C,
0XA9,
0X44E,
0X430,
0X431,
0X446,
0X434,
0X435,
0X444,
0X433,
0X445,
0X438,
0X439,
0X43A,
0X43B,
0X43C,
0X43D,
0X43E,
0X43F,
0X44F,
0X440,
0X441,
0X442,
0X443,
0X436,
0X432,
0X44C,
0X44B,
0X437,
0X448,
0X44D,
0X449,
0X447,
0X44A,
0X42E,
0X410,
0X411,
0X426,
0X414,
0X415,
0X424,
0X413,
0X425,
0X418,
0X419,
0X41A,
0X41B,
0X41C,
0X41D,
0X41E,
0X41F,
0X42F,
0X420,
0X421,
0X422,
0X423,
0X416,
0X412,
0X42C,
0X42B,
0X417,
0X428,
0X42D,
0X429,
0X427,
0X42A,
};

Koi8REncodingObject::Koi8REncodingObject()
: SingleByteEncodingObject { Encoding::KOI8_R, { "KOI8-R", "CP878" }, KOI8R } { }

}
139 changes: 139 additions & 0 deletions src/encoding/koi8u_encoding_object.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
#include "natalie.hpp"

namespace Natalie {

static const long KOI8U[] = {
0X2500,
0X2502,
0X250C,
0X2510,
0X2514,
0X2518,
0X251C,
0X2524,
0X252C,
0X2534,
0X253C,
0X2580,
0X2584,
0X2588,
0X258C,
0X2590,
0X2591,
0X2592,
0X2593,
0X2320,
0X25A0,
0X2219,
0X221A,
0X2248,
0X2264,
0X2265,
0XA0,
0X2321,
0XB0,
0XB2,
0XB7,
0XF7,
0X2550,
0X2551,
0X2552,
0X451,
0X454,
0X2554,
0X456,
0X457,
0X2557,
0X2558,
0X2559,
0X255A,
0X255B,
0X491,
0X255D,
0X255E,
0X255F,
0X2560,
0X2561,
0X401,
0X404,
0X2563,
0X406,
0X407,
0X2566,
0X2567,
0X2568,
0X2569,
0X256A,
0X490,
0X256C,
0XA9,
0X44E,
0X430,
0X431,
0X446,
0X434,
0X435,
0X444,
0X433,
0X445,
0X438,
0X439,
0X43A,
0X43B,
0X43C,
0X43D,
0X43E,
0X43F,
0X44F,
0X440,
0X441,
0X442,
0X443,
0X436,
0X432,
0X44C,
0X44B,
0X437,
0X448,
0X44D,
0X449,
0X447,
0X44A,
0X42E,
0X410,
0X411,
0X426,
0X414,
0X415,
0X424,
0X413,
0X425,
0X418,
0X419,
0X41A,
0X41B,
0X41C,
0X41D,
0X41E,
0X41F,
0X42F,
0X420,
0X421,
0X422,
0X423,
0X416,
0X412,
0X42C,
0X42B,
0X417,
0X428,
0X42D,
0X429,
0X427,
0X42A,
};

Koi8UEncodingObject::Koi8UEncodingObject()
: SingleByteEncodingObject { Encoding::KOI8_U, { "KOI8-U" }, KOI8U } { }

}
6 changes: 6 additions & 0 deletions src/natalie.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,12 @@ Env *build_top_env() {
Encoding->const_set("ISO_8859_16"_s, EncodingIso885916);
Encoding->const_set("ISO8859_16"_s, EncodingIso885916);

Value EncodingKoi8R = new Koi8REncodingObject {};
Encoding->const_set("KOI8_R"_s, EncodingKoi8R);
Encoding->const_set("CP878"_s, EncodingKoi8R);
Value EncodingKoi8U = new Koi8UEncodingObject {};
Encoding->const_set("KOI8_U"_s, EncodingKoi8U);

Value EncodingWindows1250 = new Windows1250EncodingObject {};
Encoding->const_set("Windows_1250"_s, EncodingWindows1250);
Encoding->const_set("WINDOWS_1250"_s, EncodingWindows1250);
Expand Down
4 changes: 2 additions & 2 deletions src/regexp_object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ static const auto ruby_encoding_lookup = []() {
// ONIG_ENCODING_EUC_CN has no local encoding
map.put(ONIG_ENCODING_SHIFT_JIS, Encoding::SHIFT_JIS);
// ONIG_ENCODING_WINDOWS_31J has no local encoding
// ONIG_ENCODING_KOI8_R has no local encoding
// ONIG_ENCODING_KOI8_U has no local encoding
map.put(ONIG_ENCODING_KOI8_R, Encoding::KOI8_R);
map.put(ONIG_ENCODING_KOI8_U, Encoding::KOI8_U);
map.put(ONIG_ENCODING_WINDOWS_1250, Encoding::Windows_1250);
map.put(ONIG_ENCODING_WINDOWS_1251, Encoding::Windows_1251);
map.put(ONIG_ENCODING_WINDOWS_1252, Encoding::Windows_1252);
Expand Down
Loading

0 comments on commit e9f11df

Please sign in to comment.