cloudaper · krystof-k · Feb 9, 2024 · Feb 9, 2024 · Feb 9, 2024 · Feb 9, 2024
diff --git a/README.md b/README.md
@@ -32,7 +32,7 @@ which is a thin wrapper around `CompactEncDet::DetectEncoding` and `MimeEncoding
 
 > ```ruby
 > file = File.read("unknown-encoding.txt")
-> result = CompactEncDet.detect_encoding(file, file.bytesize)
+> result = CompactEncDet.detect_encoding(file)
 > result.encoding
 > # => #<Encoding:Windows-1250>
 > result.bytes_consumed

diff --git a/ext/compact_enc_det/compact_enc_det.cc b/ext/compact_enc_det/compact_enc_det.cc
@@ -34,8 +34,8 @@ void Init_detect_encoding_result(VALUE rb_mCompactEncDet)
 // for the CompactEncDet::DetectEncoding C++ function
 static VALUE detect_encoding(int argc, VALUE *argv, VALUE self)
 {
-  VALUE ruby_text,
-      ruby_text_length,
+  VALUE text,
+      text_length,
       url_hint,
       http_charset_hint,
       meta_charset_hint,
@@ -45,9 +45,9 @@ static VALUE detect_encoding(int argc, VALUE *argv, VALUE self)
       ignore_7bit_mail_encodings;
 
   // Parse the Ruby arguments
-  rb_scan_args(argc, argv, "27",
-               &ruby_text,
-               &ruby_text_length,
+  rb_scan_args(argc, argv, "17",
+               &text,
+               &text_length,
                &url_hint,
                &http_charset_hint,
                &meta_charset_hint,
@@ -56,17 +56,18 @@ static VALUE detect_encoding(int argc, VALUE *argv, VALUE self)
                &corpus_type,
                &ignore_7bit_mail_encodings);
 
-  // Convert the Ruby values to C types
-  const char *text = StringValueCStr(ruby_text);
-  const int text_length = NUM2INT(ruby_text_length);
+  // Convert the Ruby arguments to C++ types
+  const char* c_text = StringValueCStr(text);
+  const int c_text_length = NIL_P(text_length) ? strlen(c_text) : NUM2INT(text_length);
 
   // Declare the output variables
   int bytes_consumed;
   bool is_reliable;
 
   // Detect the encoding using CompactEncDet::DetectEncoding
   Encoding encoding = CompactEncDet::DetectEncoding(
-      text, text_length,
+      c_text,
+      c_text_length,
       NIL_P(url_hint) ? nullptr : StringValueCStr(url_hint),
       NIL_P(http_charset_hint) ? nullptr : StringValueCStr(http_charset_hint),
       NIL_P(meta_charset_hint) ? nullptr : StringValueCStr(meta_charset_hint),
@@ -76,11 +77,11 @@ static VALUE detect_encoding(int argc, VALUE *argv, VALUE self)
       NIL_P(ignore_7bit_mail_encodings) ? false : RTEST(ignore_7bit_mail_encodings),
       &bytes_consumed,
       &is_reliable);
-  
+
   // Convert the encoding enum to string using MimeEncodingName
   const char* encoding_mime_name = MimeEncodingName(encoding);
   VALUE rb_encoding_mime_name = rb_str_new_cstr(encoding_mime_name);
-  
+
   // Find the Ruby Encoding class
   VALUE rb_encoding = rb_funcall(rb_cEncoding, rb_intern("find"), 1, rb_encoding_mime_name);
 

diff --git a/test/compact_enc_det_test.rb b/test/compact_enc_det_test.rb
@@ -2,7 +2,16 @@
 require_relative "../lib/compact_enc_det"
 
 class CompactEncDetTest < Minitest::Test
-  def test_detect_encoding_known_english
+  def test_detect_encoding
+    text = File.read("test/fixtures/utf-8.txt")
+    result = CompactEncDet.detect_encoding(text)
+
+    assert_equal Encoding::UTF_8, result.encoding
+    assert_operator 0, :<, result.bytes_consumed
+    assert_equal true, result.is_reliable?
+  end
+
+  def test_detect_encoding_with_explicit_length
     text = File.read("test/fixtures/utf-8.txt")
     result = CompactEncDet.detect_encoding(text, text.bytesize)