Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Transcode strings correctly between UTF-8 and UTF-16 #78

Merged
merged 6 commits into from
May 1, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions tightdb_jni/src/com_tightdb_Group.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ JNIEXPORT jlong JNICALL Java_com_tightdb_Group_createNative___3B(
TR((env, " %d bytes.", byteArrayLength));
Group* pGroup = 0;
try {
pGroup = new Group(Group::BufferSpec(reinterpret_cast<char*>(buf), S(byteArrayLength)), true);
pGroup = new Group(BinaryData(reinterpret_cast<char*>(buf), S(byteArrayLength)), true);
}
catch (...) {
// FIXME: Diffrent exception types mean different things. More
Expand Down Expand Up @@ -86,7 +86,7 @@ JNIEXPORT jlong JNICALL Java_com_tightdb_Group_createNative__Ljava_nio_ByteBuffe
// when the new-operator or the Group constructor fails.
Group* pGroup = 0;
try {
pGroup = new Group(Group::BufferSpec(bin.data(), bin.size()));
pGroup = new Group(BinaryData(bin.data(), bin.size()));
}
catch (...) {
// FIXME: Diffrent exception types mean different things. More
Expand Down Expand Up @@ -176,20 +176,20 @@ JNIEXPORT jbyteArray JNICALL Java_com_tightdb_Group_nativeWriteToMem(
{
TR((env, "nativeWriteToMem(%x)\n", nativeGroupPtr));
try {
Group::BufferSpec buffer = G(nativeGroupPtr)->write_to_mem(); // FIXME: May throw at least std::bad_alloc
BinaryData buffer = G(nativeGroupPtr)->write_to_mem(); // FIXME: May throw at least std::bad_alloc
jbyteArray jArray = 0;
if (buffer.m_size <= MAX_JSIZE) {
jsize jlen = static_cast<jsize>(buffer.m_size);
if (buffer.size() <= MAX_JSIZE) {
jsize jlen = static_cast<jsize>(buffer.size());
jArray = env->NewByteArray(jlen);
if (jArray)
// Copy data to Byte[]
env->SetByteArrayRegion(jArray, 0, jlen, (const jbyte*)buffer.m_data);
env->SetByteArrayRegion(jArray, 0, jlen, reinterpret_cast<const jbyte*>(buffer.data()));
}
if (!jArray) {
ThrowException(env, IndexOutOfBounds, "Group too big to write.");
}
// FIXME: Deallocation must happen even if somthing fails above
free(const_cast<char*>(buffer.m_data)); // free native data.
free(const_cast<char*>(buffer.data())); // free native data.
return jArray;
} catch (std::exception& e) {
ThrowException(env, IOFailed, e.what());
Expand All @@ -200,9 +200,9 @@ JNIEXPORT jobject JNICALL Java_com_tightdb_Group_nativeWriteToByteBuffer(
JNIEnv* env, jobject, jlong nativeGroupPtr)
{
TR((env, "nativeWriteToByteBuffer(%x)\n", nativeGroupPtr));
Group::BufferSpec buffer = G(nativeGroupPtr)->write_to_mem(); // FIXME: May throw at least std::bad_alloc
if (buffer.m_size <= MAX_JLONG) {
return env->NewDirectByteBuffer(const_cast<char*>(buffer.m_data), static_cast<jlong>(buffer.m_size));
BinaryData buffer = G(nativeGroupPtr)->write_to_mem(); // FIXME: May throw at least std::bad_alloc
if (buffer.size() <= MAX_JLONG) {
return env->NewDirectByteBuffer(const_cast<char*>(buffer.data()), static_cast<jlong>(buffer.size()));
// Data is NOT copied in DirectByteBuffer - so we can't free it.
}
else {
Expand Down
142 changes: 140 additions & 2 deletions tightdb_jni/src/util.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,57 @@
#include <cassert>
#include <algorithm>
#include <stdexcept>

#include <tightdb/unique_ptr.hpp>
#include <tightdb/safe_int_ops.hpp>
#include <tightdb/assert.hpp>
#include <tightdb/utf8.hpp>

#include "util.hpp"
#include "com_tightdb_internal_util.hpp"


using namespace std;
using namespace tightdb;

namespace {

// This assumes that 'jchar' is an integral type with at least 16
// non-sign value bits, that is, an unsigned 16-bit integer, or any
// signed or unsigned integer with more than 16 bits.
struct JcharTraits {
static jchar to_int_type(jchar c) TIGHTDB_NOEXCEPT { return c; }
static jchar to_char_type(jchar i) TIGHTDB_NOEXCEPT { return i; }
};

struct JStringCharsAccessor {
JStringCharsAccessor(JNIEnv* e, jstring s):
m_env(e), m_string(s), m_data(e->GetStringChars(s,0)), m_size(get_size(e,s)) {}
~JStringCharsAccessor()
{
m_env->ReleaseStringChars(m_string, m_data);
}
const jchar* data() const TIGHTDB_NOEXCEPT { return m_data; }
size_t size() const TIGHTDB_NOEXCEPT { return m_size; }

private:
JNIEnv* const m_env;
const jstring m_string;
const jchar* const m_data;
const size_t m_size;

static size_t get_size(JNIEnv* e, jstring s)
{
size_t size;
if (int_cast_with_overflow_detect(e->GetStringLength(s), size))
throw runtime_error("String size overflow");
return size;
}
};

} // anonymous namespace



void ThrowException(JNIEnv* env, ExceptionKind exception, std::string classStr, std::string itemStr)
{
std::string message;
Expand Down Expand Up @@ -49,7 +97,7 @@ void ThrowException(JNIEnv* env, ExceptionKind exception, std::string classStr,
break;

default:
assert(0);
TIGHTDB_ASSERT(false);
return;
}
if (jExceptionClass != NULL)
Expand Down Expand Up @@ -113,3 +161,93 @@ bool GetBinaryData(JNIEnv* env, jobject jByteBuffer, tightdb::BinaryData& bin)
bin = BinaryData(data, S(size));
return true;
}


jstring to_jstring(JNIEnv* env, StringData str)
{
// For efficiency, if the incoming UTF-8 string is sufficiently
// small, we will attempt to store the UTF-16 output into a stack
// allocated buffer of static size. Otherwise we will have to
// dynamically allocate the output buffer after calculating its
// size.

const size_t stack_buf_size = 48;
jchar stack_buf[stack_buf_size];
UniquePtr<jchar[]> dyn_buf;

const char* in_begin = str.data();
const char* in_end = str.data() + str.size();
jchar* out_begin = stack_buf;
jchar* out_curr = stack_buf;
jchar* out_end = stack_buf + stack_buf_size;

typedef Utf8x16<jchar, JcharTraits> Xcode;

if (str.size() <= stack_buf_size) {
if (!Xcode::to_utf16(in_begin, in_end, out_curr, out_end)) goto bad_utf8;
if (in_begin == in_end) goto transcode_complete;
}

{
const char* in_begin2 = in_begin;
size_t size = Xcode::find_utf16_buf_size(in_begin2, in_end);
if (in_begin2 != in_end) goto bad_utf8;
if (int_add_with_overflow_detect(size, stack_buf_size))
throw runtime_error("String size overflow");
dyn_buf.reset(new jchar[size]);
out_curr = copy(out_begin, out_curr, dyn_buf.get());
out_begin = dyn_buf.get();
out_end = dyn_buf.get() + size;
if (!Xcode::to_utf16(in_begin, in_end, out_curr, out_end)) goto bad_utf8;
TIGHTDB_ASSERT(in_begin == in_end);
}

transcode_complete:
{
jsize out_size;
if (int_cast_with_overflow_detect(out_curr - out_begin, out_size))
throw runtime_error("String size overflow");

return env->NewString(out_begin, out_size);
}

bad_utf8:
throw runtime_error("Bad UTF-8 encoding");
}


JStringAccessor::JStringAccessor(JNIEnv* env, jstring str)
{
// For efficiency, if the incoming UTF-16 string is sufficiently
// small, we will choose an UTF-8 output buffer whose size (in
// bytes) is simply 4 times the number of 16-bit elements in the
// input. This is guaranteed to be enough. However, to avoid
// excessive over allocation, this is not done for larger input
// strings.

JStringCharsAccessor chars(env, str);

typedef Utf8x16<jchar, JcharTraits> Xcode;
size_t max_project_size = 48;
TIGHTDB_ASSERT(max_project_size <= numeric_limits<size_t>::max()/4);
size_t buf_size;
if (chars.size() <= max_project_size) {
buf_size = chars.size() * 4;
}
else {
const jchar* begin = chars.data();
const jchar* end = begin + chars.size();
buf_size = Xcode::find_utf8_buf_size(begin, end);
}
m_data.reset(new char[buf_size]);
{
const jchar* in_begin = chars.data();
const jchar* in_end = in_begin + chars.size();
char* out_begin = m_data.get();
char* out_end = m_data.get() + buf_size;
if (!Xcode::to_utf8(in_begin, in_end, out_begin, out_end))
throw runtime_error("Bad UTF-16 encoding");
TIGHTDB_ASSERT(in_begin == in_end);
m_size = out_begin - m_data.get();
}
}
Loading