Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

EbmlString/UTFstring/EbmlUnicodeString: automatic memory management #178

Merged
merged 7 commits into from
Dec 26, 2023
14 changes: 6 additions & 8 deletions ebml/EbmlUnicodeString.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class EBML_DLL_API UTFstring {
UTFstring(const UTFstring &);
UTFstring(std::wstring const &);

virtual ~UTFstring();
virtual ~UTFstring() = default;
bool operator==(const UTFstring&) const;
inline bool operator!=(const UTFstring &cmp) const
{
Expand All @@ -42,19 +42,17 @@ class EBML_DLL_API UTFstring {
UTFstring & operator=(wchar_t);

/// Return length of string
std::size_t length() const {return _Length;}
std::size_t length() const {return WString.size();}

explicit operator const wchar_t*() const;
const wchar_t* c_str() const {return _Data;}
explicit operator const wchar_t*() const {return WString.c_str();};
const wchar_t* c_str() const {return WString.c_str();}
robUx4 marked this conversation as resolved.
Show resolved Hide resolved

const std::string & GetUTF8() const {return UTF8string;}
void SetUTF8(const std::string &);

private:
std::size_t _Length{0}; ///< length of the UCS string excluding the \0
wchar_t* _Data{nullptr}; ///< internal UCS representation
private:
std::wstring WString; ///< internal UCS representation
std::string UTF8string;
static bool wcscmp_internal(const wchar_t *str1, const wchar_t *str2);
void UpdateFromUTF8();
void UpdateFromUCS2();
};
Expand Down
35 changes: 13 additions & 22 deletions src/EbmlString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,9 @@ filepos_t EbmlString::RenderData(IOCallback & output, bool /* bForceRender */, b

if (Result < GetDefaultSize()) {
// pad the rest with 0
auto Pad = new (std::nothrow) binary[GetDefaultSize() - Result];
if (Pad == nullptr) {
return Result;
}
memset(Pad, 0x00, GetDefaultSize() - Result);
output.writeFully(Pad, GetDefaultSize() - Result);
std::string Pad(static_cast<std::string::size_type>(GetDefaultSize() - Result), static_cast<char>(0));
output.writeFully(Pad.c_str(), Pad.size());
Result = GetDefaultSize();
delete [] Pad;
}

return Result;
Expand Down Expand Up @@ -114,24 +109,20 @@ filepos_t EbmlString::ReadData(IOCallback & input, ScopeMode ReadFully)
return GetSize();

if (GetSize() == 0) {
Value = "";
SetValueIsSet();
Value.clear();

} else {
auto Buffer = (GetSize() + 1 < std::numeric_limits<std::size_t>::max()) ? new (std::nothrow) char[GetSize() + 1] : nullptr;
if (Buffer == nullptr) {
// unable to store the data, skip it
input.setFilePointer(GetSize(), seek_current);
} else {
input.readFully(Buffer, GetSize());
if (Buffer[GetSize()-1] != '\0') {
Buffer[GetSize()] = '\0';
}
Value = Buffer;
delete [] Buffer;
SetValueIsSet();
}
Value.resize(GetSize());
std::memset(&Value[0], 0, GetSize());
robUx4 marked this conversation as resolved.
Show resolved Hide resolved
input.readFully(&Value[0], GetSize());

auto PosNull = Value.find('\0');
if (PosNull != std::string::npos)
Value.resize(PosNull);
}

SetValueIsSet();

return GetSize();
}

Expand Down
124 changes: 41 additions & 83 deletions src/EbmlUnicodeString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
*/

#include <cassert>
#include <cstddef>
#include <limits>

#include "ebml/EbmlUnicodeString.h"
Expand All @@ -16,6 +17,16 @@

namespace libebml {

namespace {

std::size_t lengthToFirstNulll(std::wstring const &s)
{
auto PosNull = s.find(L'\0');
return PosNull != std::wstring::npos ? PosNull : s.size();
}

}

// ===================== UTFstring class ===================

UTFstring::UTFstring(const wchar_t * _aBuf)
Expand All @@ -28,11 +39,6 @@ UTFstring::UTFstring(std::wstring const &_aBuf)
*this = _aBuf.c_str();
}

UTFstring::~UTFstring()
{
delete [] _Data;
}

UTFstring::UTFstring(const UTFstring & _aBuf)
{
*this = _aBuf.c_str();
Expand All @@ -44,49 +50,34 @@ UTFstring & UTFstring::operator=(const UTFstring & _aBuf)
return *this;
}

UTFstring::operator const wchar_t*() const {return _Data;}


UTFstring & UTFstring::operator=(const wchar_t * _aBuf)
{
delete [] _Data;
if (_aBuf == nullptr) {
_Data = new wchar_t[1];
_Data[0] = 0;
UpdateFromUCS2();
return *this;
}
if (_aBuf != nullptr)
WString = _aBuf;
else
WString.clear();

std::size_t aLen;
for (aLen=0; _aBuf[aLen] != 0; aLen++);
_Length = aLen;
_Data = new wchar_t[_Length+1];
for (aLen=0; _aBuf[aLen] != 0; aLen++) {
_Data[aLen] = _aBuf[aLen];
}
_Data[aLen] = 0;
UpdateFromUCS2();
return *this;
}

UTFstring & UTFstring::operator=(wchar_t _aChar)
{
delete [] _Data;
_Data = new wchar_t[2];
_Length = 1;
_Data[0] = _aChar;
_Data[1] = 0;
WString = _aChar;
UpdateFromUCS2();
return *this;
}

bool UTFstring::operator==(const UTFstring& _aStr) const
{
if ((_Data == nullptr) && (_aStr._Data == nullptr))
return true;
if ((_Data == nullptr) || (_aStr._Data == nullptr))
// Only compare up to the first 0 char in both strings.
auto LengthThis = lengthToFirstNulll(WString);
auto LengthOther = lengthToFirstNulll(_aStr.WString);

if (LengthThis != LengthOther)
return false;
return wcscmp_internal(_Data, _aStr._Data);

return std::memcmp(WString.c_str(), _aStr.WString.c_str(), LengthThis * sizeof(wchar_t)) == 0;
}

void UTFstring::SetUTF8(const std::string & _aStr)
Expand All @@ -103,62 +94,42 @@ void UTFstring::UpdateFromUTF8()
// Only convert up to the first \0 character if present.
auto Current = std::find(UTF8string.begin(), UTF8string.end(), '\0');

std::wstring Temp;
WString.clear();
try {
// Even though the function names hint at UCS2, the internal
// representation must actually be compatible with the C++
// library's implementation. Implementations with sizeof(wchar_t)
// == 4 are using UCS4.
if (sizeof(wchar_t) == 2)
::utf8::utf8to16(UTF8string.begin(), Current, std::back_inserter(Temp));
::utf8::utf8to16(UTF8string.begin(), Current, std::back_inserter(WString));
else
::utf8::utf8to32(UTF8string.begin(), Current, std::back_inserter(Temp));
::utf8::utf8to32(UTF8string.begin(), Current, std::back_inserter(WString));
} catch (::utf8::invalid_code_point &) {
} catch (::utf8::invalid_utf8 &) {
}

delete [] _Data;
_Length = Temp.length();
_Data = new wchar_t[_Length + 1];

std::memcpy(_Data, Temp.c_str(), sizeof(wchar_t) * (_Length + 1));
}

void UTFstring::UpdateFromUCS2()
{
UTF8string.clear();

if (!_Data)
return;

// Only convert up to the first \0 character if present.
std::size_t Current = 0;
while ((Current < _Length) && _Data[Current])
++Current;
auto Current = std::find(WString.begin(), WString.end(), L'\0');
robUx4 marked this conversation as resolved.
Show resolved Hide resolved

UTF8string.clear();

try {
// Even though the function is called UCS2, the internal
// representation must actually be compatible with the C++
// library's implementation. Implementations with sizeof(wchar_t)
// == 4 are using UCS4.
if (sizeof(wchar_t) == 2)
::utf8::utf16to8(_Data, _Data + Current, std::back_inserter(UTF8string));
::utf8::utf16to8(WString.begin(), Current, std::back_inserter(UTF8string));
else
::utf8::utf32to8(_Data, _Data + Current, std::back_inserter(UTF8string));
::utf8::utf32to8(WString.begin(), Current, std::back_inserter(UTF8string));
} catch (::utf8::invalid_code_point &) {
} catch (::utf8::invalid_utf16 &) {
}
}

bool UTFstring::wcscmp_internal(const wchar_t *str1, const wchar_t *str2)
{
std::size_t Index=0;
while (str1[Index] == str2[Index] && str1[Index] != 0) {
Index++;
}
return (str1[Index] == str2[Index]);
}

// ===================== EbmlUnicodeString class ===================

EbmlUnicodeString::EbmlUnicodeString()
Expand Down Expand Up @@ -202,14 +173,9 @@ filepos_t EbmlUnicodeString::RenderData(IOCallback & output, bool /* bForceRende

if (Result < GetDefaultSize()) {
// pad the rest with 0
auto Pad = new (std::nothrow) binary[GetDefaultSize() - Result];
if (Pad != nullptr) {
memset(Pad, 0x00, GetDefaultSize() - Result);
output.writeFully(Pad, GetDefaultSize() - Result);

Result = GetDefaultSize();
delete [] Pad;
}
std::string Pad(static_cast<std::string::size_type>(GetDefaultSize() - Result), static_cast<char>(0));
output.writeFully(Pad.c_str(), Pad.size());
Result = GetDefaultSize();
}

return Result;
Expand Down Expand Up @@ -267,24 +233,16 @@ filepos_t EbmlUnicodeString::ReadData(IOCallback & input, ScopeMode ReadFully)

if (GetSize() == 0) {
Value = static_cast<UTFstring::value_type>(0);
SetValueIsSet();

} else {
auto Buffer = (GetSize() + 1 < std::numeric_limits<std::size_t>::max()) ? new (std::nothrow) char[GetSize()+1] : nullptr;
if (Buffer == nullptr) {
// impossible to read, skip it
input.setFilePointer(GetSize(), seek_current);
} else {
input.readFully(Buffer, GetSize());
if (Buffer[GetSize()-1] != 0) {
Buffer[GetSize()] = 0;
}

Value.SetUTF8(Buffer); // implicit conversion to std::string
delete [] Buffer;
SetValueIsSet();
}
std::string Buffer(static_cast<std::string::size_type>(GetSize()), static_cast<char>(0));
input.readFully(&Buffer[0], GetSize());

Value.SetUTF8(Buffer.c_str()); // Let conversion to std::string cut off at the first 0
}

SetValueIsSet();

return GetSize();
}

Expand Down