From 24a8210b3f95e42d14b98e3696fab98c749b7ac8 Mon Sep 17 00:00:00 2001 From: Moritz Bunkus Date: Fri, 22 Dec 2023 16:44:57 +0100 Subject: [PATCH 1/7] EbmlString: use std::string for 0-padding instead of manual memory management --- src/EbmlString.cpp | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/EbmlString.cpp b/src/EbmlString.cpp index e29bf9d4..be0b0027 100644 --- a/src/EbmlString.cpp +++ b/src/EbmlString.cpp @@ -61,14 +61,9 @@ filepos_t EbmlString::RenderData(IOCallback & output, bool /* bForceRender */, b if (Result < GetDefaultSize()) { // pad the rest with 0 - auto Pad = new (std::nothrow) binary[GetDefaultSize() - Result]; - if (Pad == nullptr) { - return Result; - } - memset(Pad, 0x00, GetDefaultSize() - Result); - output.writeFully(Pad, GetDefaultSize() - Result); + std::string Pad(static_cast(GetDefaultSize() - Result), static_cast(0)); + output.writeFully(Pad.c_str(), Pad.size()); Result = GetDefaultSize(); - delete [] Pad; } return Result; From 684612c871082b6987ecad2fb410b630b8e60068 Mon Sep 17 00:00:00 2001 From: Moritz Bunkus Date: Fri, 22 Dec 2023 17:02:26 +0100 Subject: [PATCH 2/7] EbmlUnicodeString: use std::string for 0-padding instead of manual memory management --- src/EbmlUnicodeString.cpp | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/EbmlUnicodeString.cpp b/src/EbmlUnicodeString.cpp index c3c3461c..0be95f89 100644 --- a/src/EbmlUnicodeString.cpp +++ b/src/EbmlUnicodeString.cpp @@ -202,14 +202,9 @@ filepos_t EbmlUnicodeString::RenderData(IOCallback & output, bool /* bForceRende if (Result < GetDefaultSize()) { // pad the rest with 0 - auto Pad = new (std::nothrow) binary[GetDefaultSize() - Result]; - if (Pad != nullptr) { - memset(Pad, 0x00, GetDefaultSize() - Result); - output.writeFully(Pad, GetDefaultSize() - Result); - - Result = GetDefaultSize(); - delete [] Pad; - } + std::string Pad(static_cast(GetDefaultSize() - Result), static_cast(0)); + output.writeFully(Pad.c_str(), Pad.size()); + Result = GetDefaultSize(); } return Result; From 58ccbaf9445382ae848df2d83afb2183189b3c9c Mon Sep 17 00:00:00 2001 From: Moritz Bunkus Date: Fri, 22 Dec 2023 16:55:00 +0100 Subject: [PATCH 3/7] UTFstring: store data in std::wstring instead of doing manual string management --- ebml/EbmlUnicodeString.h | 12 +++---- src/EbmlUnicodeString.cpp | 76 +++++++++++---------------------------- 2 files changed, 25 insertions(+), 63 deletions(-) diff --git a/ebml/EbmlUnicodeString.h b/ebml/EbmlUnicodeString.h index 8e601c2d..c37bd03f 100644 --- a/ebml/EbmlUnicodeString.h +++ b/ebml/EbmlUnicodeString.h @@ -42,19 +42,17 @@ class EBML_DLL_API UTFstring { UTFstring & operator=(wchar_t); /// Return length of string - std::size_t length() const {return _Length;} + std::size_t length() const {return WString.size();} - explicit operator const wchar_t*() const; - const wchar_t* c_str() const {return _Data;} + explicit operator const wchar_t*() const {return WString.c_str();}; + const wchar_t* c_str() const {return WString.c_str();} const std::string & GetUTF8() const {return UTF8string;} void SetUTF8(const std::string &); - private: - std::size_t _Length{0}; ///< length of the UCS string excluding the \0 - wchar_t* _Data{nullptr}; ///< internal UCS representation +private: + std::wstring WString; ///< internal UCS representation std::string UTF8string; - static bool wcscmp_internal(const wchar_t *str1, const wchar_t *str2); void UpdateFromUTF8(); void UpdateFromUCS2(); }; diff --git a/src/EbmlUnicodeString.cpp b/src/EbmlUnicodeString.cpp index 0be95f89..759ff30c 100644 --- a/src/EbmlUnicodeString.cpp +++ b/src/EbmlUnicodeString.cpp @@ -30,7 +30,6 @@ UTFstring::UTFstring(std::wstring const &_aBuf) UTFstring::~UTFstring() { - delete [] _Data; } UTFstring::UTFstring(const UTFstring & _aBuf) @@ -44,49 +43,34 @@ UTFstring & UTFstring::operator=(const UTFstring & _aBuf) return *this; } -UTFstring::operator const wchar_t*() const {return _Data;} - - UTFstring & UTFstring::operator=(const wchar_t * _aBuf) { - delete [] _Data; - if (_aBuf == nullptr) { - _Data = new wchar_t[1]; - _Data[0] = 0; - UpdateFromUCS2(); - return *this; - } + if (_aBuf != nullptr) + WString = _aBuf; + else + WString.clear(); - std::size_t aLen; - for (aLen=0; _aBuf[aLen] != 0; aLen++); - _Length = aLen; - _Data = new wchar_t[_Length+1]; - for (aLen=0; _aBuf[aLen] != 0; aLen++) { - _Data[aLen] = _aBuf[aLen]; - } - _Data[aLen] = 0; UpdateFromUCS2(); return *this; } UTFstring & UTFstring::operator=(wchar_t _aChar) { - delete [] _Data; - _Data = new wchar_t[2]; - _Length = 1; - _Data[0] = _aChar; - _Data[1] = 0; + WString = _aChar; UpdateFromUCS2(); return *this; } bool UTFstring::operator==(const UTFstring& _aStr) const { - if ((_Data == nullptr) && (_aStr._Data == nullptr)) - return true; - if ((_Data == nullptr) || (_aStr._Data == nullptr)) + // Only compare up to the first 0 char in both strings. + auto LengthThis = std::distance(WString.begin(), std::find(WString.begin(), WString.end(), L'\0')); + auto LengthOther = std::distance(_aStr.WString.begin(), std::find(_aStr.WString.begin(), _aStr.WString.end(), L'\0')); + + if (LengthThis != LengthOther) return false; - return wcscmp_internal(_Data, _aStr._Data); + + return std::memcmp(WString.c_str(), _aStr.WString.c_str(), LengthThis * sizeof(wchar_t)) == 0; } void UTFstring::SetUTF8(const std::string & _aStr) @@ -103,38 +87,27 @@ void UTFstring::UpdateFromUTF8() // Only convert up to the first \0 character if present. auto Current = std::find(UTF8string.begin(), UTF8string.end(), '\0'); - std::wstring Temp; + WString.clear(); try { // Even though the function names hint at UCS2, the internal // representation must actually be compatible with the C++ // library's implementation. Implementations with sizeof(wchar_t) // == 4 are using UCS4. if (sizeof(wchar_t) == 2) - ::utf8::utf8to16(UTF8string.begin(), Current, std::back_inserter(Temp)); + ::utf8::utf8to16(UTF8string.begin(), Current, std::back_inserter(WString)); else - ::utf8::utf8to32(UTF8string.begin(), Current, std::back_inserter(Temp)); + ::utf8::utf8to32(UTF8string.begin(), Current, std::back_inserter(WString)); } catch (::utf8::invalid_code_point &) { } catch (::utf8::invalid_utf8 &) { } - - delete [] _Data; - _Length = Temp.length(); - _Data = new wchar_t[_Length + 1]; - - std::memcpy(_Data, Temp.c_str(), sizeof(wchar_t) * (_Length + 1)); } void UTFstring::UpdateFromUCS2() { - UTF8string.clear(); - - if (!_Data) - return; - // Only convert up to the first \0 character if present. - std::size_t Current = 0; - while ((Current < _Length) && _Data[Current]) - ++Current; + auto Current = std::find(WString.begin(), WString.end(), L'\0'); + + UTF8string.clear(); try { // Even though the function is called UCS2, the internal @@ -142,23 +115,14 @@ void UTFstring::UpdateFromUCS2() // library's implementation. Implementations with sizeof(wchar_t) // == 4 are using UCS4. if (sizeof(wchar_t) == 2) - ::utf8::utf16to8(_Data, _Data + Current, std::back_inserter(UTF8string)); + ::utf8::utf16to8(WString.begin(), Current, std::back_inserter(UTF8string)); else - ::utf8::utf32to8(_Data, _Data + Current, std::back_inserter(UTF8string)); + ::utf8::utf32to8(WString.begin(), Current, std::back_inserter(UTF8string)); } catch (::utf8::invalid_code_point &) { } catch (::utf8::invalid_utf16 &) { } } -bool UTFstring::wcscmp_internal(const wchar_t *str1, const wchar_t *str2) -{ - std::size_t Index=0; - while (str1[Index] == str2[Index] && str1[Index] != 0) { - Index++; - } - return (str1[Index] == str2[Index]); -} - // ===================== EbmlUnicodeString class =================== EbmlUnicodeString::EbmlUnicodeString() From 33ed4437f5b8e08b187dea9899c2752c01017ddb Mon Sep 17 00:00:00 2001 From: Moritz Bunkus Date: Fri, 22 Dec 2023 17:50:15 +0100 Subject: [PATCH 4/7] EbmlUnicodeString: use std::string when reading instead of manual memory management --- src/EbmlUnicodeString.cpp | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/src/EbmlUnicodeString.cpp b/src/EbmlUnicodeString.cpp index 759ff30c..5a390c2d 100644 --- a/src/EbmlUnicodeString.cpp +++ b/src/EbmlUnicodeString.cpp @@ -226,24 +226,16 @@ filepos_t EbmlUnicodeString::ReadData(IOCallback & input, ScopeMode ReadFully) if (GetSize() == 0) { Value = static_cast(0); - SetValueIsSet(); + } else { - auto Buffer = (GetSize() + 1 < std::numeric_limits::max()) ? new (std::nothrow) char[GetSize()+1] : nullptr; - if (Buffer == nullptr) { - // impossible to read, skip it - input.setFilePointer(GetSize(), seek_current); - } else { - input.readFully(Buffer, GetSize()); - if (Buffer[GetSize()-1] != 0) { - Buffer[GetSize()] = 0; - } - - Value.SetUTF8(Buffer); // implicit conversion to std::string - delete [] Buffer; - SetValueIsSet(); - } + std::string Buffer(static_cast(GetSize()), static_cast(0)); + input.readFully(&Buffer[0], GetSize()); + + Value.SetUTF8(Buffer.c_str()); // Let conversion to std::string cut off at the first 0 } + SetValueIsSet(); + return GetSize(); } From 1a17e30049c6cdcd06da502e097980882b1c6ac8 Mon Sep 17 00:00:00 2001 From: Moritz Bunkus Date: Fri, 22 Dec 2023 20:28:06 +0100 Subject: [PATCH 5/7] UTFstring: use defaulted destructor --- ebml/EbmlUnicodeString.h | 2 +- src/EbmlUnicodeString.cpp | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/ebml/EbmlUnicodeString.h b/ebml/EbmlUnicodeString.h index c37bd03f..040fe1a9 100644 --- a/ebml/EbmlUnicodeString.h +++ b/ebml/EbmlUnicodeString.h @@ -31,7 +31,7 @@ class EBML_DLL_API UTFstring { UTFstring(const UTFstring &); UTFstring(std::wstring const &); - virtual ~UTFstring(); + virtual ~UTFstring() = default; bool operator==(const UTFstring&) const; inline bool operator!=(const UTFstring &cmp) const { diff --git a/src/EbmlUnicodeString.cpp b/src/EbmlUnicodeString.cpp index 5a390c2d..3fcacb7e 100644 --- a/src/EbmlUnicodeString.cpp +++ b/src/EbmlUnicodeString.cpp @@ -28,10 +28,6 @@ UTFstring::UTFstring(std::wstring const &_aBuf) *this = _aBuf.c_str(); } -UTFstring::~UTFstring() -{ -} - UTFstring::UTFstring(const UTFstring & _aBuf) { *this = _aBuf.c_str(); From 0fc0366ba55086b34315cb3cd5033cdf422c0b2b Mon Sep 17 00:00:00 2001 From: Moritz Bunkus Date: Sat, 23 Dec 2023 09:29:08 +0100 Subject: [PATCH 6/7] =?UTF-8?q?UTFstring:=20use=20std::wstring.find()=20in?= =?UTF-8?q?stead=20of=20std::distance(=E2=80=A6,=20std::find())?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/EbmlUnicodeString.cpp | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/EbmlUnicodeString.cpp b/src/EbmlUnicodeString.cpp index 3fcacb7e..f9aca3b3 100644 --- a/src/EbmlUnicodeString.cpp +++ b/src/EbmlUnicodeString.cpp @@ -8,6 +8,7 @@ */ #include +#include #include #include "ebml/EbmlUnicodeString.h" @@ -16,6 +17,16 @@ namespace libebml { +namespace { + +std::size_t lengthToFirstNulll(std::wstring const &s) +{ + auto PosNull = s.find(L'\0'); + return PosNull != std::wstring::npos ? PosNull : s.size(); +} + +} + // ===================== UTFstring class =================== UTFstring::UTFstring(const wchar_t * _aBuf) @@ -60,8 +71,8 @@ UTFstring & UTFstring::operator=(wchar_t _aChar) bool UTFstring::operator==(const UTFstring& _aStr) const { // Only compare up to the first 0 char in both strings. - auto LengthThis = std::distance(WString.begin(), std::find(WString.begin(), WString.end(), L'\0')); - auto LengthOther = std::distance(_aStr.WString.begin(), std::find(_aStr.WString.begin(), _aStr.WString.end(), L'\0')); + auto LengthThis = lengthToFirstNulll(WString); + auto LengthOther = lengthToFirstNulll(_aStr.WString); if (LengthThis != LengthOther) return false; From 3aea4f642062e86f66f0bd7004364fb31f3a60e6 Mon Sep 17 00:00:00 2001 From: Moritz Bunkus Date: Sat, 23 Dec 2023 09:33:04 +0100 Subject: [PATCH 7/7] EbmlString::ReadFully: use automatic memory management/fewer allocations --- src/EbmlString.cpp | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/src/EbmlString.cpp b/src/EbmlString.cpp index be0b0027..7eebb10e 100644 --- a/src/EbmlString.cpp +++ b/src/EbmlString.cpp @@ -109,24 +109,20 @@ filepos_t EbmlString::ReadData(IOCallback & input, ScopeMode ReadFully) return GetSize(); if (GetSize() == 0) { - Value = ""; - SetValueIsSet(); + Value.clear(); + } else { - auto Buffer = (GetSize() + 1 < std::numeric_limits::max()) ? new (std::nothrow) char[GetSize() + 1] : nullptr; - if (Buffer == nullptr) { - // unable to store the data, skip it - input.setFilePointer(GetSize(), seek_current); - } else { - input.readFully(Buffer, GetSize()); - if (Buffer[GetSize()-1] != '\0') { - Buffer[GetSize()] = '\0'; - } - Value = Buffer; - delete [] Buffer; - SetValueIsSet(); - } + Value.resize(GetSize()); + std::memset(&Value[0], 0, GetSize()); + input.readFully(&Value[0], GetSize()); + + auto PosNull = Value.find('\0'); + if (PosNull != std::string::npos) + Value.resize(PosNull); } + SetValueIsSet(); + return GetSize(); }