Skip to content

Commit

Permalink
Fixed error with "extractURLParameter" function (read after buffer); …
Browse files Browse the repository at this point in the history
…improved performance; added support for zero bytes in URLs; renamed Chars_t type [#CLICKHOUSE-2]
  • Loading branch information
alexey-milovidov committed Nov 25, 2018
1 parent 244e64a commit 141e979
Show file tree
Hide file tree
Showing 56 changed files with 368 additions and 282 deletions.
2 changes: 1 addition & 1 deletion dbms/programs/client/Client.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#include <Poco/File.h>
#include <Poco/Util/Application.h>
#include <common/readline_use.h>
#include <common/find_first_symbols.h>
#include <common/find_symbols.h>
#include <Common/ClickHouseRevision.h>
#include <Common/Stopwatch.h>
#include <Common/Exception.h>
Expand Down
2 changes: 1 addition & 1 deletion dbms/programs/odbc-bridge/validateODBCConnectionString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#include <cstring>
#include <algorithm>
#include <Poco/String.h>
#include <common/find_first_symbols.h>
#include <common/find_symbols.h>
#include <Common/Exception.h>
#include <Common/StringUtils/StringUtils.h>
#include "validateODBCConnectionString.h"
Expand Down
8 changes: 4 additions & 4 deletions dbms/src/Columns/ColumnArray.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -437,11 +437,11 @@ ColumnPtr ColumnArray::filterString(const Filter & filt, ssize_t result_size_hin
auto res = ColumnArray::create(data->cloneEmpty());

const ColumnString & src_string = typeid_cast<const ColumnString &>(*data);
const ColumnString::Chars_t & src_chars = src_string.getChars();
const ColumnString::Chars & src_chars = src_string.getChars();
const Offsets & src_string_offsets = src_string.getOffsets();
const Offsets & src_offsets = getOffsets();

ColumnString::Chars_t & res_chars = typeid_cast<ColumnString &>(res->getData()).getChars();
ColumnString::Chars & res_chars = typeid_cast<ColumnString &>(res->getData()).getChars();
Offsets & res_string_offsets = typeid_cast<ColumnString &>(res->getData()).getOffsets();
Offsets & res_offsets = res->getOffsets();

Expand Down Expand Up @@ -781,11 +781,11 @@ ColumnPtr ColumnArray::replicateString(const Offsets & replicate_offsets) const
ColumnArray & res_ = static_cast<ColumnArray &>(*res);

const ColumnString & src_string = typeid_cast<const ColumnString &>(*data);
const ColumnString::Chars_t & src_chars = src_string.getChars();
const ColumnString::Chars & src_chars = src_string.getChars();
const Offsets & src_string_offsets = src_string.getOffsets();
const Offsets & src_offsets = getOffsets();

ColumnString::Chars_t & res_chars = typeid_cast<ColumnString &>(res_.getData()).getChars();
ColumnString::Chars & res_chars = typeid_cast<ColumnString &>(res_.getData()).getChars();
Offsets & res_string_offsets = typeid_cast<ColumnString &>(res_.getData()).getOffsets();
Offsets & res_offsets = res_.getOffsets();

Expand Down
6 changes: 3 additions & 3 deletions dbms/src/Columns/ColumnFixedString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ ColumnPtr ColumnFixedString::permute(const Permutation & perm, size_t limit) con

auto res = ColumnFixedString::create(n);

Chars_t & res_chars = res->chars;
Chars & res_chars = res->chars;

res_chars.resize(n * limit);

Expand All @@ -274,7 +274,7 @@ ColumnPtr ColumnFixedString::indexImpl(const PaddedPODArray<Type> & indexes, siz

auto res = ColumnFixedString::create(n);

Chars_t & res_chars = res->chars;
Chars & res_chars = res->chars;

res_chars.resize(n * limit);

Expand All @@ -296,7 +296,7 @@ ColumnPtr ColumnFixedString::replicate(const Offsets & offsets) const
if (0 == col_size)
return res;

Chars_t & res_chars = res->chars;
Chars & res_chars = res->chars;
res_chars.resize(n * offsets.back());

Offset curr_offset = 0;
Expand Down
8 changes: 4 additions & 4 deletions dbms/src/Columns/ColumnFixedString.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ class ColumnFixedString final : public COWPtrHelper<IColumn, ColumnFixedString>
public:
friend class COWPtrHelper<IColumn, ColumnFixedString>;

using Chars_t = PaddedPODArray<UInt8>;
using Chars = PaddedPODArray<UInt8>;

private:
/// Bytes of rows, laid in succession. The strings are stored without a trailing zero byte.
/** NOTE It is required that the offset and type of chars in the object be the same as that of `data in ColumnUInt8`.
* Used in `packFixed` function (AggregationCommon.h)
*/
Chars_t chars;
Chars chars;
/// The size of the rows.
const size_t n;

Expand Down Expand Up @@ -138,8 +138,8 @@ class ColumnFixedString final : public COWPtrHelper<IColumn, ColumnFixedString>

/// Specialized part of interface, not from IColumn.

Chars_t & getChars() { return chars; }
const Chars_t & getChars() const { return chars; }
Chars & getChars() { return chars; }
const Chars & getChars() const { return chars; }

size_t getN() const { return n; }
};
Expand Down
8 changes: 4 additions & 4 deletions dbms/src/Columns/ColumnString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ ColumnPtr ColumnString::filter(const Filter & filt, ssize_t result_size_hint) co

auto res = ColumnString::create();

Chars_t & res_chars = res->chars;
Chars & res_chars = res->chars;
Offsets & res_offsets = res->offsets;

filterArraysImpl<UInt8>(chars, offsets, res_chars, res_offsets, filt, result_size_hint);
Expand All @@ -126,7 +126,7 @@ ColumnPtr ColumnString::permute(const Permutation & perm, size_t limit) const

auto res = ColumnString::create();

Chars_t & res_chars = res->chars;
Chars & res_chars = res->chars;
Offsets & res_offsets = res->offsets;

if (limit == size)
Expand Down Expand Up @@ -202,7 +202,7 @@ ColumnPtr ColumnString::indexImpl(const PaddedPODArray<Type> & indexes, size_t l

auto res = ColumnString::create();

Chars_t & res_chars = res->chars;
Chars & res_chars = res->chars;
Offsets & res_offsets = res->offsets;

size_t new_chars_size = 0;
Expand Down Expand Up @@ -287,7 +287,7 @@ ColumnPtr ColumnString::replicate(const Offsets & replicate_offsets) const
if (0 == col_size)
return res;

Chars_t & res_chars = res->chars;
Chars & res_chars = res->chars;
Offsets & res_offsets = res->offsets;
res_chars.reserve(chars.size() / col_size * replicate_offsets.back());
res_offsets.reserve(replicate_offsets.back());
Expand Down
8 changes: 4 additions & 4 deletions dbms/src/Columns/ColumnString.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ namespace DB
class ColumnString final : public COWPtrHelper<IColumn, ColumnString>
{
public:
using Chars_t = PaddedPODArray<UInt8>;
using Chars = PaddedPODArray<UInt8>;

private:
friend class COWPtrHelper<IColumn, ColumnString>;
Expand All @@ -29,7 +29,7 @@ class ColumnString final : public COWPtrHelper<IColumn, ColumnString>

/// Bytes of strings, placed contiguously.
/// For convenience, every string ends with terminating zero byte. Note that strings could contain zero bytes in the middle.
Chars_t chars;
Chars chars;

size_t ALWAYS_INLINE offsetAt(size_t i) const { return i == 0 ? 0 : offsets[i - 1]; }

Expand Down Expand Up @@ -245,8 +245,8 @@ class ColumnString final : public COWPtrHelper<IColumn, ColumnString>
bool canBeInsideNullable() const override { return true; }


Chars_t & getChars() { return chars; }
const Chars_t & getChars() const { return chars; }
Chars & getChars() { return chars; }
const Chars & getChars() const { return chars; }

Offsets & getOffsets() { return offsets; }
const Offsets & getOffsets() const { return offsets; }
Expand Down
2 changes: 1 addition & 1 deletion dbms/src/Common/parseAddress.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#include <Common/parseAddress.h>
#include <Common/Exception.h>
#include <IO/ReadHelpers.h>
#include <common/find_first_symbols.h>
#include <common/find_symbols.h>


namespace DB
Expand Down
16 changes: 8 additions & 8 deletions dbms/src/DataTypes/DataTypeFixedString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ void DataTypeFixedString::serializeBinary(const IColumn & column, size_t row_num

void DataTypeFixedString::deserializeBinary(IColumn & column, ReadBuffer & istr) const
{
ColumnFixedString::Chars_t & data = static_cast<ColumnFixedString &>(column).getChars();
ColumnFixedString::Chars & data = static_cast<ColumnFixedString &>(column).getChars();
size_t old_size = data.size();
data.resize(old_size + n);
try
Expand All @@ -81,7 +81,7 @@ void DataTypeFixedString::deserializeBinary(IColumn & column, ReadBuffer & istr)

void DataTypeFixedString::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const
{
const ColumnFixedString::Chars_t & data = typeid_cast<const ColumnFixedString &>(column).getChars();
const ColumnFixedString::Chars & data = typeid_cast<const ColumnFixedString &>(column).getChars();

size_t size = data.size() / n;

Expand All @@ -95,7 +95,7 @@ void DataTypeFixedString::serializeBinaryBulk(const IColumn & column, WriteBuffe

void DataTypeFixedString::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double /*avg_value_size_hint*/) const
{
ColumnFixedString::Chars_t & data = typeid_cast<ColumnFixedString &>(column).getChars();
ColumnFixedString::Chars & data = typeid_cast<ColumnFixedString &>(column).getChars();

size_t initial_size = data.size();
size_t max_bytes = limit * n;
Expand Down Expand Up @@ -126,7 +126,7 @@ void DataTypeFixedString::serializeTextEscaped(const IColumn & column, size_t ro
template <typename Reader>
static inline void read(const DataTypeFixedString & self, IColumn & column, Reader && reader)
{
ColumnFixedString::Chars_t & data = typeid_cast<ColumnFixedString &>(column).getChars();
ColumnFixedString::Chars & data = typeid_cast<ColumnFixedString &>(column).getChars();
size_t prev_size = data.size();

try
Expand All @@ -152,7 +152,7 @@ static inline void read(const DataTypeFixedString & self, IColumn & column, Read

void DataTypeFixedString::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
read(*this, column, [&istr](ColumnFixedString::Chars_t & data) { readEscapedStringInto(data, istr); });
read(*this, column, [&istr](ColumnFixedString::Chars & data) { readEscapedStringInto(data, istr); });
}


Expand All @@ -165,7 +165,7 @@ void DataTypeFixedString::serializeTextQuoted(const IColumn & column, size_t row

void DataTypeFixedString::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
read(*this, column, [&istr](ColumnFixedString::Chars_t & data) { readQuotedStringInto<true>(data, istr); });
read(*this, column, [&istr](ColumnFixedString::Chars & data) { readQuotedStringInto<true>(data, istr); });
}


Expand All @@ -178,7 +178,7 @@ void DataTypeFixedString::serializeTextJSON(const IColumn & column, size_t row_n

void DataTypeFixedString::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
read(*this, column, [&istr](ColumnFixedString::Chars_t & data) { readJSONStringInto(data, istr); });
read(*this, column, [&istr](ColumnFixedString::Chars & data) { readJSONStringInto(data, istr); });
}


Expand All @@ -198,7 +198,7 @@ void DataTypeFixedString::serializeTextCSV(const IColumn & column, size_t row_nu

void DataTypeFixedString::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
read(*this, column, [&istr, &csv = settings.csv](ColumnFixedString::Chars_t & data) { readCSVStringInto(data, istr, csv); });
read(*this, column, [&istr, &csv = settings.csv](ColumnFixedString::Chars & data) { readCSVStringInto(data, istr, csv); });
}


Expand Down
18 changes: 9 additions & 9 deletions dbms/src/DataTypes/DataTypeString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ void DataTypeString::serializeBinary(const IColumn & column, size_t row_num, Wri
void DataTypeString::deserializeBinary(IColumn & column, ReadBuffer & istr) const
{
ColumnString & column_string = static_cast<ColumnString &>(column);
ColumnString::Chars_t & data = column_string.getChars();
ColumnString::Chars & data = column_string.getChars();
ColumnString::Offsets & offsets = column_string.getOffsets();

UInt64 size;
Expand Down Expand Up @@ -81,7 +81,7 @@ void DataTypeString::deserializeBinary(IColumn & column, ReadBuffer & istr) cons
void DataTypeString::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const
{
const ColumnString & column_string = typeid_cast<const ColumnString &>(column);
const ColumnString::Chars_t & data = column_string.getChars();
const ColumnString::Chars & data = column_string.getChars();
const ColumnString::Offsets & offsets = column_string.getOffsets();

size_t size = column.size();
Expand Down Expand Up @@ -111,7 +111,7 @@ void DataTypeString::serializeBinaryBulk(const IColumn & column, WriteBuffer & o


template <int UNROLL_TIMES>
static NO_INLINE void deserializeBinarySSE2(ColumnString::Chars_t & data, ColumnString::Offsets & offsets, ReadBuffer & istr, size_t limit)
static NO_INLINE void deserializeBinarySSE2(ColumnString::Chars & data, ColumnString::Offsets & offsets, ReadBuffer & istr, size_t limit)
{
size_t offset = data.size();
for (size_t i = 0; i < limit; ++i)
Expand Down Expand Up @@ -174,7 +174,7 @@ static NO_INLINE void deserializeBinarySSE2(ColumnString::Chars_t & data, Column
void DataTypeString::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const
{
ColumnString & column_string = typeid_cast<ColumnString &>(column);
ColumnString::Chars_t & data = column_string.getChars();
ColumnString::Chars & data = column_string.getChars();
ColumnString::Offsets & offsets = column_string.getOffsets();

double avg_chars_size = 1; /// By default reserve only for empty strings.
Expand Down Expand Up @@ -235,7 +235,7 @@ template <typename Reader>
static inline void read(IColumn & column, Reader && reader)
{
ColumnString & column_string = static_cast<ColumnString &>(column);
ColumnString::Chars_t & data = column_string.getChars();
ColumnString::Chars & data = column_string.getChars();
ColumnString::Offsets & offsets = column_string.getOffsets();

size_t old_chars_size = data.size();
Expand All @@ -258,7 +258,7 @@ static inline void read(IColumn & column, Reader && reader)

void DataTypeString::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
read(column, [&](ColumnString::Chars_t & data) { readEscapedStringInto(data, istr); });
read(column, [&](ColumnString::Chars & data) { readEscapedStringInto(data, istr); });
}


Expand All @@ -270,7 +270,7 @@ void DataTypeString::serializeTextQuoted(const IColumn & column, size_t row_num,

void DataTypeString::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
read(column, [&](ColumnString::Chars_t & data) { readQuotedStringInto<true>(data, istr); });
read(column, [&](ColumnString::Chars & data) { readQuotedStringInto<true>(data, istr); });
}


Expand All @@ -282,7 +282,7 @@ void DataTypeString::serializeTextJSON(const IColumn & column, size_t row_num, W

void DataTypeString::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
read(column, [&](ColumnString::Chars_t & data) { readJSONStringInto(data, istr); });
read(column, [&](ColumnString::Chars & data) { readJSONStringInto(data, istr); });
}


Expand All @@ -300,7 +300,7 @@ void DataTypeString::serializeTextCSV(const IColumn & column, size_t row_num, Wr

void DataTypeString::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
read(column, [&](ColumnString::Chars_t & data) { readCSVStringInto(data, istr, settings.csv); });
read(column, [&](ColumnString::Chars & data) { readCSVStringInto(data, istr, settings.csv); });
}


Expand Down
2 changes: 1 addition & 1 deletion dbms/src/DataTypes/tests/data_type_string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ try

{
auto column = ColumnString::create();
ColumnString::Chars_t & data = column->getChars();
ColumnString::Chars & data = column->getChars();
ColumnString::Offsets & offsets = column->getOffsets();

data.resize(n * size);
Expand Down
6 changes: 3 additions & 3 deletions dbms/src/Functions/EmptyImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ struct EmptyImpl
/// If the function will return constant value for FixedString data type.
static constexpr auto is_fixed_to_constant = false;

static void vector(const ColumnString::Chars_t & /*data*/, const ColumnString::Offsets & offsets, PaddedPODArray<UInt8> & res)
static void vector(const ColumnString::Chars & /*data*/, const ColumnString::Offsets & offsets, PaddedPODArray<UInt8> & res)
{
size_t size = offsets.size();
ColumnString::Offset prev_offset = 1;
Expand All @@ -31,12 +31,12 @@ struct EmptyImpl
}

/// Only make sense if is_fixed_to_constant.
static void vector_fixed_to_constant(const ColumnString::Chars_t & /*data*/, size_t /*n*/, UInt8 & /*res*/)
static void vector_fixed_to_constant(const ColumnString::Chars & /*data*/, size_t /*n*/, UInt8 & /*res*/)
{
throw Exception("Logical error: 'vector_fixed_to_constant method' is called", ErrorCodes::LOGICAL_ERROR);
}

static void vector_fixed_to_vector(const ColumnString::Chars_t & data, size_t n, PaddedPODArray<UInt8> & res)
static void vector_fixed_to_vector(const ColumnString::Chars & data, size_t n, PaddedPODArray<UInt8> & res)
{
std::vector<char> empty_chars(n);
size_t size = data.size() / n;
Expand Down
Loading

0 comments on commit 141e979

Please sign in to comment.