Skip to content

Commit

Permalink
Merge pull request #70 from janstarke/feature/release/2.1.4
Browse files Browse the repository at this point in the history
Feature/release/2.1.4
  • Loading branch information
janstarke authored Nov 30, 2021
2 parents 4795379 + 814fc8c commit a10d034
Show file tree
Hide file tree
Showing 12 changed files with 46 additions and 40 deletions.
7 changes: 7 additions & 0 deletions src/librexgen/c/librexgen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,13 @@ static size_t callback_wc_wrapper(char* dst, const size_t buffer_size) {
return wcstombs(dst, callback_buffer, buffer_size);
}

EXPORT
c_regex_ptr __c_regex_cb_mb(
const char* regex_str,
callback_fp_mb cb) {
return c_regex_cb_mb(regex_str, cb, NULL);
}

EXPORT
c_regex_ptr c_regex_cb_mb(
const char* regex_str,
Expand Down
5 changes: 5 additions & 0 deletions src/librexgen/c/librexgen.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ c_regex_ptr c_regex_cb(
const char* regex_str,
callback_fp cb));

EXPORT
c_regex_ptr __c_regex_cb_mb(
const char* regex_str,
callback_fp_mb cb);

EXPORT
c_regex_ptr c_regex_cb_mb(
const char* regex_str,
Expand Down
44 changes: 20 additions & 24 deletions src/librexgen/iterator/classregexiterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,21 +37,15 @@ namespace rexgen {

template <class Iter>
ClassRegexIterator(Iter begin, Iter end)
: Iterator(), current(-1), characters() {
std::for_each(begin, end, [this](const wchar_t& ch) {characters.append_widechar(ch);});
: Iterator(), current(-1), characters_count(0), characters() {
std::string::size_type index = 0;
for (size_t n = 0; n < characters.length(); ++n) {

/*
* TODO(jasa):
* the call to character_length is very slow and should be removed
*/
lengths.push_back(characters.character_length(n));

std::for_each(begin, end, [this, &index](const wchar_t& ch) {
size_t mb_length = characters.append_widechar(ch);
lengths.push_back(mb_length);
indices.push_back(index);
index += characters.character_length(n);
}
characters_count = static_cast<size_t>(characters.length());
index += mb_length;
++characters_count;
});
state = usable;
}

Expand All @@ -62,16 +56,18 @@ namespace rexgen {
virtual void updateAttributes(IteratorState& /* iterState */) {}

inline void value(SimpleString *dst) const {
/**
* FIXME(jasa):
* this condition may be expensive and should be unnecessary
*/
if (current >= 0) {
const std::string::size_type &length = lengths[current];
const std::string::size_type &index = indices[current];

for (std::string::size_type n = 0; n < length; ++n) {
dst->push_back(characters[index + n]);
if (characters_count > 0) {
/**
* FIXME(jasa):
* this condition may be expensive and should be unnecessary
*/
if (current >= 0) {
const std::string::size_type &length = lengths[current];
const std::string::size_type &index = indices[current];

for (std::string::size_type n = 0; n < length; ++n) {
dst->push_back(characters[index + n]);
}
}
}
}
Expand Down Expand Up @@ -113,7 +109,7 @@ namespace rexgen {

/*
* we must use this because multibyte characters
* cannot be counted effectively
* cannot be counted efficiently
*/
int characters_count;
SimpleString characters;
Expand Down
4 changes: 2 additions & 2 deletions src/librexgen/librexgen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,12 @@ std::shared_ptr<rexgen::Regex> parse_regex(const char* regex, const rexgen::Rexg

try {
auto result = driver.parse(regex);
/*

if (driver.hasInvalidGroupReferences()) {
driver.handleParserError("This regular expression has an invalid back reference");
return nullptr;
}
*/

return result;
} catch (SyntaxError &exc) {
driver.handleParserError(exc.getMessage());
Expand Down
2 changes: 1 addition & 1 deletion src/librexgen/lua/librexgen_lua.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ int rexgen_value(lua_State* L, c_iterator_ptr iter) {

EXPORT
int rexgen_parse_regex(lua_State* L) {
auto regex = c_regex_cb_mb(luaL_checklstring(L, 1, NULL), callback, parser_error);
auto regex = c_regex_cb_mb2(luaL_checklstring(L, 1, NULL), callback, parser_error);

if (regex == c_regex_none) {
lua_pushliteral(L, "parsing error");
Expand Down
9 changes: 3 additions & 6 deletions src/librexgen/parser/regex_lexer.l
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,6 @@ CARRIAGERETURN \\r
CLASS_DIGIT \\d
CLASS_WORD \\w
CLASS_SPACE \\s
SPECIAL [ \t\r\n,;:=/%&<>-]
NORMAL [^ \t\r\n.,;:=/%&?<>-]
GROUPID \\[1-9]
STREAM \\0
ESCAPED \\[^xnurdw0-9]
Expand All @@ -75,7 +73,7 @@ MULTIBYTE_CHARACTER \xFE
{ANSICHAR} { return RexgenParser::make_T_ANY_CHAR(parseAnsiChar(yytext)); }
{UNICODECHAR} { return RexgenParser::make_T_ANY_CHAR(parseUnicodeChar(yytext)); }
{MULTIBYTE_CHARACTER} { return RexgenParser::make_T_ANY_CHAR(*wcontent_ptr++); }
{CLASS_DIGIT} { return RexgenParser::make_T_CLASS_DIGIT(); }
{CLASS_DIGIT} { return RexgenParser::make_T_CLASS_DIGIT(); }
{CLASS_WORD} { return RexgenParser::make_T_CLASS_WORD(); }
{CLASS_SPACE} { return RexgenParser::make_T_CLASS_SPACE(); }
{LINEFEED} { return RexgenParser::make_T_ANY_CHAR(btowc('\n')); }
Expand All @@ -87,6 +85,7 @@ MULTIBYTE_CHARACTER \xFE
{BEGIN_GROUP_WITH_OPTIONS} { return beginGroupWithOptions(driver); }
{BEGIN_GROUP} { return beginGroup(driver); }
{END_GROUP} { return RexgenParser::make_T_END_GROUP(); }
<IN_CLASS>"?" { return RexgenParser::make_T_ANY_CHAR(btowc(yytext[0])); }
"?" { return RexgenParser::make_T_OPTIONAL_QUANTIFIER(); }
"{" { BEGIN(IN_QUANTIFIER); return RexgenParser::make_T_BEGIN_QUANTIFIER(); }
"}" { BEGIN(INITIAL); return RexgenParser::make_T_END_QUANTIFIER(); }
Expand All @@ -95,8 +94,6 @@ MULTIBYTE_CHARACTER \xFE
<IN_QUANTIFIER>"," { return RexgenParser::make_T_COMMA(); }
<IN_CLASS>"-" { return RexgenParser::make_T_HYPHEN(); }
<IN_QUANTIFIER>{DIGIT}+ { return RexgenParser::make_T_NUMBER(atoi(yytext)); }
{NORMAL} { return RexgenParser::make_T_ANY_CHAR(btowc(yytext[0])); }
{SPECIAL} { return RexgenParser::make_T_ANY_CHAR(btowc(yytext[0])); }
. { }
. { return RexgenParser::make_T_ANY_CHAR(btowc(yytext[0])); }

%%
2 changes: 1 addition & 1 deletion src/librexgen/parser/rexgenparsingdriver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ namespace rexgen {
bool invalids = false;
for (auto ref : groupRefs) {
for (auto gr : *(ref.second)) {
invalids |= (gr->getRegex().expired() == false);
invalids |= (gr->getRegex().expired() == true);
}
}
return invalids;
Expand Down
4 changes: 2 additions & 2 deletions src/librexgen/string/simplestring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ void SimpleString::toggle_case(size_t idx) {
}
}

SimpleString& SimpleString::append_widechar(const wchar_t &widechar) {
size_t SimpleString::append_widechar(const wchar_t &widechar) {
char buffer[MB_LEN_MAX];
int ch_size = std::wctomb(&buffer[0], widechar);

Expand All @@ -64,7 +64,7 @@ SimpleString& SimpleString::append_widechar(const wchar_t &widechar) {
}

this->append(&buffer[0], ch_size);
return *this;
return static_cast<size_t>(ch_size);
}

wchar_t SimpleString::widechar_at(size_t index) const {
Expand Down
2 changes: 1 addition & 1 deletion src/librexgen/string/simplestring.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ class SimpleString : public std::string {
bool isupper(unsigned int n) const;

wchar_t widechar_at(size_t index) const;
SimpleString& append_widechar(const wchar_t& codepoint);
size_t append_widechar(const wchar_t& codepoint);
};

#endif /* __cplusplus */
Expand Down
2 changes: 1 addition & 1 deletion src/rexgen/rexgen.1
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ T}
.TE

.SH Examples
.IP "rexgen index.php?id=[1-5]"
.IP "rexgen index\\.php\\?id=[1-5]"
Would create the results
.nf
index.php?id=1
Expand Down
3 changes: 2 additions & 1 deletion src/unittest/testcases/test_simpleregex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,5 @@ TEST_CASE("TestSimple2", "TestSimple2") {validateRegex("test[\\d]", 10);}
TEST_CASE("TestSimple3", "TestSimple3") {validateRegex("test[a\\d]", 11);}
TEST_CASE("TestSimple4", "TestSimple4") {validateRegex("test[\\da]", 11);}
TEST_CASE("TestSimple5", "TestSimple5") {validateRegex("test\\da", 10);}
TEST_CASE("TestSimple6", "TestSimple6") {validateRegex("a\\dtest", 10);}
TEST_CASE("TestSimple6", "TestSimple6") {validateRegex("a\\dtest", 10);}
TEST_CASE("TestSimple7", "TestSimple7") {validateRegex("index\\.php_id=[1-5]", 5);}
2 changes: 1 addition & 1 deletion src/version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.1.3
2.1.4

0 comments on commit a10d034

Please sign in to comment.