Skip to content

Commit

Permalink
Input refactoring in progress.
Browse files Browse the repository at this point in the history
  • Loading branch information
ColinH committed Dec 16, 2023
1 parent 3fec7aa commit 708fc94
Show file tree
Hide file tree
Showing 106 changed files with 778 additions and 682 deletions.
1 change: 1 addition & 0 deletions doc/Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
* Added rule [`try_catch_raise_nested`](Rule-Reference.md#try_catch_raise_nested-r-).
* Added rule [`try_catch_std_raise_nested`](Rule-Reference.md#try_catch_std_raise_nested-r-).
* Added rule [`try_catch_type_raise_nested`](Rule-Reference.md#try_catch_type_raise_nested-e-r-).
* Added rules for matching signed integers mirroring the existing ones for unsigned integers.
* Moved depth counter to adapter class `input_with_depth` in [contrib](Contrib-and-Examples#contrib).
* Changed default top-level `rewind_mode` to ~~`dontcare`~~ `optional`.
* Replaced `rewind_mode` values `dontcare` and `active` with new value `optional`.
Expand Down
20 changes: 15 additions & 5 deletions doc/Rule-Reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -1466,20 +1466,30 @@ Convenience wrappers for enumerated properties that return a value instead of an

These rules are available in multiple versions,

* in namespace `tao::pegtl::uint8` for 8-bit integer values,
* in namespace `tao::pegtl::int8` for signed 8-bit integer values,
* in namespace `tao::pegtl::uint8` for unsigned 8-bit integer values,
* in namespace `tao::pegtl::int16_be` for big-endian 16-bit integer values,
* in namespace `tao::pegtl::int16_le` for little-endian 16-bit integer values,
* in namespace `tao::pegtl::int32_be` for big-endian 32-bit integer values,
* in namespace `tao::pegtl::int32_le` for little-endian 32-bit integer values,
* in namespace `tao::pegtl::int64_be` for big-endian 64-bit integer values, and
* in namespace `tao::pegtl::int64_le` for little-endian 64-bit integer values.
* in namespace `tao::pegtl::uint16_be` for big-endian 16-bit integer values,
* in namespace `tao::pegtl::uint16_le` for little-endian 16-bit integer values,
* in namespace `tao::pegtl::uint32_be` for big-endian 32-bit integer values,
* in namespace `tao::pegtl::uint32_le` for little-endian 32-bit integer values,
* in namespace `tao::pegtl::uint64_be` for big-endian 64-bit integer values, and
* in namespace `tao::pegtl::uint64_le` for little-endian 64-bit integer values.
* in namespace `tao::pegtl::uint64_le` for little-endian 64-bit integer values,

The binary rules need to be manually included from their corresponding headers in the `contrib` section.
however please not that the masked rules are available only for unsigned integers.

These rules read one or more bytes from the input to form (and match) an 8, 16, 32 or 64-bit value, respectively, and corresponding template parameters are given as either `std::uint8_t`, `std::uint16_t`, `std::uint32_t` or `std::uin64_t`.
The binary rules need to be manually included from their corresponding headers.

These rules read one or more bytes from the input to form (and match) an 8, 16, 32 or 64-bit value, respectively, and corresponding template parameters are given as either `std::int8_t`, `std::uint8_t`, `std::int16_t`, `std::uint16_t`, `std::int32_t`, `std::uint32_t`, `std::int64_t` or `std::uin64_t`.

In the following descriptions, the parameter N is the size of a single value in bytes, i.e. either 1, 2, 4 or 8.
The term *input value* indicates a correspondingly sized integer value read from successive bytes of the input.
The term *input value* indicates a correspondingly sized integer value read from the input.
For inputs of values of size 1 like `char` or `std::byte` all integer rules can be used, for inputs of values of size greater than 1 like `int` or `long` only integer rules of matching size are possible.

Binary rules do not rely on other rules.

Expand Down
2 changes: 1 addition & 1 deletion include/tao/pegtl/ascii.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

#include "config.hpp"

#include "internal/peeks.hpp"
#include "internal/peek_integer.hpp"
#include "internal/result_on_found.hpp"
#include "internal/rules.hpp"

Expand Down
7 changes: 3 additions & 4 deletions include/tao/pegtl/contrib/http.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,11 @@
#include "../ascii.hpp"
#include "../config.hpp"
#include "../nothing.hpp"
#include "../remove_first_state.hpp"
#include "../rules.hpp"
#include "../utf8.hpp"

#include "abnf.hpp"
#include "forward.hpp"
#include "remove_first_state.hpp"
#include "uri.hpp"

namespace TAO_PEGTL_NAMESPACE::http
Expand Down Expand Up @@ -167,7 +166,7 @@ namespace TAO_PEGTL_NAMESPACE::http
}
break;
}
in.bump_in_this_line( i );
in.template consume< internal::eol_exclude_tag >( i );
return i > 0;
}
};
Expand All @@ -193,7 +192,7 @@ namespace TAO_PEGTL_NAMESPACE::http
[[nodiscard]] static bool match( ParseInput& in, const std::size_t size, States&&... /*unused*/ )
{
if( in.size( size ) >= size ) {
in.bump( size );
in.template consume< internal::eol_unknown_tag >( size );
return true;
}
return false;
Expand Down
10 changes: 5 additions & 5 deletions include/tao/pegtl/contrib/unescape.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ namespace TAO_PEGTL_NAMESPACE::unescape
{
// Utility functions for the unescape actions.

[[nodiscard]] inline bool utf8_append_utf32( std::string& string, const unsigned utf32 )
[[nodiscard]] inline bool utf8_append_utf32( std::string& string, const char32_t utf32 )
{
if( utf32 <= 0x7f ) {
string += static_cast< char >( utf32 & 0xff );
Expand Down Expand Up @@ -150,7 +150,7 @@ namespace TAO_PEGTL_NAMESPACE::unescape
static void apply( const ActionInput& in, std::string& s )
{
assert( !in.empty() ); // First character MUST be present, usually 'u' or 'U'.
if( !utf8_append_utf32( s, unhex_string< unsigned >( in.begin() + 1, in.end() ) ) ) {
if( !utf8_append_utf32( s, unhex_string< char32_t >( in.begin() + 1, in.end() ) ) ) {
throw parse_error( "invalid escaped unicode code point", in );
}
}
Expand All @@ -159,7 +159,7 @@ namespace TAO_PEGTL_NAMESPACE::unescape
static bool apply( const ActionInput& in, std::string& s )
{
assert( !in.empty() ); // First character MUST be present, usually 'u' or 'U'.
return utf8_append_utf32( s, unhex_string< unsigned >( in.begin() + 1, in.end() ) );
return utf8_append_utf32( s, unhex_string< char32_t >( in.begin() + 1, in.end() ) );
}
#endif
};
Expand Down Expand Up @@ -189,9 +189,9 @@ namespace TAO_PEGTL_NAMESPACE::unescape
{
assert( ( ( in.size() + 1 ) % 6 ) == 0 ); // Expects multiple "\\u1234", starting with the first "u".
for( const char* b = in.begin() + 1; b < in.end(); b += 6 ) {
const auto c = unhex_string< unsigned >( b, b + 4 );
const auto c = unhex_string< char32_t >( b, b + 4 );
if( ( 0xd800 <= c ) && ( c <= 0xdbff ) && ( b + 6 < in.end() ) ) {
const auto d = unhex_string< unsigned >( b + 6, b + 10 );
const auto d = unhex_string< char32_t >( b + 6, b + 10 );
if( ( 0xdc00 <= d ) && ( d <= 0xdfff ) ) {
b += 6;
(void)utf8_append_utf32( s, ( ( ( c & 0x03ff ) << 10 ) | ( d & 0x03ff ) ) + 0x10000 );
Expand Down
45 changes: 45 additions & 0 deletions include/tao/pegtl/enums.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// Copyright (c) 2022-2023 Dr. Colin Hirsch and Daniel Frey
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)

#ifndef TAO_PEGTL_ENUMS_HPP
#define TAO_PEGTL_ENUMS_HPP

#include "config.hpp"

#include "internal/endian.hpp"
#include "internal/peek_endian.hpp"
#include "internal/result_on_found.hpp"
#include "internal/rules.hpp"

namespace TAO_PEGTL_NAMESPACE
{
namespace enums_be
{
// clang-format off
template< auto E, decltype( E )... Es > struct not_one : internal::one< internal::result_on_found::failure, internal::peek_endian< decltype( E ), internal::big_endian >, E, Es... > {};
template< auto Lo, decltype( Lo ) Hi > struct not_range : internal::range< internal::result_on_found::failure, internal::peek_endian< decltype( Lo ), internal::big_endian >, Lo, Hi > {};
template< auto E, decltype( E )... Es > struct one : internal::one< internal::result_on_found::success, internal::peek_endian< decltype( E ), internal::big_endian >, E, Es... > {};
template< auto Lo, decltype( Lo ) Hi > struct range : internal::range< internal::result_on_found::success, internal::peek_endian< decltype( Lo ), internal::big_endian >, Lo, Hi > {};
template< auto E, decltype( E )... Es > struct ranges : internal::ranges< internal::peek_endian< decltype( E ), internal::big_endian >, E, Es... > {};
template< auto E, decltype( E )... Es > struct string : internal::seq< internal::one< internal::result_on_found::success, internal::peek_endian< decltype( E ), internal::big_endian >, E >, internal::one< internal::result_on_found::success, internal::peek_endian< decltype( E ), internal::big_endian >, Es >... > {};
// clang-format on

} // namespace enums_be

namespace enums_le
{
// clang-format off
template< auto E, decltype( E )... Es > struct not_one : internal::one< internal::result_on_found::failure, internal::peek_endian< decltype( E ), internal::little_endian >, E, Es... > {};
template< auto Lo, decltype( Lo ) Hi > struct not_range : internal::range< internal::result_on_found::failure, internal::peek_endian< decltype( Lo ), internal::little_endian >, Lo, Hi > {};
template< auto E, decltype( E )... Es > struct one : internal::one< internal::result_on_found::success, internal::peek_endian< decltype( E ), internal::little_endian >, E, Es... > {};
template< auto Lo, decltype( Lo ) Hi > struct range : internal::range< internal::result_on_found::success, internal::peek_endian< decltype( Lo ), internal::little_endian >, Lo, Hi > {};
template< auto E, decltype( E )... Es > struct ranges : internal::ranges< internal::peek_endian< decltype( E ), internal::little_endian >, E, Es... > {};
template< auto E, decltype( E )... Es > struct string : internal::seq< internal::one< internal::result_on_found::success, internal::peek_endian< decltype( E ), internal::little_endian >, E >, internal::one< internal::result_on_found::success, internal::peek_endian< decltype( E ), internal::little_endian >, Es >... > {};
// clang-format on

} // namespace enums_le

} // namespace TAO_PEGTL_NAMESPACE

#endif
28 changes: 25 additions & 3 deletions include/tao/pegtl/inputs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ namespace TAO_PEGTL_NAMESPACE
using argv_input_with_source = internal::input_with_fakes< internal::input_with_peeks< internal::argv_input_with_source > >; // TODO: Add input_with_start?
template< typename Container >
using copy_input_with_source = internal::input_with_fakes< internal::input_with_peeks< internal::input_with_source< std::string, internal::copy_input< Container > > > >;
using file_input_with_source = internal::input_with_fakes< internal::input_with_peeks< internal::file_input_with_source > >;
using file_input_with_source = internal::input_with_fakes< internal::input_with_peeks< internal::file_input_with_source< file_input > > >;
using read_input_with_source = internal::input_with_fakes< internal::input_with_peeks< internal::file_input_with_source< read_input > > >;
template< typename Data >
using view_input_with_source = internal::input_with_fakes< internal::input_with_peeks< internal::input_with_source< std::string, internal::view_input< Data > > > >; // TODO: Add input_with_start?

Expand All @@ -37,6 +38,10 @@ namespace TAO_PEGTL_NAMESPACE
template< typename Eol >
using text_file_input = internal::input_with_fakes< internal::input_with_peeks< internal::text_input< Eol, internal::file_input > > >;
template< typename Eol >
using lazy_read_input = internal::input_with_fakes< internal::input_with_peeks< internal::lazy_input< Eol, internal::read_input > > >;
template< typename Eol >
using text_read_input = internal::input_with_fakes< internal::input_with_peeks< internal::text_input< Eol, internal::read_input > > >;
template< typename Eol >
using lazy_view_input = internal::input_with_fakes< internal::input_with_peeks< internal::lazy_input< Eol, internal::view_input< char > > > >; // TODO: Add input_with_start?
template< typename Eol >
using text_view_input = internal::input_with_fakes< internal::input_with_peeks< internal::text_input< Eol, internal::view_input< char > > > >; // TODO: Add input_with_start?
Expand All @@ -46,14 +51,31 @@ namespace TAO_PEGTL_NAMESPACE
template< typename Eol >
using text_copy_input_with_source = internal::input_with_fakes< internal::input_with_peeks< internal::text_input_with_source< Eol, std::string, internal::copy_input< std::string > > > >;
template< typename Eol >
using lazy_file_input_with_source = internal::input_with_fakes< internal::input_with_peeks< internal::lazy_file_input_with_source< Eol > > >;
using lazy_file_input_with_source = internal::input_with_fakes< internal::input_with_peeks< internal::input_double_path< internal::lazy_input_with_source< Eol, std::filesystem::path, internal::file_input > > > >;
template< typename Eol >
using text_file_input_with_source = internal::input_with_fakes< internal::input_with_peeks< internal::input_double_path< internal::text_input_with_source< Eol, std::filesystem::path, internal::file_input > > > >;
template< typename Eol >
using text_file_input_with_source = internal::input_with_fakes< internal::input_with_peeks< internal::text_file_input_with_source< Eol > > >;
using lazy_read_input_with_source = internal::input_with_fakes< internal::input_with_peeks< internal::input_double_path< internal::lazy_input_with_source< Eol, std::filesystem::path, internal::read_input > > > >;
template< typename Eol >
using text_read_input_with_source = internal::input_with_fakes< internal::input_with_peeks< internal::input_double_path< internal::text_input_with_source< Eol, std::filesystem::path, internal::read_input > > > >;
template< typename Eol >
using lazy_view_input_with_source = internal::input_with_fakes< internal::input_with_peeks< internal::lazy_input_with_source< Eol, std::string, internal::view_input< char > > > >; // TODO: Add input_with_start?
template< typename Eol >
using text_view_input_with_source = internal::input_with_fakes< internal::input_with_peeks< internal::text_input_with_source< Eol, std::string, internal::view_input< char > > > >; // TODO: Add input_with_start?

#if defined( TAO_PEGTL_MMAP_AVAILABLE )
using mmap_input = internal::input_with_fakes< internal::input_with_peeks< internal::mmap_input< char > > >;
using mmap_input_with_source = internal::input_with_fakes< internal::input_with_peeks< internal::file_input_with_source< internal::mmap_input< char > > > >;
template< typename Eol >
using lazy_mmap_input = internal::input_with_fakes< internal::input_with_peeks< internal::lazy_input< Eol, internal::mmap_input< char > > > >;
template< typename Eol >
using text_mmap_input = internal::input_with_fakes< internal::input_with_peeks< internal::text_input< Eol, internal::mmap_input< char > > > >;
template< typename Eol >
using lazy_mmap_input_with_source = internal::input_with_fakes< internal::input_with_peeks< internal::input_double_path< internal::lazy_input_with_source< Eol, std::filesystem::path, internal::mmap_input< char > > > > >;
template< typename Eol >
using text_mmap_input_with_source = internal::input_with_fakes< internal::input_with_peeks< internal::input_double_path< internal::text_input_with_source< Eol, std::filesystem::path, internal::mmap_input< char > > > > >;
#endif

} // namespace TAO_PEGTL_NAMESPACE

#endif
50 changes: 50 additions & 0 deletions include/tao/pegtl/int16.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// Copyright (c) 2018-2023 Dr. Colin Hirsch and Daniel Frey
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)

#ifndef TAO_PEGTL_INT16_HPP
#define TAO_PEGTL_INT16_HPP

#include "config.hpp"

#include "internal/peek_integer.hpp"
#include "internal/result_on_found.hpp"
#include "internal/rules.hpp"

namespace TAO_PEGTL_NAMESPACE
{
namespace int16_be
{
// clang-format off
struct any : internal::any< internal::peek_int16_be > {};
template< unsigned Count > struct many : internal::many< Count, internal::peek_int16_be > {};

template< std::int16_t... Cs > struct not_one : internal::one< internal::result_on_found::failure, internal::peek_int16_be, Cs... > {};
template< std::int16_t Lo, std::int16_t Hi > struct not_range : internal::range< internal::result_on_found::failure, internal::peek_int16_be, Lo, Hi > {};
template< std::int16_t... Cs > struct one : internal::one< internal::result_on_found::success, internal::peek_int16_be, Cs... > {};
template< std::int16_t Lo, std::int16_t Hi > struct range : internal::range< internal::result_on_found::success, internal::peek_int16_be, Lo, Hi > {};
template< std::int16_t... Cs > struct ranges : internal::ranges< internal::peek_int16_be, Cs... > {};
template< std::int16_t... Cs > struct string : internal::seq< internal::one< internal::result_on_found::success, internal::peek_int16_be, Cs >... > {};
// clang-format on

} // namespace int16_be

namespace int16_le
{
// clang-format off
struct any : internal::any< internal::peek_int16_le > {};
template< unsigned Count > struct many : internal::many< Count, internal::peek_int16_le > {};

template< std::int16_t... Cs > struct not_one : internal::one< internal::result_on_found::failure, internal::peek_int16_le, Cs... > {};
template< std::int16_t Lo, std::int16_t Hi > struct not_range : internal::range< internal::result_on_found::failure, internal::peek_int16_le, Lo, Hi > {};
template< std::int16_t... Cs > struct one : internal::one< internal::result_on_found::success, internal::peek_int16_le, Cs... > {};
template< std::int16_t Lo, std::int16_t Hi > struct range : internal::range< internal::result_on_found::success, internal::peek_int16_le, Lo, Hi > {};
template< std::int16_t... Cs > struct ranges : internal::ranges< internal::peek_int16_le, Cs... > {};
template< std::int16_t... Cs > struct string : internal::seq< internal::one< internal::result_on_found::success, internal::peek_int16_le, Cs >... > {};
// clang-format on

} // namespace int16_le

} // namespace TAO_PEGTL_NAMESPACE

#endif
Loading

0 comments on commit 708fc94

Please sign in to comment.