Skip to content
This repository has been archived by the owner on Jun 30, 2021. It is now read-only.

[RFC] ragel support #99

Closed
NathanFrench opened this issue Jun 25, 2018 · 0 comments
Closed

[RFC] ragel support #99

NathanFrench opened this issue Jun 25, 2018 · 0 comments

Comments

@NathanFrench
Copy link
Collaborator

When testing ragel, I found it to be a pleasure to work with. It also generates a much faster FSM than the hand-written one we currently use. I used the following prototype code to test. It's about a tenth of what we currently have, but it's just for testing.

%%{
    machine evhtp_parser;
    action  _mark    { _mark = fpc;                                         }
    action  _meth    {                                                      }
    action  _uri     { hook_uri_run(parser, hooks, _mark, fpc - _mark);     } 
    action  _versn   {                                                      }
    action  _start_k { _mark = fpc;                                         }
    action  _start_v { _mark = fpc;                                         }
    action  _hkey    { hook_hdr_key_run(parser, hooks, _mark, fpc - _mark); } 
    action  _hval    { hook_hdr_val_run(parser, hooks, _mark, fpc - _mark); }
    action  _done    {
        hook_on_hdrs_complete_run(parser, hooks);
        hook_on_msg_complete_run(parser, hooks);

        fbreak;
    }

    _CRLF  = ("\r\n" | "\n");
    _CTL   = (cntrl  | 127);
    _SAFE  = ("$" | "-" | "_" | ".");
    _EXTRA = ("!" | "*" | "'" | "(" | ")" | ",");
    _RESVD = (";" | "/" | "?" | ":" | "@" | "&" | "=" | "+"); 
    _USAFE = (_CTL | " " | "#" | "%" | "<" | ">" | "\""); 
    _NATNL = any -- (alpha | digit | _RESVD | _EXTRA | _SAFE | _USAFE);
    _URESV = (alpha | digit | _SAFE | _EXTRA | _NATNL);
    _ESC   = ("%" xdigit xdigit);
    _UCHAR = (_URESV | _ESC);
    _PCHAR = (_UCHAR | ":" | "@" | "&" | "=" | "+");
    _SPECL = ("("    | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\\" | "\"" | "/" | "[" | "]" | "?" | "=" | "{" | "}" | " " | "\t");

    _token    = (ascii -- (_CTL | _SPECL));
    _scheme   = ("http" | "https"); 
    _abs_uri  = (_scheme ":" (_UCHAR | _RESVD)*);
    _path     = (_PCHAR+ ("/" _PCHAR*)*);
    _query    = (_UCHAR | _RESVD)*;
    _param    = (_PCHAR | "/")*;
    _params   = (_param (";" _param)*);
    _rel_path = (_path? (";" _params)?) ("?" _query) ?; 
    _abs_path = ("/" + _rel_path);
    _vernum   = ( "1." ("0" | "1") );
    _fragment = (_UCHAR | _RESVD)*;

    _req_uri  = (
          "*" 
        | _abs_uri 
        | _abs_path
    ) >_mark %_uri;

    _method   = 
    ( 
          "GET"  %{ parser->method = htp_method_GET;    }
        | "HEAD" %{ parser->method = htp_method_DELETE; }
        | "POST" %{ parser->method = htp_method_POST;   }
        | ( upper | digit | _SAFE ){1,20}
    ) >_mark %_meth;

    _http_ver = ( "HTTP/" _vernum ) >_mark %_versn;
    _req_line = ( _method " " _req_uri ("#" _fragment){0,1} " " _http_ver _CRLF );
    _fld_name = ( _token -- ":" )+ >_start_k %_hkey;
    _fld_val  = any* >_start_v %_hval; 
    _msg_hdr  = _fld_name ":" " "* _fld_val :> _CRLF;
    _request  = _req_line ( _msg_hdr )* ( _CRLF );

    main := (_request) @_done;
}%%

%% write data;

void
evhtp_parser_init(struct evhtp_parser * parser, enum evhtp_parser_type type) {
    int cs = 0;
    void * data;

    %% write init;
    
    data = parser->data;

    memset(parser, 0, sizeof(*parser));

    parser->data  = data;
    parser->type  = type;
    parser->state = cs;
}

size_t evhtp_parser_execute(struct evhtp_parser * parser, const char * data, size_t len) {
    int cs = parser->state;
    const char * p = data;
    const char * pe = data + len;
    const char * _mark = 0;

    %% write exec;

    return p - data;
}
Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Projects
None yet
Development

No branches or pull requests

1 participant