From 9c704a9eb91331fe385863d3113261f4cbc04b1c Mon Sep 17 00:00:00 2001 From: Max Horn Date: Mon, 9 Apr 2018 23:59:37 +0200 Subject: [PATCH] scanner+io: handle line continuation in io.c ... and not in the scanner. This is much simpler, and also ensures uniform treatment of line continuations everywhere. Several new test cases are added to demonstrate this. This leads to one change in behavior: line continuations inside of triple quoted strings are now handled, while before they would just insert a backslash followed by a newline into the string. This change is intentional. A test case is adjusted accordingly. --- src/io.c | 32 +++++++++- src/io.h | 3 +- src/scanner.c | 92 +++++++--------------------- tst/testinstall/linecontinuation.tst | 15 ++++- 4 files changed, 69 insertions(+), 73 deletions(-) diff --git a/src/io.c b/src/io.c index 27468578771..dd9ac0742fa 100644 --- a/src/io.c +++ b/src/io.c @@ -195,14 +195,44 @@ Char GET_NEXT_CHAR(void) } else STATE(In)++; + + // handle line continuation, i.e., backslash followed by new line or CRLF; + // and also the end of a line in general + while (*STATE(In) == '\\' || *STATE(In) == 0) { + if (!*STATE(In)) + GetLine(); + else if (STATE(In)[1] == '\n') + STATE(In) += 2; + else if (STATE(In)[1] == '\r') + STATE(In) += (STATE(In)[2] == '\n') ? 3 : 2; + else + break; + } + + return *STATE(In); +} + +// GET_NEXT_CHAR_NO_LC is like GET_NEXT_CHAR, but does not handle +// line continuations. This is used when skipping to the end of the +// current line, when handling comment lines. +static Char GET_NEXT_CHAR_NO_LC(void) +{ + if (STATE(In) == &IO()->Pushback) { + STATE(In) = IO()->RealIn; + } + else + STATE(In)++; + if (!*STATE(In)) GetLine(); + return *STATE(In); } Char PEEK_NEXT_CHAR(void) { assert(IS_CHAR_PUSHBACK_EMPTY()); + // store the current character IO()->Pushback = *STATE(In); @@ -224,7 +254,7 @@ void IGNORE_REST_OF_LINE(void) { Char c = *STATE(In); while (c != '\n' && c != '\r' && c != '\377') - c = GET_NEXT_CHAR(); + c = GET_NEXT_CHAR_NO_LC(); } diff --git a/src/io.h b/src/io.h index 636c796fa09..03840004f0e 100644 --- a/src/io.h +++ b/src/io.h @@ -25,7 +25,8 @@ extern Char GET_NEXT_CHAR(void); extern Char PEEK_NEXT_CHAR(void); extern Char PEEK_CURR_CHAR(void); -// skip the rest of the current line +// skip the rest of the current line, ignoring line continuations +// (used to handle comments) extern void IGNORE_REST_OF_LINE(void); /**************************************************************************** diff --git a/src/scanner.c b/src/scanner.c index 737f738e9b9..be23eea839e 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -191,7 +191,7 @@ void Match ( */ static void GetIdent(void) { - Int i, fetch; + Int i; Int isQuoted; /* initially it could be a keyword */ @@ -201,24 +201,13 @@ static void GetIdent(void) Char c = PEEK_CURR_CHAR(); for ( i=0; IsIdent(c) || IsDigit(c) || c=='\\'; i++ ) { - fetch = 1; /* handle escape sequences */ /* we ignore '\ newline' by decrementing i, except at the very start of the identifier, when we cannot do that so we recurse instead */ if ( c == '\\' ) { c = GET_NEXT_CHAR(); - if ( c == '\n' && i == 0 ) { GetSymbol(); return; } - else if ( c == '\r' ) { - c = GET_NEXT_CHAR(); - if ( c == '\n' ) { - if (i == 0) { GetSymbol(); return; } - else i--; - } - else {STATE(Value)[i] = '\r'; fetch = 0;} - } - else if ( c == '\n' && i < SAFE_VALUE_SIZE-1 ) i--; - else if ( c == 'n' && i < SAFE_VALUE_SIZE-1 ) STATE(Value)[i] = '\n'; + if ( c == 'n' && i < SAFE_VALUE_SIZE-1 ) STATE(Value)[i] = '\n'; else if ( c == 't' && i < SAFE_VALUE_SIZE-1 ) STATE(Value)[i] = '\t'; else if ( c == 'r' && i < SAFE_VALUE_SIZE-1 ) STATE(Value)[i] = '\r'; else if ( c == 'b' && i < SAFE_VALUE_SIZE-1 ) STATE(Value)[i] = '\b'; @@ -234,7 +223,7 @@ static void GetIdent(void) } /* read the next character */ - if (fetch) c = GET_NEXT_CHAR(); + c = GET_NEXT_CHAR(); } @@ -327,35 +316,24 @@ static void GetIdent(void) ** exponent digit. ** */ -static Char GetCleanedChar( UInt *wasEscaped ) { - Char c = GET_NEXT_CHAR(); - *wasEscaped = 0; - if (c == '\\') { - c = GET_NEXT_CHAR(); - if ( c == '\n') - return GetCleanedChar(wasEscaped); - else if ( c == '\r' ) { - if ( PEEK_NEXT_CHAR() == '\n' ) { - GET_NEXT_CHAR(); // skip the \n - return GetCleanedChar(wasEscaped); - } - else { +static Char GetCleanedChar(UInt * wasEscaped) +{ + Char c = GET_NEXT_CHAR(); + *wasEscaped = 0; + if (c == '\\') { + c = GET_NEXT_CHAR(); *wasEscaped = 1; - return '\r'; - } - } - else { - *wasEscaped = 1; - if ( c == 'n') return '\n'; - else if ( c == 't') return '\t'; - else if ( c == 'r') return '\r'; - else if ( c == 'b') return '\b'; - else if ( c == '>') return '\01'; - else if ( c == '<') return '\02'; - else if ( c == 'c') return '\03'; + switch (c) { + case 'n': return '\n'; + case 't': return '\t'; + case 'r': return '\r'; + case 'b': return '\b'; + case '>': return '\01'; + case '<': return '\02'; + case 'c': return '\03'; + } } - } - return c; + return c; } @@ -712,30 +690,17 @@ static Char GetEscapedChar(void) */ static void GetStr(void) { - Int i = 0, fetch; + Int i = 0; Char c = PEEK_CURR_CHAR(); /* read all characters into 'Value' */ for ( i = 0; i < SAFE_VALUE_SIZE-1 && c != '"' && c != '\n' && c != '\377'; i++ ) { - fetch = 1; /* handle escape sequences */ if ( c == '\\' ) { c = GET_NEXT_CHAR(); - /* if next is another '\\' followed by '\n' it must be ignored */ - while ( c == '\\' && PEEK_NEXT_CHAR() == '\n' ) { - c = GET_NEXT_CHAR(); // skip '\\' - c = GET_NEXT_CHAR(); // skip '\n' - } - if ( c == '\n' ) i--; - else if ( c == '\r' ) { - c = GET_NEXT_CHAR(); - if ( c == '\n' ) i--; - else {STATE(Value)[i] = '\r'; fetch = 0;} - } else { - STATE(Value)[i] = GetEscapedChar(); - } + STATE(Value)[i] = GetEscapedChar(); } /* put normal chars into 'Value' but only if there is room */ @@ -744,7 +709,7 @@ static void GetStr(void) } /* read the next character */ - if (fetch) c = GET_NEXT_CHAR(); + c = GET_NEXT_CHAR(); } @@ -988,8 +953,6 @@ void GetSymbol ( void ) break; case '!': STATE(Symbol) = S_ILLEGAL; c = GET_NEXT_CHAR(); - if ( c == '\\' ) { c = GET_NEXT_CHAR(); - if ( c == '\n' ) { c = GET_NEXT_CHAR(); } } if ( c == '.' ) { STATE(Symbol) = S_BDOT; GET_NEXT_CHAR(); break; } if ( c == '[' ) { STATE(Symbol) = S_BLBRACK; GET_NEXT_CHAR(); break; } if ( c == '{' ) { STATE(Symbol) = S_BLBRACE; GET_NEXT_CHAR(); break; } @@ -1003,11 +966,6 @@ void GetSymbol ( void ) case ',': STATE(Symbol) = S_COMMA; GET_NEXT_CHAR(); break; case ':': STATE(Symbol) = S_COLON; c = GET_NEXT_CHAR(); - if ( c == '\\' ) { - c = GET_NEXT_CHAR(); - if ( c == '\n' ) - { c = GET_NEXT_CHAR(); } - } if ( c == '=' ) { STATE(Symbol) = S_ASSIGN; c = GET_NEXT_CHAR(); break; } break; @@ -1019,21 +977,15 @@ void GetSymbol ( void ) case '=': STATE(Symbol) = S_EQ; GET_NEXT_CHAR(); break; case '<': STATE(Symbol) = S_LT; c = GET_NEXT_CHAR(); - if ( c == '\\' ) { c = GET_NEXT_CHAR(); - if ( c == '\n' ) { c = GET_NEXT_CHAR(); } } if ( c == '=' ) { STATE(Symbol) = S_LE; c = GET_NEXT_CHAR(); break; } if ( c == '>' ) { STATE(Symbol) = S_NE; c = GET_NEXT_CHAR(); break; } break; case '>': STATE(Symbol) = S_GT; c = GET_NEXT_CHAR(); - if ( c == '\\' ) { c = GET_NEXT_CHAR(); - if ( c == '\n' ) { c = GET_NEXT_CHAR(); } } if ( c == '=' ) { STATE(Symbol) = S_GE; c = GET_NEXT_CHAR(); break; } break; case '+': STATE(Symbol) = S_PLUS; GET_NEXT_CHAR(); break; case '-': STATE(Symbol) = S_MINUS; c = GET_NEXT_CHAR(); - if ( c == '\\' ) { c = GET_NEXT_CHAR(); - if ( c == '\n' ) { c = GET_NEXT_CHAR(); } } if ( c == '>' ) { STATE(Symbol)=S_MAPTO; c = GET_NEXT_CHAR(); break; } break; case '*': STATE(Symbol) = S_MULT; GET_NEXT_CHAR(); break; diff --git a/tst/testinstall/linecontinuation.tst b/tst/testinstall/linecontinuation.tst index ea3e8ba9fe7..ee4e62eaa6e 100644 --- a/tst/testinstall/linecontinuation.tst +++ b/tst/testinstall/linecontinuation.tst @@ -12,7 +12,7 @@ gap> x:="foo\ # in triple quoted string gap> x:="""haha\ > !"""; -"haha\\\n!" +"haha!" # break keywords and operators like :=, <=, >= etc. in the middle gap> 1 m\ @@ -22,6 +22,19 @@ gap> x :\ > =1; 1 +# inside range expressions +gap> [1.\ +> .4]; +[ 1 .. 4 ] + +# inside triple dots +gap> {x..\ +> .}->x; +function( x... ) ... end +gap> {x.\ +> ..}->x; +function( x... ) ... end + # however, in comments, you cannot use line continuations: gap> # 1234\ gap> 5;