Skip to content

Commit

Permalink
Merge pull request #612: Add parsing of hex literals in strings and c…
Browse files Browse the repository at this point in the history
…haracter literals

Add parsing of hex literals in strings
  • Loading branch information
markuspf authored Nov 9, 2016
2 parents 4a9d47e + 0d7a403 commit 5f18c3d
Show file tree
Hide file tree
Showing 12 changed files with 177 additions and 63 deletions.
26 changes: 20 additions & 6 deletions doc/ref/string.xml
Original file line number Diff line number Diff line change
Expand Up @@ -147,10 +147,10 @@ next line
There are a number of <E>special character sequences</E> that can be used
between the singlequotes of a character literal or between the
doublequotes of a string literal to specify characters.
They consist of two characters.
The first is a backslash <C>\</C>. The second may be any character.
If it is an octal digit (from <C>0</C> to <C>7</C>) there must be two more
such digits. The meaning is given in the following list
They consist of a backslash <C>\</C> followed by a second character
indicating the type of special character sequence, and possibly more characters.
The following special character sequences are currently defined.
For any other sequence starting with a backslash, the backslash is ignored.
<P/>
<List>
<Mark><C>\n</C></Mark>
Expand Down Expand Up @@ -227,17 +227,31 @@ such digits. The meaning is given in the following list
<Index>octal character codes</Index>
<C>\XYZ</C></Mark>
<Item>
with <C>X</C>, <C>Y</C>, <C>Z</C> three octal digits.
with <C>X</C>, <C>Y</C>, <C>Z</C> three octal digits, that is one
of <C>"01234567"</C>.
This is translated to the character corresponding to the number
<C>X * 64 + Y * 8 + Z modulo 256</C>.
This can be used to specify and store arbitrary binary data as a string
in &GAP;.
</Item>
<Mark>
<Index><C>\0xYZ</C></Index>
<Index>hexadecimal character codes</Index>
<C>\0xYZ</C></Mark>
<Item>
with <C>Y</C>, and <C>Z</C> hexadecimal digits, that is one of
<C>"0123456789ABCDEFabcdef"</C>, where <C>a</C> to <C>f</C> and
<C>A</C> to <C>F</C> are interpreted as the numbers <C>10</C> to
<C>15</C>. This is translated to the character corresponding
to the number <C>Y*16 + Z</C>.
</Item>
<Mark>
<Index>escaping non-special characters</Index>
other</Mark>
<Item>
For any other character the backslash is simply ignored.
For any other character the backslash is ignored.
<!--If the character is a letter, that is one of <C>a..zA..Z</C>, then a
warning is displayed.-->
</Item>
</List>
<P/>
Expand Down
2 changes: 1 addition & 1 deletion lib/helpbase.gi
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,7 @@ InstallGlobalFunction(SIMPLE_STRING, function(str)
"efghijklmnopqrstuvwxyz[\000]^_\000abcdefghijklmnopqrstuvwxyz{ }~",
"\177\200\201\202",
"\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225",
"\226\227\230\231\232\233\234\235\236\237\238",
"\226\227\230\231\232\233\234\235\236\237\240",
"\241\242\244\244\246\246\250\250\251\252\253\254\255\256\257\260\261\262",
"\264\264\265\266\270\270\271\272\276\276\276\276\277aaaaaa",
"aceeeeiiiidnooooo\327ouuuuypsaaaaaaaceeeeiiiidnooooo\367ouuuuypy"
Expand Down
12 changes: 6 additions & 6 deletions lib/helpt2t.gi
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ local book, chapter, section, key, subkey, MatchKey, ssectypes,
# did we get the section only via a keyword?
if Length(arg)>3 then
key:=arg[4];
p:=Position(key,'\!');
p:=Position(key,'!');
if p=fail then
subkey:="";
else
Expand All @@ -196,20 +196,20 @@ local book, chapter, section, key, subkey, MatchKey, ssectypes,
if subkey="" then
ssectypes :=
[ # \><key>(...)
[ " ", key, " ", "(", ")", "^\!"],
[ " ", key, " ", "(", ")", "^!"],
# \>`<key>' V
[ Concatenation("`",key,"' "), "^{", " ", "V" ],
# \>`...'{<key>}
[ " ", "`", "'", " ", Concatenation("{",key,"}"), "^\!" ] ];
[ " ", "`", "'", " ", Concatenation("{",key,"}"), "^!" ] ];
else
ssectypes :=
[ # \><key>(...)!{<subkey>}
[ " ", key, " ", "(", ")", " ", "\!", " ",
[ " ", key, " ", "(", ")", " ", "!", " ",
Concatenation("{", subkey, "}") ],
# \>`...'{<key>!<subkey>}
[ " ", "`", "'", " ", Concatenation("{",key,"\!",subkey,"}"), "^\!" ],
[ " ", "`", "'", " ", Concatenation("{",key,"!",subkey,"}"), "^!" ],
# \>`...'{<key>}!{<subkey>}
[ " ", "`", "'", " ", Concatenation("{",key,"}\!{",subkey,"}"), "^\!"] ];
[ " ", "`", "'", " ", Concatenation("{",key,"}!{",subkey,"}"), "^!"] ];
fi;
else
key:=fail;
Expand Down
2 changes: 1 addition & 1 deletion lib/mgmcong.gi
Original file line number Diff line number Diff line change
Expand Up @@ -863,7 +863,7 @@ InstallMethod( \in, "for a magma congruence class", true,
# first ensure that <x> is in the right family
if FamilyObj(x) <>
ElementsFamily(FamilyObj(Source(EquivalenceClassRelation(C)))) then
Error("incompatible arguments for \in");
Error("incompatible arguments for \\in");
fi;

# quick check to see if element is representative
Expand Down
2 changes: 1 addition & 1 deletion lib/obsolete.gi
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,7 @@ function ( ffe )
local str, log,deg,char;
char:=Characteristic(ffe);
if IsZero( ffe ) then
str := Concatenation("0\*Z(",String(char),")");
str := Concatenation("0*Z(",String(char),")");
else
str := Concatenation("Z(",String(char));
deg:=DegreeFFE(ffe);
Expand Down
4 changes: 2 additions & 2 deletions lib/relation.gd
Original file line number Diff line number Diff line change
Expand Up @@ -660,8 +660,8 @@ DeclareOperation("StronglyConnectedComponents", [IsBinaryRelation]);
## Special definitions for exponentiation with sets, lists, and Zero
##
DeclareOperation("POW", [IsListOrCollection, IsBinaryRelation]);
DeclareOperation("\+", [IsBinaryRelation, IsBinaryRelation]);
DeclareOperation("\-", [IsBinaryRelation, IsBinaryRelation]);
DeclareOperation("+", [IsBinaryRelation, IsBinaryRelation]);
DeclareOperation("-", [IsBinaryRelation, IsBinaryRelation]);

#############################################################################
##
Expand Down
2 changes: 1 addition & 1 deletion lib/relation.gi
Original file line number Diff line number Diff line change
Expand Up @@ -2101,7 +2101,7 @@ InstallMethod(\in, "for element and equivalence class", true,
# first ensure that <x> is in the right family
if FamilyObj(x) <>
ElementsFamily(FamilyObj(Source(EquivalenceClassRelation(C)))) then
Error("incompatible arguments for \in");
Error("incompatible arguments for \\in");
fi;

# now just enumerate the elements of <C> until we come to <x>
Expand Down
3 changes: 2 additions & 1 deletion lib/string.g
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ DeclareCategoryCollections( "IsChar" );
## See <Ref Func="IsStringRep"/> below for more details.
## <P/>
## Each character, in particular those which cannot be typed directly from
## the keyboard, can also be typed in three digit octal notation.
## the keyboard, can also be typed in three digit octal notation, or
## two digit hexadecimal notation.
## And for some special characters (like the newline character) there is a
## further possibility to type them,
## see section <Ref Sect="Special Characters"/>.
Expand Down
4 changes: 2 additions & 2 deletions small/smlinfo.gi
Original file line number Diff line number Diff line change
Expand Up @@ -288,8 +288,8 @@ SMALL_GROUPS_INFORMATION[ 21 ] := function( size, smav, num )
Print( " with order p^6 for p >= 5 is based on the Easterfield \n");
Print( " list, corrected by Newman, O'Brien and Vaughan-Lee (2004).\n");
Print( " It differs only in the addition of groups in isoclinism \n");
Print( " family $\Phi_{13}$, in using the James (1980) presentations \n");
Print( " for the groups in $\Phi_{19}$, and a small number of \n");
Print( " family $\\Phi_{13}$, in using the James (1980) presentations \n");
Print( " for the groups in $\\Phi_{19}$, and a small number of \n");
Print( " typographical amendments. The linear ordering employed is \n");
Print( " very close to that of Easterfield. \n \n");

Expand Down
138 changes: 97 additions & 41 deletions src/scanner.c
Original file line number Diff line number Diff line change
Expand Up @@ -1810,6 +1810,99 @@ void GetNumber ( UInt StartingStatus )
}


/*******************************************************************************
**
*F GetEscapedChar() . . . . . . . . . . . . . . . . get an escaped character
**
** 'GetEscapedChar' reads an escape sequence from the current input file into
** the variable *dst.
**
*/
static inline Char GetOctalDigits( void )
{
Char c;

if ( *TLS(In) < '0' || *TLS(In) > '7' )
SyntaxError("Expecting octal digit");
c = 8 * (*TLS(In) - '0');
GET_CHAR();
if ( *TLS(In) < '0' || *TLS(In) > '7' )
SyntaxError("Expecting octal digit");
c = c + (*TLS(In) - '0');

return c;
}


/****************************************************************************
**
*F CharHexDigit( <ch> ) . . . . . . . . . turn a single hex digit into Char
**
*/
static inline Char CharHexDigit( const Char ch ) {
if (ch >= 'a') {
return (ch - 'a' + 10);
} else if (ch >= 'A') {
return (ch - 'A' + 10);
} else {
return (ch - '0');
}
};

Char GetEscapedChar( void )
{
Char c;

c = 0;

if ( *TLS(In) == 'n' ) c = '\n';
else if ( *TLS(In) == 't' ) c = '\t';
else if ( *TLS(In) == 'r' ) c = '\r';
else if ( *TLS(In) == 'b' ) c = '\b';
else if ( *TLS(In) == '>' ) c = '\01';
else if ( *TLS(In) == '<' ) c = '\02';
else if ( *TLS(In) == 'c' ) c = '\03';
else if ( *TLS(In) == '"' ) c = '"';
else if ( *TLS(In) == '\\' ) c = '\\';
else if ( *TLS(In) == '\'' ) c = '\'';
else if ( *TLS(In) == '0' ) {
/* from here we can either read a hex-escape or three digit
octal numbers */
GET_CHAR();
if (*TLS(In) == 'x') {
GET_CHAR();
if (!IsHexDigit(*TLS(In))) {
SyntaxError("Expecting hexadecimal digit");
}
c = 16 * CharHexDigit(*TLS(In));
GET_CHAR();
if (!IsHexDigit(*TLS(In))) {
SyntaxError("Expecting hexadecimal digit");
}
c += CharHexDigit(*TLS(In));
} else if (*TLS(In) >= '0' && *TLS(In) <= '7' ) {
c += GetOctalDigits();
} else {
SyntaxError("Expecting hexadecimal escape, or two more octal digits");
}
} else if ( *TLS(In) >= '1' && *TLS(In) <= '7' ) {
/* escaped three digit octal numbers are allowed in input */
c = 64 * (*TLS(In) - '0');
GET_CHAR();
c += GetOctalDigits();
} else {
/* Following discussions on pull-request #612, this warning is currently
disabled for backwards compatibility; some code relies on this behaviour
and tests break with the warning enabled */
/*
if (IsAlpha(*TLS(In)))
SyntaxWarning("Alphabet letter after \\");
*/
c = *TLS(In);
}
return c;
}

/****************************************************************************
**
*F GetStr() . . . . . . . . . . . . . . . . . . . . . . get a string, local
Expand All @@ -1833,7 +1926,6 @@ void GetNumber ( UInt StartingStatus )
void GetStr ( void )
{
Int i = 0, fetch;
Char a, b, c;

/* Avoid substitution of '?' in beginning of GetLine chunks */
TLS(HELPSubsOn) = 0;
Expand Down Expand Up @@ -1861,22 +1953,9 @@ void GetStr ( void )
GET_CHAR();
if ( *TLS(In) == '\n' ) i--;
else {TLS(Value)[i] = '\r'; fetch = 0;}
} else {
TLS(Value)[i] = GetEscapedChar();
}
else if ( *TLS(In) == 'n' ) TLS(Value)[i] = '\n';
else if ( *TLS(In) == 't' ) TLS(Value)[i] = '\t';
else if ( *TLS(In) == 'r' ) TLS(Value)[i] = '\r';
else if ( *TLS(In) == 'b' ) TLS(Value)[i] = '\b';
else if ( *TLS(In) == '>' ) TLS(Value)[i] = '\01';
else if ( *TLS(In) == '<' ) TLS(Value)[i] = '\02';
else if ( *TLS(In) == 'c' ) TLS(Value)[i] = '\03';
else if ( IsDigit( *TLS(In) ) ) {
a = *TLS(In); GET_CHAR(); b = *TLS(In); GET_CHAR(); c = *TLS(In);
if (!( IsDigit(b) && IsDigit(c) )){
SyntaxError("Expecting three octal digits after \\ in string");
}
TLS(Value)[i] = (a-'0') * 64 + (b-'0') * 8 + c-'0';
}
else TLS(Value)[i] = *TLS(In);
}

/* put normal chars into 'Value' but only if there is room */
Expand Down Expand Up @@ -2039,48 +2118,25 @@ void GetMaybeTripStr ( void )
*/
void GetChar ( void )
{
Char c;

/* skip '\'' */
GET_CHAR();

/* handle escape equences */
if ( *TLS(In) == '\\' ) {
GET_CHAR();
if ( *TLS(In) == 'n' ) TLS(Value)[0] = '\n';
else if ( *TLS(In) == 't' ) TLS(Value)[0] = '\t';
else if ( *TLS(In) == 'r' ) TLS(Value)[0] = '\r';
else if ( *TLS(In) == 'b' ) TLS(Value)[0] = '\b';
else if ( *TLS(In) == '>' ) TLS(Value)[0] = '\01';
else if ( *TLS(In) == '<' ) TLS(Value)[0] = '\02';
else if ( *TLS(In) == 'c' ) TLS(Value)[0] = '\03';
else if ( *TLS(In) >= '0' && *TLS(In) <= '7' ) {
/* escaped three digit octal numbers are allowed in input */
c = 64 * (*TLS(In) - '0');
GET_CHAR();
if ( *TLS(In) < '0' || *TLS(In) > '7' )
SyntaxError("Expecting octal digit in character constant");
c = c + 8 * (*TLS(In) - '0');
GET_CHAR();
if ( *TLS(In) < '0' || *TLS(In) > '7' )
SyntaxError("Expecting 3 octal digits in character constant");
c = c + (*TLS(In) - '0');
TLS(Value)[0] = c;
}
else TLS(Value)[0] = *TLS(In);
TLS(Value)[0] = GetEscapedChar();
}
else if ( *TLS(In) == '\n' ) {
SyntaxError("Newline not allowed in character literal");
}
/* put normal chars into 'TLS(Value)' */
/* put normal chars into 'TLS(Value)' */
else {
TLS(Value)[0] = *TLS(In);
}

/* read the next character */
GET_CHAR();


/* check for terminating single quote */
if ( *TLS(In) != '\'' )
SyntaxError("Missing single quote in character constant");
Expand Down
11 changes: 10 additions & 1 deletion src/system.h
Original file line number Diff line number Diff line change
Expand Up @@ -631,6 +631,16 @@ extern UInt SyTimeChildrenSys ( void );
*/
#define IsDigit(ch) (isdigit((unsigned int)ch))


/****************************************************************************
**
*F IsHexDigit( <ch> ) . . . . . . . . . . . . . . . is a character a digit
**
** 'IsDigit' returns 1 if its character argument is a digit from the ranges
** '0..9', 'A..F', or 'a..f' and 0 otherwise.
*/
#define IsHexDigit(ch) (isxdigit((unsigned int)ch))

/****************************************************************************
**
*F IsSpace( <ch> ) . . . . . . . . . . . . . . . .is a character whitespace
Expand All @@ -640,7 +650,6 @@ extern UInt SyTimeChildrenSys ( void );
*/
#define IsSpace(ch) (isspace((unsigned int)ch))


/****************************************************************************
**
*F SyIntString( <string> ) . . . . . . . . extract a C integer from a string
Expand Down
Loading

0 comments on commit 5f18c3d

Please sign in to comment.