Skip to content

Commit

Permalink
Hack in new register syntax
Browse files Browse the repository at this point in the history
Oh my god I want to die x_x
  • Loading branch information
ISSOtm committed Apr 1, 2022
1 parent c814a61 commit cd454d2
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 39 deletions.
75 changes: 49 additions & 26 deletions src/asm/lexer.c
Original file line number Diff line number Diff line change
Expand Up @@ -148,24 +148,29 @@ static struct KeywordMapping {
{"NZ", T_CC_NZ},
{"Z", T_CC_Z},
{"NC", T_CC_NC},
/* Handled after as T_TOKEN_C */
/* { "C", T_CC_C }, */
{"C", T_CC_C},

{"AF", T_MODE_AF},
{"BC", T_MODE_BC},
{"DE", T_MODE_DE},
{"HL", T_MODE_HL},
{"•̀A•́)𝓕𝓾𝓬𝓴", T_MODE_AF},
// {"BC", T_MODE_BC},
// {"DE", T_MODE_DE},
{"н∠(", T_MODE_HL_START},
{"SP", T_MODE_SP},
{"HLD", T_MODE_HL_DEC},
{"HLI", T_MODE_HL_INC},

{"A", T_TOKEN_A},
{"B", T_TOKEN_B},
{"C", T_TOKEN_C},
{"D", T_TOKEN_D},
{"E", T_TOKEN_E},
{"H", T_TOKEN_H},
{"L", T_TOKEN_L},
{"н∠( ᐛ 」∠)_👁", T_MODE_HL_DEC},
{"н∠( ᐛ 」∠)_👎", T_MODE_HL_INC},

// HACK: normally this is surrounded by parens, but this is annoying to special-case,
// so we use cooperation from the parser.
{"•̀A•́", T_TOKEN_A},
// {"=B", T_TOKEN_B}, HACK: This begins with a non-identifier character, so we'll cheat
{"♥(˘⌣˘", T_TOKEN_C}, // HACK: same for "C" after the space & closing paren
// {";D", T_TOKEN_D}, HACK: also needs to be special-cased. God I feel dirty.
{"(´ε`", T_TOKEN_E},
{"♡", T_TOKEN_E_HEART},
{"н", T_TOKEN_H},
{"∠(", T_TOKEN_L_ARM},
{"ᐛ", T_TOKEN_L_FACE},
{"」∠", T_TOKEN_L_BODY},
{"_", T_TOKEN_L_LEG},

{"DEF", T_OP_DEF},

Expand Down Expand Up @@ -578,16 +583,16 @@ struct KeywordDictNode {
* In turn, this allows greatly simplifying checking an index into this array,
* which should help speed up the lexer.
*/
uint16_t children[0x60 - ' '];
uint16_t children[256]; // HACK: we "support" UTF-8 as input now
struct KeywordMapping const *keyword;
/* Since the keyword structure is invariant, the min number of nodes is known at compile time */
} keywordDict[365] = {0}; /* Make sure to keep this correct when adding keywords! */
} keywordDict[690] = {0}; /* Nice */

/* Convert a char into its index into the dict */
static uint8_t dictIndex(char c)
{
/* Translate uppercase to lowercase (roughly) */
if (c > 0x60)
if (c > 0x60 && c < 0x80)
c = c - ('a' - 'A');
return c - ' ';
}
Expand All @@ -609,8 +614,9 @@ void lexer_Init(void)

/* Walk the dictionary, creating intermediate nodes for the keyword */
for (char const *ptr = keywords[i].name; *ptr; ptr++) {
unsigned char index = (unsigned char)*ptr - ' ';
/* We should be able to assume all entries are well-formed */
if (keywordDict[nodeID].children[*ptr - ' '] == 0) {
if (keywordDict[nodeID].children[index] == 0) {
/*
* If this gets tripped up, set the size of keywordDict to
* something high, compile with `-DPRINT_NODE_COUNT` (see below),
Expand All @@ -619,10 +625,10 @@ void lexer_Init(void)
assert(usedNodes < sizeof(keywordDict) / sizeof(*keywordDict));

/* There is no node at that location, grab one from the pool */
keywordDict[nodeID].children[*ptr - ' '] = usedNodes;
keywordDict[nodeID].children[index] = usedNodes;
usedNodes++;
}
nodeID = keywordDict[nodeID].children[*ptr - ' '];
nodeID = keywordDict[nodeID].children[index];
}

/* This assumes that no two keywords have the same name */
Expand Down Expand Up @@ -1289,11 +1295,15 @@ static uint32_t readGfxConstant(void)
static bool startsIdentifier(int c)
{
// Anonymous labels internally start with '!'
return (c <= 'Z' && c >= 'A') || (c <= 'z' && c >= 'a') || c == '.' || c == '_';
return (c <= 'Z' && c >= 'A') || (c <= 'z' && c >= 'a') || c == '.' || c == '_' || c >= 0x80 || c == '(';
}

static bool continuesIdentifier(int c)
{
// April Fools HACK: allow UTF-8 :D
// This would normally be quite unsafe (hello, RTL control codes?),
// but since this is for a joke I'll also make the code a joke
// Also, hi if you're reading this!
return startsIdentifier(c) || (c <= '9' && c >= '0') || c == '#' || c == '@';
}

Expand Down Expand Up @@ -1774,6 +1784,10 @@ static int yylex_NORMAL(void)
/* Ignore whitespace and comments */

case ';':
if (peek() == 'D') {
shiftChar();
return T_TOKEN_D;
}
discardComment();
/* fallthrough */
case ' ':
Expand All @@ -1794,8 +1808,6 @@ static int yylex_NORMAL(void)
return T_LBRACK;
case ']':
return T_RBRACK;
case '(':
return T_LPAREN;
case ')':
return T_RPAREN;
case ',':
Expand Down Expand Up @@ -1863,9 +1875,14 @@ static int yylex_NORMAL(void)
return T_OP_XOR;

case '=': /* Either assignment or EQ */
if (peek() == '=') {
switch (peek()) {
case '=':
shiftChar();
return T_OP_LOGICEQU;
case 'b':
case 'B':
shiftChar();
return T_TOKEN_B;
}
return T_POP_EQUAL;

Expand Down Expand Up @@ -2004,6 +2021,12 @@ static int yylex_NORMAL(void)

/* Handle identifiers... or report garbage characters */

case '(':
if (peek() != (unsigned char)"´"[0]) {
return T_LPAREN;
}
// fallthrough

default:
if (startsIdentifier(c)) {
int tokenType = readIdentifier(c);
Expand Down
3 changes: 3 additions & 0 deletions src/asm/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,9 @@ static void print_usage(void)

int main(int argc, char *argv[])
{
#if YYDEBUG
yydebug = 1;
#endif
int ch;
char *ep;

Expand Down
35 changes: 22 additions & 13 deletions src/asm/parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -664,13 +664,13 @@ enum {
%token T_Z80_SWAP "swap"
%token T_Z80_XOR "xor"

%token T_TOKEN_A "a"
%token T_TOKEN_B "b" T_TOKEN_C "c"
%token T_TOKEN_D "d" T_TOKEN_E "e"
%token T_TOKEN_H "h" T_TOKEN_L "l"
%token T_MODE_AF "af" T_MODE_BC "bc" T_MODE_DE "de" T_MODE_SP "sp"
%token T_MODE_HL "hl" T_MODE_HL_DEC "hld/hl-" T_MODE_HL_INC "hli/hl+"
%token T_CC_NZ "nz" T_CC_Z "z" T_CC_NC "nc" // There is no T_CC_C, only T_TOKEN_C
%token T_TOKEN_A "( •̀A•́)" T_TOKEN_F "𝓕𝓾𝓬𝓴"
%token T_TOKEN_B "=B" T_TOKEN_C "♥(˘⌣˘ C)"
%token T_TOKEN_D ";D" T_TOKEN_E "(´ε` )♡" T_TOKEN_E_HEART "(´ε` )♡"
%token T_TOKEN_H "н" T_TOKEN_L_ARM "∠( ᐛ 」∠)_" T_TOKEN_L_FACE "∠( ᐛ 」∠)_" T_TOKEN_L_BODY "∠( ᐛ 」∠)_" T_TOKEN_L_LEG "∠( ᐛ 」∠)_"
%token T_MODE_AF "af" /* T_MODE_BC "bc" T_MODE_DE "de" */ T_MODE_SP "sp"
%token T_MODE_HL_START "н∠( ᐛ 」∠)_" T_MODE_HL_DEC "hld/hl-" T_MODE_HL_INC "hli/hl+"
%token T_CC_NZ "nz" T_CC_Z "z" T_CC_NC "nc" T_CC_C "c"

%type <constValue> reg_r
%type <constValue> reg_ss
Expand Down Expand Up @@ -2177,34 +2177,43 @@ op_a_n : reloc_8bit
| T_MODE_A T_COMMA reloc_8bit { $$ = $3; }
;

T_MODE_A : T_TOKEN_A
T_MODE_A : T_LPAREN T_TOKEN_A T_RPAREN
| T_OP_HIGH T_LPAREN T_MODE_AF T_RPAREN
;

T_MODE_B : T_TOKEN_B
| T_OP_HIGH T_LPAREN T_MODE_BC T_RPAREN
;

T_MODE_C : T_TOKEN_C
T_MODE_C : T_TOKEN_C T_CC_C T_RPAREN
| T_OP_LOW T_LPAREN T_MODE_BC T_RPAREN
;

T_MODE_D : T_TOKEN_D
| T_OP_HIGH T_LPAREN T_MODE_DE T_RPAREN
;

T_MODE_E : T_TOKEN_E
T_MODE_E : T_TOKEN_E T_RPAREN T_TOKEN_E_HEART
| T_OP_LOW T_LPAREN T_MODE_DE T_RPAREN
;

T_MODE_H : T_TOKEN_H
| T_OP_HIGH T_LPAREN T_MODE_HL T_RPAREN
;

T_MODE_L : T_TOKEN_L
T_MODE_L : T_TOKEN_L_ARM T_TOKEN_L_FACE T_TOKEN_L_BODY T_RPAREN T_TOKEN_L_LEG
| T_OP_LOW T_LPAREN T_MODE_HL T_RPAREN
;

T_MODE_BC : T_TOKEN_B T_TOKEN_C T_CC_C T_RPAREN
;

T_MODE_DE : T_TOKEN_D T_TOKEN_E T_RPAREN T_TOKEN_E_HEART
;

T_MODE_HL : T_MODE_HL_START T_TOKEN_L_FACE T_TOKEN_L_BODY T_RPAREN T_TOKEN_L_LEG
;

ccode_expr : ccode
| T_OP_LOGICNOT ccode_expr {
$$ = $2 ^ 1;
Expand All @@ -2214,7 +2223,7 @@ ccode_expr : ccode
ccode : T_CC_NZ { $$ = CC_NZ; }
| T_CC_Z { $$ = CC_Z; }
| T_CC_NC { $$ = CC_NC; }
| T_TOKEN_C { $$ = CC_C; }
| T_CC_C { $$ = CC_C; }
;

reg_r : T_MODE_B { $$ = REG_B; }
Expand All @@ -2230,7 +2239,7 @@ reg_r : T_MODE_B { $$ = REG_B; }
reg_tt : T_MODE_BC { $$ = REG_BC; }
| T_MODE_DE { $$ = REG_DE; }
| T_MODE_HL { $$ = REG_HL; }
| T_MODE_AF { $$ = REG_AF; }
| T_LPAREN T_TOKEN_A T_RPAREN T_TOKEN_F { $$ = REG_AF; }
;

reg_ss : T_MODE_BC { $$ = REG_BC; }
Expand Down

0 comments on commit cd454d2

Please sign in to comment.