From 3b455ba0f64cc65ec14b0e819b9964c9f8f2c584 Mon Sep 17 00:00:00 2001 From: Rangi Date: Sun, 14 Mar 2021 20:49:40 -0400 Subject: [PATCH] New definition syntax with leading DEF keyword This will enable fixing #457 later once the old definition syntax is removed. --- src/asm/lexer.c | 2 +- src/asm/parser.y | 132 +++++++++++++++++++++++++++++----------- src/asm/rgbasm.5 | 108 +++++++++++++++++++++----------- test/asm/def.asm | 29 +++++++++ test/asm/def.err | 5 ++ test/asm/def.out | 9 +++ test/asm/def.simple.err | 5 ++ 7 files changed, 215 insertions(+), 75 deletions(-) create mode 100644 test/asm/def.asm create mode 100644 test/asm/def.err create mode 100644 test/asm/def.out create mode 100644 test/asm/def.simple.err diff --git a/src/asm/lexer.c b/src/asm/lexer.c index dd3768c06..305d479a4 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -275,10 +275,10 @@ static struct KeywordMapping { {"RW", T_POP_RW}, /* Handled before as T_Z80_RL */ /* {"RL", T_POP_RL}, */ + {"EQU", T_POP_EQU}, {"EQUS", T_POP_EQUS}, {"REDEF", T_POP_REDEF}, - /* Handled before as T_Z80_SET */ /* {"SET", T_POP_SET}, */ diff --git a/src/asm/parser.y b/src/asm/parser.y index 24411a073..abf530dd1 100644 --- a/src/asm/parser.y +++ b/src/asm/parser.y @@ -506,6 +506,8 @@ enum { %token T_ID "identifier" %token T_LOCAL_ID "local identifier" %token T_ANON "anonymous label" +%type def_id +%type redef_id %type scoped_id %type scoped_anon_id %token T_POP_EQU "EQU" @@ -694,6 +696,22 @@ endc : T_POP_ENDC { } ; +def_id : T_OP_DEF { + lexer_ToggleStringExpansion(false); + } T_ID { + lexer_ToggleStringExpansion(true); + strcpy($$, $3); + } +; + +redef_id : T_POP_REDEF { + lexer_ToggleStringExpansion(false); + } T_ID { + lexer_ToggleStringExpansion(true); + strcpy($$, $3); + } +; + scoped_id : T_ID | T_LOCAL_ID; scoped_anon_id : scoped_id | T_ANON; @@ -775,8 +793,14 @@ directive : include | fail | warn | assert + | def_equ + | def_set + | def_rb + | def_rw + | def_rl + | def_equs + | redef_equs | purge - | redef | pops | pushs | popo @@ -788,6 +812,36 @@ directive : include trailing_comma : %empty | T_COMMA ; +equ : T_LABEL T_POP_EQU const { sym_AddEqu($1, $3); } +; + +set_or_equal : T_POP_SET | T_POP_EQUAL +; + +set : T_LABEL set_or_equal const { sym_AddSet($1, $3); } +; + +equs : T_LABEL T_POP_EQUS string { sym_AddString($1, $3); } +; + +rb : T_LABEL T_POP_RB rs_uconst { + sym_AddEqu($1, sym_GetConstantValue("_RS")); + sym_AddSet("_RS", sym_GetConstantValue("_RS") + $3); + } +; + +rw : T_LABEL T_POP_RW rs_uconst { + sym_AddEqu($1, sym_GetConstantValue("_RS")); + sym_AddSet("_RS", sym_GetConstantValue("_RS") + 2 * $3); + } +; + +rl : T_LABEL T_Z80_RL rs_uconst { + sym_AddEqu($1, sym_GetConstantValue("_RS")); + sym_AddSet("_RS", sym_GetConstantValue("_RS") + 4 * $3); + } +; + align : T_OP_ALIGN uconst { if ($2 > 16) error("Alignment must be between 0 and 16, not %u\n", $2); @@ -937,9 +991,6 @@ macrodef : T_POP_MACRO T_ID T_NEWLINE { } ; -equs : T_LABEL T_POP_EQUS string { sym_AddString($1, $3); } -; - rsset : T_POP_RSSET uconst { sym_AddSet("_RS", $2); } ; @@ -952,24 +1003,6 @@ rs_uconst : %empty { | uconst ; -rl : T_LABEL T_Z80_RL rs_uconst { - sym_AddEqu($1, sym_GetConstantValue("_RS")); - sym_AddSet("_RS", sym_GetConstantValue("_RS") + 4 * $3); - } -; - -rw : T_LABEL T_POP_RW rs_uconst { - sym_AddEqu($1, sym_GetConstantValue("_RS")); - sym_AddSet("_RS", sym_GetConstantValue("_RS") + 2 * $3); - } -; - -rb : T_LABEL T_POP_RB rs_uconst { - sym_AddEqu($1, sym_GetConstantValue("_RS")); - sym_AddSet("_RS", sym_GetConstantValue("_RS") + $3); - } -; - union : T_POP_UNION { sect_StartUnion(); } ; @@ -1012,19 +1045,51 @@ dl : T_POP_DL { out_Skip(4, false); } | T_POP_DL constlist_32bit trailing_comma ; -purge : T_POP_PURGE { - lexer_ToggleStringExpansion(false); - } purge_list trailing_comma { - lexer_ToggleStringExpansion(true); +def_equ : def_id T_POP_EQU const { + sym_AddEqu($1, $3); + } +; + +def_set : def_id set_or_equal const { + sym_AddSet($1, $3); + } + | redef_id set_or_equal const { + sym_AddSet($1, $3); + } +; + +def_rb : def_id T_POP_RB rs_uconst { + sym_AddEqu($1, sym_GetConstantValue("_RS")); + sym_AddSet("_RS", sym_GetConstantValue("_RS") + $3); + } +; + +def_rw : def_id T_POP_RW rs_uconst { + sym_AddEqu($1, sym_GetConstantValue("_RS")); + sym_AddSet("_RS", sym_GetConstantValue("_RS") + 2 * $3); + } +; + +def_rl : def_id T_Z80_RL rs_uconst { + sym_AddEqu($1, sym_GetConstantValue("_RS")); + sym_AddSet("_RS", sym_GetConstantValue("_RS") + 4 * $3); + } +; + +def_equs : def_id T_POP_EQUS string { + sym_AddString($1, $3); + } +; + +redef_equs : redef_id T_POP_EQUS string { + sym_RedefString($1, $3); } ; -redef : T_POP_REDEF { +purge : T_POP_PURGE { lexer_ToggleStringExpansion(false); - } scoped_id { + } purge_list trailing_comma { lexer_ToggleStringExpansion(true); - } T_POP_EQUS string { - sym_RedefString($3, $6); } ; @@ -1045,13 +1110,6 @@ export_list : export_list_entry export_list_entry : scoped_id { sym_Export($1); } ; -equ : T_LABEL T_POP_EQU const { sym_AddEqu($1, $3); } -; - -set : T_LABEL T_POP_SET const { sym_AddSet($1, $3); } - | T_LABEL T_POP_EQUAL const { sym_AddSet($1, $3); } -; - include : T_POP_INCLUDE string { fstk_RunInclude($2); if (oFailedOnMissingInclude) diff --git a/src/asm/rgbasm.5 b/src/asm/rgbasm.5 index 0834d72f5..333e6c7b7 100644 --- a/src/asm/rgbasm.5 +++ b/src/asm/rgbasm.5 @@ -274,7 +274,7 @@ If it's a numeric symbol, its value is converted to hexadecimal notation with a .Sq $ prepended. .Bd -literal -offset indent -TOPIC equs "life, the universe, and \[rs]"everything\[rs]"" +def TOPIC equs "life, the universe, and \[rs]"everything\[rs]"" ANSWER = 42 ;\ Prints "The answer to life, the universe, and "everything" is $2A" PRINTLN "The answer to {TOPIC} is {ANSWER}" @@ -355,14 +355,14 @@ HINT: The construct can also be used outside strings. The symbol's value is again inserted directly. .Bd -literal -offset indent -NAME equs "ITEM" -FMT equs "d" -ZERO_NUM equ 0 -ZERO_STR equs "0" +def NAME equs "ITEM" +def FMT equs "d" +def ZERO_NUM equ 0 +def ZERO_STR equs "0" ;\ Defines INDEX as 100 INDEX = 1{ZERO_STR}{{FMT}:ZERO_NUM} ;\ Defines ITEM_100 as "\[rs]"hundredth\[rs]"" -{NAME}_{d:INDEX} equs "\[rs]"hundredth\[rs]"" +def {NAME}_{d:INDEX} equs "\[rs]"hundredth\[rs]"" ;\ Prints "ITEM_100 is hundredth" PRINTLN STRCAT("{NAME}_{d:INDEX} is ", {NAME}_{d:INDEX}) ;\ Purges ITEM_100 @@ -853,14 +853,6 @@ Periods .Sq \&. are allowed exclusively in labels, as described below. A symbol cannot have the same name as a reserved keyword. -.Pp -Constants and string equates -.Em must not -have any whitespace before their name when they are defined; -otherwise -.Nm -will treat them as a macro invocation. -Label and macro definitions may have whitespace before them, since a leading period or a following colon distinguishes them from invoking a macro. .Bl -tag -width indent .It Sy Label declaration One of the assembler's main tasks is to keep track of addresses for you, so you can work with meaningful names instead of "magic" numbers. @@ -945,8 +937,8 @@ Unlike below, constants defined this way cannot be redefined. They can, for example, be used for things such as bit definitions of hardware registers. .Bd -literal -offset indent -SCREEN_WIDTH equ 160 ;\ In pixels -SCREEN_HEIGHT equ 144 +def SCREEN_WIDTH equ 160 ;\ In pixels +def SCREEN_HEIGHT equ 144 .Ed .Pp Note that colons @@ -961,11 +953,12 @@ defines constant symbols like but those constants can be redefined. This is useful for variables in macros, for counters, etc. .Bd -literal -offset indent -ARRAY_SIZE EQU 4 -COUNT SET 2 -COUNT SET ARRAY_SIZE+COUNT -;\ COUNT now has the value 6 -COUNT = COUNT + 1 +DEF ARRAY_SIZE EQU 4 +DEF COUNT SET 2 +DEF COUNT SET 3 +REDEF COUNT SET ARRAY_SIZE+COUNT +COUNT = COUNT*2 +;\ COUNT now has the value 14 .Ed .Pp Note that colons @@ -974,19 +967,19 @@ following the name are not allowed. .It Ic RSSET , RSRESET , RB , RW The RS group of commands is a handy way of defining structures: .Bd -literal -offset indent - RSRESET -str_pStuff RW 1 -str_tData RB 256 -str_bCount RB 1 -str_SIZEOF RB 0 + RSRESET +DEF str_pStuff RW 1 +DEF str_tData RB 256 +DEF str_bCount RB 1 +DEF str_SIZEOF RB 0 .Ed .Pp The example defines four constants as if by: .Bd -literal -offset indent -str_pStuff EQU 0 -str_tData EQU 2 -str_bCount EQU 258 -str_SIZEOF EQU 259 +DEF str_pStuff EQU 0 +DEF str_tData EQU 2 +DEF str_bCount EQU 258 +DEF str_SIZEOF EQU 259 .Ed .Pp There are five commands in the RS group of commands: @@ -1015,10 +1008,10 @@ Wherever the assembler meets a string symbol its name is replaced with its value If you are familiar with C you can think of it as similar to .Fd #define . .Bd -literal -offset indent -COUNTREG EQUS "[hl+]" +DEF COUNTREG EQUS "[hl+]" ld a,COUNTREG -PLAYER_NAME EQUS "\[rs]"John\[rs]"" +DEF PLAYER_NAME EQUS "\[rs]"John\[rs]"" db PLAYER_NAME .Ed .Pp @@ -1030,7 +1023,7 @@ This will be interpreted as: .Pp String symbols can also be used to define small one-line macros: .Bd -literal -offset indent -pusha EQUS "push af\[rs]npush bc\[rs]npush de\[rs]npush hl\[rs]n" +DEF pusha EQUS "push af\[rs]npush bc\[rs]npush de\[rs]npush hl\[rs]n" .Ed .Pp Note that colons @@ -1047,7 +1040,7 @@ However, the keyword will define or redefine a string symbol. For example: .Bd -literal -offset indent -s EQUS "Hello, " +DEF s EQUS "Hello, " REDEF s EQUS "{s}world!" ; prints "Hello, world!" PRINTT "{s}\n" @@ -1070,6 +1063,46 @@ command-line option in Also, a macro can contain an .Ic EQUS which calls the same macro, which causes the same problem. +.Pp +The examples above for +.Ql EQU , +.Ql SET +or +.Ql = , +.Ql RB , +.Ql RW , +.Ql RL , +and +.Ql EQUS +all start with +.Ql DEF . +(A +.Ql SET +or +.Ql = +definition may start with +.Ql REDEF +instead, since they are redefinable.) +You may use the older syntax without +.Ql DEF , +but then the name being defined +.Em must not +have any whitespace before it; +otherwise +.Nm +will treat it as a macro invocation. +Furthermore, without the +.Ql DEF +keyword, +string equates may expanded for the name. +This can lead to surprising results: +.Bd -literal -offset indent +X EQUS "Y" +; this defines Y, not X! +X EQU 42 +; prints "Y $2A" +PRINTLN "{X} {Y}" +.Ed .It Ic MACRO One of the best features of an assembler is the ability to write macros for it. Macros can be called with arguments, and can react depending on input using @@ -1092,6 +1125,7 @@ instead of with a single colon .Ql \&: following the macro's name. +.Pp Macros can't be exported or imported. .Pp Plainly nesting macro definitions is not allowed, but this can be worked around using @@ -1108,7 +1142,7 @@ ENDM But this will: .Bd -literal -offset indent MACRO outer -definition EQUS "MACRO inner\[rs]nPRINTLN \[rs]"Hello!\[rs]"\[rs]nENDM" +DEF definition EQUS "MACRO inner\[rs]nPRINTLN \[rs]"Hello!\[rs]"\[rs]nENDM" definition PURGE definition ENDM @@ -1184,8 +1218,8 @@ I can't stress this enough, DON'T purge a symbol that you use in expressions the linker needs to calculate. When not sure, it's probably not safe to purge anything other than string symbols, macros, and constants. .Bd -literal -offset indent -Kamikaze EQUS "I don't want to live anymore" -AOLer EQUS "Me too" +DEF Kamikaze EQUS "I don't want to live anymore" +DEF AOLer EQUS "Me too" PURGE Kamikaze, AOLer .Ed .Pp diff --git a/test/asm/def.asm b/test/asm/def.asm new file mode 100644 index 000000000..4e896c709 --- /dev/null +++ b/test/asm/def.asm @@ -0,0 +1,29 @@ +def variable = 1 + println variable +def variable set 2 + println variable +redef variable = 3 + println variable +redef variable set 4 + println variable + +DEF constant EQU 42 + println constant + +DEF string EQUS "here" + println "{string}" + +rsreset +def _x rb +def _y rw 2 +def _z rl +def _size rb 0 + println "{_x} {_y} {_z} {_size}" + +def constant equ 6*7 ; fails + println constant + +redef string equs "there" + println "{string}" + +redef constant equ 6*9 ; syntax error diff --git a/test/asm/def.err b/test/asm/def.err new file mode 100644 index 000000000..2fe3ac940 --- /dev/null +++ b/test/asm/def.err @@ -0,0 +1,5 @@ +ERROR: def.asm(23): + 'constant' already defined at def.asm(10) +ERROR: def.asm(29): + syntax error, unexpected EQU, expecting SET or = or EQUS +error: Assembly aborted (2 errors)! diff --git a/test/asm/def.out b/test/asm/def.out new file mode 100644 index 000000000..e09b8df47 --- /dev/null +++ b/test/asm/def.out @@ -0,0 +1,9 @@ +$1 +$2 +$3 +$4 +$2A +here +$0 $1 $5 $9 +$2A +there diff --git a/test/asm/def.simple.err b/test/asm/def.simple.err new file mode 100644 index 000000000..f61faed77 --- /dev/null +++ b/test/asm/def.simple.err @@ -0,0 +1,5 @@ +ERROR: def.asm(23): + 'constant' already defined at def.asm(10) +ERROR: def.asm(29): + syntax error +error: Assembly aborted (2 errors)!