From 6dc4ce65999d9f26703c4728ed983fbec90635c1 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Thu, 23 Jul 2020 12:12:58 +0200 Subject: [PATCH 01/59] Implement infrastructure around new lexer The lexer itself is very much incomplete, but this is intended to be a safe point to revert to should further implementation go south. --- Makefile | 3 +- include/asm/asm.h | 3 - include/asm/fstack.h | 13 +- include/asm/lexer.h | 92 +--- include/asm/main.h | 4 + src/CMakeLists.txt | 1 - src/asm/asmy.y | 328 ++---------- src/asm/fstack.c | 208 +++----- src/asm/globlex.c | 4 +- src/asm/lexer.c | 1196 +++++++++--------------------------------- src/asm/macro.c | 2 +- src/asm/main.c | 92 +--- src/asm/section.c | 18 +- src/asm/symbol.c | 7 +- src/asm/warning.c | 4 +- 15 files changed, 443 insertions(+), 1532 deletions(-) diff --git a/Makefile b/Makefile index f7f82459b..ee2605f7d 100644 --- a/Makefile +++ b/Makefile @@ -56,7 +56,6 @@ rgbasm_obj := \ src/asm/asmy.o \ src/asm/charmap.o \ src/asm/fstack.o \ - src/asm/globlex.o \ src/asm/lexer.o \ src/asm/macro.o \ src/asm/main.o \ @@ -73,7 +72,7 @@ rgbasm_obj := \ src/hashmap.o \ src/linkdefs.o -src/asm/globlex.o src/asm/lexer.o src/asm/constexpr.o: src/asm/asmy.h +src/asm/lexer.o: src/asm/asmy.h rgblink_obj := \ src/link/assign.o \ diff --git a/include/asm/asm.h b/include/asm/asm.h index ff5de973c..caa87b899 100644 --- a/include/asm/asm.h +++ b/include/asm/asm.h @@ -24,11 +24,8 @@ #define MAXMACROARGS 99999 #define MAXINCPATHS 128 -extern int32_t nLineNo; extern uint32_t nTotalLines; extern uint32_t nIFDepth; -extern bool skipElif; -extern char tzCurrentFileName[_MAX_PATH + 1]; extern struct Section *pCurrentSection; extern bool oDontExpandStrings; diff --git a/include/asm/fstack.h b/include/asm/fstack.h index 41fe24c92..29405e16f 100644 --- a/include/asm/fstack.h +++ b/include/asm/fstack.h @@ -24,7 +24,7 @@ struct MacroArgs; struct sContext { - YY_BUFFER_STATE FlexHandle; + struct LexerState *lexerState; struct Symbol const *pMacro; struct sContext *next; char tzFileName[_MAX_PATH + 1]; @@ -32,7 +32,6 @@ struct sContext { uint32_t uniqueID; int32_t nLine; uint32_t nStatus; - FILE *pFile; char *pREPTBlock; uint32_t nREPTBlockCount; uint32_t nREPTBlockSize; @@ -46,11 +45,17 @@ void fstk_RunInclude(char *tzFileName); void fstk_Init(char *s); void fstk_Dump(void); void fstk_DumpToStr(char *buf, size_t len); -void fstk_DumpStringExpansions(void); void fstk_AddIncludePath(char *s); void fstk_RunMacro(char *s, struct MacroArgs *args); void fstk_RunRept(uint32_t count, int32_t nReptLineNo); -FILE *fstk_FindFile(char const *fname, char **incPathUsed); +/** + * @param path The user-provided file name + * @param fullPath The address of a pointer, which will be made to point at the full path + * The pointer's value must be a valid argument to `realloc`, including NULL + * @param size Current size of the buffer, or 0 if the pointer is NULL + * @return True if the file was found, false if no path worked + */ +bool fstk_FindFile(char const *path, char **fullPath, size_t *size); int32_t fstk_GetLine(void); #endif /* RGBDS_ASM_FSTACK_H */ diff --git a/include/asm/lexer.h b/include/asm/lexer.h index 7d095e536..e4fcd8445 100644 --- a/include/asm/lexer.h +++ b/include/asm/lexer.h @@ -9,78 +9,44 @@ #ifndef RGBDS_ASM_LEXER_H #define RGBDS_ASM_LEXER_H -#include -#include - -#define LEXHASHSIZE (1 << 11) #define MAXSTRLEN 255 -struct sLexInitString { - char *tzName; - uint32_t nToken; -}; +struct LexerState; +extern struct LexerState *lexerState; +extern struct LexerState *lexerStateEOL; -struct sLexFloat { - uint32_t (*Callback)(char *s, uint32_t size); - uint32_t nToken; -}; - -struct yy_buffer_state { - /* Actual starting address */ - char *pBufferRealStart; - /* Address where the data is initially written after a safety margin */ - char *pBufferStart; - char *pBuffer; - size_t nBufferSize; - uint32_t oAtLineStart; -}; +static inline struct LexerState *lexer_GetState(void) +{ + return lexerState; +} -enum eLexerState { - LEX_STATE_NORMAL, - LEX_STATE_MACROARGS -}; +static inline void lexer_SetState(struct LexerState *state) +{ + lexerState = state; +} -struct sStringExpansionPos { - char *tzName; - char *pBuffer; - char *pBufferPos; - struct sStringExpansionPos *pParent; -}; +static inline void lexer_SetStateAtEOL(struct LexerState *state) +{ + lexerStateEOL = state; +} -#define INITIAL 0 -#define macroarg 3 +struct LexerState *lexer_OpenFile(char const *path); +struct LexerState *lexer_OpenFileView(void); +void lexer_DeleteState(struct LexerState *state); -typedef struct yy_buffer_state *YY_BUFFER_STATE; +enum LexerMode { + LEXER_NORMAL, + LEXER_RAW +}; -void setup_lexer(void); +void lexer_SetMode(enum LexerMode mode); +void lexer_ToggleStringExpansion(bool enable); -void yy_set_state(enum eLexerState i); -YY_BUFFER_STATE yy_create_buffer(FILE *f); -YY_BUFFER_STATE yy_scan_bytes(char const *mem, uint32_t size); -void yy_delete_buffer(YY_BUFFER_STATE buf); -void yy_switch_to_buffer(YY_BUFFER_STATE buf); -uint32_t lex_FloatAlloc(const struct sLexFloat *tok); -void lex_FloatAddRange(uint32_t id, uint16_t start, uint16_t end); -void lex_FloatDeleteRange(uint32_t id, uint16_t start, uint16_t end); -void lex_FloatAddFirstRange(uint32_t id, uint16_t start, uint16_t end); -void lex_FloatDeleteFirstRange(uint32_t id, uint16_t start, uint16_t end); -void lex_FloatAddSecondRange(uint32_t id, uint16_t start, uint16_t end); -void lex_FloatDeleteSecondRange(uint32_t id, uint16_t start, uint16_t end); -void lex_Init(void); -void lex_AddStrings(const struct sLexInitString *lex); -void lex_SetBuffer(char *buffer, uint32_t len); -void lex_BeginStringExpansion(const char *tzName); -int yywrap(void); +char const *lexer_GetFileName(void); +unsigned int lexer_GetLineNo(void); +void lexer_DumpStringExpansions(void); int yylex(void); -void yyunput(char c); -void yyunputstr(const char *s); -void yyskipbytes(uint32_t count); -void yyunputbytes(uint32_t count); - -extern YY_BUFFER_STATE pCurrentBuffer; -extern struct sStringExpansionPos *pCurrentStringExpansion; - -void upperstring(char *s); -void lowerstring(char *s); +void lexer_SkipToBlockEnd(int blockStartToken, int blockEndToken, int endToken, + char **capture, size_t *size, char const *name); #endif /* RGBDS_ASM_LEXER_H */ diff --git a/include/asm/main.h b/include/asm/main.h index 874ad5a67..c2820d4ce 100644 --- a/include/asm/main.h +++ b/include/asm/main.h @@ -43,6 +43,10 @@ void opt_Push(void); void opt_Pop(void); void opt_Parse(char *s); +void upperstring(char *s); +void lowerstring(char *s); + +/* TODO: are these really needed? */ #define YY_FATAL_ERROR fatalerror #ifdef YYLMAX diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 861c85d15..63f155df0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -31,7 +31,6 @@ set(rgbasm_src "${BISON_ASMy_OUTPUT_SOURCE}" "asm/charmap.c" "asm/fstack.c" - "asm/globlex.c" "asm/lexer.c" "asm/macro.c" "asm/main.c" diff --git a/src/asm/asmy.y b/src/asm/asmy.y index 6a1694f70..9f3bc873c 100644 --- a/src/asm/asmy.y +++ b/src/asm/asmy.y @@ -39,6 +39,7 @@ uint32_t nListCountEmpty; char *tzNewMacro; uint32_t ulNewMacroSize; int32_t nPCOffset; +bool skipElifs; /* If this is set, ELIFs cannot be executed anymore */ size_t symvaluetostring(char *dest, size_t maxLength, char *symName, const char *mode) @@ -111,278 +112,6 @@ static uint32_t str2int2(uint8_t *s, int32_t length) return r; } -static uint32_t isWhiteSpace(char s) -{ - return (s == ' ') || (s == '\t') || (s == '\0') || (s == '\n'); -} - -static uint32_t isRept(char *s) -{ - return (strncasecmp(s, "REPT", 4) == 0) - && isWhiteSpace(*(s - 1)) && isWhiteSpace(s[4]); -} - -static uint32_t isEndr(char *s) -{ - return (strncasecmp(s, "ENDR", 4) == 0) - && isWhiteSpace(*(s - 1)) && isWhiteSpace(s[4]); -} - -static void copyrept(void) -{ - int32_t level = 1, len, instring = 0; - char *src = pCurrentBuffer->pBuffer; - char *bufferEnd = pCurrentBuffer->pBufferStart - + pCurrentBuffer->nBufferSize; - - while (src < bufferEnd && level) { - if (instring == 0) { - if (isRept(src)) { - level++; - src += 4; - } else if (isEndr(src)) { - level--; - src += 4; - } else { - if (*src == '\"') - instring = 1; - src++; - } - } else { - if (*src == '\\') { - src += 2; - } else if (*src == '\"') { - src++; - instring = 0; - } else { - src++; - } - } - } - - if (level != 0) - fatalerror("Unterminated REPT block\n"); - - len = src - pCurrentBuffer->pBuffer - 4; - - src = pCurrentBuffer->pBuffer; - ulNewMacroSize = len; - - tzNewMacro = malloc(ulNewMacroSize + 1); - - if (tzNewMacro == NULL) - fatalerror("Not enough memory for REPT block.\n"); - - uint32_t i; - - tzNewMacro[ulNewMacroSize] = 0; - for (i = 0; i < ulNewMacroSize; i++) { - tzNewMacro[i] = src[i]; - if (src[i] == '\n') - nLineNo++; - } - - yyskipbytes(ulNewMacroSize + 4); - -} - -static uint32_t isMacro(char *s) -{ - return (strncasecmp(s, "MACRO", 4) == 0) - && isWhiteSpace(*(s - 1)) && isWhiteSpace(s[5]); -} - -static uint32_t isEndm(char *s) -{ - return (strncasecmp(s, "ENDM", 4) == 0) - && isWhiteSpace(*(s - 1)) && isWhiteSpace(s[4]); -} - -static void copymacro(void) -{ - int32_t level = 1, len, instring = 0; - char *src = pCurrentBuffer->pBuffer; - char *bufferEnd = pCurrentBuffer->pBufferStart - + pCurrentBuffer->nBufferSize; - - while (src < bufferEnd && level) { - if (instring == 0) { - if (isMacro(src)) { - level++; - src += 4; - } else if (isEndm(src)) { - level--; - src += 4; - } else { - if(*src == '\"') - instring = 1; - src++; - } - } else { - if (*src == '\\') { - src += 2; - } else if (*src == '\"') { - src++; - instring = 0; - } else { - src++; - } - } - } - - if (level != 0) - fatalerror("Unterminated MACRO definition.\n"); - - len = src - pCurrentBuffer->pBuffer - 4; - - src = pCurrentBuffer->pBuffer; - ulNewMacroSize = len; - - tzNewMacro = (char *)malloc(ulNewMacroSize + 1); - if (tzNewMacro == NULL) - fatalerror("Not enough memory for MACRO definition.\n"); - - uint32_t i; - - tzNewMacro[ulNewMacroSize] = 0; - for (i = 0; i < ulNewMacroSize; i++) { - tzNewMacro[i] = src[i]; - if (src[i] == '\n') - nLineNo++; - } - - yyskipbytes(ulNewMacroSize + 4); -} - -static bool endsIf(char c) -{ - return isWhiteSpace(c) || c == '(' || c == '{'; -} - -static uint32_t isIf(char *s) -{ - return (strncasecmp(s, "IF", 2) == 0) - && isWhiteSpace(s[-1]) && endsIf(s[2]); -} - -static uint32_t isElif(char *s) -{ - return (strncasecmp(s, "ELIF", 4) == 0) - && isWhiteSpace(s[-1]) && endsIf(s[4]); -} - -static uint32_t isElse(char *s) -{ - return (strncasecmp(s, "ELSE", 4) == 0) - && isWhiteSpace(s[-1]) && isWhiteSpace(s[4]); -} - -static uint32_t isEndc(char *s) -{ - return (strncasecmp(s, "ENDC", 4) == 0) - && isWhiteSpace(s[-1]) && isWhiteSpace(s[4]); -} - -static void if_skip_to_else(void) -{ - int32_t level = 1; - bool inString = false; - char *src = pCurrentBuffer->pBuffer; - - while (*src && level) { - if (*src == '\n') - nLineNo++; - - if (!inString) { - if (isIf(src)) { - level++; - src += 2; - - } else if (level == 1 && isElif(src)) { - level--; - skipElif = false; - - } else if (level == 1 && isElse(src)) { - level--; - src += 4; - - } else if (isEndc(src)) { - level--; - if (level != 0) - src += 4; - - } else { - if (*src == '\"') - inString = true; - src++; - } - } else { - if (*src == '\"') { - inString = false; - } else if (*src == '\\') { - /* Escaped quotes don't end the string */ - if (*++src != '\"') - src--; - } - src++; - } - } - - if (level != 0) - fatalerror("Unterminated IF construct\n"); - - int32_t len = src - pCurrentBuffer->pBuffer; - - yyskipbytes(len); - yyunput('\n'); - nLineNo--; -} - -static void if_skip_to_endc(void) -{ - int32_t level = 1; - bool inString = false; - char *src = pCurrentBuffer->pBuffer; - - while (*src && level) { - if (*src == '\n') - nLineNo++; - - if (!inString) { - if (isIf(src)) { - level++; - src += 2; - } else if (isEndc(src)) { - level--; - if (level != 0) - src += 4; - } else { - if (*src == '\"') - inString = true; - src++; - } - } else { - if (*src == '\"') { - inString = false; - } else if (*src == '\\') { - /* Escaped quotes don't end the string */ - if (*++src != '\"') - src--; - } - src++; - } - } - - if (level != 0) - fatalerror("Unterminated IF construct\n"); - - int32_t len = src - pCurrentBuffer->pBuffer; - - yyskipbytes(len); - yyunput('\n'); - nLineNo--; -} - static size_t strlenUTF8(const char *s) { size_t len = 0; @@ -660,7 +389,6 @@ lines : /* empty */ nListCountEmpty = 0; nPCOffset = 0; } line '\n' { - nLineNo++; nTotalLines++; } ; @@ -699,9 +427,9 @@ label : /* empty */ ; macro : T_ID { - yy_set_state(LEX_STATE_MACROARGS); + lexer_SetMode(LEXER_RAW); } macroargs { - yy_set_state(LEX_STATE_NORMAL); + lexer_SetMode(LEXER_NORMAL); fstk_RunMacro($1, $3); } ; @@ -786,9 +514,9 @@ align : T_OP_ALIGN uconst { ; opt : T_POP_OPT { - yy_set_state(LEX_STATE_MACROARGS); + lexer_SetMode(LEXER_RAW); } opt_list { - yy_set_state(LEX_STATE_NORMAL); + lexer_SetMode(LEXER_NORMAL); } ; @@ -875,15 +603,21 @@ load : T_POP_LOAD string ',' sectiontype sectorg sectattrs { ; rept : T_POP_REPT uconst { - uint32_t nDefinitionLineNo = nLineNo; - copyrept(); + uint32_t nDefinitionLineNo = lexer_GetLineNo(); + char *body; + size_t size; + lexer_SkipToBlockEnd(T_POP_REPT, T_POP_ENDR, T_POP_ENDR, + &body, &size, "REPT block"); fstk_RunRept($2, nDefinitionLineNo); } ; macrodef : T_LABEL ':' T_POP_MACRO { - int32_t nDefinitionLineNo = nLineNo; - copymacro(); + int32_t nDefinitionLineNo = lexer_GetLineNo(); + char *body; + size_t size; + lexer_SkipToBlockEnd(T_POP_MACRO, T_POP_ENDM, T_POP_ENDM, + &body, &size, "macro definition"); sym_AddMacro($1, nDefinitionLineNo); } ; @@ -956,9 +690,9 @@ dl : T_POP_DL constlist_32bit_entry ',' constlist_32bit { ; purge : T_POP_PURGE { - oDontExpandStrings = true; + lexer_ToggleStringExpansion(false); } purge_list { - oDontExpandStrings = false; + lexer_ToggleStringExpansion(true); } ; @@ -1054,8 +788,14 @@ printf : T_POP_PRINTF const { math_Print($2); } if : T_POP_IF const { nIFDepth++; - if (!$2) - if_skip_to_else(); + if (!$2) { + /* The function is hardcoded to also stop on T_POP_ELSE and ENDC */ + lexer_SkipToBlockEnd(T_POP_IF, T_POP_ENDC, T_POP_ELIF, + NULL, NULL, "if block"); + skipElifs = false; + } else { + skipElifs = true; + } } ; @@ -1063,7 +803,7 @@ elif : T_POP_ELIF const { if (nIFDepth <= 0) fatalerror("Found ELIF outside an IF construct\n"); - if (skipElif) { + if (skipElifs) { /* * Executed when ELIF is reached at the end of * an IF or ELIF block for which the condition @@ -1071,21 +811,24 @@ elif : T_POP_ELIF const { * * Continue parsing at ENDC keyword */ - if_skip_to_endc(); + lexer_SkipToBlockEnd(T_POP_IF, T_POP_ENDC, T_POP_ENDC, + NULL, NULL, "elif block"); } else { /* * Executed when ELIF is skipped to because the * condition of the previous IF or ELIF block * was false. */ - skipElif = true; if (!$2) { /* * Continue parsing after ELSE, or at * ELIF or ENDC keyword. */ - if_skip_to_else(); + lexer_SkipToBlockEnd(T_POP_IF, T_POP_ENDC, T_POP_ELIF, + NULL, NULL, "elif block"); + } else { + skipElifs = true; } } } @@ -1096,7 +839,8 @@ else : T_POP_ELSE { fatalerror("Found ELSE outside an IF construct\n"); /* Continue parsing at ENDC keyword */ - if_skip_to_endc(); + lexer_SkipToBlockEnd(T_POP_IF, T_POP_ENDC, T_POP_ENDC, + NULL, NULL, "else block"); } ; @@ -1267,13 +1011,13 @@ relocexpr_no_str : scoped_id { rpn_Symbol(&$$, $1); } } | T_OP_BANK '(' string ')' { rpn_BankSection(&$$, $3); } | T_OP_DEF { - oDontExpandStrings = true; + lexer_ToggleStringExpansion(false); } '(' scoped_id ')' { struct Symbol const *sym = sym_FindSymbol($4); rpn_Number(&$$, !!sym); - oDontExpandStrings = false; + lexer_ToggleStringExpansion(true); } | T_OP_ROUND '(' const ')' { rpn_Number(&$$, math_Round($3)); diff --git a/src/asm/fstack.c b/src/asm/fstack.c index c667acea9..b094f14ba 100644 --- a/src/asm/fstack.c +++ b/src/asm/fstack.c @@ -36,10 +36,7 @@ static struct sContext *pFileStack; static unsigned int nFileStackDepth; unsigned int nMaxRecursionDepth; static struct Symbol const *pCurrentMacro; -static YY_BUFFER_STATE CurrentFlexHandle; -static FILE *pCurrentFile; static uint32_t nCurrentStatus; -char tzCurrentFileName[_MAX_PATH + 1]; static char IncludePaths[MAXINCPATHS][_MAX_PATH + 1]; static int32_t NextIncPath; static uint32_t nMacroCount; @@ -81,10 +78,8 @@ static void pushcontext(void) if (*ppFileStack == NULL) fatalerror("No memory for context\n"); - (*ppFileStack)->FlexHandle = CurrentFlexHandle; (*ppFileStack)->next = NULL; - strcpy((char *)(*ppFileStack)->tzFileName, (char *)tzCurrentFileName); - (*ppFileStack)->nLine = nLineNo; + (*ppFileStack)->nLine = lexer_GetLineNo(); switch ((*ppFileStack)->nStatus = nCurrentStatus) { case STAT_isMacroArg: @@ -93,7 +88,6 @@ static void pushcontext(void) (*ppFileStack)->pMacro = pCurrentMacro; break; case STAT_isInclude: - (*ppFileStack)->pFile = pCurrentFile; break; case STAT_isREPTBlock: (*ppFileStack)->macroArgs = macro_GetCurrentArgs(); @@ -107,8 +101,6 @@ static void pushcontext(void) fatalerror("%s: Internal error.\n", __func__); } (*ppFileStack)->uniqueID = macro_GetUniqueID(); - - nLineNo = 0; } static int32_t popcontext(void) @@ -122,20 +114,15 @@ static int32_t popcontext(void) int nNbCharsWritten; int nNbCharsLeft; - yy_delete_buffer(CurrentFlexHandle); - CurrentFlexHandle = - yy_scan_bytes(pCurrentREPTBlock, - nCurrentREPTBlockSize); - yy_switch_to_buffer(CurrentFlexHandle); macro_SetUniqueID(nMacroCount++); /* Increment REPT count in file path */ pREPTIterationWritePtr = - strrchr(tzCurrentFileName, '~') + 1; + strrchr(lexer_GetFileName(), '~') + 1; nREPTIterationNo = strtoul(pREPTIterationWritePtr, NULL, 10); - nNbCharsLeft = sizeof(tzCurrentFileName) - - (pREPTIterationWritePtr - tzCurrentFileName); + nNbCharsLeft = sizeof(lexer_GetFileName()) + - (pREPTIterationWritePtr - lexer_GetFileName()); nNbCharsWritten = snprintf(pREPTIterationWritePtr, nNbCharsLeft, "%lu", nREPTIterationNo + 1); @@ -150,7 +137,6 @@ static int32_t popcontext(void) fatalerror("Cannot write REPT count to file path\n"); } - nLineNo = nCurrentREPTBodyFirstLine; return 0; } } @@ -165,19 +151,8 @@ static int32_t popcontext(void) pLastFile = *ppLastFile; } - yy_delete_buffer(CurrentFlexHandle); - nLineNo = nCurrentStatus == STAT_isREPTBlock ? nCurrentREPTBodyLastLine - : pLastFile->nLine; - - if (nCurrentStatus == STAT_isInclude) - fclose(pCurrentFile); - - if (nCurrentStatus == STAT_isMacro - || nCurrentStatus == STAT_isREPTBlock) - nLineNo++; - - CurrentFlexHandle = pLastFile->FlexHandle; - strcpy((char *)tzCurrentFileName, (char *)pLastFile->tzFileName); + lexer_DeleteState(lexer_GetState()); + lexer_SetState(pLastFile->lexerState); switch (pLastFile->nStatus) { struct MacroArgs *args; @@ -193,7 +168,6 @@ static int32_t popcontext(void) pCurrentMacro = pLastFile->pMacro; break; case STAT_isInclude: - pCurrentFile = pLastFile->pFile; break; case STAT_isREPTBlock: args = macro_GetCurrentArgs(); @@ -218,7 +192,6 @@ static int32_t popcontext(void) free(*ppLastFile); *ppLastFile = NULL; - yy_switch_to_buffer(CurrentFlexHandle); return 0; } @@ -229,11 +202,11 @@ int32_t fstk_GetLine(void) switch (nCurrentStatus) { case STAT_isInclude: /* This is the normal mode, also used when including a file. */ - return nLineNo; + return lexer_GetLineNo(); case STAT_isMacro: break; /* Peek top file of the stack */ case STAT_isMacroArg: - return nLineNo; /* ??? */ + return lexer_GetLineNo(); /* ??? */ case STAT_isREPTBlock: break; /* Peek top file of the stack */ default: @@ -277,7 +250,7 @@ void fstk_Dump(void) pLastFile = pLastFile->next; } - fprintf(stderr, "%s(%" PRId32 ")", tzCurrentFileName, nLineNo); + fprintf(stderr, "%s(%" PRId32 ")", lexer_GetFileName(), lexer_GetLineNo()); } void fstk_DumpToStr(char *buf, size_t buflen) @@ -299,7 +272,7 @@ void fstk_DumpToStr(char *buf, size_t buflen) } retcode = snprintf(&buf[buflen - len], len, "%s(%" PRId32 ")", - tzCurrentFileName, nLineNo); + lexer_GetFileName(), lexer_GetLineNo()); if (retcode < 0) fatalerror("Failed to dump file stack to string: %s\n", strerror(errno)); else if (retcode >= len) @@ -311,20 +284,6 @@ void fstk_DumpToStr(char *buf, size_t buflen) warning(WARNING_LONG_STR, "File stack dump too long, got truncated\n"); } -/* - * Dump the string expansion stack to stderr - */ -void fstk_DumpStringExpansions(void) -{ - const struct sStringExpansionPos *pExpansion = pCurrentStringExpansion; - - while (pExpansion) { - fprintf(stderr, "while expanding symbol \"%s\"\n", - pExpansion->tzName); - pExpansion = pExpansion->pParent; - } -} - /* * Extra includepath stuff */ @@ -351,63 +310,58 @@ static void printdep(const char *fileName) } } -static FILE *getFile(char const *pathname) +static bool isPathValid(char const *pathname) { struct stat statbuf; if (stat(pathname, &statbuf) != 0) - return NULL; + return false; /* Reject directories */ - if (S_ISDIR(statbuf.st_mode)) - return NULL; - - return fopen(pathname, "rb"); + return !S_ISDIR(statbuf.st_mode); } -FILE *fstk_FindFile(char const *fname, char **incPathUsed) +bool fstk_FindFile(char const *path, char **fullPath, size_t *size) { - if (fname == NULL) - return NULL; - - char path[_MAX_PATH]; - FILE *f = getFile(fname); - - if (f) { - printdep(fname); - return f; + if (!*size) { + *size = 64; /* This is arbitrary, really */ + *fullPath = realloc(*fullPath, *size); + if (!*fullPath) + error("realloc error during include path search: %s\n", + strerror(errno)); } - for (size_t i = 0; i < NextIncPath; ++i) { - /* - * The function snprintf() does not write more than `size` bytes - * (including the terminating null byte ('\0')). If the output - * was truncated due to this limit, the return value is the - * number of characters (excluding the terminating null byte) - * which would have been written to the final string if enough - * space had been available. Thus, a return value of `size` or - * more means that the output was truncated. - */ - int fullpathlen = snprintf(path, sizeof(path), "%s%s", - IncludePaths[i], fname); - - if (fullpathlen >= (int)sizeof(path)) - continue; - - f = getFile(path); - if (f) { - printdep(path); - - if (incPathUsed) - *incPathUsed = IncludePaths[i]; - return f; + if (*fullPath) { + for (size_t i = 0; i <= NextIncPath; ++i) { + char *incPath = i ? IncludePaths[i - 1] : ""; + int len = snprintf(*fullPath, *size, "%s%s", incPath, path); + + /* Oh how I wish `asnprintf` was standard... */ + if (len >= *size) { /* `len` doesn't include the terminator, `size` does */ + *size = len + 1; + *fullPath = realloc(*fullPath, *size); + if (!*fullPath) { + error("realloc error during include path search: %s\n", + strerror(errno)); + break; + } + len = sprintf(*fullPath, "%s%s", incPath, path); + } + + if (len < 0) { + error("snprintf error during include path search: %s\n", + strerror(errno)); + } else if (isPathValid(*fullPath)) { + printdep(*fullPath); + return true; + } } } errno = ENOENT; if (oGeneratedMissingIncludes) - printdep(fname); - return NULL; + printdep(path); + return false; } /* @@ -415,33 +369,31 @@ FILE *fstk_FindFile(char const *fname, char **incPathUsed) */ void fstk_RunInclude(char *tzFileName) { - char *incPathUsed = ""; - FILE *f = fstk_FindFile(tzFileName, &incPathUsed); + char *fullPath = NULL; + size_t size = 0; - if (f == NULL) { - if (oGeneratedMissingIncludes) { + if (!fstk_FindFile(tzFileName, &fullPath, &size)) { + if (oGeneratedMissingIncludes) oFailedOnMissingInclude = true; - return; - } - error("Unable to open included file '%s': %s\n", tzFileName, strerror(errno)); + else + error("Unable to open included file '%s': %s\n", + tzFileName, strerror(errno)); + free(fullPath); return; } pushcontext(); - nLineNo = 1; nCurrentStatus = STAT_isInclude; - snprintf(tzCurrentFileName, sizeof(tzCurrentFileName), "%s%s", - incPathUsed, tzFileName); if (verbose) - printf("Assembling %s\n", tzCurrentFileName); - pCurrentFile = f; - CurrentFlexHandle = yy_create_buffer(pCurrentFile); - yy_switch_to_buffer(CurrentFlexHandle); + printf("Assembling %s\n", fullPath); - /* Dirty hack to give the INCLUDE directive a linefeed */ + struct LexerState *state = lexer_OpenFile(fullPath); - yyunput('\n'); - nLineNo--; + if (!state) + /* If lexer had an error, it already reported it */ + fatalerror("Failed to open file for INCLUDE\n"); /* TODO: make this non-fatal? */ + lexer_SetStateAtEOL(state); + free(fullPath); } /* @@ -450,7 +402,6 @@ void fstk_RunInclude(char *tzFileName) void fstk_RunMacro(char *s, struct MacroArgs *args) { struct Symbol const *sym = sym_FindSymbol(s); - int nPrintedChars; if (sym == NULL) { error("Macro \"%s\" not defined\n", s); @@ -464,21 +415,10 @@ void fstk_RunMacro(char *s, struct MacroArgs *args) pushcontext(); macro_SetUniqueID(nMacroCount++); /* Minus 1 because there is a newline at the beginning of the buffer */ - nLineNo = sym->fileLine - 1; macro_UseNewArgs(args); nCurrentStatus = STAT_isMacro; - nPrintedChars = snprintf(tzCurrentFileName, _MAX_PATH + 1, - "%s::%s", sym->fileName, s); - if (nPrintedChars > _MAX_PATH) { - popcontext(); - fatalerror("File name + macro name is too large to fit into buffer\n"); - } pCurrentMacro = sym; - /* TODO: why is `strlen` being used when there's a macro size field? */ - CurrentFlexHandle = yy_scan_bytes(pCurrentMacro->macro, - strlen(pCurrentMacro->macro)); - yy_switch_to_buffer(CurrentFlexHandle); } /* @@ -487,11 +427,6 @@ void fstk_RunMacro(char *s, struct MacroArgs *args) void fstk_RunRept(uint32_t count, int32_t nReptLineNo) { if (count) { - static const char *tzReptStr = "::REPT~1"; - - /* For error printing to make sense, fake nLineNo */ - nCurrentREPTBodyLastLine = nLineNo; - nLineNo = nReptLineNo; pushcontext(); macro_SetUniqueID(nMacroCount++); nCurrentREPTBlockCount = count; @@ -499,15 +434,6 @@ void fstk_RunRept(uint32_t count, int32_t nReptLineNo) nCurrentREPTBlockSize = ulNewMacroSize; pCurrentREPTBlock = tzNewMacro; nCurrentREPTBodyFirstLine = nReptLineNo + 1; - nLineNo = nReptLineNo; - - if (strlen(tzCurrentFileName) + strlen(tzReptStr) > _MAX_PATH) - fatalerror("Cannot append \"%s\" to file path\n", tzReptStr); - strcat(tzCurrentFileName, tzReptStr); - - CurrentFlexHandle = - yy_scan_bytes(pCurrentREPTBlock, nCurrentREPTBlockSize); - yy_switch_to_buffer(CurrentFlexHandle); } } @@ -526,7 +452,6 @@ void fstk_Init(char *pFileName) // minus 2 to account for trailing "\"\0" // minus 1 to avoid a buffer overflow in extreme cases while (*c && fileNameIndex < sizeof(tzSymFileName) - 2 - 1) { - if (*c == '"') { tzSymFileName[fileNameIndex++] = '\\'; } @@ -541,19 +466,8 @@ void fstk_Init(char *pFileName) sym_AddString("__FILE__", tzSymFileName); pFileStack = NULL; - if (strcmp(pFileName, "-") == 0) { - pCurrentFile = stdin; - } else { - pCurrentFile = fopen(pFileName, "rb"); - if (pCurrentFile == NULL) - fatalerror("Unable to open file '%s': %s\n", pFileName, strerror(errno)); - } nFileStackDepth = 0; nMacroCount = 0; nCurrentStatus = STAT_isInclude; - snprintf(tzCurrentFileName, _MAX_PATH + 1, "%s", pFileName); - CurrentFlexHandle = yy_create_buffer(pCurrentFile); - yy_switch_to_buffer(CurrentFlexHandle); - nLineNo = 1; } diff --git a/src/asm/globlex.c b/src/asm/globlex.c index 33d2e9c6d..89d1556fa 100644 --- a/src/asm/globlex.c +++ b/src/asm/globlex.c @@ -287,10 +287,10 @@ uint32_t ParseSymbol(char *src, uint32_t size) /* Feed the symbol's contents into the buffer */ yyunputstr(s = sym_GetStringValue(sym)); - /* Lines inserted this way shall not increase nLineNo */ + /* Lines inserted this way shall not increase lexer_GetLineNo() */ while (*s) { if (*s++ == '\n') - nLineNo--; + lexer_GetLineNo()--; } return 0; } diff --git a/src/asm/lexer.c b/src/asm/lexer.c index e9a152158..937b8250b 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -1,1054 +1,364 @@ /* * This file is part of RGBDS. * - * Copyright (c) 1997-2019, Carsten Sorensen and RGBDS contributors. + * Copyright (c) 2020, Eldred Habert and RGBDS contributors. * * SPDX-License-Identifier: MIT */ +#include +#include #include -#include -#include -#include +#include +#include +#include +#include #include +#include #include #include +#include -#include "asm/asm.h" -#include "asm/fstack.h" #include "asm/lexer.h" -#include "asm/macro.h" -#include "asm/main.h" #include "asm/rpn.h" -#include "asm/section.h" +#include "asm/symbol.h" /* For MAXSYMLEN in asmy.h */ #include "asm/warning.h" - -#include "extern/err.h" - -#include "asmy.h" -#include "platform.h" // strncasecmp, strdup - -struct sLexString { - char *tzName; - uint32_t nToken; - uint32_t nNameLength; - struct sLexString *next; +/* Include this last so it gets all type & constant definitions */ +#include "asmy.h" /* For token definitions, generated from asmy.y */ + +#define LEXER_BUF_SIZE 42 /* TODO: determine a sane value for this */ +/* This caps the size of buffer reads, and according to POSIX, passing more than SSIZE_MAX is UB */ +static_assert(LEXER_BUF_SIZE <= SSIZE_MAX); + +struct LexerState { + char const *path; + + /* mmap()-dependent IO state */ + bool isMmapped; + union { + struct { /* If mmap()ed */ + char *ptr; + off_t size; + off_t offset; + }; + struct { /* Otherwise */ + int fd; + size_t index; /* Read index into the buffer */ + size_t nbChars; /* Number of chars in front of the buffer */ + char buf[LEXER_BUF_SIZE]; /* Circular buffer */ + }; + }; + + /* Common state */ + enum LexerMode mode; + bool atLineStart; + unsigned int lineNo; + bool capturing; /* Whether the text being lexed should be captured */ + size_t captureSize; /* Amount of text captured */ + char *captureBuf; /* Buffer to send the captured text to if non-NULL */ + size_t captureCapacity; /* Size of the buffer above */ + bool expandStrings; }; -#define pLexBufferRealStart (pCurrentBuffer->pBufferRealStart) -#define pLexBuffer (pCurrentBuffer->pBuffer) -#define AtLineStart (pCurrentBuffer->oAtLineStart) - -#define SAFETYMARGIN 1024 - -#define BOM_SIZE 3 - -struct sLexFloat tLexFloat[32]; -struct sLexString *tLexHash[LEXHASHSIZE]; -YY_BUFFER_STATE pCurrentBuffer; -uint32_t nLexMaxLength; // max length of all keywords and operators - -uint32_t tFloatingSecondChar[256]; -uint32_t tFloatingFirstChar[256]; -uint32_t tFloatingChars[256]; -uint32_t nFloating; -enum eLexerState lexerstate = LEX_STATE_NORMAL; +struct LexerState *lexerState = NULL; +struct LexerState *lexerStateEOL = NULL; -struct sStringExpansionPos *pCurrentStringExpansion; -static unsigned int nNbStringExpansions; - -/* UTF-8 byte order mark */ -static const unsigned char bom[BOM_SIZE] = { 0xEF, 0xBB, 0xBF }; - -void upperstring(char *s) +struct LexerState *lexer_OpenFile(char const *path) { - while (*s) { - *s = toupper(*s); - s++; - } -} + bool isStdin = !strcmp(path, "-"); + struct LexerState *state = malloc(sizeof(*state)); -void lowerstring(char *s) -{ - while (*s) { - *s = tolower(*s); - s++; + /* Give stdin a nicer file name */ + if (isStdin) + path = ""; + if (!state) { + error("Failed to open file \"%s\": %s\n", path, strerror(errno)); + return NULL; } -} + state->path = path; -void yyskipbytes(uint32_t count) -{ - pLexBuffer += count; -} + state->fd = isStdin ? STDIN_FILENO : open(path, O_RDONLY); + state->isMmapped = false; /* By default, assume it won't be mmap()ed */ + off_t size = lseek(state->fd, 0, SEEK_END); -void yyunputbytes(uint32_t count) -{ - pLexBuffer -= count; -} - -void yyunput(char c) -{ - if (pLexBuffer <= pLexBufferRealStart) - fatalerror("Buffer safety margin exceeded\n"); - - *(--pLexBuffer) = c; -} - -void yyunputstr(const char *s) -{ - int32_t len; - - len = strlen(s); - - /* - * It would be undefined behavior to subtract `len` from pLexBuffer and - * potentially have it point outside of pLexBufferRealStart's buffer, - * this is why the check is done this way. - * Refer to https://github.com/rednex/rgbds/pull/411#discussion_r319779797 - */ - if (pLexBuffer - pLexBufferRealStart < len) - fatalerror("Buffer safety margin exceeded\n"); + if (size != 1) { + /* The file is a regular file, so use `mmap` for better performance */ - pLexBuffer -= len; - - memcpy(pLexBuffer, s, len); -} - -/* - * Marks that a new string expansion with name `tzName` ends here - * Enforces recursion depth - */ -void lex_BeginStringExpansion(const char *tzName) -{ - if (++nNbStringExpansions > nMaxRecursionDepth) - fatalerror("Recursion limit (%u) exceeded\n", nMaxRecursionDepth); - - struct sStringExpansionPos *pNewStringExpansion = - malloc(sizeof(*pNewStringExpansion)); - char *tzNewExpansionName = strdup(tzName); + /* + * Important: do NOT assign to `state->ptr` directly, to avoid a cast that may + * alter an eventual `MAP_FAILED` value. It would also invalidate `state->fd`, + * being on the other side of the union. + */ + void *pa = mmap(NULL, size, PROT_READ, MAP_PRIVATE, state->fd, 0); + + if (pa == MAP_FAILED && errno == ENOTSUP) + /* + * The implementation may not support MAP_PRIVATE; try again with MAP_SHARED + * instead, offering, I believe, weaker guarantees about external + * modifications to the file while reading it. That's still better than not + * opening it at all, though. + */ + pa = mmap(NULL, size, PROT_READ, MAP_SHARED, state->fd, 0); - if (!pNewStringExpansion || !tzNewExpansionName) - fatalerror("Could not allocate memory to expand '%s'\n", tzName); + if (pa == MAP_FAILED) { + /* If mmap()ing failed, try again using another method (below) */ + state->isMmapped = false; + } else { + /* IMPORTANT: the `union` mandates this is accessed before other members! */ + close(state->fd); - pNewStringExpansion->tzName = tzNewExpansionName; - pNewStringExpansion->pBuffer = pLexBufferRealStart; - pNewStringExpansion->pBufferPos = pLexBuffer; - pNewStringExpansion->pParent = pCurrentStringExpansion; + state->isMmapped = true; + state->ptr = pa; + state->size = size; + } + } + if (!state->isMmapped) { + /* Sometimes mmap() fails or isn't available, so have a fallback */ + lseek(state->fd, 0, SEEK_SET); + state->index = 0; + state->nbChars = 0; + } - pCurrentStringExpansion = pNewStringExpansion; + state->mode = LEXER_NORMAL; + state->atLineStart = true; + state->lineNo = 0; + state->capturing = false; + state->captureBuf = NULL; + return state; } -void yy_switch_to_buffer(YY_BUFFER_STATE buf) +struct LexerState *lexer_OpenFileView(void) { - pCurrentBuffer = buf; + return NULL; } -void yy_set_state(enum eLexerState i) +void lexer_DeleteState(struct LexerState *state) { - lexerstate = i; + if (state->isMmapped) + munmap(state->ptr, state->size); + else + close(state->fd); + free(state); } -void yy_delete_buffer(YY_BUFFER_STATE buf) +void lexer_SetMode(enum LexerMode mode) { - free(buf->pBufferStart - SAFETYMARGIN); - free(buf); + lexerState->mode = mode; } -/* - * Maintains the following invariants: - * 1. nBufferSize < capacity - * 2. The buffer is terminated with 0 - * 3. nBufferSize is the size without the terminator - */ -static void yy_buffer_append(YY_BUFFER_STATE buf, size_t capacity, char c) +void lexer_ToggleStringExpansion(bool enable) { - assert(buf->pBufferStart[buf->nBufferSize] == 0); - assert(buf->nBufferSize + 1 < capacity); - - buf->pBufferStart[buf->nBufferSize++] = c; - buf->pBufferStart[buf->nBufferSize] = 0; + lexerState->expandStrings = enable; } -static void yy_buffer_append_newlines(YY_BUFFER_STATE buf, size_t capacity) -{ - /* Add newline if file doesn't end with one */ - if (buf->nBufferSize == 0 - || buf->pBufferStart[buf->nBufferSize - 1] != '\n') - yy_buffer_append(buf, capacity, '\n'); - - /* Add newline if \ will eat the last newline */ - if (buf->nBufferSize >= 2) { - size_t pos = buf->nBufferSize - 2; - - /* Skip spaces and tabs */ - while (pos > 0 && (buf->pBufferStart[pos] == ' ' - || buf->pBufferStart[pos] == '\t')) - pos--; +/* Functions for the actual lexer to obtain characters */ - if (buf->pBufferStart[pos] == '\\') - yy_buffer_append(buf, capacity, '\n'); - } -} - -YY_BUFFER_STATE yy_scan_bytes(char const *mem, uint32_t size) +static void reallocCaptureBuf(void) { - YY_BUFFER_STATE pBuffer = malloc(sizeof(struct yy_buffer_state)); - - if (pBuffer == NULL) - fatalerror("%s: Out of memory!\n", __func__); - - size_t capacity = size + 3; /* space for 2 newlines and terminator */ - - pBuffer->pBufferRealStart = malloc(capacity + SAFETYMARGIN); - - if (pBuffer->pBufferRealStart == NULL) - fatalerror("%s: Out of memory for buffer!\n", __func__); - - pBuffer->pBufferStart = pBuffer->pBufferRealStart + SAFETYMARGIN; - pBuffer->pBuffer = pBuffer->pBufferRealStart + SAFETYMARGIN; - memcpy(pBuffer->pBuffer, mem, size); - pBuffer->pBuffer[size] = 0; - pBuffer->nBufferSize = size; - yy_buffer_append_newlines(pBuffer, capacity); - pBuffer->oAtLineStart = 1; - - return pBuffer; + lexerState->captureCapacity *= 2; + lexerState->captureBuf = realloc(lexerState->captureBuf, lexerState->captureCapacity); + if (!lexerState->captureBuf) + fatalerror("realloc error while resizing capture buffer: %s\n", strerror(errno)); } -YY_BUFFER_STATE yy_create_buffer(FILE *f) +/* If at any point we need more than 255 characters of lookahead, something went VERY wrong. */ +static int peek(uint8_t distance) { - YY_BUFFER_STATE pBuffer = malloc(sizeof(struct yy_buffer_state)); - - if (pBuffer == NULL) - fatalerror("%s: Out of memory!\n", __func__); - - size_t size = 0, capacity = -1; - char *buf = NULL; - - /* - * Check if we can get the file size without implementation-defined - * behavior: - * - * From ftell(3p): - * [On error], ftell() and ftello() shall return −1, and set errno to - * indicate the error. - * - * The ftell() and ftello() functions shall fail if: [...] - * ESPIPE The file descriptor underlying stream is associated with a - * pipe, FIFO, or socket. - * - * From fseek(3p): - * The behavior of fseek() on devices which are incapable of seeking - * is implementation-defined. - */ - if (ftell(f) != -1) { - fseek(f, 0, SEEK_END); - capacity = ftell(f); - rewind(f); - } - - // If ftell errored or the block above wasn't executed - if (capacity == -1) - capacity = 4096; - // Handle 0-byte files gracefully - else if (capacity == 0) - capacity = 1; - - do { - if (buf == NULL || size >= capacity) { - if (buf) - capacity *= 2; - /* Give extra room for 2 newlines and terminator */ - buf = realloc(buf, capacity + SAFETYMARGIN + 3); - - if (buf == NULL) - fatalerror("%s: Out of memory for buffer!\n", - __func__); - } - - char *bufpos = buf + SAFETYMARGIN + size; - size_t read_count = fread(bufpos, 1, capacity - size, f); - - if (read_count == 0 && !feof(f)) - fatalerror("%s: fread error\n", __func__); - - size += read_count; - } while (!feof(f)); - - pBuffer->pBufferRealStart = buf; - pBuffer->pBufferStart = buf + SAFETYMARGIN; - pBuffer->pBuffer = buf + SAFETYMARGIN; - pBuffer->pBuffer[size] = 0; - pBuffer->nBufferSize = size; - - /* This is added here to make the buffer scaling above easy to express, - * while taking the newline space into account - * for the yy_buffer_append_newlines() call below. - */ - capacity += 3; - - /* Skip UTF-8 byte order mark. */ - if (pBuffer->nBufferSize >= BOM_SIZE - && !memcmp(pBuffer->pBuffer, bom, BOM_SIZE)) - pBuffer->pBuffer += BOM_SIZE; - - /* Convert all line endings to LF and spaces */ - - char *mem = pBuffer->pBuffer; - int32_t lineCount = 0; - - while (*mem) { - if ((mem[0] == '\\') && (mem[1] == '\"' || mem[1] == '\\')) { - mem += 2; - } else { - /* LF CR and CR LF */ - if (((mem[0] == '\n') && (mem[1] == '\r')) - || ((mem[0] == '\r') && (mem[1] == '\n'))) { - *mem++ = ' '; - *mem++ = '\n'; - lineCount++; - /* LF and CR */ - } else if ((mem[0] == '\n') || (mem[0] == '\r')) { - *mem++ = '\n'; - lineCount++; - } else { - mem++; - } - } - } - - if (mem != pBuffer->pBuffer + size) { - nLineNo = lineCount + 1; - fatalerror("Found null character\n"); + if (lexerState->isMmapped) { + if (lexerState->offset + distance >= lexerState->size) + return EOF; + return lexerState->ptr[lexerState->offset + distance]; } - /* Remove comments */ + if (lexerState->nbChars <= distance) { + /* Buffer isn't full enough, read some chars in */ - mem = pBuffer->pBuffer; - bool instring = false; + /* Compute the index we'll start writing to */ + size_t writeIndex = (lexerState->index + lexerState->nbChars) % LEXER_BUF_SIZE; + size_t target = LEXER_BUF_SIZE - lexerState->nbChars; /* Aim: making the buf full */ + ssize_t nbCharsRead = 0; - while (*mem) { - if (*mem == '\"') - instring = !instring; +#define readChars(size) do { \ + nbCharsRead = read(lexerState->fd, &lexerState->buf[writeIndex], (size)); \ + if (nbCharsRead == -1) \ + fatalerror("Error while reading \"%s\": %s\n", lexerState->path, errno); \ + writeIndex += nbCharsRead; \ + if (writeIndex == LEXER_BUF_SIZE) \ + writeIndex = 0; \ + lexerState->nbChars += nbCharsRead; /* Count all those chars in */ \ + target -= nbCharsRead; \ +} while (0) - if ((mem[0] == '\\') && (mem[1] == '\"' || mem[1] == '\\')) { - mem += 2; - } else if (instring) { - mem++; - } else { - /* Comments that start with ; anywhere in a line */ - if (*mem == ';') { - while (!((*mem == '\n') || (*mem == '\0'))) - *mem++ = ' '; - /* Comments that start with * at the start of a line */ - } else if ((mem[0] == '\n') && (mem[1] == '*')) { - warning(WARNING_OBSOLETE, - "'*' is deprecated for comments, please use ';' instead\n"); - mem++; - while (!((*mem == '\n') || (*mem == '\0'))) - *mem++ = ' '; - } else { - mem++; - } + /* If the range to fill passes over the buffer wrapping point, we need two reads */ + if (writeIndex + target > LEXER_BUF_SIZE) { + readChars(LEXER_BUF_SIZE - writeIndex); + /* If the read was incomplete, don't perform a second read */ + if (nbCharsRead < LEXER_BUF_SIZE - writeIndex) + target = 0; } - } + if (target != 0) + readChars(target); - yy_buffer_append_newlines(pBuffer, capacity); - pBuffer->oAtLineStart = 1; - return pBuffer; -} - -uint32_t lex_FloatAlloc(const struct sLexFloat *token) -{ - tLexFloat[nFloating] = *token; - - return (1 << (nFloating++)); -} +#undef readChars -/* - * Make sure that only non-zero ASCII characters are used. Also, check if the - * start is greater than the end of the range. - */ -bool lex_CheckCharacterRange(uint16_t start, uint16_t end) -{ - if (start > end || start < 1 || end > 127) { - error("Invalid character range (start: %" PRIu16 ", end: %" PRIu16 ")\n", - start, end); - return false; + /* If there aren't enough chars even after refilling, give up */ + if (lexerState->nbChars <= distance) + return EOF; } - return true; + return lexerState->buf[(lexerState->index + distance) % LEXER_BUF_SIZE]; } -void lex_FloatDeleteRange(uint32_t id, uint16_t start, uint16_t end) +static void shiftChars(uint8_t distance) { - if (lex_CheckCharacterRange(start, end)) { - while (start <= end) { - tFloatingChars[start] &= ~id; - start++; + if (lexerState->capturing) { + if (lexerState->captureBuf) { + if (lexerState->captureSize + distance >= lexerState->captureCapacity) + reallocCaptureBuf(); + /* TODO: improve this? */ + for (uint8_t i = 0; i < distance; i++) + lexerState->captureBuf[lexerState->captureSize++] = peek(i); + } else { + lexerState->captureSize += distance; } } -} -void lex_FloatAddRange(uint32_t id, uint16_t start, uint16_t end) -{ - if (lex_CheckCharacterRange(start, end)) { - while (start <= end) { - tFloatingChars[start] |= id; - start++; - } + if (lexerState->isMmapped) { + lexerState->offset += distance; + } else { + lexerState->nbChars -= distance; + lexerState->index += distance; + /* Wrap around if necessary */ + if (lexerState->index >= LEXER_BUF_SIZE) + lexerState->index %= LEXER_BUF_SIZE; } } -void lex_FloatDeleteFirstRange(uint32_t id, uint16_t start, uint16_t end) +static int nextChar(void) { - if (lex_CheckCharacterRange(start, end)) { - while (start <= end) { - tFloatingFirstChar[start] &= ~id; - start++; - } - } -} + int c = peek(0); -void lex_FloatAddFirstRange(uint32_t id, uint16_t start, uint16_t end) -{ - if (lex_CheckCharacterRange(start, end)) { - while (start <= end) { - tFloatingFirstChar[start] |= id; - start++; - } - } + /* If not at EOF, advance read position */ + if (c != EOF) + shiftChars(1); + return c; } -void lex_FloatDeleteSecondRange(uint32_t id, uint16_t start, uint16_t end) -{ - if (lex_CheckCharacterRange(start, end)) { - while (start <= end) { - tFloatingSecondChar[start] &= ~id; - start++; - } - } -} +/* "Services" provided by the lexer to the rest of the program */ -void lex_FloatAddSecondRange(uint32_t id, uint16_t start, uint16_t end) +char const *lexer_GetFileName(void) { - if (lex_CheckCharacterRange(start, end)) { - while (start <= end) { - tFloatingSecondChar[start] |= id; - start++; - } - } -} - -static struct sLexFloat *lexgetfloat(uint32_t nFloatMask) -{ - if (nFloatMask == 0) - fatalerror("Internal error in %s\n", __func__); - - int32_t i = 0; - - while ((nFloatMask & 1) == 0) { - nFloatMask >>= 1; - i++; - } - - return &tLexFloat[i]; + return lexerState->path; } -static uint32_t lexcalchash(char *s) +unsigned int lexer_GetLineNo(void) { - uint32_t hash = 0; - - while (*s) - hash = (hash * 283) ^ toupper(*s++); - - return hash % LEXHASHSIZE; -} - -void lex_Init(void) -{ - uint32_t i; - - for (i = 0; i < LEXHASHSIZE; i++) - tLexHash[i] = NULL; - - for (i = 0; i < 256; i++) { - tFloatingFirstChar[i] = 0; - tFloatingSecondChar[i] = 0; - tFloatingChars[i] = 0; - } - - nLexMaxLength = 0; - nFloating = 0; - - pCurrentStringExpansion = NULL; - nNbStringExpansions = 0; + return lexerState->lineNo; } -void lex_AddStrings(const struct sLexInitString *lex) +void lexer_DumpStringExpansions(void) { - while (lex->tzName) { - struct sLexString **ppHash; - uint32_t hash = lexcalchash(lex->tzName); - - ppHash = &tLexHash[hash]; - while (*ppHash) - ppHash = &((*ppHash)->next); - - *ppHash = malloc(sizeof(struct sLexString)); - if (*ppHash == NULL) - fatalerror("Out of memory!\n"); - - (*ppHash)->tzName = (char *)strdup(lex->tzName); - if ((*ppHash)->tzName == NULL) - fatalerror("Out of memory!\n"); - - (*ppHash)->nNameLength = strlen(lex->tzName); - (*ppHash)->nToken = lex->nToken; - (*ppHash)->next = NULL; - - upperstring((*ppHash)->tzName); - - if ((*ppHash)->nNameLength > nLexMaxLength) - nLexMaxLength = (*ppHash)->nNameLength; - - lex++; - } + /* TODO */ } -/* - * Gets the "float" mask and "float" length. - * "Float" refers to the token type of a token that is not a keyword. - * The character classes floatingFirstChar, floatingSecondChar, and - * floatingChars are defined separately for each token type. - * It uses bit masks to match against a set of simple regular expressions - * of the form /[floatingFirstChar]([floatingSecondChar][floatingChars]*)?/. - * The token types with the longest match from the current position in the - * buffer will have their bits set in the float mask. - */ -void yylex_GetFloatMaskAndFloatLen(uint32_t *pnFloatMask, uint32_t *pnFloatLen) +static int yylex_NORMAL(void) { - /* - * Note that '\0' should always have a bit mask of 0 in the "floating" - * tables, so it doesn't need to be checked for separately. - */ - - char *s = pLexBuffer; - uint32_t nOldFloatMask = 0; - uint32_t nFloatMask = tFloatingFirstChar[(uint8_t)*s]; - - if (nFloatMask != 0) { - s++; - nOldFloatMask = nFloatMask; - nFloatMask &= tFloatingSecondChar[(uint8_t)*s]; - - while (nFloatMask != 0) { - s++; - nOldFloatMask = nFloatMask; - nFloatMask &= tFloatingChars[(uint8_t)*s]; - } - } + for (;;) { + int c = nextChar(); - *pnFloatMask = nOldFloatMask; - *pnFloatLen = (uint32_t)(s - pLexBuffer); -} - -/* - * Gets the longest keyword/operator from the current position in the buffer. - */ -struct sLexString *yylex_GetLongestFixed(void) -{ - struct sLexString *pLongestFixed = NULL; - char *s = pLexBuffer; - uint32_t hash = 0; - uint32_t length = 0; - - while (length < nLexMaxLength && *s) { - hash = (hash * 283) ^ toupper(*s); - s++; - length++; - - struct sLexString *lex = tLexHash[hash % LEXHASHSIZE]; - - while (lex) { - if (lex->nNameLength == length - && strncasecmp(pLexBuffer, lex->tzName, length) == 0) { - pLongestFixed = lex; - break; + switch (c) { + case '\n': + if (lexerStateEOL) { + lexer_SetState(lexerStateEOL); + lexerStateEOL = NULL; } - lex = lex->next; - } - } + return '\n'; - return pLongestFixed; -} - -size_t CopyMacroArg(char *dest, size_t maxLength, char c) -{ - size_t i; - char const *s; + /* Ignore whitespace */ + case ' ': + case '\t': + break; - if (c == '@') - s = macro_GetUniqueIDStr(); - else if (c >= '1' && c <= '9') - s = macro_GetArg(c - '0'); - else - return 0; - - if (s == NULL) - fatalerror("Macro argument '\\%c' not defined\n", c); - - // TODO: `strncpy`, nay? - for (i = 0; s[i] != 0; i++) { - if (i >= maxLength) - fatalerror("Macro argument too long to fit buffer\n"); + case EOF: + /* Captures end at their buffer's boundary no matter what */ + if (!lexerState->capturing) { + /* TODO: use `yywrap()` */ + } + return 0; - dest[i] = s[i]; + default: + error("Unknown character '%c'\n"); + } } - - return i; } -static inline void yylex_StringWriteChar(char *s, size_t index, char c) +static int yylex_RAW(void) { - if (index >= MAXSTRLEN) - fatalerror("String too long\n"); - - s[index] = c; + fatalerror("LEXER_RAW not yet implemented\n"); } -static inline void yylex_SymbolWriteChar(char *s, size_t index, char c) +int yylex(void) { - if (index >= MAXSYMLEN) - fatalerror("Symbol too long\n"); + if (lexerState->atLineStart) + lexerState->lineNo++; - s[index] = c; -} + static int (* const lexerModeFuncs[])(void) = { + [LEXER_NORMAL] = yylex_NORMAL, + [LEXER_RAW] = yylex_RAW, + }; + int token = lexerModeFuncs[lexerState->mode](); -/* - * Trims white space at the end of a string. - * The index parameter is the index of the 0 at the end of the string. - */ -void yylex_TrimEnd(char *s, size_t index) -{ - int32_t i = (int32_t)index - 1; + if (token == '\n') + lexerState->atLineStart = true; + else if (lexerState->atLineStart) + lexerState->atLineStart = false; - while ((i >= 0) && (s[i] == ' ' || s[i] == '\t')) { - s[i] = 0; - i--; - } + return token; } -size_t yylex_ReadBracketedSymbol(char *dest, size_t index) +void lexer_SkipToBlockEnd(int blockStartToken, int blockEndToken, int endToken, + char **capture, size_t *size, char const *name) { - char sym[MAXSYMLEN + 1]; - char ch; - size_t i = 0; - size_t length, maxLength; - const char *mode = NULL; + lexerState->capturing = true; + lexerState->captureSize = 0; + unsigned int level = 0; + char *captureStart; - for (ch = *pLexBuffer; - ch != '}' && ch != '"' && ch != '\n'; - ch = *(++pLexBuffer)) { - if (ch == '\\') { - ch = *(++pLexBuffer); - maxLength = MAXSYMLEN - i; - length = CopyMacroArg(&sym[i], maxLength, ch); - - if (length != 0) - i += length; - else - fatalerror("Illegal character escape '%c'\n", ch); - } else if (ch == '{') { - /* Handle nested symbols */ - ++pLexBuffer; - i += yylex_ReadBracketedSymbol(sym, i); - --pLexBuffer; - } else if (ch == ':' && !mode) { /* Only grab 1st colon */ - /* Use a whitelist of modes, which does prevent the - * use of some features such as precision, - * but also avoids a security flaw - */ - const char *acceptedModes = "bxXd"; - /* Binary isn't natively supported, - * so it's handled differently - */ - static const char * const formatSpecifiers[] = { - "", "%" PRIx32, "%" PRIX32, "%" PRId32 - }; - /* Prevent reading out of bounds! */ - const char *designatedMode; - - if (i != 1) - fatalerror("Print types are exactly 1 character long\n"); - - designatedMode = strchr(acceptedModes, sym[i - 1]); - if (!designatedMode) - fatalerror("Illegal print type '%c'\n", sym[i - 1]); - mode = formatSpecifiers[designatedMode - acceptedModes]; - /* Begin writing the symbol again */ - i = 0; + if (capture) { + if (lexerState->isMmapped) { + captureStart = lexerState->ptr; } else { - yylex_SymbolWriteChar(sym, i++, ch); + lexerState->captureCapacity = 128; /* The initial size will be twice that */ + reallocCaptureBuf(); + captureStart = lexerState->captureBuf; } } - /* Properly terminate the string */ - yylex_SymbolWriteChar(sym, i, 0); - - /* It's assumed we're writing to a T_STRING */ - maxLength = MAXSTRLEN - index; - length = symvaluetostring(&dest[index], maxLength, sym, mode); - - if (*pLexBuffer == '}') - pLexBuffer++; - else - fatalerror("Missing }\n"); + for (;;) { + int token = yylex(); - return length; -} - -static void yylex_ReadQuotedString(void) -{ - size_t index = 0; - size_t length, maxLength; - - while (*pLexBuffer != '"' && *pLexBuffer != '\n') { - char ch = *pLexBuffer++; - - if (ch == '\\') { - ch = *pLexBuffer++; - - switch (ch) { - case 'n': - ch = '\n'; - break; - case 'r': - ch = '\r'; - break; - case 't': - ch = '\t'; - break; - case '\\': - ch = '\\'; + if (level == 0) { + if (token == endToken) break; - case '"': - ch = '"'; - break; - case ',': - ch = ','; - break; - case '{': - ch = '{'; - break; - case '}': - ch = '}'; - break; - default: - maxLength = MAXSTRLEN - index; - length = CopyMacroArg(&yylval.tzString[index], - maxLength, ch); - - if (length != 0) - index += length; - else - fatalerror("Illegal character escape '%c'\n", ch); - - ch = 0; - break; - } - } else if (ch == '{') { - // Get bracketed symbol within string. - index += yylex_ReadBracketedSymbol(yylval.tzString, - index); - ch = 0; - } - - if (ch) - yylex_StringWriteChar(yylval.tzString, index++, ch); - } - - yylex_StringWriteChar(yylval.tzString, index, 0); - - if (*pLexBuffer == '"') - pLexBuffer++; - else - fatalerror("Unterminated string\n"); -} - -static uint32_t yylex_NORMAL(void) -{ - struct sLexString *pLongestFixed = NULL; - uint32_t nFloatMask, nFloatLen; - uint32_t linestart = AtLineStart; - - AtLineStart = 0; - -scanagain: - while (*pLexBuffer == ' ' || *pLexBuffer == '\t') { - linestart = 0; - pLexBuffer++; - } - - if (*pLexBuffer == 0) { - // Reached the end of a file, macro, or rept. - if (yywrap() == 0) { - linestart = AtLineStart; - AtLineStart = 0; - goto scanagain; - } - } - - /* Check for line continuation character */ - if (*pLexBuffer == '\\') { - /* - * Look for line continuation character after a series of - * spaces. This is also useful for files that use Windows line - * endings: "\r\n" is replaced by " \n" before the lexer has the - * opportunity to see it. - */ - if (pLexBuffer[1] == ' ' || pLexBuffer[1] == '\t') { - pLexBuffer += 2; - while (1) { - if (*pLexBuffer == ' ' || *pLexBuffer == '\t') { - pLexBuffer++; - } else if (*pLexBuffer == '\n') { - pLexBuffer++; - nLineNo++; - goto scanagain; - } else { - error("Expected a new line after the continuation character.\n"); - pLexBuffer++; - } - } - } - - /* Line continuation character */ - if (pLexBuffer[1] == '\n') { - pLexBuffer += 2; - nLineNo++; - goto scanagain; - } - - /* - * If there isn't a newline character or a space, ignore the - * character '\'. It will eventually be handled by other - * functions like PutMacroArg(). - */ - } - - /* - * Try to match an identifier, macro argument (e.g. \1), - * or numeric literal. - */ - yylex_GetFloatMaskAndFloatLen(&nFloatMask, &nFloatLen); - - /* Try to match a keyword or operator. */ - pLongestFixed = yylex_GetLongestFixed(); - - if (nFloatLen == 0 && pLongestFixed == NULL) { - /* - * No keyword, identifier, operator, or numerical literal - * matches. - */ - - if (*pLexBuffer == '"') { - pLexBuffer++; - yylex_ReadQuotedString(); - return T_STRING; - } else if (*pLexBuffer == '{') { - pLexBuffer++; - size_t len = yylex_ReadBracketedSymbol(yylval.tzString, - 0); - yylval.tzString[len] = 0; - return T_STRING; - } - - /* - * It's not a keyword, operator, identifier, macro argument, - * numeric literal, string, or bracketed symbol, so just return - * the ASCII character. - */ - unsigned char ch = *pLexBuffer++; - - if (ch == '\n') - AtLineStart = 1; - - /* - * Check for invalid unprintable characters. - * They may not be readily apparent in a text editor, - * so this is useful for identifying encoding problems. - */ - if (ch != 0 - && ch != '\n' - && !(ch >= 0x20 && ch <= 0x7E)) - fatalerror("Found garbage character: 0x%02X\n", ch); - - return ch; - } - - if (pLongestFixed == NULL || nFloatLen > pLongestFixed->nNameLength) { - /* - * Longest match was an identifier, macro argument, or numeric - * literal. - */ - struct sLexFloat *token = lexgetfloat(nFloatMask); - - if (token->Callback) { - int32_t done = token->Callback(pLexBuffer, nFloatLen); - - if (!done) - goto scanagain; - } - - uint32_t type = token->nToken; - - if (type == T_ID && strchr(yylval.tzSym, '.')) - type = T_LOCAL_ID; - - if (linestart && type == T_ID) - return T_LABEL; - return type; - } - - /* Longest match was a keyword or operator. */ - pLexBuffer += pLongestFixed->nNameLength; - yylval.nConstValue = pLongestFixed->nToken; - return pLongestFixed->nToken; -} - -static uint32_t yylex_MACROARGS(void) -{ - size_t index = 0; - size_t length, maxLength; - - while ((*pLexBuffer == ' ') || (*pLexBuffer == '\t')) - pLexBuffer++; - - while ((*pLexBuffer != ',') && (*pLexBuffer != '\n')) { - char ch = *pLexBuffer++; - - if (ch == '\\') { - ch = *pLexBuffer++; - - switch (ch) { - case 'n': - ch = '\n'; - break; - case 't': - ch = '\t'; - break; - case '\\': - ch = '\\'; - break; - case '"': - ch = '\"'; - break; - case ',': - ch = ','; - break; - case '{': - ch = '{'; - break; - case '}': - ch = '}'; - break; - case ' ': - case '\t': - /* - * Look for line continuation character after a - * series of spaces. This is also useful for - * files that use Windows line endings: "\r\n" - * is replaced by " \n" before the lexer has the - * opportunity to see it. - */ - while (1) { - if (*pLexBuffer == ' ' - || *pLexBuffer == '\t') { - pLexBuffer++; - } else if (*pLexBuffer == '\n') { - pLexBuffer++; - nLineNo++; - ch = 0; - break; - } else { - error("Expected a new line after the continuation character.\n"); - } - } - break; - case '\n': - /* Line continuation character */ - nLineNo++; - ch = 0; - break; - default: - maxLength = MAXSTRLEN - index; - length = CopyMacroArg(&yylval.tzString[index], - maxLength, ch); - - if (length != 0) - index += length; - else - fatalerror("Illegal character escape '%c'\n", ch); - - ch = 0; + /* + * Hack: skipping after a `if` requires stopping on three different tokens, + * which there is no simple way to make this function support. Instead, + * if ELIF is the end token, ELSE and ENDC are also checked for here. + */ + if (endToken == T_POP_ELIF && (token == T_POP_ELSE || token == T_POP_ENDC)) break; - } - } else if (ch == '{') { - index += yylex_ReadBracketedSymbol(yylval.tzString, - index); - ch = 0; } - if (ch) - yylex_StringWriteChar(yylval.tzString, index++, ch); - } - - if (index) { - yylex_StringWriteChar(yylval.tzString, index, 0); - /* trim trailing white space at the end of the line */ - if (*pLexBuffer == '\n') - yylex_TrimEnd(yylval.tzString, index); - - return T_STRING; - } else if (*pLexBuffer == '\n') { - pLexBuffer++; - AtLineStart = 1; - return '\n'; - } else if (*pLexBuffer == ',') { - pLexBuffer++; - return ','; + if (token == EOF) + error("Unterminated %s\n", name); + else if (token == blockStartToken) + level++; + else if (token == blockEndToken) + level--; } - fatalerror("Internal error in %s\n", __func__); -} - -int yylex(void) -{ - int returnedChar; - - switch (lexerstate) { - case LEX_STATE_NORMAL: - returnedChar = yylex_NORMAL(); - break; - case LEX_STATE_MACROARGS: - returnedChar = yylex_MACROARGS(); - break; - default: - fatalerror("%s: Internal error.\n", __func__); + if (capture) { + *capture = captureStart; + *size = lexerState->captureSize; } - - /* Check if string expansions were fully read */ - while (pCurrentStringExpansion - && pCurrentStringExpansion->pBuffer == pLexBufferRealStart - && pCurrentStringExpansion->pBufferPos <= pLexBuffer) { - struct sStringExpansionPos *pParent = - pCurrentStringExpansion->pParent; - free(pCurrentStringExpansion->tzName); - free(pCurrentStringExpansion); - - pCurrentStringExpansion = pParent; - nNbStringExpansions--; - } - - return returnedChar; + lexerState->captureBuf = NULL; } diff --git a/src/asm/macro.c b/src/asm/macro.c index 852a4bbda..713649430 100644 --- a/src/asm/macro.c +++ b/src/asm/macro.c @@ -61,7 +61,7 @@ void macro_AppendArg(struct MacroArgs **argPtr, char *s) #define macArgs (*argPtr) if (macArgs->nbArgs == MAXMACROARGS) error("A maximum of " EXPAND_AND_STR(MAXMACROARGS) - " arguments is allowed\n"); + " arguments is allowed\n"); if (macArgs->nbArgs >= macArgs->capacity) { macArgs->capacity *= 2; /* Check that overflow didn't roll us back */ diff --git a/src/asm/main.c b/src/asm/main.c index 2e94fe4c1..2f44f453b 100644 --- a/src/asm/main.c +++ b/src/asm/main.c @@ -6,6 +6,7 @@ * SPDX-License-Identifier: MIT */ +#include #include #include #include @@ -41,10 +42,6 @@ char **cldefines; clock_t nStartClock, nEndClock; uint32_t nTotalLines, nIFDepth; -bool skipElif; -uint32_t unionStart[128], unionSize[128]; - -int32_t nLineNo; #if defined(YYDEBUG) && YYDEBUG extern int yydebug; @@ -76,64 +73,8 @@ struct sOptionStackEntry *pOptionStack; void opt_SetCurrentOptions(struct sOptions *pOpt) { - if (nGBGfxID != -1) { - lex_FloatDeleteRange(nGBGfxID, CurrentOptions.gbgfx[0], - CurrentOptions.gbgfx[0]); - lex_FloatDeleteRange(nGBGfxID, CurrentOptions.gbgfx[1], - CurrentOptions.gbgfx[1]); - lex_FloatDeleteRange(nGBGfxID, CurrentOptions.gbgfx[2], - CurrentOptions.gbgfx[2]); - lex_FloatDeleteRange(nGBGfxID, CurrentOptions.gbgfx[3], - CurrentOptions.gbgfx[3]); - lex_FloatDeleteSecondRange(nGBGfxID, CurrentOptions.gbgfx[0], - CurrentOptions.gbgfx[0]); - lex_FloatDeleteSecondRange(nGBGfxID, CurrentOptions.gbgfx[1], - CurrentOptions.gbgfx[1]); - lex_FloatDeleteSecondRange(nGBGfxID, CurrentOptions.gbgfx[2], - CurrentOptions.gbgfx[2]); - lex_FloatDeleteSecondRange(nGBGfxID, CurrentOptions.gbgfx[3], - CurrentOptions.gbgfx[3]); - } - if (nBinaryID != -1) { - lex_FloatDeleteRange(nBinaryID, CurrentOptions.binary[0], - CurrentOptions.binary[0]); - lex_FloatDeleteRange(nBinaryID, CurrentOptions.binary[1], - CurrentOptions.binary[1]); - lex_FloatDeleteSecondRange(nBinaryID, CurrentOptions.binary[0], - CurrentOptions.binary[0]); - lex_FloatDeleteSecondRange(nBinaryID, CurrentOptions.binary[1], - CurrentOptions.binary[1]); - } - CurrentOptions = *pOpt; - - if (nGBGfxID != -1) { - lex_FloatAddRange(nGBGfxID, CurrentOptions.gbgfx[0], - CurrentOptions.gbgfx[0]); - lex_FloatAddRange(nGBGfxID, CurrentOptions.gbgfx[1], - CurrentOptions.gbgfx[1]); - lex_FloatAddRange(nGBGfxID, CurrentOptions.gbgfx[2], - CurrentOptions.gbgfx[2]); - lex_FloatAddRange(nGBGfxID, CurrentOptions.gbgfx[3], - CurrentOptions.gbgfx[3]); - lex_FloatAddSecondRange(nGBGfxID, CurrentOptions.gbgfx[0], - CurrentOptions.gbgfx[0]); - lex_FloatAddSecondRange(nGBGfxID, CurrentOptions.gbgfx[1], - CurrentOptions.gbgfx[1]); - lex_FloatAddSecondRange(nGBGfxID, CurrentOptions.gbgfx[2], - CurrentOptions.gbgfx[2]); - lex_FloatAddSecondRange(nGBGfxID, CurrentOptions.gbgfx[3], - CurrentOptions.gbgfx[3]); - } - if (nBinaryID != -1) { - lex_FloatAddRange(nBinaryID, CurrentOptions.binary[0], - CurrentOptions.binary[0]); - lex_FloatAddRange(nBinaryID, CurrentOptions.binary[1], - CurrentOptions.binary[1]); - lex_FloatAddSecondRange(nBinaryID, CurrentOptions.binary[0], - CurrentOptions.binary[0]); - lex_FloatAddSecondRange(nBinaryID, CurrentOptions.binary[1], - CurrentOptions.binary[1]); - } + /* TODO */ + (void)pOpt; } void opt_Parse(char *s) @@ -251,6 +192,22 @@ static void opt_ParseDefines(void) sym_AddString(cldefines[i], cldefines[i + 1]); } +void upperstring(char *s) +{ + while (*s) { + *s = toupper(*s); + s++; + } +} + +void lowerstring(char *s) +{ + while (*s) { + *s = tolower(*s); + s++; + } +} + /* Escapes Make-special chars from a string */ static char *make_escape(const char *str) { @@ -516,8 +473,6 @@ int main(int argc, char *argv[]) tzMainfile = argv[argc - 1]; - setup_lexer(); - if (verbose) printf("Assembling %s\n", tzMainfile); @@ -530,17 +485,20 @@ int main(int argc, char *argv[]) nStartClock = clock(); - nLineNo = 1; nTotalLines = 0; nIFDepth = 0; - skipElif = true; sym_Init(); sym_SetExportAll(exportall); fstk_Init(tzMainfile); + struct LexerState *state = lexer_OpenFile(tzMainfile); + + if (!state) + fatalerror("Failed to open main file!"); + lexer_SetState(state); + opt_ParseDefines(); charmap_New("main", NULL); - yy_set_state(LEX_STATE_NORMAL); opt_SetCurrentOptions(&DefaultOptions); if (yyparse() != 0 || nbErrors != 0) diff --git a/src/asm/section.c b/src/asm/section.c index 3658dd672..ee5d5144b 100644 --- a/src/asm/section.c +++ b/src/asm/section.c @@ -656,9 +656,15 @@ void out_BinaryFile(char const *s, int32_t startPos) startPos = 0; } - FILE *f = fstk_FindFile(s, NULL); + char *fullPath = NULL; + size_t size = 0; + FILE *f = NULL; + + if (fstk_FindFile(s, &fullPath, &size)) + f = fopen(fullPath, "rb"); if (!f) { + free(fullPath); if (oGeneratedMissingIncludes) { oFailedOnMissingInclude = true; return; @@ -699,6 +705,7 @@ void out_BinaryFile(char const *s, int32_t startPos) error("Error reading INCBIN file '%s': %s\n", s, strerror(errno)); fclose(f); + free(fullPath); } void out_BinaryFileSlice(char const *s, int32_t start_pos, int32_t length) @@ -715,9 +722,15 @@ void out_BinaryFileSlice(char const *s, int32_t start_pos, int32_t length) if (length == 0) /* Don't even bother with 0-byte slices */ return; - FILE *f = fstk_FindFile(s, NULL); + char *fullPath = NULL; + size_t size = 0; + FILE *f = NULL; + + if (fstk_FindFile(s, &fullPath, &size)) + f = fopen(fullPath, "rb"); if (!f) { + free(fullPath); if (oGeneratedMissingIncludes) { oFailedOnMissingInclude = true; return; @@ -767,6 +780,7 @@ void out_BinaryFileSlice(char const *s, int32_t start_pos, int32_t length) } fclose(f); + free(fullPath); } /* diff --git a/src/asm/symbol.c b/src/asm/symbol.c index d7d219e6a..cb0f29179 100644 --- a/src/asm/symbol.c +++ b/src/asm/symbol.c @@ -82,7 +82,7 @@ static int32_t Callback_NARG(void) static int32_t Callback__LINE__(void) { - return nLineNo; + return lexer_GetLineNo(); } static int32_t CallbackPC(void) @@ -113,8 +113,9 @@ int32_t sym_GetValue(struct Symbol const *sym) static void updateSymbolFilename(struct Symbol *sym) { if (snprintf(sym->fileName, _MAX_PATH + 1, "%s", - tzCurrentFileName) > _MAX_PATH) - fatalerror("%s: File name is too long: '%s'\n", __func__, tzCurrentFileName); + lexer_GetFileName()) > _MAX_PATH) + fatalerror("%s: File name is too long: '%s'\n", __func__, + lexer_GetFileName()); sym->fileLine = fstk_GetLine(); } diff --git a/src/asm/warning.c b/src/asm/warning.c index f1fe00a1a..2521baa96 100644 --- a/src/asm/warning.c +++ b/src/asm/warning.c @@ -204,7 +204,7 @@ void verror(const char *fmt, va_list args, char const *flag) fstk_Dump(); fprintf(stderr, flag ? ": [-Werror=%s]\n " : ":\n ", flag); vfprintf(stderr, fmt, args); - fstk_DumpStringExpansions(); + lexer_DumpStringExpansions(); nbErrors++; } @@ -256,7 +256,7 @@ void warning(enum WarningID id, char const *fmt, ...) fstk_Dump(); fprintf(stderr, ": [-W%s]\n ", flag); vfprintf(stderr, fmt, args); - fstk_DumpStringExpansions(); + lexer_DumpStringExpansions(); va_end(args); } From 71f88717024cd5434ff7ee541a112ece86082121 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Thu, 23 Jul 2020 13:49:45 +0200 Subject: [PATCH 02/59] Implement more functionality Macro arg detection, first emitted tokens, primitive (bad) column counting --- include/asm/lexer.h | 5 +- src/asm/asmy.y | 4 +- src/asm/fstack.c | 3 +- src/asm/globlex.c | 698 -------------------------------------------- src/asm/lexer.c | 117 +++++++- src/asm/main.c | 12 +- 6 files changed, 120 insertions(+), 719 deletions(-) delete mode 100644 src/asm/globlex.c diff --git a/include/asm/lexer.h b/include/asm/lexer.h index e4fcd8445..32df75d3f 100644 --- a/include/asm/lexer.h +++ b/include/asm/lexer.h @@ -43,10 +43,11 @@ void lexer_SetMode(enum LexerMode mode); void lexer_ToggleStringExpansion(bool enable); char const *lexer_GetFileName(void); -unsigned int lexer_GetLineNo(void); +uint32_t lexer_GetLineNo(void); +uint32_t lexer_GetColNo(void); void lexer_DumpStringExpansions(void); int yylex(void); void lexer_SkipToBlockEnd(int blockStartToken, int blockEndToken, int endToken, - char **capture, size_t *size, char const *name); + char const **capture, size_t *size, char const *name); #endif /* RGBDS_ASM_LEXER_H */ diff --git a/src/asm/asmy.y b/src/asm/asmy.y index 9f3bc873c..a8ec303a6 100644 --- a/src/asm/asmy.y +++ b/src/asm/asmy.y @@ -604,7 +604,7 @@ load : T_POP_LOAD string ',' sectiontype sectorg sectattrs { rept : T_POP_REPT uconst { uint32_t nDefinitionLineNo = lexer_GetLineNo(); - char *body; + char const *body; size_t size; lexer_SkipToBlockEnd(T_POP_REPT, T_POP_ENDR, T_POP_ENDR, &body, &size, "REPT block"); @@ -614,7 +614,7 @@ rept : T_POP_REPT uconst { macrodef : T_LABEL ':' T_POP_MACRO { int32_t nDefinitionLineNo = lexer_GetLineNo(); - char *body; + char const *body; size_t size; lexer_SkipToBlockEnd(T_POP_MACRO, T_POP_ENDM, T_POP_ENDM, &body, &size, "macro definition"); diff --git a/src/asm/fstack.c b/src/asm/fstack.c index b094f14ba..c28937807 100644 --- a/src/asm/fstack.c +++ b/src/asm/fstack.c @@ -250,7 +250,8 @@ void fstk_Dump(void) pLastFile = pLastFile->next; } - fprintf(stderr, "%s(%" PRId32 ")", lexer_GetFileName(), lexer_GetLineNo()); + fprintf(stderr, "%s(%" PRId32 ",%" PRId32 ")", + lexer_GetFileName(), lexer_GetLineNo(), lexer_GetColNo()); } void fstk_DumpToStr(char *buf, size_t buflen) diff --git a/src/asm/globlex.c b/src/asm/globlex.c deleted file mode 100644 index 89d1556fa..000000000 --- a/src/asm/globlex.c +++ /dev/null @@ -1,698 +0,0 @@ -/* - * This file is part of RGBDS. - * - * Copyright (c) 1997-2018, Carsten Sorensen and RGBDS contributors. - * - * SPDX-License-Identifier: MIT - */ - -#include -#include -#include -#include -#include -#include - -#include "asm/asm.h" -#include "asm/lexer.h" -#include "asm/macro.h" -#include "asm/main.h" -#include "asm/rpn.h" -#include "asm/section.h" -#include "asm/warning.h" - -#include "helpers.h" - -#include "asmy.h" - -bool oDontExpandStrings; -int32_t nGBGfxID = -1; -int32_t nBinaryID = -1; - -static int32_t gbgfx2bin(char ch) -{ - int32_t i; - - for (i = 0; i <= 3; i++) { - if (CurrentOptions.gbgfx[i] == ch) - return i; - } - - return 0; -} - -static int32_t binary2bin(char ch) -{ - int32_t i; - - for (i = 0; i <= 1; i++) { - if (CurrentOptions.binary[i] == ch) - return i; - } - - return 0; -} - -static int32_t char2bin(char ch) -{ - if (ch >= 'a' && ch <= 'f') - return (ch - 'a' + 10); - - if (ch >= 'A' && ch <= 'F') - return (ch - 'A' + 10); - - if (ch >= '0' && ch <= '9') - return (ch - '0'); - - return 0; -} - -typedef int32_t(*x2bin) (char ch); - -static int32_t ascii2bin(char *s) -{ - char *start = s; - uint32_t radix = 10; - uint32_t result = 0; - x2bin convertfunc = char2bin; - - switch (*s) { - case '$': - radix = 16; - s++; - convertfunc = char2bin; - break; - case '&': - radix = 8; - s++; - convertfunc = char2bin; - break; - case '`': - radix = 4; - s++; - convertfunc = gbgfx2bin; - break; - case '%': - radix = 2; - s++; - convertfunc = binary2bin; - break; - default: - /* Handle below */ - break; - } - - const uint32_t max_q = UINT32_MAX / radix; - const uint32_t max_r = UINT32_MAX % radix; - - if (*s == '\0') { - /* - * There are no digits after the radix prefix - * (or the string is empty, which shouldn't happen). - */ - error("Invalid integer constant\n"); - } else if (radix == 4) { - int32_t size = 0; - int32_t c; - - while (*s != '\0') { - c = convertfunc(*s++); - result = result * 2 + ((c & 2) << 7) + (c & 1); - size++; - } - - /* - * Extending a graphics constant longer than 8 pixels, - * the Game Boy tile width, produces a nonsensical result. - */ - if (size > 8) { - warning(WARNING_LARGE_CONSTANT, "Graphics constant '%s' is too long\n", - start); - } - } else { - bool overflow = false; - - while (*s != '\0') { - int32_t digit = convertfunc(*s++); - - if (result > max_q - || (result == max_q && digit > max_r)) { - overflow = true; - } - result = result * radix + digit; - } - - if (overflow) - warning(WARNING_LARGE_CONSTANT, "Integer constant '%s' is too large\n", - start); - } - - return result; -} - -uint32_t ParseFixedPoint(char *s, uint32_t size) -{ - uint32_t i; - uint32_t dot = 0; - - for (i = 0; i < size; i++) { - if (s[i] == '.') { - dot++; - - if (dot == 2) - break; - } - } - - yyskipbytes(i); - - yylval.nConstValue = (int32_t)(atof(s) * 65536); - - return 1; -} - -uint32_t ParseNumber(char *s, uint32_t size) -{ - char dest[256]; - - if (size > 255) - fatalerror("Number token too long\n"); - - strncpy(dest, s, size); - dest[size] = 0; - yylval.nConstValue = ascii2bin(dest); - - yyskipbytes(size); - - return 1; -} - -/* - * If the symbol name ends before the end of the macro arg, - * return a pointer to the rest of the macro arg. - * Otherwise, return NULL. - */ -char const *AppendMacroArg(char whichArg, char *dest, size_t *destIndex) -{ - char const *marg; - - if (whichArg == '@') - marg = macro_GetUniqueIDStr(); - else if (whichArg >= '1' && whichArg <= '9') - marg = macro_GetArg(whichArg - '0'); - else - fatalerror("Invalid macro argument '\\%c' in symbol\n", whichArg); - - if (!marg) - fatalerror("Macro argument '\\%c' not defined\n", whichArg); - - char ch; - - while ((ch = *marg) != 0) { - if ((ch >= 'a' && ch <= 'z') - || (ch >= 'A' && ch <= 'Z') - || (ch >= '0' && ch <= '9') - || ch == '_' - || ch == '@' - || ch == '#' - || ch == '.') { - if (*destIndex >= MAXSYMLEN) - fatalerror("Symbol too long\n"); - - dest[*destIndex] = ch; - (*destIndex)++; - } else { - return marg; - } - - marg++; - } - - return NULL; -} - -uint32_t ParseSymbol(char *src, uint32_t size) -{ - char dest[MAXSYMLEN + 1]; - size_t srcIndex = 0; - size_t destIndex = 0; - char const *rest = NULL; - - while (srcIndex < size) { - char ch = src[srcIndex++]; - - if (ch == '\\') { - /* - * We don't check if srcIndex is still less than size, - * but that can only fail to be true when the - * following char is neither '@' nor a digit. - * In that case, AppendMacroArg() will catch the error. - */ - ch = src[srcIndex++]; - - rest = AppendMacroArg(ch, dest, &destIndex); - /* If the symbol's end was in the middle of the token */ - if (rest) - break; - } else { - if (destIndex >= MAXSYMLEN) - fatalerror("Symbol too long\n"); - dest[destIndex++] = ch; - } - } - - dest[destIndex] = 0; - - /* Tell the lexer we read all bytes that we did */ - yyskipbytes(srcIndex); - - /* - * If an escape's expansion left some chars after the symbol's end, - * such as the `::` in a `Backup\1` expanded to `BackupCamX::`, - * put those into the buffer. - * Note that this NEEDS to be done after the `yyskipbytes` above. - */ - if (rest) - yyunputstr(rest); - - /* If the symbol is an EQUS, expand it */ - if (!oDontExpandStrings) { - struct Symbol const *sym = sym_FindSymbol(dest); - - if (sym && sym->type == SYM_EQUS) { - char const *s; - - lex_BeginStringExpansion(dest); - - /* Feed the symbol's contents into the buffer */ - yyunputstr(s = sym_GetStringValue(sym)); - - /* Lines inserted this way shall not increase lexer_GetLineNo() */ - while (*s) { - if (*s++ == '\n') - lexer_GetLineNo()--; - } - return 0; - } - } - - strcpy(yylval.tzSym, dest); - return 1; -} - -uint32_t PutMacroArg(char *src, uint32_t size) -{ - char const *s; - - yyskipbytes(size); - if ((size == 2 && src[1] >= '1' && src[1] <= '9')) { - s = macro_GetArg(src[1] - '0'); - - if (s != NULL) - yyunputstr(s); - else - error("Macro argument '\\%c' not defined\n", src[1]); - } else { - error("Invalid macro argument '\\%c'\n", src[1]); - } - return 0; -} - -uint32_t PutUniqueID(char *src, uint32_t size) -{ - (void)src; - char const *s; - - yyskipbytes(size); - - s = macro_GetUniqueIDStr(); - - if (s != NULL) - yyunputstr(s); - else - error("Macro unique label string not defined\n"); - - return 0; -} - -enum { - T_LEX_MACROARG = 3000, - T_LEX_MACROUNIQUE -}; - -const struct sLexInitString lexer_strings[] = { - {"adc", T_Z80_ADC}, - {"add", T_Z80_ADD}, - {"and", T_Z80_AND}, - {"bit", T_Z80_BIT}, - {"call", T_Z80_CALL}, - {"ccf", T_Z80_CCF}, - {"cpl", T_Z80_CPL}, - {"cp", T_Z80_CP}, - {"daa", T_Z80_DAA}, - {"dec", T_Z80_DEC}, - {"di", T_Z80_DI}, - {"ei", T_Z80_EI}, - {"halt", T_Z80_HALT}, - {"inc", T_Z80_INC}, - {"jp", T_Z80_JP}, - {"jr", T_Z80_JR}, - {"ld", T_Z80_LD}, - {"ldi", T_Z80_LDI}, - {"ldd", T_Z80_LDD}, - {"ldio", T_Z80_LDIO}, - {"ldh", T_Z80_LDIO}, - {"nop", T_Z80_NOP}, - {"or", T_Z80_OR}, - {"pop", T_Z80_POP}, - {"push", T_Z80_PUSH}, - {"res", T_Z80_RES}, - {"reti", T_Z80_RETI}, - {"ret", T_Z80_RET}, - {"rlca", T_Z80_RLCA}, - {"rlc", T_Z80_RLC}, - {"rla", T_Z80_RLA}, - {"rl", T_Z80_RL}, - {"rrc", T_Z80_RRC}, - {"rrca", T_Z80_RRCA}, - {"rra", T_Z80_RRA}, - {"rr", T_Z80_RR}, - {"rst", T_Z80_RST}, - {"sbc", T_Z80_SBC}, - {"scf", T_Z80_SCF}, - {"set", T_POP_SET}, - {"sla", T_Z80_SLA}, - {"sra", T_Z80_SRA}, - {"srl", T_Z80_SRL}, - {"stop", T_Z80_STOP}, - {"sub", T_Z80_SUB}, - {"swap", T_Z80_SWAP}, - {"xor", T_Z80_XOR}, - - {"nz", T_CC_NZ}, - {"z", T_CC_Z}, - {"nc", T_CC_NC}, - /* Handled in list of registers */ - /* { "c", T_TOKEN_C }, */ - - {"hli", T_MODE_HL_INC}, - {"hld", T_MODE_HL_DEC}, - {"$ff00+c", T_MODE_HW_C}, - {"$ff00 + c", T_MODE_HW_C}, - {"af", T_MODE_AF}, - {"bc", T_MODE_BC}, - {"de", T_MODE_DE}, - {"hl", T_MODE_HL}, - {"sp", T_MODE_SP}, - - {"a", T_TOKEN_A}, - {"b", T_TOKEN_B}, - {"c", T_TOKEN_C}, - {"d", T_TOKEN_D}, - {"e", T_TOKEN_E}, - {"h", T_TOKEN_H}, - {"l", T_TOKEN_L}, - - {"||", T_OP_LOGICOR}, - {"&&", T_OP_LOGICAND}, - {"==", T_OP_LOGICEQU}, - {">", T_OP_LOGICGT}, - {"<", T_OP_LOGICLT}, - {">=", T_OP_LOGICGE}, - {"<=", T_OP_LOGICLE}, - {"!=", T_OP_LOGICNE}, - {"!", T_OP_LOGICNOT}, - {"|", T_OP_OR}, - {"^", T_OP_XOR}, - {"&", T_OP_AND}, - {"<<", T_OP_SHL}, - {">>", T_OP_SHR}, - {"+", T_OP_ADD}, - {"-", T_OP_SUB}, - {"*", T_OP_MUL}, - {"/", T_OP_DIV}, - {"%", T_OP_MOD}, - {"~", T_OP_NOT}, - - {"def", T_OP_DEF}, - - {"fragment", T_POP_FRAGMENT}, - {"bank", T_OP_BANK}, - {"align", T_OP_ALIGN}, - - {"round", T_OP_ROUND}, - {"ceil", T_OP_CEIL}, - {"floor", T_OP_FLOOR}, - {"div", T_OP_FDIV}, - {"mul", T_OP_FMUL}, - {"sin", T_OP_SIN}, - {"cos", T_OP_COS}, - {"tan", T_OP_TAN}, - {"asin", T_OP_ASIN}, - {"acos", T_OP_ACOS}, - {"atan", T_OP_ATAN}, - {"atan2", T_OP_ATAN2}, - - {"high", T_OP_HIGH}, - {"low", T_OP_LOW}, - {"isconst", T_OP_ISCONST}, - - {"strcmp", T_OP_STRCMP}, - {"strin", T_OP_STRIN}, - {"strsub", T_OP_STRSUB}, - {"strlen", T_OP_STRLEN}, - {"strcat", T_OP_STRCAT}, - {"strupr", T_OP_STRUPR}, - {"strlwr", T_OP_STRLWR}, - - {"include", T_POP_INCLUDE}, - {"printt", T_POP_PRINTT}, - {"printi", T_POP_PRINTI}, - {"printv", T_POP_PRINTV}, - {"printf", T_POP_PRINTF}, - {"export", T_POP_EXPORT}, - {"xdef", T_POP_XDEF}, - {"global", T_POP_GLOBAL}, - {"ds", T_POP_DS}, - {"db", T_POP_DB}, - {"dw", T_POP_DW}, - {"dl", T_POP_DL}, - {"section", T_POP_SECTION}, - {"purge", T_POP_PURGE}, - - {"rsreset", T_POP_RSRESET}, - {"rsset", T_POP_RSSET}, - - {"incbin", T_POP_INCBIN}, - {"charmap", T_POP_CHARMAP}, - {"newcharmap", T_POP_NEWCHARMAP}, - {"setcharmap", T_POP_SETCHARMAP}, - {"pushc", T_POP_PUSHC}, - {"popc", T_POP_POPC}, - - {"fail", T_POP_FAIL}, - {"warn", T_POP_WARN}, - {"fatal", T_POP_FATAL}, - {"assert", T_POP_ASSERT}, - {"static_assert", T_POP_STATIC_ASSERT}, - - {"macro", T_POP_MACRO}, - /* Not needed but we have it here just to protect the name */ - {"endm", T_POP_ENDM}, - {"shift", T_POP_SHIFT}, - - {"rept", T_POP_REPT}, - /* Not needed but we have it here just to protect the name */ - {"endr", T_POP_ENDR}, - - {"load", T_POP_LOAD}, - {"endl", T_POP_ENDL}, - - {"if", T_POP_IF}, - {"else", T_POP_ELSE}, - {"elif", T_POP_ELIF}, - {"endc", T_POP_ENDC}, - - {"union", T_POP_UNION}, - {"nextu", T_POP_NEXTU}, - {"endu", T_POP_ENDU}, - - {"wram0", T_SECT_WRAM0}, - {"vram", T_SECT_VRAM}, - {"romx", T_SECT_ROMX}, - {"rom0", T_SECT_ROM0}, - {"hram", T_SECT_HRAM}, - {"wramx", T_SECT_WRAMX}, - {"sram", T_SECT_SRAM}, - {"oam", T_SECT_OAM}, - - {"rb", T_POP_RB}, - {"rw", T_POP_RW}, - {"equ", T_POP_EQU}, - {"equs", T_POP_EQUS}, - - /* Handled before in list of CPU instructions */ - /* {"set", T_POP_SET}, */ - {"=", T_POP_EQUAL}, - - {"pushs", T_POP_PUSHS}, - {"pops", T_POP_POPS}, - {"pusho", T_POP_PUSHO}, - {"popo", T_POP_POPO}, - - {"opt", T_POP_OPT}, - - {NULL, 0} -}; - -const struct sLexFloat tNumberToken = { - ParseNumber, - T_NUMBER -}; - -const struct sLexFloat tFixedPointToken = { - ParseFixedPoint, - T_NUMBER -}; - -const struct sLexFloat tIDToken = { - ParseSymbol, - T_ID -}; - -const struct sLexFloat tMacroArgToken = { - PutMacroArg, - T_LEX_MACROARG -}; - -const struct sLexFloat tMacroUniqueToken = { - PutUniqueID, - T_LEX_MACROUNIQUE -}; - -void setup_lexer(void) -{ - uint32_t id; - - lex_Init(); - lex_AddStrings(lexer_strings); - - //Macro arguments - - id = lex_FloatAlloc(&tMacroArgToken); - lex_FloatAddFirstRange(id, '\\', '\\'); - lex_FloatAddSecondRange(id, '1', '9'); - id = lex_FloatAlloc(&tMacroUniqueToken); - lex_FloatAddFirstRange(id, '\\', '\\'); - lex_FloatAddSecondRange(id, '@', '@'); - - //Decimal constants - - id = lex_FloatAlloc(&tNumberToken); - lex_FloatAddFirstRange(id, '0', '9'); - lex_FloatAddSecondRange(id, '0', '9'); - lex_FloatAddRange(id, '0', '9'); - - //Binary constants - - id = lex_FloatAlloc(&tNumberToken); - nBinaryID = id; - lex_FloatAddFirstRange(id, '%', '%'); - lex_FloatAddSecondRange(id, CurrentOptions.binary[0], - CurrentOptions.binary[0]); - lex_FloatAddSecondRange(id, CurrentOptions.binary[1], - CurrentOptions.binary[1]); - lex_FloatAddRange(id, CurrentOptions.binary[0], - CurrentOptions.binary[0]); - lex_FloatAddRange(id, CurrentOptions.binary[1], - CurrentOptions.binary[1]); - - //Octal constants - - id = lex_FloatAlloc(&tNumberToken); - lex_FloatAddFirstRange(id, '&', '&'); - lex_FloatAddSecondRange(id, '0', '7'); - lex_FloatAddRange(id, '0', '7'); - - //Gameboy gfx constants - - id = lex_FloatAlloc(&tNumberToken); - nGBGfxID = id; - lex_FloatAddFirstRange(id, '`', '`'); - lex_FloatAddSecondRange(id, CurrentOptions.gbgfx[0], - CurrentOptions.gbgfx[0]); - lex_FloatAddSecondRange(id, CurrentOptions.gbgfx[1], - CurrentOptions.gbgfx[1]); - lex_FloatAddSecondRange(id, CurrentOptions.gbgfx[2], - CurrentOptions.gbgfx[2]); - lex_FloatAddSecondRange(id, CurrentOptions.gbgfx[3], - CurrentOptions.gbgfx[3]); - lex_FloatAddRange(id, CurrentOptions.gbgfx[0], CurrentOptions.gbgfx[0]); - lex_FloatAddRange(id, CurrentOptions.gbgfx[1], CurrentOptions.gbgfx[1]); - lex_FloatAddRange(id, CurrentOptions.gbgfx[2], CurrentOptions.gbgfx[2]); - lex_FloatAddRange(id, CurrentOptions.gbgfx[3], CurrentOptions.gbgfx[3]); - - //Hex constants - - id = lex_FloatAlloc(&tNumberToken); - lex_FloatAddFirstRange(id, '$', '$'); - lex_FloatAddSecondRange(id, '0', '9'); - lex_FloatAddSecondRange(id, 'A', 'F'); - lex_FloatAddSecondRange(id, 'a', 'f'); - lex_FloatAddRange(id, '0', '9'); - lex_FloatAddRange(id, 'A', 'F'); - lex_FloatAddRange(id, 'a', 'f'); - - //ID 's - - id = lex_FloatAlloc(&tIDToken); - lex_FloatAddFirstRange(id, 'a', 'z'); - lex_FloatAddFirstRange(id, 'A', 'Z'); - lex_FloatAddFirstRange(id, '_', '_'); - lex_FloatAddSecondRange(id, '.', '.'); - lex_FloatAddSecondRange(id, 'a', 'z'); - lex_FloatAddSecondRange(id, 'A', 'Z'); - lex_FloatAddSecondRange(id, '0', '9'); - lex_FloatAddSecondRange(id, '_', '_'); - lex_FloatAddSecondRange(id, '\\', '\\'); - lex_FloatAddSecondRange(id, '@', '@'); - lex_FloatAddSecondRange(id, '#', '#'); - lex_FloatAddRange(id, '.', '.'); - lex_FloatAddRange(id, 'a', 'z'); - lex_FloatAddRange(id, 'A', 'Z'); - lex_FloatAddRange(id, '0', '9'); - lex_FloatAddRange(id, '_', '_'); - lex_FloatAddRange(id, '\\', '\\'); - lex_FloatAddRange(id, '@', '@'); - lex_FloatAddRange(id, '#', '#'); - - //Local ID - - id = lex_FloatAlloc(&tIDToken); - lex_FloatAddFirstRange(id, '.', '.'); - lex_FloatAddSecondRange(id, 'a', 'z'); - lex_FloatAddSecondRange(id, 'A', 'Z'); - lex_FloatAddSecondRange(id, '_', '_'); - lex_FloatAddRange(id, 'a', 'z'); - lex_FloatAddRange(id, 'A', 'Z'); - lex_FloatAddRange(id, '0', '9'); - lex_FloatAddRange(id, '_', '_'); - lex_FloatAddRange(id, '\\', '\\'); - lex_FloatAddRange(id, '@', '@'); - lex_FloatAddRange(id, '#', '#'); - - // "@" - - id = lex_FloatAlloc(&tIDToken); - lex_FloatAddFirstRange(id, '@', '@'); - - //Fixed point constants - - id = lex_FloatAlloc(&tFixedPointToken); - lex_FloatAddFirstRange(id, '.', '.'); - lex_FloatAddFirstRange(id, '0', '9'); - lex_FloatAddSecondRange(id, '.', '.'); - lex_FloatAddSecondRange(id, '0', '9'); - lex_FloatAddRange(id, '.', '.'); - lex_FloatAddRange(id, '0', '9'); -} diff --git a/src/asm/lexer.c b/src/asm/lexer.c index 937b8250b..902e0c838 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -30,6 +30,13 @@ /* This caps the size of buffer reads, and according to POSIX, passing more than SSIZE_MAX is UB */ static_assert(LEXER_BUF_SIZE <= SSIZE_MAX); +struct Expansion { + uint8_t distance; /* How far the expansion's beginning is from the current position */ + char const *contents; + size_t len; + struct Expansion *parent; +}; + struct LexerState { char const *path; @@ -37,14 +44,13 @@ struct LexerState { bool isMmapped; union { struct { /* If mmap()ed */ - char *ptr; + char *ptr; /* Technically `const` during the lexer's execution */ off_t size; off_t offset; }; struct { /* Otherwise */ int fd; size_t index; /* Read index into the buffer */ - size_t nbChars; /* Number of chars in front of the buffer */ char buf[LEXER_BUF_SIZE]; /* Circular buffer */ }; }; @@ -52,12 +58,17 @@ struct LexerState { /* Common state */ enum LexerMode mode; bool atLineStart; - unsigned int lineNo; + uint32_t lineNo; + uint32_t colNo; + bool capturing; /* Whether the text being lexed should be captured */ size_t captureSize; /* Amount of text captured */ char *captureBuf; /* Buffer to send the captured text to if non-NULL */ size_t captureCapacity; /* Size of the buffer above */ + + size_t nbChars; /* Number of chars of lookahead, for processing expansions */ bool expandStrings; + struct Expansion *expansion; }; struct LexerState *lexerState = NULL; @@ -116,14 +127,18 @@ struct LexerState *lexer_OpenFile(char const *path) /* Sometimes mmap() fails or isn't available, so have a fallback */ lseek(state->fd, 0, SEEK_SET); state->index = 0; - state->nbChars = 0; } state->mode = LEXER_NORMAL; - state->atLineStart = true; + state->atLineStart = true; /* yylex() will init colNo due to this */ state->lineNo = 0; + state->capturing = false; state->captureBuf = NULL; + + state->nbChars = 0; + state->expandStrings = true; + state->expansion = NULL; return state; } @@ -164,28 +179,50 @@ static void reallocCaptureBuf(void) /* If at any point we need more than 255 characters of lookahead, something went VERY wrong. */ static int peek(uint8_t distance) { + if (distance >= LEXER_BUF_SIZE) + fatalerror("Internal lexer error: buffer has insufficient size for peeking (%u >= %u)\n", + distance, LEXER_BUF_SIZE); + if (lexerState->isMmapped) { if (lexerState->offset + distance >= lexerState->size) return EOF; + + if (!lexerState->capturing) { + bool escaped = false; + + while (lexerState->nbChars < distance && !escaped) { + char c = lexerState->ptr[lexerState->offset + + lexerState->nbChars++]; + + if (escaped) { + escaped = false; + if ((c >= '1' && c <= '9') || c == '@') + fatalerror("Macro arg expansion is not implemented yet\n"); + } else if (c == '\\') { + escaped = true; + } + } + } + return lexerState->ptr[lexerState->offset + distance]; } if (lexerState->nbChars <= distance) { /* Buffer isn't full enough, read some chars in */ + size_t target = LEXER_BUF_SIZE - lexerState->nbChars; /* Aim: making the buf full */ /* Compute the index we'll start writing to */ size_t writeIndex = (lexerState->index + lexerState->nbChars) % LEXER_BUF_SIZE; - size_t target = LEXER_BUF_SIZE - lexerState->nbChars; /* Aim: making the buf full */ - ssize_t nbCharsRead = 0; + ssize_t nbCharsRead = 0, totalCharsRead = 0; #define readChars(size) do { \ nbCharsRead = read(lexerState->fd, &lexerState->buf[writeIndex], (size)); \ if (nbCharsRead == -1) \ fatalerror("Error while reading \"%s\": %s\n", lexerState->path, errno); \ + totalCharsRead += nbCharsRead; \ writeIndex += nbCharsRead; \ if (writeIndex == LEXER_BUF_SIZE) \ writeIndex = 0; \ - lexerState->nbChars += nbCharsRead; /* Count all those chars in */ \ target -= nbCharsRead; \ } while (0) @@ -201,6 +238,40 @@ static int peek(uint8_t distance) #undef readChars + /* Do not perform expansions when capturing */ + if (!lexerState->capturing) { + /* Scan the newly-inserted chars for any expansions */ + bool escaped = false; + size_t index = (lexerState->index + lexerState->nbChars) % LEXER_BUF_SIZE; + + for (ssize_t i = 0; i < totalCharsRead; i++) { + char c = lexerState->buf[index++]; + + if (escaped) { + escaped = false; + if ((c >= '1' && c <= '9') || c == '@') + fatalerror("Macro arg expansion is not implemented yet\n"); + } else if (c == '\\') { + escaped = true; + } + if (index == LEXER_BUF_SIZE) /* Wrap around buffer */ + index = 0; + } + + /* + * If last char read was a backslash, pretend we didn't read it; this is + * important, otherwise we may miss an expansion that straddles refills + */ + if (escaped) { + totalCharsRead--; + /* However, if that prevents having enough characters, error out */ + if (lexerState->nbChars + totalCharsRead <= distance) + fatalerror("Internal lexer error: cannot read far enough due to backslash\n"); + } + } + + lexerState->nbChars += totalCharsRead; + /* If there aren't enough chars even after refilling, give up */ if (lexerState->nbChars <= distance) return EOF; @@ -231,6 +302,8 @@ static void shiftChars(uint8_t distance) if (lexerState->index >= LEXER_BUF_SIZE) lexerState->index %= LEXER_BUF_SIZE; } + + lexerState->colNo += distance; } static int nextChar(void) @@ -250,11 +323,16 @@ char const *lexer_GetFileName(void) return lexerState->path; } -unsigned int lexer_GetLineNo(void) +uint32_t lexer_GetLineNo(void) { return lexerState->lineNo; } +uint32_t lexer_GetColNo(void) +{ + return lexerState->colNo; +} + void lexer_DumpStringExpansions(void) { /* TODO */ @@ -278,6 +356,20 @@ static int yylex_NORMAL(void) case '\t': break; + /* Handle single-char tokens */ + case '+': + return T_OP_ADD; + case '-': + return T_OP_SUB; + + /* Handle accepted single chars */ + case '[': + case ']': + case '(': + case ')': + case ',': + return c; + case EOF: /* Captures end at their buffer's boundary no matter what */ if (!lexerState->capturing) { @@ -288,6 +380,7 @@ static int yylex_NORMAL(void) default: error("Unknown character '%c'\n"); } + lexerState->atLineStart = false; } } @@ -298,8 +391,10 @@ static int yylex_RAW(void) int yylex(void) { - if (lexerState->atLineStart) + if (lexerState->atLineStart) { lexerState->lineNo++; + lexerState->colNo = 0; + } static int (* const lexerModeFuncs[])(void) = { [LEXER_NORMAL] = yylex_NORMAL, @@ -316,7 +411,7 @@ int yylex(void) } void lexer_SkipToBlockEnd(int blockStartToken, int blockEndToken, int endToken, - char **capture, size_t *size, char const *name) + char const **capture, size_t *size, char const *name) { lexerState->capturing = true; lexerState->captureSize = 0; diff --git a/src/asm/main.c b/src/asm/main.c index 2f44f453b..9f522d3ae 100644 --- a/src/asm/main.c +++ b/src/asm/main.c @@ -483,6 +483,13 @@ int main(int argc, char *argv[]) fprintf(dependfile, "%s: %s\n", tzTargetFileName, tzMainfile); } + /* Init lexer; important to do first, since that's what provides the file name, line, etc */ + struct LexerState *state = lexer_OpenFile(tzMainfile); + + if (!state) + fatalerror("Failed to open main file!\n"); + lexer_SetState(state); + nStartClock = clock(); nTotalLines = 0; @@ -490,11 +497,6 @@ int main(int argc, char *argv[]) sym_Init(); sym_SetExportAll(exportall); fstk_Init(tzMainfile); - struct LexerState *state = lexer_OpenFile(tzMainfile); - - if (!state) - fatalerror("Failed to open main file!"); - lexer_SetState(state); opt_ParseDefines(); charmap_New("main", NULL); From 4c9a929a14d76609c26c06eeac0371d531f670c8 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Tue, 28 Jul 2020 22:06:03 +0200 Subject: [PATCH 03/59] Implement almost all functionality Add keywords and identifiers Add comments Add number literals Add strings Add a lot of new tokens Add (and clean up) IF etc. Improve reporting of unexpected chars / garbage bytes Fix bug with and improved error messages when failing to open file Add verbose-level messages about how files are opened Enforce that files finish with a newline Fix chars returned not being cast to unsigned char (may conflict w/ EOF) Return null path when no file is open, rather than crash Unify and improve error printing slightly Known to be missing: macro expansion, REPT blocks, EQUS expansions --- include/asm/asm.h | 4 - include/asm/fstack.h | 4 +- include/asm/lexer.h | 9 +- include/asm/symbol.h | 7 +- include/asm/util.h | 1 + src/asm/asmy.y | 205 +++------ src/asm/fstack.c | 14 +- src/asm/lexer.c | 1025 ++++++++++++++++++++++++++++++++++++++++-- src/asm/main.c | 1 + src/asm/symbol.c | 31 +- src/asm/util.c | 32 ++ src/asm/warning.c | 21 +- 12 files changed, 1129 insertions(+), 225 deletions(-) diff --git a/include/asm/asm.h b/include/asm/asm.h index caa87b899..47d7d2565 100644 --- a/include/asm/asm.h +++ b/include/asm/asm.h @@ -27,9 +27,5 @@ extern uint32_t nTotalLines; extern uint32_t nIFDepth; extern struct Section *pCurrentSection; -extern bool oDontExpandStrings; - -size_t symvaluetostring(char *dest, size_t maxLength, char *sym, - const char *mode); #endif /* RGBDS_ASM_ASM_H */ diff --git a/include/asm/fstack.h b/include/asm/fstack.h index 29405e16f..d365b9545 100644 --- a/include/asm/fstack.h +++ b/include/asm/fstack.h @@ -32,7 +32,7 @@ struct sContext { uint32_t uniqueID; int32_t nLine; uint32_t nStatus; - char *pREPTBlock; + char const *pREPTBlock; uint32_t nREPTBlockCount; uint32_t nREPTBlockSize; int32_t nREPTBodyFirstLine; @@ -47,7 +47,7 @@ void fstk_Dump(void); void fstk_DumpToStr(char *buf, size_t len); void fstk_AddIncludePath(char *s); void fstk_RunMacro(char *s, struct MacroArgs *args); -void fstk_RunRept(uint32_t count, int32_t nReptLineNo); +void fstk_RunRept(uint32_t count, int32_t nReptLineNo, char const *body, size_t size); /** * @param path The user-provided file name * @param fullPath The address of a pointer, which will be made to point at the full path diff --git a/include/asm/lexer.h b/include/asm/lexer.h index 32df75d3f..842a91f36 100644 --- a/include/asm/lexer.h +++ b/include/asm/lexer.h @@ -33,10 +33,13 @@ static inline void lexer_SetStateAtEOL(struct LexerState *state) struct LexerState *lexer_OpenFile(char const *path); struct LexerState *lexer_OpenFileView(void); void lexer_DeleteState(struct LexerState *state); +void lexer_Init(void); enum LexerMode { LEXER_NORMAL, - LEXER_RAW + LEXER_RAW, + LEXER_SKIP_TO_ELIF, + LEXER_SKIP_TO_ENDC }; void lexer_SetMode(enum LexerMode mode); @@ -47,7 +50,7 @@ uint32_t lexer_GetLineNo(void); uint32_t lexer_GetColNo(void); void lexer_DumpStringExpansions(void); int yylex(void); -void lexer_SkipToBlockEnd(int blockStartToken, int blockEndToken, int endToken, - char const **capture, size_t *size, char const *name); +void lexer_CaptureBlock(int blockStartToken, int blockEndToken, char const **capture, size_t *size, + char const *name); #endif /* RGBDS_ASM_LEXER_H */ diff --git a/include/asm/symbol.h b/include/asm/symbol.h index 52f61a524..8503490b1 100644 --- a/include/asm/symbol.h +++ b/include/asm/symbol.h @@ -44,8 +44,8 @@ struct Symbol { int32_t (*callback)(void); }; struct { /* For SYM_MACRO */ - uint32_t macroSize; - char *macro; + size_t macroSize; + char const *macro; }; }; @@ -114,9 +114,10 @@ void sym_Export(char const *symName); struct Symbol *sym_AddEqu(char const *symName, int32_t value); struct Symbol *sym_AddSet(char const *symName, int32_t value); uint32_t sym_GetPCValue(void); +uint32_t sym_GetConstantSymValue(struct Symbol const *sym); uint32_t sym_GetConstantValue(char const *s); struct Symbol *sym_FindSymbol(char const *symName); -struct Symbol *sym_AddMacro(char const *symName, int32_t defLineNo); +struct Symbol *sym_AddMacro(char const *symName, int32_t defLineNo, char const *body, size_t size); struct Symbol *sym_Ref(char const *symName); struct Symbol *sym_AddString(char const *symName, char const *value); uint32_t sym_GetDefinedValue(char const *s); diff --git a/include/asm/util.h b/include/asm/util.h index c03281f5a..d0c12bfb4 100644 --- a/include/asm/util.h +++ b/include/asm/util.h @@ -12,6 +12,7 @@ #include uint32_t calchash(const char *s); +char const *print(char c); size_t readUTF8Char(uint8_t *dest, char const *src); #endif /* RGBDS_UTIL_H */ diff --git a/src/asm/asmy.y b/src/asm/asmy.y index a8ec303a6..d10681f3f 100644 --- a/src/asm/asmy.y +++ b/src/asm/asmy.y @@ -39,63 +39,7 @@ uint32_t nListCountEmpty; char *tzNewMacro; uint32_t ulNewMacroSize; int32_t nPCOffset; -bool skipElifs; /* If this is set, ELIFs cannot be executed anymore */ - -size_t symvaluetostring(char *dest, size_t maxLength, char *symName, - const char *mode) -{ - size_t length; - struct Symbol *sym = sym_FindSymbol(symName); - - if (sym && sym->type == SYM_EQUS) { - char const *src = sym_GetStringValue(sym); - size_t i; - - if (mode) - error("Print types are only allowed for numbers\n"); - - for (i = 0; src[i] != 0; i++) { - if (i >= maxLength) - fatalerror("Symbol value too long to fit buffer\n"); - - dest[i] = src[i]; - } - - length = i; - - } else { - uint32_t value = sym_GetConstantValue(symName); - int32_t fullLength; - - /* Special cheat for binary */ - if (mode && !mode[0]) { - char binary[33]; /* 32 bits + 1 terminator */ - char *write_ptr = binary + 32; - fullLength = 0; - binary[32] = 0; - do { - *(--write_ptr) = (value & 1) + '0'; - value >>= 1; - fullLength++; - } while(value); - strncpy(dest, write_ptr, maxLength + 1); - } else { - fullLength = snprintf(dest, maxLength + 1, - mode ? mode : "$%" PRIX32, - value); - } - - if (fullLength < 0) { - fatalerror("snprintf encoding error\n"); - } else { - length = (size_t)fullLength; - if (length > maxLength) - fatalerror("Symbol value too long to fit buffer\n"); - } - } - - return length; -} +bool executedIfBlock; /* If this is set, ELIFs cannot be executed anymore */ static uint32_t str2int2(uint8_t *s, int32_t length) { @@ -388,16 +332,69 @@ lines : /* empty */ | lines { nListCountEmpty = 0; nPCOffset = 0; - } line '\n' { + } line { nTotalLines++; } ; -line : label - | label cpu_command - | label macro - | label simple_pseudoop - | pseudoop +line : label '\n' + | label cpu_command '\n' + | label macro '\n' + | label simple_pseudoop '\n' + | pseudoop '\n' + | conditional /* May not necessarily be followed by a newline, see below */ +; + +/* + * For "logistical" reasons, conditionals must manage newlines themselves. + * This is because we need to switch the lexer's mode *after* the newline has been read, + * and to avoid causing some grammar conflicts (token reducing is finicky). + * This is DEFINITELY one of the more FRAGILE parts of the codebase, handle with care. + */ +conditional : if + /* It's important that all of these require being at line start for `skipIfBlock` */ + | elif + | else + | endc +; + +if : T_POP_IF const '\n' { + nIFDepth++; + executedIfBlock = !!$2; + if (!executedIfBlock) + lexer_SetMode(LEXER_SKIP_TO_ELIF); + } +; + +elif : T_POP_ELIF const '\n' { + if (nIFDepth <= 0) + fatalerror("Found ELIF outside an IF construct\n"); + + if (executedIfBlock) { + lexer_SetMode(LEXER_SKIP_TO_ENDC); + } else { + executedIfBlock = !!$2; + if (!executedIfBlock) + lexer_SetMode(LEXER_SKIP_TO_ELIF); + } + } +; + +else : T_POP_ELSE '\n' { + if (nIFDepth <= 0) + fatalerror("Found ELSE outside an IF construct\n"); + + if (executedIfBlock) + lexer_SetMode(LEXER_SKIP_TO_ENDC); + } +; + +endc : T_POP_ENDC '\n' { + if (nIFDepth <= 0) + fatalerror("Found ENDC outside an IF construct\n"); + + nIFDepth--; + } ; scoped_id : T_ID | T_LOCAL_ID ; @@ -460,10 +457,6 @@ simple_pseudoop : include | printt | printv | printi - | if - | elif - | else - | endc | export | db | dw @@ -606,9 +599,9 @@ rept : T_POP_REPT uconst { uint32_t nDefinitionLineNo = lexer_GetLineNo(); char const *body; size_t size; - lexer_SkipToBlockEnd(T_POP_REPT, T_POP_ENDR, T_POP_ENDR, - &body, &size, "REPT block"); - fstk_RunRept($2, nDefinitionLineNo); + lexer_CaptureBlock(T_POP_REPT, T_POP_ENDR, &body, &size, + "REPT block"); + fstk_RunRept($2, nDefinitionLineNo, body, size); } ; @@ -616,9 +609,9 @@ macrodef : T_LABEL ':' T_POP_MACRO { int32_t nDefinitionLineNo = lexer_GetLineNo(); char const *body; size_t size; - lexer_SkipToBlockEnd(T_POP_MACRO, T_POP_ENDM, T_POP_ENDM, - &body, &size, "macro definition"); - sym_AddMacro($1, nDefinitionLineNo); + lexer_CaptureBlock(T_POP_MACRO, T_POP_ENDM, &body, &size, + "macro definition"); + sym_AddMacro($1, nDefinitionLineNo, body, size); } ; @@ -786,72 +779,6 @@ printi : T_POP_PRINTI const { printf("%" PRId32, $2); } printf : T_POP_PRINTF const { math_Print($2); } ; -if : T_POP_IF const { - nIFDepth++; - if (!$2) { - /* The function is hardcoded to also stop on T_POP_ELSE and ENDC */ - lexer_SkipToBlockEnd(T_POP_IF, T_POP_ENDC, T_POP_ELIF, - NULL, NULL, "if block"); - skipElifs = false; - } else { - skipElifs = true; - } - } -; - -elif : T_POP_ELIF const { - if (nIFDepth <= 0) - fatalerror("Found ELIF outside an IF construct\n"); - - if (skipElifs) { - /* - * Executed when ELIF is reached at the end of - * an IF or ELIF block for which the condition - * was true. - * - * Continue parsing at ENDC keyword - */ - lexer_SkipToBlockEnd(T_POP_IF, T_POP_ENDC, T_POP_ENDC, - NULL, NULL, "elif block"); - } else { - /* - * Executed when ELIF is skipped to because the - * condition of the previous IF or ELIF block - * was false. - */ - - if (!$2) { - /* - * Continue parsing after ELSE, or at - * ELIF or ENDC keyword. - */ - lexer_SkipToBlockEnd(T_POP_IF, T_POP_ENDC, T_POP_ELIF, - NULL, NULL, "elif block"); - } else { - skipElifs = true; - } - } - } -; - -else : T_POP_ELSE { - if (nIFDepth <= 0) - fatalerror("Found ELSE outside an IF construct\n"); - - /* Continue parsing at ENDC keyword */ - lexer_SkipToBlockEnd(T_POP_IF, T_POP_ENDC, T_POP_ENDC, - NULL, NULL, "else block"); - } -; - -endc : T_POP_ENDC { - if (nIFDepth <= 0) - fatalerror("Found ENDC outside an IF construct\n"); - - nIFDepth--; - } -; - const_3bit : const { int32_t value = $1; diff --git a/src/asm/fstack.c b/src/asm/fstack.c index c28937807..2e3dc1bd2 100644 --- a/src/asm/fstack.c +++ b/src/asm/fstack.c @@ -41,7 +41,7 @@ static char IncludePaths[MAXINCPATHS][_MAX_PATH + 1]; static int32_t NextIncPath; static uint32_t nMacroCount; -static char *pCurrentREPTBlock; +static char const *pCurrentREPTBlock; static uint32_t nCurrentREPTBlockSize; static uint32_t nCurrentREPTBlockCount; static int32_t nCurrentREPTBodyFirstLine; @@ -249,9 +249,11 @@ void fstk_Dump(void) pLastFile->nLine); pLastFile = pLastFile->next; } + char const *fileName = lexer_GetFileName(); - fprintf(stderr, "%s(%" PRId32 ",%" PRId32 ")", - lexer_GetFileName(), lexer_GetLineNo(), lexer_GetColNo()); + if (fileName) + fprintf(stderr, "%s(%" PRId32 ",%" PRId32 "): ", + fileName, lexer_GetLineNo(), lexer_GetColNo()); } void fstk_DumpToStr(char *buf, size_t buflen) @@ -425,15 +427,15 @@ void fstk_RunMacro(char *s, struct MacroArgs *args) /* * Set up a repeat block for parsing */ -void fstk_RunRept(uint32_t count, int32_t nReptLineNo) +void fstk_RunRept(uint32_t count, int32_t nReptLineNo, char const *body, size_t size) { if (count) { pushcontext(); macro_SetUniqueID(nMacroCount++); nCurrentREPTBlockCount = count; nCurrentStatus = STAT_isREPTBlock; - nCurrentREPTBlockSize = ulNewMacroSize; - pCurrentREPTBlock = tzNewMacro; + nCurrentREPTBlockSize = size; + pCurrentREPTBlock = body; nCurrentREPTBodyFirstLine = nReptLineNo + 1; } } diff --git a/src/asm/lexer.c b/src/asm/lexer.c index 902e0c838..1179a5732 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -9,8 +9,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -19,13 +21,208 @@ #include #include +#include "extern/utf8decoder.h" + +#include "asm/asm.h" #include "asm/lexer.h" +#include "asm/macro.h" +#include "asm/main.h" #include "asm/rpn.h" -#include "asm/symbol.h" /* For MAXSYMLEN in asmy.h */ +#include "asm/symbol.h" +#include "asm/util.h" #include "asm/warning.h" /* Include this last so it gets all type & constant definitions */ #include "asmy.h" /* For token definitions, generated from asmy.y */ +/* + * Identifiers that are also keywords are listed here. This ONLY applies to ones + * that would normally be matched as identifiers! Check out `yylex_NORMAL` to + * see how this is used. + * Tokens / keywords not handled here are handled in `yylex_NORMAL`'s switch. + */ +static struct KeywordMapping { + char const *name; + int token; +} const keywords[] = { + /* + * CAUTION when editing this: adding keywords will probably require extra nodes in the + * `keywordDict` array. If you forget to, you will probably trip up an assertion, anyways. + * Also, all entries in this array must be in uppercase for the dict to build correctly. + */ + {"ADC", T_Z80_ADC}, + {"ADD", T_Z80_ADD}, + {"AND", T_Z80_AND}, + {"BIT", T_Z80_BIT}, + {"CALL", T_Z80_CALL}, + {"CCF", T_Z80_CCF}, + {"CPL", T_Z80_CPL}, + {"CP", T_Z80_CP}, + {"DAA", T_Z80_DAA}, + {"DEC", T_Z80_DEC}, + {"DI", T_Z80_DI}, + {"EI", T_Z80_EI}, + {"HALT", T_Z80_HALT}, + {"INC", T_Z80_INC}, + {"JP", T_Z80_JP}, + {"JR", T_Z80_JR}, + {"LD", T_Z80_LD}, + {"LDI", T_Z80_LDI}, + {"LDD", T_Z80_LDD}, + {"LDIO", T_Z80_LDIO}, + {"LDH", T_Z80_LDIO}, + {"NOP", T_Z80_NOP}, + {"OR", T_Z80_OR}, + {"POP", T_Z80_POP}, + {"PUSH", T_Z80_PUSH}, + {"RES", T_Z80_RES}, + {"RETI", T_Z80_RETI}, + {"RET", T_Z80_RET}, + {"RLCA", T_Z80_RLCA}, + {"RLC", T_Z80_RLC}, + {"RLA", T_Z80_RLA}, + {"RL", T_Z80_RL}, + {"RRC", T_Z80_RRC}, + {"RRCA", T_Z80_RRCA}, + {"RRA", T_Z80_RRA}, + {"RR", T_Z80_RR}, + {"RST", T_Z80_RST}, + {"SBC", T_Z80_SBC}, + {"SCF", T_Z80_SCF}, + {"SET", T_POP_SET}, + {"SLA", T_Z80_SLA}, + {"SRA", T_Z80_SRA}, + {"SRL", T_Z80_SRL}, + {"STOP", T_Z80_STOP}, + {"SUB", T_Z80_SUB}, + {"SWAP", T_Z80_SWAP}, + {"XOR", T_Z80_XOR}, + + {"NZ", T_CC_NZ}, + {"Z", T_CC_Z}, + {"NC", T_CC_NC}, + /* Handled in list of registers */ + /* { "C", T_CC_C }, */ + + {"AF", T_MODE_AF}, + {"BC", T_MODE_BC}, + {"DE", T_MODE_DE}, + {"HL", T_MODE_HL}, + {"SP", T_MODE_SP}, + + {"A", T_TOKEN_A}, + {"B", T_TOKEN_B}, + {"C", T_TOKEN_C}, + {"D", T_TOKEN_D}, + {"E", T_TOKEN_E}, + {"H", T_TOKEN_H}, + {"L", T_TOKEN_L}, + + {"DEF", T_OP_DEF}, + + {"FRAGMENT", T_POP_FRAGMENT}, + {"BANK", T_OP_BANK}, + {"ALIGN", T_OP_ALIGN}, + + {"ROUND", T_OP_ROUND}, + {"CEIL", T_OP_CEIL}, + {"FLOOR", T_OP_FLOOR}, + {"DIV", T_OP_FDIV}, + {"MUL", T_OP_FMUL}, + {"SIN", T_OP_SIN}, + {"COS", T_OP_COS}, + {"TAN", T_OP_TAN}, + {"ASIN", T_OP_ASIN}, + {"ACOS", T_OP_ACOS}, + {"ATAN", T_OP_ATAN}, + {"ATAN2", T_OP_ATAN2}, + + {"HIGH", T_OP_HIGH}, + {"LOW", T_OP_LOW}, + {"ISCONST", T_OP_ISCONST}, + + {"STRCMP", T_OP_STRCMP}, + {"STRIN", T_OP_STRIN}, + {"STRSUB", T_OP_STRSUB}, + {"STRLEN", T_OP_STRLEN}, + {"STRCAT", T_OP_STRCAT}, + {"STRUPR", T_OP_STRUPR}, + {"STRLWR", T_OP_STRLWR}, + + {"INCLUDE", T_POP_INCLUDE}, + {"PRINTT", T_POP_PRINTT}, + {"PRINTI", T_POP_PRINTI}, + {"PRINTV", T_POP_PRINTV}, + {"PRINTF", T_POP_PRINTF}, + {"EXPORT", T_POP_EXPORT}, + {"XDEF", T_POP_XDEF}, + {"GLOBAL", T_POP_GLOBAL}, + {"DS", T_POP_DS}, + {"DB", T_POP_DB}, + {"DW", T_POP_DW}, + {"DL", T_POP_DL}, + {"SECTION", T_POP_SECTION}, + {"PURGE", T_POP_PURGE}, + + {"RSRESET", T_POP_RSRESET}, + {"RSSET", T_POP_RSSET}, + + {"INCBIN", T_POP_INCBIN}, + {"CHARMAP", T_POP_CHARMAP}, + {"NEWCHARMAP", T_POP_NEWCHARMAP}, + {"SETCHARMAP", T_POP_SETCHARMAP}, + {"PUSHC", T_POP_PUSHC}, + {"POPC", T_POP_POPC}, + + {"FAIL", T_POP_FAIL}, + {"WARN", T_POP_WARN}, + {"FATAL", T_POP_FATAL}, + {"ASSERT", T_POP_ASSERT}, + {"STATIC_ASSERT", T_POP_STATIC_ASSERT}, + + {"MACRO", T_POP_MACRO}, + {"ENDM", T_POP_ENDM}, + {"SHIFT", T_POP_SHIFT}, + + {"REPT", T_POP_REPT}, + {"ENDR", T_POP_ENDR}, + + {"LOAD", T_POP_LOAD}, + {"ENDL", T_POP_ENDL}, + + {"IF", T_POP_IF}, + {"ELSE", T_POP_ELSE}, + {"ELIF", T_POP_ELIF}, + {"ENDC", T_POP_ENDC}, + + {"UNION", T_POP_UNION}, + {"NEXTU", T_POP_NEXTU}, + {"ENDU", T_POP_ENDU}, + + {"WRAM0", T_SECT_WRAM0}, + {"VRAM", T_SECT_VRAM}, + {"ROMX", T_SECT_ROMX}, + {"ROM0", T_SECT_ROM0}, + {"HRAM", T_SECT_HRAM}, + {"WRAMX", T_SECT_WRAMX}, + {"SRAM", T_SECT_SRAM}, + {"OAM", T_SECT_OAM}, + + {"RB", T_POP_RB}, + {"RW", T_POP_RW}, + {"EQU", T_POP_EQU}, + {"EQUS", T_POP_EQUS}, + + /* Handled before in list of CPU instructions */ + /* {"SET", T_POP_SET}, */ + + {"PUSHS", T_POP_PUSHS}, + {"POPS", T_POP_POPS}, + {"PUSHO", T_POP_PUSHO}, + {"POPO", T_POP_POPO}, + + {"OPT", T_POP_OPT} +}; + #define LEXER_BUF_SIZE 42 /* TODO: determine a sane value for this */ /* This caps the size of buffer reads, and according to POSIX, passing more than SSIZE_MAX is UB */ static_assert(LEXER_BUF_SIZE <= SSIZE_MAX); @@ -60,6 +257,7 @@ struct LexerState { bool atLineStart; uint32_t lineNo; uint32_t colNo; + int lastToken; bool capturing; /* Whether the text being lexed should be captured */ size_t captureSize; /* Amount of text captured */ @@ -83,12 +281,17 @@ struct LexerState *lexer_OpenFile(char const *path) if (isStdin) path = ""; if (!state) { - error("Failed to open file \"%s\": %s\n", path, strerror(errno)); + error("Failed to allocate memory for lexer state: %s\n", strerror(errno)); return NULL; } state->path = path; state->fd = isStdin ? STDIN_FILENO : open(path, O_RDONLY); + if (state->fd == -1) { + error("Failed to open file \"%s\": %s\n", path, strerror(errno)); + free(state); + return NULL; + } state->isMmapped = false; /* By default, assume it won't be mmap()ed */ off_t size = lseek(state->fd, 0, SEEK_END); @@ -121,10 +324,16 @@ struct LexerState *lexer_OpenFile(char const *path) state->isMmapped = true; state->ptr = pa; state->size = size; + + if (verbose) + printf("File %s successfully mmap()ped\n", path); } } if (!state->isMmapped) { /* Sometimes mmap() fails or isn't available, so have a fallback */ + if (verbose) + printf("File %s opened as regular, errno reports \"%s\"\n", + path, strerror(errno)); lseek(state->fd, 0, SEEK_SET); state->index = 0; } @@ -132,6 +341,7 @@ struct LexerState *lexer_OpenFile(char const *path) state->mode = LEXER_NORMAL; state->atLineStart = true; /* yylex() will init colNo due to this */ state->lineNo = 0; + state->lastToken = 0; state->capturing = false; state->captureBuf = NULL; @@ -156,6 +366,72 @@ void lexer_DeleteState(struct LexerState *state) free(state); } +struct KeywordDictNode { + /* + * The identifier charset is (currently) 44 characters big. By storing entries for the + * entire printable ASCII charset, minus lower-case due to case-insensitivity, + * we only waste (0x60 - 0x20) - 70 = 20 entries per node, which should be acceptable. + * In turn, this allows greatly simplifying checking an index into this array, + * which should help speed up the lexer. + */ + uint16_t children[0x60 - ' ']; + struct KeywordMapping const *keyword; +/* Since the keyword structure is invariant, the min number of nodes is known at compile time */ +} keywordDict[336] = {0}; /* Make sure to keep this correct when adding keywords! */ + +/* Convert a char into its index into the dict */ +static inline uint8_t dictIndex(char c) +{ + /* Translate uppercase to lowercase (roughly) */ + if (c > 0x60) + c = c - ('a' - 'A'); + return c - ' '; +} + +void lexer_Init(void) +{ + /* + * Build the dictionary of keywords. This could be done at compile time instead, however: + * - Doing so manually is a task nobody wants to undertake + * - It would be massively hard to read + * - Doing it within CC or CPP would be quite non-trivial + * - Doing it externally would require some extra work to use only POSIX tools + * - The startup overhead isn't much compared to the program's + */ + uint16_t usedNodes = 1; + + for (size_t i = 0; i < sizeof(keywords) / sizeof(*keywords); i++) { + uint16_t nodeID = 0; + + /* Walk the dictionary, creating intermediate nodes for the keyword */ + for (char const *ptr = keywords[i].name; *ptr; ptr++) { + /* We should be able to assume all entries are well-formed */ + if (keywordDict[nodeID].children[*ptr - ' '] == 0) { + /* + * If this gets tripped up, set the size of keywordDict to + * something high, compile with `-DPRINT_NODE_COUNT` (see below), + * and set the size to that. + */ + assert(usedNodes < sizeof(keywordDict) / sizeof(*keywordDict)); + + /* There is no node at that location, grab one from the pool */ + keywordDict[nodeID].children[*ptr - ' '] = usedNodes; + usedNodes++; + } + nodeID = keywordDict[nodeID].children[*ptr - ' ']; + } + + /* This assumes that no two keywords have the same name */ + keywordDict[nodeID].keyword = &keywords[i]; + } + +#ifdef PRINT_NODE_COUNT /* For the maintainer to check how many nodes are needed */ + printf("Lexer keyword dictionary: %zu keywords in %u nodes (pool size %zu)\n", + sizeof(keywords) / sizeof(*keywords), usedNodes, + sizeof(keywordDict) / sizeof(*keywordDict)); +#endif +} + void lexer_SetMode(enum LexerMode mode) { lexerState->mode = mode; @@ -187,7 +463,16 @@ static int peek(uint8_t distance) if (lexerState->offset + distance >= lexerState->size) return EOF; + /* + * Note: the following block is also duplicated for the non-mmap() path. This sucks. + * However, due to subtle handling differences, I haven't found a clean way to + * avoid that duplication. If you have any ideas, please discuss them in an issue or + * pull request. Thank you! + */ + + /* Do not perform expansions while capturing */ if (!lexerState->capturing) { + /* Scan the newly-inserted chars for any macro args */ bool escaped = false; while (lexerState->nbChars < distance && !escaped) { @@ -204,7 +489,7 @@ static int peek(uint8_t distance) } } - return lexerState->ptr[lexerState->offset + distance]; + return (unsigned char)lexerState->ptr[lexerState->offset + distance]; } if (lexerState->nbChars <= distance) { @@ -240,7 +525,7 @@ static int peek(uint8_t distance) /* Do not perform expansions when capturing */ if (!lexerState->capturing) { - /* Scan the newly-inserted chars for any expansions */ + /* Scan the newly-inserted chars for any macro args */ bool escaped = false; size_t index = (lexerState->index + lexerState->nbChars) % LEXER_BUF_SIZE; @@ -276,7 +561,7 @@ static int peek(uint8_t distance) if (lexerState->nbChars <= distance) return EOF; } - return lexerState->buf[(lexerState->index + distance) % LEXER_BUF_SIZE]; + return (unsigned char)lexerState->buf[(lexerState->index + distance) % LEXER_BUF_SIZE]; } static void shiftChars(uint8_t distance) @@ -320,7 +605,7 @@ static int nextChar(void) char const *lexer_GetFileName(void) { - return lexerState->path; + return lexerState ? lexerState->path : NULL; } uint32_t lexer_GetLineNo(void) @@ -338,38 +623,622 @@ void lexer_DumpStringExpansions(void) /* TODO */ } -static int yylex_NORMAL(void) +/* Function to discard all of a line's comments */ + +static void discardComment(void) { for (;;) { - int c = nextChar(); + int c = peek(0); + + if (c == EOF || c == '\r' || c == '\n') + break; + shiftChars(1); + } +} + +/* Functions to lex numbers of various radixes */ + +static void readNumber(int radix, int32_t baseValue) +{ + uint32_t value = baseValue; + + for (;;) { + int c = peek(0); + + if (c < '0' || c > '0' + radix - 1) + break; + if (value > UINT32_MAX / radix) + warning(WARNING_LARGE_CONSTANT, "Integer constant is too large\n"); + value = value * radix + (c - '0'); + + shiftChars(1); + } + + yylval.nConstValue = value; +} + +static void readFractionalPart(void) +{ + uint32_t value = 0, divisor = 1; + + for (;;) { + int c = peek(0); + + if (c < '0' || c > '9') + break; + if (divisor > UINT32_MAX / 10) { + warning(WARNING_LARGE_CONSTANT, + "Precision of fixed-point constant is too large\n"); + /* Discard any additional digits */ + while (c = peek(0), c >= '0' && c <= '9') + shiftChars(1); + break; + } + } + + if (yylval.nConstValue > INT16_MAX || yylval.nConstValue < INT16_MIN) + warning(WARNING_LARGE_CONSTANT, "Magnitude of fixed-point constant is too large\n"); + + /* Cast to unsigned avoids UB if shifting discards bits */ + yylval.nConstValue = (uint32_t)yylval.nConstValue << 16; + /* Cast to unsigned avoids undefined overflow behavior */ + uint16_t fractional = value * 65536 / divisor; + + yylval.nConstValue |= fractional * (yylval.nConstValue >= 0 ? 1 : -1); +} + +static void readBinaryNumber(void) +{ + uint32_t value = 0; + + for (;;) { + int c = peek(0); + + /* TODO: handle `-b`'s dynamic chars */ + if (c != '0' && c != '1') + break; + value = value * 2 + (c - '0'); + + shiftChars(1); + } + + yylval.nConstValue = value; +} + +static void readHexNumber(void) +{ + uint32_t value = 0; + bool empty = true; + + for (;;) { + int c = peek(0); + + if (c >= 'a' && c <= 'f') /* Convert letters to right after digits */ + c = c - 'a' + 10; + else if (c >= 'A' && c <= 'F') + c = c - 'A' + 10; + else if (c >= '0' && c <= '9') + c = c - '0'; + else + break; + + if (value > UINT32_MAX / 16) + warning(WARNING_LARGE_CONSTANT, "Integer constant is too large\n"); + value = value * 16 + c; + + shiftChars(1); + empty = false; + } + + if (empty) + error("Invalid integer constant, no digits after '$'\n"); + + yylval.nConstValue = value; +} + +static void readGfxConstant(void) +{ + uint32_t bp0 = 0, bp1 = 0; + uint8_t width = 0; + + for (;;) { + int c = peek(0); + + /* TODO: handle `-g`'s dynamic chars */ + if (c < '0' || c > '3') + break; + uint8_t pixel = c - '0'; + + if (width < 8) { + bp0 = bp0 << 1 | (pixel & 1); + bp1 = bp1 << 1 | (pixel >> 1); + } + if (width <= 8) + width++; + shiftChars(1); + } + + if (width == 0) + error("Invalid gfx constant, no digits after '`'\n"); + else if (width == 8) + warning(WARNING_LARGE_CONSTANT, + "Gfx constant is too large, only 8 first pixels considered\n"); + + yylval.nConstValue = bp1 << 8 | bp0; +} + +/* Function to read identifiers & keywords */ + +static int readIdentifier(char firstChar) +{ + /* Lex while checking for a keyword */ + yylval.tzSym[0] = firstChar; + uint16_t nodeID = keywordDict[0].children[dictIndex(firstChar)]; + int tokenType = firstChar == '.' ? T_LOCAL_ID : T_ID; + size_t i; + + for (i = 1; ; i++) { + int c = peek(0); + + /* If that char isn't in the symbol charset, end */ + if ((c > '9' || c < '0') + && (c > 'Z' || c < 'A') + && (c > 'z' || c < 'a') + && c != '#' && c != '.' && c != '@' && c != '_') + break; + shiftChars(1); + + /* Write the char to the identifier's name */ + if (i < sizeof(yylval.tzSym) - 1) + yylval.tzSym[i] = c; + + /* If the char was a dot, mark the identifier as local */ + if (c == '.') + tokenType = T_LOCAL_ID; + + /* Attempt to traverse the tree to check for a keyword */ + if (nodeID) /* Do nothing if matching already failed */ + nodeID = keywordDict[nodeID].children[dictIndex(c)]; + } + + if (i > sizeof(yylval.tzSym) - 1) { + warning(WARNING_LONG_STR, "Symbol name too long, got truncated\n"); + i = sizeof(yylval.tzSym) - 1; + } + yylval.tzSym[i] = '\0'; /* Terminate the string */ + + if (keywordDict[nodeID].keyword) + return keywordDict[nodeID].keyword->token; + + return tokenType; +} + +/* Functions to read strings */ + +enum PrintType { + TYPE_NONE, + TYPE_DECIMAL, /* d */ + TYPE_UPPERHEX, /* X */ + TYPE_LOWERHEX, /* x */ + TYPE_BINARY, /* b */ +}; + +static void intToString(char *dest, size_t bufSize, struct Symbol const *sym, enum PrintType type) +{ + uint32_t value = sym_GetConstantSymValue(sym); + int fullLength; + + /* Special cheat for binary */ + if (type == TYPE_BINARY) { + char binary[33]; /* 32 bits + 1 terminator */ + char *write_ptr = binary + 32; + + fullLength = 0; + binary[32] = 0; + do { + *(--write_ptr) = (value & 1) + '0'; + value >>= 1; + fullLength++; + } while (value); + strncpy(dest, write_ptr, bufSize - 1); + } else { + static char const * const formats[] = { + [TYPE_NONE] = "$%" PRIX32, + [TYPE_DECIMAL] = "%" PRId32, + [TYPE_UPPERHEX] = "%" PRIX32, + [TYPE_LOWERHEX] = "%" PRIx32 + }; + + fullLength = snprintf(dest, bufSize, formats[type], value); + if (fullLength < 0) { + error("snprintf encoding error: %s\n", strerror(errno)); + dest[0] = '\0'; + } + } + + if ((size_t)fullLength >= bufSize) + warning(WARNING_LONG_STR, "Interpolated symbol %s too long to fit buffer\n", + sym->name); +} + +static char const *readInterpolation(void) +{ + char symName[MAXSYMLEN + 1]; + size_t i = 0; + enum PrintType type = TYPE_NONE; + + for (;;) { + int c = peek(0); + + if (c == '{') { /* Nested interpolation */ + shiftChars(1); + char const *inner = readInterpolation(); + + if (inner) { + while (*inner) { + if (i == sizeof(symName)) + break; + symName[i++] = *inner++; + } + } + } else if (c == EOF || c == '\r' || c == '\n' || c == '"') { + error("Unterminated interpolation\n"); + break; + } else if (c == '}') { + shiftChars(1); + break; + } else if (c == ':' && type == TYPE_NONE) { /* Print type, only once */ + if (i != 1) { + error("Print types are exactly 1 character long\n"); + } else { + switch (symName[0]) { + case 'b': + type = TYPE_BINARY; + break; + case 'd': + type = TYPE_DECIMAL; + break; + case 'X': + type = TYPE_UPPERHEX; + break; + case 'x': + type = TYPE_LOWERHEX; + break; + default: + error("Invalid print type '%s'\n", print(symName[0])); + } + } + i = 0; /* Now that type has been set, restart at beginning of string */ + shiftChars(1); + } else { + if (i < sizeof(symName)) /* Allow writing an extra char to flag overflow */ + symName[i++] = c; + shiftChars(1); + } + } + + if (i == sizeof(symName)) { + warning(WARNING_LONG_STR, "Symbol name too long\n"); + i--; + } + symName[i] = '\0'; + + struct Symbol const *sym = sym_FindSymbol(symName); + + if (!sym) { + error("Interpolated symbol \"%s\" does not exist\n", symName); + } else if (sym->type == SYM_EQUS) { + if (type != TYPE_NONE) + error("Print types are only allowed for numbers\n"); + return sym_GetStringValue(sym); + } else if (sym_IsNumeric(sym)) { + static char buf[33]; /* Worst case of 32 digits + terminator */ + + intToString(buf, sizeof(buf), sym, type); + return buf; + } else { + error("Only numerical and string symbols can be interpolated\n"); + } + return NULL; +} + +static void readString(void) +{ + size_t i = 0; + + for (;;) { + int c = peek(0); switch (c) { - case '\n': - if (lexerStateEOL) { - lexer_SetState(lexerStateEOL); - lexerStateEOL = NULL; + case '"': + shiftChars(1); + if (i == sizeof(yylval.tzString)) { + i--; + warning(WARNING_LONG_STR, "String constant too long\n"); } - return '\n'; + yylval.tzString[i] = '\0'; + return; + case '\r': + case '\n': /* Do not shift these! */ + case EOF: + if (i == sizeof(yylval.tzString)) { + i--; + warning(WARNING_LONG_STR, "String constant too long\n"); + } + yylval.tzString[i] = '\0'; + error("Unterminated string\n"); + return; + + case '\\': /* Character escape */ + c = peek(1); + switch (c) { + case '\\': /* Return that character unchanged */ + case '"': + case '{': + case '}': + shiftChars(1); + break; + case 'n': + c = '\n'; + shiftChars(1); + break; + case 'r': + c = '\r'; + shiftChars(1); + break; + case 't': + c = '\t'; + shiftChars(1); + break; + + case EOF: /* Can't really print that one */ + error("Illegal character escape at end of input\n"); + c = '\\'; + break; + default: + error("Illegal character escape '%s'\n", print(c)); + c = '\\'; + break; + } + break; + + case '{': /* Symbol interpolation */ + shiftChars(1); + char const *ptr = readInterpolation(); - /* Ignore whitespace */ + if (ptr) { + while (*ptr) { + if (i == sizeof(yylval.tzString)) + break; + yylval.tzString[i++] = *ptr++; + } + } + continue; /* Do not copy an additional character */ + + /* Regular characters will just get copied */ + } + if (i < sizeof(yylval.tzString)) /* Copy one extra to flag overflow */ + yylval.tzString[i++] = c; + shiftChars(1); + } +} + +/* Function to report one character's worth of garbage bytes */ + +static char const *reportGarbageChar(unsigned char firstByte) +{ + static char bytes[6 + 2 + 1]; /* Max size of a UTF-8 encoded code point, plus "''\0" */ + /* First, attempt UTF-8 decoding */ + uint32_t state = 0; /* UTF8_ACCEPT */ + uint32_t codepoint; + uint8_t size = 0; /* Number of additional bytes to shift */ + + bytes[1] = firstByte; /* No need to init the rest of the array */ + decode(&state, &codepoint, firstByte); + while (state != 0 && state != 1 /* UTF8_REJECT */) { + int c = peek(size++); + + if (c == EOF) + break; + bytes[size + 1] = c; + decode(&state, &codepoint, c); + } + + if (state == 0 && (codepoint > UCHAR_MAX || isprint((unsigned char)codepoint))) { + /* Character is valid, printable UTF-8! */ + shiftChars(size); + bytes[0] = '\''; + bytes[size + 2] = '\''; + bytes[size + 3] = '\0'; + return bytes; + } + + /* The character isn't valid UTF-8, so we'll only print that first byte */ + if (isprint(firstByte)) { + /* bytes[1] = firstByte; */ + bytes[0] = '\''; + bytes[2] = '\''; + bytes[3] = '\0'; + return bytes; + } + /* Well then, print its hex value */ + static char const hexChars[16] = "0123456789ABCDEF"; + + bytes[0] = '0'; + bytes[1] = 'x'; + bytes[2] = hexChars[firstByte >> 4]; + bytes[3] = hexChars[firstByte & 0x0f]; + bytes[4] = '\0'; + return bytes; +} + +/* Lexer core */ + +static int yylex_NORMAL(void) +{ + for (;;) { + int c = nextChar(); + + switch (c) { + /* Ignore whitespace and comments */ + + case '*': + if (!lexerState->atLineStart) + return T_OP_MUL; + warning(WARNING_OBSOLETE, + "'*' is deprecated for comments, please use ';' instead\n"); + /* fallthrough */ + case ';': + discardComment(); + /* fallthrough */ case ' ': case '\t': break; - /* Handle single-char tokens */ + /* Handle unambiguous single-char tokens */ + + case '^': + return T_OP_XOR; case '+': return T_OP_ADD; case '-': return T_OP_SUB; + case '/': + return T_OP_DIV; + case '~': + return T_OP_NOT; + + case '@': + return T_ID; /* Handle accepted single chars */ + case '[': case ']': case '(': case ')': case ',': + case ':': return c; + /* Handle ambiguous 1- or 2-char tokens */ + char secondChar; + + case '|': /* Either binary or logical OR */ + secondChar = peek(0); + if (secondChar == '|') { + shiftChars(1); + return T_OP_LOGICOR; + } + return T_OP_OR; + + case '=': /* Either SET alias, or EQ */ + secondChar = peek(0); + if (secondChar == '=') { + shiftChars(1); + return T_OP_LOGICEQU; + } + return T_POP_EQUAL; + + case '<': /* Either a LT, LTE, or left shift */ + secondChar = peek(0); + if (secondChar == '=') { + shiftChars(1); + return T_OP_LOGICLE; + } else if (secondChar == '<') { + shiftChars(1); + return T_OP_SHL; + } + return T_OP_LOGICLT; + + case '>': /* Either a GT, GTE, or right shift */ + secondChar = peek(0); + if (secondChar == '=') { + shiftChars(1); + return T_OP_LOGICGE; + } else if (secondChar == '>') { + shiftChars(1); + return T_OP_SHR; + } + return T_OP_LOGICGT; + + case '!': /* Either a NEQ, or negation */ + secondChar = peek(0); + if (secondChar == '=') { + shiftChars(1); + return T_OP_LOGICNE; + } + return T_OP_LOGICNOT; + + /* Handle numbers */ + + case '$': + yylval.nConstValue = 0; + readHexNumber(); + return T_NUMBER; + + case '0': /* Decimal number */ + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + readNumber(10, c - '0'); + int perhapsPeriod = peek(0); + + if (perhapsPeriod == '.') { + shiftChars(1); + readFractionalPart(); + } + return T_NUMBER; + + case '&': + secondChar = peek(0); + if (secondChar == '&') { + shiftChars(1); + return T_OP_LOGICAND; + } else if (secondChar >= '0' && secondChar <= '7') { + readNumber(8, 0); + return T_NUMBER; + } + return T_OP_AND; + + case '%': /* Either a modulo, or a binary constant */ + secondChar = peek(0); + if (secondChar != '0' && secondChar != '1') + return T_OP_MOD; + + yylval.nConstValue = 0; + readBinaryNumber(); + return T_NUMBER; + + case '`': /* Gfx constant */ + readGfxConstant(); + return T_NUMBER; + + /* Handle strings */ + + case '"': + readString(); + return T_STRING; + + /* Handle newlines and EOF */ + + case '\r': + if (peek(0) == '\n') + shiftChars(1); /* Shift that EOL */ + /* fallthrough */ + case '\n': + if (lexerStateEOL) { + lexer_SetState(lexerStateEOL); + lexerStateEOL = NULL; + } + return '\n'; + case EOF: /* Captures end at their buffer's boundary no matter what */ if (!lexerState->capturing) { @@ -377,8 +1246,31 @@ static int yylex_NORMAL(void) } return 0; + /* Handle identifiers... or error out */ + default: - error("Unknown character '%c'\n"); + if ((c <= 'Z' && c >= 'A') + || (c <= 'z' && c >= 'a') + || c == '.' || c == '_') { + int tokenType = readIdentifier(c); + + /* If a keyword, don't try to expand */ + if (tokenType != T_ID && tokenType != T_LOCAL_ID) + return tokenType; + + /* TODO: attempt string expansion */ + + if (tokenType == T_ID && lexerState->atLineStart) + return T_LABEL; + + return tokenType; + } + + /* Do not report weird characters when capturing, it'll be done later */ + if (!lexerState->capturing) { + /* TODO: try to group reportings */ + error("Unknown character %s\n", reportGarbageChar(c)); + } } lexerState->atLineStart = false; } @@ -389,6 +1281,56 @@ static int yylex_RAW(void) fatalerror("LEXER_RAW not yet implemented\n"); } +/* + * This function uses the fact that `if`, etc. constructs are only valid when + * there's nothing before them on their lines. This enables filtering + * "meaningful" (= at line start) vs. "meaningless" (everything else) tokens. + * It's especially important due to macro args not being handled in this + * state, and lexing them in "normal" mode potentially producing such tokens. + */ +static int skipIfBlock(bool toEndc) +{ + lexer_SetMode(LEXER_NORMAL); + int startingDepth = nIFDepth; + int token; + + /* Prevent expanding macro args in this state by enabling capture to nothing */ + lexerState->capturing = true; + lexerState->captureSize = 0; + lexerState->captureBuf = NULL; + + for (;;) { + bool atLineStart = lexerState->atLineStart; + + token = yylex(); + if (token == 0) { /* Pass EOF through */ + return token; + } else if (atLineStart && token == T_POP_IF) { /* Increase nesting */ + nIFDepth++; + } else if (atLineStart && nIFDepth == startingDepth) { /* An occasion to finish? */ + if (token == T_POP_ENDC || (!toEndc && (token == T_POP_ELIF + || token == T_POP_ELSE))) + break; + } else if (atLineStart && token == T_POP_ENDC) { /* Decrease nesting */ + nIFDepth--; + } + } + + lexerState->capturing = false; + + return token; +} + +static int yylex_SKIP_TO_ELIF(void) +{ + return skipIfBlock(false); +} + +static int yylex_SKIP_TO_ENDC(void) +{ + return skipIfBlock(true); +} + int yylex(void) { if (lexerState->atLineStart) { @@ -397,51 +1339,46 @@ int yylex(void) } static int (* const lexerModeFuncs[])(void) = { - [LEXER_NORMAL] = yylex_NORMAL, - [LEXER_RAW] = yylex_RAW, + [LEXER_NORMAL] = yylex_NORMAL, + [LEXER_RAW] = yylex_RAW, + [LEXER_SKIP_TO_ELIF] = yylex_SKIP_TO_ELIF, + [LEXER_SKIP_TO_ENDC] = yylex_SKIP_TO_ENDC }; int token = lexerModeFuncs[lexerState->mode](); + /* Make sure to terminate files with a line feed */ + if (token == 0 && lexerState->lastToken != '\n') + token = '\n'; + lexerState->lastToken = token; + + lexerState->atLineStart = false; if (token == '\n') lexerState->atLineStart = true; - else if (lexerState->atLineStart) - lexerState->atLineStart = false; return token; } -void lexer_SkipToBlockEnd(int blockStartToken, int blockEndToken, int endToken, - char const **capture, size_t *size, char const *name) +void lexer_CaptureBlock(int blockStartToken, int blockEndToken, char const **capture, size_t *size, + char const *name) { lexerState->capturing = true; lexerState->captureSize = 0; unsigned int level = 0; char *captureStart; - if (capture) { - if (lexerState->isMmapped) { - captureStart = lexerState->ptr; - } else { - lexerState->captureCapacity = 128; /* The initial size will be twice that */ - reallocCaptureBuf(); - captureStart = lexerState->captureBuf; - } + if (lexerState->isMmapped) { + captureStart = lexerState->ptr; + } else { + lexerState->captureCapacity = 128; /* The initial size will be twice that */ + reallocCaptureBuf(); + captureStart = lexerState->captureBuf; } for (;;) { int token = yylex(); - if (level == 0) { - if (token == endToken) - break; - /* - * Hack: skipping after a `if` requires stopping on three different tokens, - * which there is no simple way to make this function support. Instead, - * if ELIF is the end token, ELSE and ENDC are also checked for here. - */ - if (endToken == T_POP_ELIF && (token == T_POP_ELSE || token == T_POP_ENDC)) - break; - } + if (level == 0 && token == blockEndToken) + break; if (token == EOF) error("Unterminated %s\n", name); @@ -451,9 +1388,7 @@ void lexer_SkipToBlockEnd(int blockStartToken, int blockEndToken, int endToken, level--; } - if (capture) { - *capture = captureStart; - *size = lexerState->captureSize; - } + *capture = captureStart; + *size = lexerState->captureSize; lexerState->captureBuf = NULL; } diff --git a/src/asm/main.c b/src/asm/main.c index 9f522d3ae..9c559e7a9 100644 --- a/src/asm/main.c +++ b/src/asm/main.c @@ -488,6 +488,7 @@ int main(int argc, char *argv[]) if (!state) fatalerror("Failed to open main file!\n"); + lexer_Init(); lexer_SetState(state); nStartClock = clock(); diff --git a/src/asm/symbol.c b/src/asm/symbol.c index cb0f29179..8253ffac7 100644 --- a/src/asm/symbol.c +++ b/src/asm/symbol.c @@ -210,8 +210,6 @@ void sym_Purge(char const *symName) labelScope = NULL; hash_RemoveElement(symbols, symbol->name); - if (symbol->type == SYM_MACRO) - free(symbol->macro); free(symbol); } } @@ -230,7 +228,22 @@ uint32_t sym_GetPCValue(void) } /* - * Return a constant symbols value + * Return a constant symbol's value, assuming it's defined + */ +uint32_t sym_GetConstantSymValue(struct Symbol const *sym) +{ + if (sym == PCSymbol) + return sym_GetPCValue(); + else if (!sym_IsConstant(sym)) + error("\"%s\" does not have a constant value\n", sym->name); + else + return sym_GetValue(sym); + + return 0; +} + +/* + * Return a constant symbol's value */ uint32_t sym_GetConstantValue(char const *s) { @@ -238,12 +251,8 @@ uint32_t sym_GetConstantValue(char const *s) if (sym == NULL) error("'%s' not defined\n", s); - else if (sym == PCSymbol) - return sym_GetPCValue(); - else if (!sym_IsConstant(sym)) - error("\"%s\" does not have a constant value\n", s); else - return sym_GetValue(sym); + return sym_GetConstantSymValue(sym); return 0; } @@ -468,13 +477,13 @@ void sym_Export(char const *symName) /* * Add a macro definition */ -struct Symbol *sym_AddMacro(char const *symName, int32_t defLineNo) +struct Symbol *sym_AddMacro(char const *symName, int32_t defLineNo, char const *body, size_t size) { struct Symbol *sym = createNonrelocSymbol(symName); sym->type = SYM_MACRO; - sym->macroSize = ulNewMacroSize; - sym->macro = tzNewMacro; + sym->macroSize = size; + sym->macro = body; updateSymbolFilename(sym); /* * The symbol is created at the line after the `endm`, diff --git a/src/asm/util.c b/src/asm/util.c index 51f88ba88..ed5836c16 100644 --- a/src/asm/util.c +++ b/src/asm/util.c @@ -6,6 +6,7 @@ * SPDX-License-Identifier: MIT */ +#include #include #include "asm/main.h" @@ -27,6 +28,37 @@ uint32_t calchash(const char *s) return hash; } +char const *print(char c) +{ + static char buf[5]; /* '\xNN' + '\0' */ + + if (isprint(c)) { + buf[0] = c; + buf[1] = '\0'; + return buf; + } + + buf[0] = '\\'; + switch (c) { + case '\n': + buf[1] = 'n'; + break; + case '\r': + buf[1] = 'r'; + break; + case '\t': + buf[1] = 't'; + break; + + default: /* Print as hex */ + buf[1] = 'x'; + sprintf(&buf[2], "%02hhx", c); + return buf; + } + buf[2] = '\0'; + return buf; +} + size_t readUTF8Char(uint8_t *dest, char const *src) { uint32_t state = 0; diff --git a/src/asm/warning.c b/src/asm/warning.c index 2521baa96..094cbb7e5 100644 --- a/src/asm/warning.c +++ b/src/asm/warning.c @@ -198,14 +198,14 @@ void processWarningFlag(char const *flag) warnx("Unknown warning `%s`", flag); } -void verror(const char *fmt, va_list args, char const *flag) +void printDiag(const char *fmt, va_list args, char const *type, + char const *flagfmt, char const *flag) { - fputs("ERROR: ", stderr); + fputs(type, stderr); fstk_Dump(); - fprintf(stderr, flag ? ": [-Werror=%s]\n " : ":\n ", flag); + fprintf(stderr, flagfmt, flag); vfprintf(stderr, fmt, args); lexer_DumpStringExpansions(); - nbErrors++; } void error(const char *fmt, ...) @@ -213,8 +213,9 @@ void error(const char *fmt, ...) va_list args; va_start(args, fmt); - verror(fmt, args, NULL); + printDiag(fmt, args, "ERROR: ", "\n ", NULL); va_end(args); + nbErrors++; } noreturn_ void fatalerror(const char *fmt, ...) @@ -222,7 +223,7 @@ noreturn_ void fatalerror(const char *fmt, ...) va_list args; va_start(args, fmt); - verror(fmt, args, NULL); + printDiag(fmt, args, "FATAL: ", "\n ", NULL); va_end(args); exit(1); @@ -240,7 +241,7 @@ void warning(enum WarningID id, char const *fmt, ...) return; case WARNING_ERROR: - verror(fmt, args, flag); + printDiag(fmt, args, "ERROR: ", "[-Werror=%s]\n ", flag); va_end(args); return; @@ -252,11 +253,7 @@ void warning(enum WarningID id, char const *fmt, ...) break; } - fputs("warning: ", stderr); - fstk_Dump(); - fprintf(stderr, ": [-W%s]\n ", flag); - vfprintf(stderr, fmt, args); - lexer_DumpStringExpansions(); + printDiag(fmt, args, "warning: ", "[-W%s]\n ", flag); va_end(args); } From e56c6cc2918003f404edfd7c8575c0ab343ba2da Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Tue, 28 Jul 2020 21:34:39 +0200 Subject: [PATCH 04/59] Fix PC's name not being passed to parser --- src/asm/lexer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/asm/lexer.c b/src/asm/lexer.c index 1179a5732..a6a8b6934 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -1109,6 +1109,8 @@ static int yylex_NORMAL(void) return T_OP_NOT; case '@': + yylval.tzSym[0] = '@'; + yylval.tzSym[1] = '\0'; return T_ID; /* Handle accepted single chars */ From 2ec10012b6633f5e23012330fd5faddcfea490b8 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Tue, 28 Jul 2020 21:50:43 +0200 Subject: [PATCH 05/59] Fix mmap read offset not being initialized --- src/asm/lexer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/asm/lexer.c b/src/asm/lexer.c index a6a8b6934..71171392a 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -324,6 +324,7 @@ struct LexerState *lexer_OpenFile(char const *path) state->isMmapped = true; state->ptr = pa; state->size = size; + state->offset = 0; if (verbose) printf("File %s successfully mmap()ped\n", path); From 5ad7a9375076f1d352a9825379e0abee43c2fdfc Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Wed, 29 Jul 2020 01:06:53 +0200 Subject: [PATCH 06/59] Add EQUS expansion --- src/asm/lexer.c | 131 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 126 insertions(+), 5 deletions(-) diff --git a/src/asm/lexer.c b/src/asm/lexer.c index 71171392a..a3dfeb8ee 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -228,10 +228,12 @@ static struct KeywordMapping { static_assert(LEXER_BUF_SIZE <= SSIZE_MAX); struct Expansion { - uint8_t distance; /* How far the expansion's beginning is from the current position */ + struct Expansion *firstChild; + struct Expansion *next; char const *contents; size_t len; - struct Expansion *parent; + uint8_t distance; /* Distance between the beginning of this expansion and of its parent */ + uint8_t skip; /* How many extra characters to skip after the expansion is over */ }; struct LexerState { @@ -266,7 +268,8 @@ struct LexerState { size_t nbChars; /* Number of chars of lookahead, for processing expansions */ bool expandStrings; - struct Expansion *expansion; + struct Expansion *expansions; + size_t expansionOfs; /* Offset into the current top-level expansion (negative = before) */ }; struct LexerState *lexerState = NULL; @@ -349,7 +352,8 @@ struct LexerState *lexer_OpenFile(char const *path) state->nbChars = 0; state->expandStrings = true; - state->expansion = NULL; + state->expansions = NULL; + state->expansionOfs = 0; return state; } @@ -453,6 +457,67 @@ static void reallocCaptureBuf(void) fatalerror("realloc error while resizing capture buffer: %s\n", strerror(errno)); } +static struct Expansion *getExpansionAtDistance(size_t *distance) +{ + struct Expansion *expansion = lexerState->expansions; + struct Expansion *prevLevel = NULL; /* Top level has no "previous" level */ + + for (;;) { + /* Find the closest expansion whose end is after the target */ + while (expansion && expansion->len - expansion->distance <= *distance) { + *distance -= expansion->skip; + expansion = expansion->next; + } + + /* If there is none, or it begins after the target, return the previous level */ + if (!expansion || expansion->distance > *distance) + return prevLevel; + + /* We know we are inside of that expansion */ + *distance -= expansion->distance; /* Distances are relative to their parent */ + + if (!expansion->firstChild) /* If there are no children, this is it */ + return expansion; + /* Otherwise, register this expansion and repeat the process */ + prevLevel = expansion; + expansion = expansion->firstChild; + } +} + +static void beginExpansion(size_t distance, uint8_t skip, char const *str, size_t size) +{ + struct Expansion *parent = getExpansionAtDistance(&distance); + struct Expansion **insertPoint = parent ? &parent->firstChild : &lexerState->expansions; + + /* We cannot be *inside* of any of these expansions, so just keep the list sorted */ + while (*insertPoint && (*insertPoint)->distance < distance) + insertPoint = &(*insertPoint)->next; + + *insertPoint = malloc(sizeof(**insertPoint)); + if (!*insertPoint) + fatalerror("Unable to allocate new expansion: %s", strerror(errno)); + (*insertPoint)->firstChild = NULL; + (*insertPoint)->next = NULL; /* Expansions are always performed left to right */ + (*insertPoint)->contents = str; + (*insertPoint)->len = size; + (*insertPoint)->distance = distance; + (*insertPoint)->skip = skip; + + /* If expansion is the new closest one, update offset */ + if (insertPoint == &lexerState->expansions) + lexerState->expansionOfs = 0; +} + +static void freeExpansion(struct Expansion *expansion) +{ + do { + struct Expansion *next = expansion->next; + + free(expansion); + expansion = next; + } while (expansion); +} + /* If at any point we need more than 255 characters of lookahead, something went VERY wrong. */ static int peek(uint8_t distance) { @@ -460,6 +525,16 @@ static int peek(uint8_t distance) fatalerror("Internal lexer error: buffer has insufficient size for peeking (%u >= %u)\n", distance, LEXER_BUF_SIZE); + size_t ofs = lexerState->expansionOfs + distance; + struct Expansion const *expansion = getExpansionAtDistance(&ofs); + + if (expansion) { + assert(distance < expansion->len); + return expansion->contents[ofs]; + } + + distance = ofs - lexerState->expansionOfs; + if (lexerState->isMmapped) { if (lexerState->offset + distance >= lexerState->size) return EOF; @@ -579,6 +654,42 @@ static void shiftChars(uint8_t distance) } } + /* + * The logic is as follows: + * - Any characters up to the expansion need to be consumed in the file + * - If some remain after that, advance the offset within the expansion + * - If that goes *past* the expansion, then leftovers shall be consumed in the file + * - If we went past the expansion, we're back to square one, and should re-do all + */ +nextExpansion: + if (lexerState->expansions) { + /* If the read cursor reaches into the expansion, update offset */ + if (distance > lexerState->expansions->distance) { + /* distance = + */ + lexerState->expansionOfs += distance - lexerState->expansions->distance; + distance = lexerState->expansions->distance; /* Nb chars to read in file */ + /* Now, check if the expansion finished being read */ + if (lexerState->expansionOfs >= lexerState->expansions->len) { + /* Add the leftovers to the distance */ + distance += lexerState->expansionOfs - lexerState->expansions->len; + /* Also add in the post-expansion skip */ + distance += lexerState->expansions->skip; + /* Move on to the next expansion */ + struct Expansion *next = lexerState->expansions->next; + + freeExpansion(lexerState->expansions); + lexerState->expansions = next; + /* Reset the offset for the next expansion */ + lexerState->expansionOfs = 0; + /* And repeat, in case we also go into or over the next expansion */ + goto nextExpansion; + } + } + /* Getting closer to the expansion */ + lexerState->expansions->distance -= distance; + /* Now, `distance` is how many bytes to move forward **in the file** */ + } + if (lexerState->isMmapped) { lexerState->offset += distance; } else { @@ -1261,7 +1372,17 @@ static int yylex_NORMAL(void) if (tokenType != T_ID && tokenType != T_LOCAL_ID) return tokenType; - /* TODO: attempt string expansion */ + if (lexerState->expandStrings) { + /* Attempt string expansion */ + struct Symbol const *sym = sym_FindSymbol(yylval.tzSym); + + if (sym && sym->type == SYM_EQUS) { + char const *s = sym_GetStringValue(sym); + + beginExpansion(0, 0, s, strlen(s)); + continue; /* Restart, reading from the new buffer */ + } + } if (tokenType == T_ID && lexerState->atLineStart) return T_LABEL; From 61b2fd98160f3aa9f7f66f00273090412623002b Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Wed, 29 Jul 2020 00:39:18 +0200 Subject: [PATCH 07/59] Add string expansion reporting And fix line counting with expansion-made newlines. This has the same bug as the old lexer (equs-newline's output does not print the second warning as being part of the expansion). Additionally, we regress equs-recursion, as we are no longer able to catch this specific EQUS recursion. Simply enough, the new expansion begins **after** the old one ends! I have found no way to handle that. --- src/asm/lexer.c | 51 ++++++++++++++++++++++++++++++++++++--- test/asm/equs-newline.asm | 4 +++ test/asm/equs-newline.err | 8 ++++++ test/asm/equs-newline.out | 0 test/asm/equs-purge.asm | 2 ++ test/asm/equs-purge.err | 3 +++ test/asm/equs-purge.out | 0 7 files changed, 64 insertions(+), 4 deletions(-) create mode 100644 test/asm/equs-newline.asm create mode 100644 test/asm/equs-newline.err create mode 100644 test/asm/equs-newline.out create mode 100644 test/asm/equs-purge.asm create mode 100644 test/asm/equs-purge.err create mode 100644 test/asm/equs-purge.out diff --git a/src/asm/lexer.c b/src/asm/lexer.c index a3dfeb8ee..af101c1b1 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -25,6 +25,7 @@ #include "asm/asm.h" #include "asm/lexer.h" +#include "asm/fstack.h" #include "asm/macro.h" #include "asm/main.h" #include "asm/rpn.h" @@ -230,6 +231,7 @@ static_assert(LEXER_BUF_SIZE <= SSIZE_MAX); struct Expansion { struct Expansion *firstChild; struct Expansion *next; + char *name; char const *contents; size_t len; uint8_t distance; /* Distance between the beginning of this expansion and of its parent */ @@ -461,6 +463,7 @@ static struct Expansion *getExpansionAtDistance(size_t *distance) { struct Expansion *expansion = lexerState->expansions; struct Expansion *prevLevel = NULL; /* Top level has no "previous" level */ + unsigned int depth = 0; for (;;) { /* Find the closest expansion whose end is after the target */ @@ -481,10 +484,14 @@ static struct Expansion *getExpansionAtDistance(size_t *distance) /* Otherwise, register this expansion and repeat the process */ prevLevel = expansion; expansion = expansion->firstChild; + + if (depth++ > nMaxRecursionDepth) + fatalerror("Recursion limit (%u) exceeded", nMaxRecursionDepth); } } -static void beginExpansion(size_t distance, uint8_t skip, char const *str, size_t size) +static void beginExpansion(size_t distance, uint8_t skip, + char const *str, size_t size, char const *name) { struct Expansion *parent = getExpansionAtDistance(&distance); struct Expansion **insertPoint = parent ? &parent->firstChild : &lexerState->expansions; @@ -498,6 +505,7 @@ static void beginExpansion(size_t distance, uint8_t skip, char const *str, size_ fatalerror("Unable to allocate new expansion: %s", strerror(errno)); (*insertPoint)->firstChild = NULL; (*insertPoint)->next = NULL; /* Expansions are always performed left to right */ + (*insertPoint)->name = strdup(name); (*insertPoint)->contents = str; (*insertPoint)->len = size; (*insertPoint)->distance = distance; @@ -513,6 +521,7 @@ static void freeExpansion(struct Expansion *expansion) do { struct Expansion *next = expansion->next; + free(expansion->name); free(expansion); expansion = next; } while (expansion); @@ -654,6 +663,8 @@ static void shiftChars(uint8_t distance) } } + /* FIXME: this may not be too great, as only the top level is considered... */ + /* * The logic is as follows: * - Any characters up to the expansion need to be consumed in the file @@ -732,7 +743,37 @@ uint32_t lexer_GetColNo(void) void lexer_DumpStringExpansions(void) { - /* TODO */ + if (!lexerState) + return; + /* This is essentially a modified copy-paste of `getExpansionAtDistance(0)` */ + struct Expansion *stack[nMaxRecursionDepth]; + + struct Expansion *expansion = lexerState->expansions; + unsigned int depth = 0; + size_t distance = lexerState->expansionOfs; + + for (;;) { + /* Find the closest expansion whose end is after the target */ + while (expansion && expansion->len - expansion->distance <= distance) { + distance -= expansion->skip; + expansion = expansion->next; + } + + /* If there is none, or it begins after the target, return the previous level */ + if (!expansion || expansion->distance > distance) + break; + + /* We know we are inside of that expansion */ + distance -= expansion->distance; /* Distances are relative to their parent */ + + stack[depth++] = expansion; + if (!expansion->firstChild) + break; + expansion = expansion->firstChild; + } + + while (depth--) + fprintf(stderr, "while expanding symbol \"%s\"\n", stack[depth]->name); } /* Function to discard all of a line's comments */ @@ -1379,7 +1420,7 @@ static int yylex_NORMAL(void) if (sym && sym->type == SYM_EQUS) { char const *s = sym_GetStringValue(sym); - beginExpansion(0, 0, s, strlen(s)); + beginExpansion(0, 0, s, strlen(s), sym->name); continue; /* Restart, reading from the new buffer */ } } @@ -1457,7 +1498,9 @@ static int yylex_SKIP_TO_ENDC(void) int yylex(void) { - if (lexerState->atLineStart) { + if (lexerState->atLineStart + /* Newlines read within an expansion should not increase the line count */ + && (!lexerState->expansions || lexerState->expansions->distance)) { lexerState->lineNo++; lexerState->colNo = 0; } diff --git a/test/asm/equs-newline.asm b/test/asm/equs-newline.asm new file mode 100644 index 000000000..571577ec5 --- /dev/null +++ b/test/asm/equs-newline.asm @@ -0,0 +1,4 @@ + +ACT equs "WARN \"First\"\nWARN \"Second\"" + ACT + WARN "Third" diff --git a/test/asm/equs-newline.err b/test/asm/equs-newline.err new file mode 100644 index 000000000..620f86313 --- /dev/null +++ b/test/asm/equs-newline.err @@ -0,0 +1,8 @@ +warning: test/asm/equs-newline.asm(2): [-Wuser] + First +while expanding symbol "ACT" +warning: test/asm/equs-newline.asm(3): [-Wuser] + Second +while expanding symbol "ACT" +warning: test/asm/equs-newline.asm(4): [-Wuser] + Third diff --git a/test/asm/equs-newline.out b/test/asm/equs-newline.out new file mode 100644 index 000000000..e69de29bb diff --git a/test/asm/equs-purge.asm b/test/asm/equs-purge.asm new file mode 100644 index 000000000..2cd7659c0 --- /dev/null +++ b/test/asm/equs-purge.asm @@ -0,0 +1,2 @@ +BYE equs "PURGE BYE\nWARN \"Crash?\"\n \n" +BYE diff --git a/test/asm/equs-purge.err b/test/asm/equs-purge.err new file mode 100644 index 000000000..6d834761b --- /dev/null +++ b/test/asm/equs-purge.err @@ -0,0 +1,3 @@ +warning: test/asm/equs-purge.asm(0): [-Wuser] + Crash? +while expanding symbol "BYE" diff --git a/test/asm/equs-purge.out b/test/asm/equs-purge.out new file mode 100644 index 000000000..e69de29bb From fed252bc4975f48116e49301a0b21266edcf6c35 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Thu, 30 Jul 2020 19:06:47 +0200 Subject: [PATCH 08/59] Fix nested expansions being incorrectly handled The biggest problem was simply that the length of children expansions was not accounted for when skipping over the parent... this took a lot of arduous debugging, but it finally works! --- src/asm/lexer.c | 121 +++++++++++++++++++++--------------- test/asm/equs-nest.asm | 4 ++ test/asm/equs-nest.err | 0 test/asm/equs-nest.out | 0 test/asm/equs-recursion.asm | 8 ++- 5 files changed, 80 insertions(+), 53 deletions(-) create mode 100644 test/asm/equs-nest.asm create mode 100644 test/asm/equs-nest.err create mode 100644 test/asm/equs-nest.out diff --git a/src/asm/lexer.c b/src/asm/lexer.c index af101c1b1..8ae6300e8 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -234,7 +234,8 @@ struct Expansion { char *name; char const *contents; size_t len; - uint8_t distance; /* Distance between the beginning of this expansion and of its parent */ + size_t totalLen; + size_t distance; /* Distance between the beginning of this expansion and of its parent */ uint8_t skip; /* How many extra characters to skip after the expansion is over */ }; @@ -459,44 +460,77 @@ static void reallocCaptureBuf(void) fatalerror("realloc error while resizing capture buffer: %s\n", strerror(errno)); } +/* + * The multiple evaluations of `retvar` causing side effects is INTENTIONAL, and + * required for example by `lexer_dumpStringExpansions`. It is however only + * evaluated once per level, and only then. + * + * This uses the concept of "X macros": you must #define LOOKUP_PRE_NEST and + * LOOKUP_POST_NEST before invoking this (and #undef them right after), and + * those macros will be expanded at the corresponding points in the loop. + * This is necessary because there are at least 3 places which need to iterate + * through iterations while performing custom actions + */ +#define lookupExpansion(retvar, dist) do { \ + struct Expansion *exp = lexerState->expansions; \ + \ + for (;;) { \ + /* Find the closest expansion whose end is after the target */ \ + while (exp && exp->totalLen + exp->distance <= (dist)) { \ + (dist) -= exp->totalLen + exp->skip; \ + exp = exp->next; \ + } \ + \ + /* If there is none, or it begins after the target, return the previous level */ \ + if (!exp || exp->distance > (dist)) \ + break; \ + \ + /* We know we are inside of that expansion */ \ + (dist) -= exp->distance; /* Distances are relative to their parent */ \ + \ + /* Otherwise, register this expansion and repeat the process */ \ + LOOKUP_PRE_NEST(exp); \ + (retvar) = exp; \ + if (!exp->firstChild) /* If there are no children, this is it */ \ + break; \ + exp = exp->firstChild; \ + \ + LOOKUP_POST_NEST(exp); \ + } \ +} while (0) + static struct Expansion *getExpansionAtDistance(size_t *distance) { - struct Expansion *expansion = lexerState->expansions; - struct Expansion *prevLevel = NULL; /* Top level has no "previous" level */ unsigned int depth = 0; + struct Expansion *expansion = NULL; /* Top level has no "previous" level */ - for (;;) { - /* Find the closest expansion whose end is after the target */ - while (expansion && expansion->len - expansion->distance <= *distance) { - *distance -= expansion->skip; - expansion = expansion->next; - } - - /* If there is none, or it begins after the target, return the previous level */ - if (!expansion || expansion->distance > *distance) - return prevLevel; - - /* We know we are inside of that expansion */ - *distance -= expansion->distance; /* Distances are relative to their parent */ - - if (!expansion->firstChild) /* If there are no children, this is it */ - return expansion; - /* Otherwise, register this expansion and repeat the process */ - prevLevel = expansion; - expansion = expansion->firstChild; +#define LOOKUP_PRE_NEST(exp) +#define LOOKUP_POST_NEST(exp) do { \ + if (depth++ > nMaxRecursionDepth) \ + fatalerror("Recursion limit (%u) exceeded", nMaxRecursionDepth); \ +} while (0) + lookupExpansion(expansion, *distance); +#undef LOOKUP_PRE_NEST +#undef LOOKUP_POST_NEST - if (depth++ > nMaxRecursionDepth) - fatalerror("Recursion limit (%u) exceeded", nMaxRecursionDepth); - } + return expansion; } static void beginExpansion(size_t distance, uint8_t skip, char const *str, size_t size, char const *name) { - struct Expansion *parent = getExpansionAtDistance(&distance); + distance += lexerState->expansionOfs; /* Distance argument is relative to read offset! */ + /* Increase the total length of all parents, and return the topmost one */ + struct Expansion *parent = NULL; + +#define LOOKUP_PRE_NEST(exp) (exp)->totalLen += size +#define LOOKUP_POST_NEST(exp) + lookupExpansion(parent, distance); +#undef LOOKUP_PRE_NEST +#undef LOOKUP_POST_NEST struct Expansion **insertPoint = parent ? &parent->firstChild : &lexerState->expansions; - /* We cannot be *inside* of any of these expansions, so just keep the list sorted */ + /* We know we are in none of the children expansions: add ourselves, keeping it sorted */ while (*insertPoint && (*insertPoint)->distance < distance) insertPoint = &(*insertPoint)->next; @@ -508,6 +542,7 @@ static void beginExpansion(size_t distance, uint8_t skip, (*insertPoint)->name = strdup(name); (*insertPoint)->contents = str; (*insertPoint)->len = size; + (*insertPoint)->totalLen = size; (*insertPoint)->distance = distance; (*insertPoint)->skip = skip; @@ -680,9 +715,10 @@ static void shiftChars(uint8_t distance) lexerState->expansionOfs += distance - lexerState->expansions->distance; distance = lexerState->expansions->distance; /* Nb chars to read in file */ /* Now, check if the expansion finished being read */ - if (lexerState->expansionOfs >= lexerState->expansions->len) { + if (lexerState->expansionOfs >= lexerState->expansions->totalLen) { /* Add the leftovers to the distance */ - distance += lexerState->expansionOfs - lexerState->expansions->len; + distance += lexerState->expansionOfs; + distance -= lexerState->expansions->totalLen; /* Also add in the post-expansion skip */ distance += lexerState->expansions->skip; /* Move on to the next expansion */ @@ -745,32 +781,15 @@ void lexer_DumpStringExpansions(void) { if (!lexerState) return; - /* This is essentially a modified copy-paste of `getExpansionAtDistance(0)` */ struct Expansion *stack[nMaxRecursionDepth]; - - struct Expansion *expansion = lexerState->expansions; unsigned int depth = 0; size_t distance = lexerState->expansionOfs; - for (;;) { - /* Find the closest expansion whose end is after the target */ - while (expansion && expansion->len - expansion->distance <= distance) { - distance -= expansion->skip; - expansion = expansion->next; - } - - /* If there is none, or it begins after the target, return the previous level */ - if (!expansion || expansion->distance > distance) - break; - - /* We know we are inside of that expansion */ - distance -= expansion->distance; /* Distances are relative to their parent */ - - stack[depth++] = expansion; - if (!expansion->firstChild) - break; - expansion = expansion->firstChild; - } +#define LOOKUP_PRE_NEST(exp) +#define LOOKUP_POST_NEST(exp) + lookupExpansion(stack[depth++], distance); +#undef LOOKUP_PRE_NEST +#undef LOOKUP_POST_NEST while (depth--) fprintf(stderr, "while expanding symbol \"%s\"\n", stack[depth]->name); diff --git a/test/asm/equs-nest.asm b/test/asm/equs-nest.asm new file mode 100644 index 000000000..f2f23a8a4 --- /dev/null +++ b/test/asm/equs-nest.asm @@ -0,0 +1,4 @@ +X1 equs "Y1 equs \"\\\"Success!\\\\n\\\"\"" +Y1 equs "Z1" +X1 + PRINTT Z1 diff --git a/test/asm/equs-nest.err b/test/asm/equs-nest.err new file mode 100644 index 000000000..e69de29bb diff --git a/test/asm/equs-nest.out b/test/asm/equs-nest.out new file mode 100644 index 000000000..e69de29bb diff --git a/test/asm/equs-recursion.asm b/test/asm/equs-recursion.asm index 794f49602..3aae90530 100644 --- a/test/asm/equs-recursion.asm +++ b/test/asm/equs-recursion.asm @@ -1,2 +1,6 @@ -recurse EQUS "recurse" -recurse \ No newline at end of file +recurse EQUS "recurse " +recurse + +; FIXME: also handle the following: +; recurse EQUS "recurse" +; recurse From 149db9a0228a08760fb0b8d080b359e1c36fed67 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Thu, 30 Jul 2020 19:57:45 +0200 Subject: [PATCH 09/59] Fix incorrect freeing of expansions Freeing an expansion should free its children, not its siblings... Fixes a use-after-free reported by scan-build. Nice catch! --- src/asm/lexer.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/asm/lexer.c b/src/asm/lexer.c index 8ae6300e8..6fcccb6f1 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -553,13 +553,16 @@ static void beginExpansion(size_t distance, uint8_t skip, static void freeExpansion(struct Expansion *expansion) { - do { - struct Expansion *next = expansion->next; + struct Expansion *child = expansion->firstChild; - free(expansion->name); - free(expansion); - expansion = next; - } while (expansion); + while (child) { + struct Expansion *next = child->next; + + freeExpansion(child); + child = next; + } + free(expansion->name); + free(expansion); } /* If at any point we need more than 255 characters of lookahead, something went VERY wrong. */ From 38bda7e1bb91586e20f41cd52254a49b2a549d5e Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Fri, 31 Jul 2020 09:49:51 +0200 Subject: [PATCH 10/59] Fix string expansion reporting More expansions were allowed than the limit specified, and reporting code did not account for the extra one that caused overflow --- src/asm/lexer.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/asm/lexer.c b/src/asm/lexer.c index 6fcccb6f1..71c4a95c0 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -501,14 +501,10 @@ static void reallocCaptureBuf(void) static struct Expansion *getExpansionAtDistance(size_t *distance) { - unsigned int depth = 0; struct Expansion *expansion = NULL; /* Top level has no "previous" level */ #define LOOKUP_PRE_NEST(exp) -#define LOOKUP_POST_NEST(exp) do { \ - if (depth++ > nMaxRecursionDepth) \ - fatalerror("Recursion limit (%u) exceeded", nMaxRecursionDepth); \ -} while (0) +#define LOOKUP_POST_NEST(exp) lookupExpansion(expansion, *distance); #undef LOOKUP_PRE_NEST #undef LOOKUP_POST_NEST @@ -522,9 +518,13 @@ static void beginExpansion(size_t distance, uint8_t skip, distance += lexerState->expansionOfs; /* Distance argument is relative to read offset! */ /* Increase the total length of all parents, and return the topmost one */ struct Expansion *parent = NULL; + unsigned int depth = 0; #define LOOKUP_PRE_NEST(exp) (exp)->totalLen += size -#define LOOKUP_POST_NEST(exp) +#define LOOKUP_POST_NEST(exp) do { \ + if (++depth >= nMaxRecursionDepth) \ + fatalerror("Recursion limit (%u) exceeded", nMaxRecursionDepth); \ +} while (0) lookupExpansion(parent, distance); #undef LOOKUP_PRE_NEST #undef LOOKUP_POST_NEST @@ -784,7 +784,7 @@ void lexer_DumpStringExpansions(void) { if (!lexerState) return; - struct Expansion *stack[nMaxRecursionDepth]; + struct Expansion *stack[nMaxRecursionDepth + 1]; unsigned int depth = 0; size_t distance = lexerState->expansionOfs; From 7c895f8a1b02e433f9790d43f1376ae163b3dbe8 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Fri, 31 Jul 2020 18:22:21 +0200 Subject: [PATCH 11/59] Fix diagnostic formatting Missing colon and space after the file stack --- src/asm/warning.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/asm/warning.c b/src/asm/warning.c index 094cbb7e5..cb58ff8de 100644 --- a/src/asm/warning.c +++ b/src/asm/warning.c @@ -213,7 +213,7 @@ void error(const char *fmt, ...) va_list args; va_start(args, fmt); - printDiag(fmt, args, "ERROR: ", "\n ", NULL); + printDiag(fmt, args, "ERROR: ", ":\n ", NULL); va_end(args); nbErrors++; } @@ -223,7 +223,7 @@ noreturn_ void fatalerror(const char *fmt, ...) va_list args; va_start(args, fmt); - printDiag(fmt, args, "FATAL: ", "\n ", NULL); + printDiag(fmt, args, "FATAL: ", ":\n ", NULL); va_end(args); exit(1); @@ -241,7 +241,7 @@ void warning(enum WarningID id, char const *fmt, ...) return; case WARNING_ERROR: - printDiag(fmt, args, "ERROR: ", "[-Werror=%s]\n ", flag); + printDiag(fmt, args, "ERROR: ", ": [-Werror=%s]\n ", flag); va_end(args); return; @@ -253,7 +253,7 @@ void warning(enum WarningID id, char const *fmt, ...) break; } - printDiag(fmt, args, "warning: ", "[-W%s]\n ", flag); + printDiag(fmt, args, "warning: ", ": [-W%s]\n ", flag); va_end(args); } From e11f25024e619c9c1fdd6d3a2cf577f937fa13f9 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Fri, 31 Jul 2020 18:24:44 +0200 Subject: [PATCH 12/59] Add test for built-in file symbol It's currently defined in fstack.c, making it more prone to accidental dropping. Let's not repeat the 0.3.9 scenario... --- Makefile | 2 +- include/asm/fstack.h | 20 +- include/asm/lexer.h | 5 +- include/asm/macro.h | 1 + include/asm/symbol.h | 4 +- src/asm/asmy.y | 8 +- src/asm/fstack.c | 584 ++++++++++++++++-------------------------- src/asm/lexer.c | 89 +++++-- src/asm/macro.c | 13 +- src/asm/main.c | 17 +- src/asm/output.c | 4 +- src/asm/symbol.c | 2 +- test/asm/file-sym.asm | 1 + test/asm/file-sym.err | 0 test/asm/file-sym.out | 1 + 15 files changed, 331 insertions(+), 420 deletions(-) create mode 100644 test/asm/file-sym.asm create mode 100644 test/asm/file-sym.err create mode 100644 test/asm/file-sym.out diff --git a/Makefile b/Makefile index ee2605f7d..cecb0a6ee 100644 --- a/Makefile +++ b/Makefile @@ -72,7 +72,7 @@ rgbasm_obj := \ src/hashmap.o \ src/linkdefs.o -src/asm/lexer.o: src/asm/asmy.h +src/asm/lexer.o src/asm/main.o: src/asm/asmy.h rgblink_obj := \ src/link/assign.o \ diff --git a/include/asm/fstack.h b/include/asm/fstack.h index d365b9545..bbf321c36 100644 --- a/include/asm/fstack.h +++ b/include/asm/fstack.h @@ -41,13 +41,7 @@ struct sContext { extern unsigned int nMaxRecursionDepth; -void fstk_RunInclude(char *tzFileName); -void fstk_Init(char *s); -void fstk_Dump(void); -void fstk_DumpToStr(char *buf, size_t len); -void fstk_AddIncludePath(char *s); -void fstk_RunMacro(char *s, struct MacroArgs *args); -void fstk_RunRept(uint32_t count, int32_t nReptLineNo, char const *body, size_t size); +void fstk_AddIncludePath(char const *s); /** * @param path The user-provided file name * @param fullPath The address of a pointer, which will be made to point at the full path @@ -56,6 +50,16 @@ void fstk_RunRept(uint32_t count, int32_t nReptLineNo, char const *body, size_t * @return True if the file was found, false if no path worked */ bool fstk_FindFile(char const *path, char **fullPath, size_t *size); -int32_t fstk_GetLine(void); + +bool yywrap(void); +void fstk_RunInclude(char const *path); +void fstk_RunMacro(char *macroName, struct MacroArgs *args); +void fstk_RunRept(uint32_t count, int32_t nReptLineNo, char *body, size_t size); + +void fstk_Dump(void); +char *fstk_DumpToStr(void); +uint32_t fstk_GetLine(void); + +void fstk_Init(char *mainPath, uint32_t maxRecursionDepth); #endif /* RGBDS_ASM_FSTACK_H */ diff --git a/include/asm/lexer.h b/include/asm/lexer.h index 842a91f36..d1cfd08bf 100644 --- a/include/asm/lexer.h +++ b/include/asm/lexer.h @@ -31,7 +31,8 @@ static inline void lexer_SetStateAtEOL(struct LexerState *state) } struct LexerState *lexer_OpenFile(char const *path); -struct LexerState *lexer_OpenFileView(void); +struct LexerState *lexer_OpenFileView(char *buf, size_t size, uint32_t lineNo); +void lexer_RestartRept(uint32_t lineNo); void lexer_DeleteState(struct LexerState *state); void lexer_Init(void); @@ -50,7 +51,7 @@ uint32_t lexer_GetLineNo(void); uint32_t lexer_GetColNo(void); void lexer_DumpStringExpansions(void); int yylex(void); -void lexer_CaptureBlock(int blockStartToken, int blockEndToken, char const **capture, size_t *size, +void lexer_CaptureBlock(int blockStartToken, int blockEndToken, char **capture, size_t *size, char const *name); #endif /* RGBDS_ASM_LEXER_H */ diff --git a/include/asm/macro.h b/include/asm/macro.h index 2142ea718..855133f8f 100644 --- a/include/asm/macro.h +++ b/include/asm/macro.h @@ -28,6 +28,7 @@ char const *macro_GetArg(uint32_t i); uint32_t macro_GetUniqueID(void); char const *macro_GetUniqueIDStr(void); void macro_SetUniqueID(uint32_t id); +uint32_t macro_UseNewUniqueID(void); void macro_ShiftCurrentArgs(void); uint32_t macro_NbArgs(void); diff --git a/include/asm/symbol.h b/include/asm/symbol.h index 8503490b1..22ae95abf 100644 --- a/include/asm/symbol.h +++ b/include/asm/symbol.h @@ -45,7 +45,7 @@ struct Symbol { }; struct { /* For SYM_MACRO */ size_t macroSize; - char const *macro; + char *macro; }; }; @@ -117,7 +117,7 @@ uint32_t sym_GetPCValue(void); uint32_t sym_GetConstantSymValue(struct Symbol const *sym); uint32_t sym_GetConstantValue(char const *s); struct Symbol *sym_FindSymbol(char const *symName); -struct Symbol *sym_AddMacro(char const *symName, int32_t defLineNo, char const *body, size_t size); +struct Symbol *sym_AddMacro(char const *symName, int32_t defLineNo, char *body, size_t size); struct Symbol *sym_Ref(char const *symName); struct Symbol *sym_AddString(char const *symName, char const *value); uint32_t sym_GetDefinedValue(char const *s); diff --git a/src/asm/asmy.y b/src/asm/asmy.y index d10681f3f..ac08843dc 100644 --- a/src/asm/asmy.y +++ b/src/asm/asmy.y @@ -597,21 +597,21 @@ load : T_POP_LOAD string ',' sectiontype sectorg sectattrs { rept : T_POP_REPT uconst { uint32_t nDefinitionLineNo = lexer_GetLineNo(); - char const *body; + char *body; size_t size; lexer_CaptureBlock(T_POP_REPT, T_POP_ENDR, &body, &size, "REPT block"); - fstk_RunRept($2, nDefinitionLineNo, body, size); + fstk_RunRept($2, nDefinitionLineNo, body, size - strlen("ENDR")); } ; macrodef : T_LABEL ':' T_POP_MACRO { int32_t nDefinitionLineNo = lexer_GetLineNo(); - char const *body; + char *body; size_t size; lexer_CaptureBlock(T_POP_MACRO, T_POP_ENDM, &body, &size, "macro definition"); - sym_AddMacro($1, nDefinitionLineNo, body, size); + sym_AddMacro($1, nDefinitionLineNo, body, size - strlen("ENDM")); } ; diff --git a/src/asm/fstack.c b/src/asm/fstack.c index 2e3dc1bd2..3e02c936f 100644 --- a/src/asm/fstack.c +++ b/src/asm/fstack.c @@ -6,318 +6,82 @@ * SPDX-License-Identifier: MIT */ -/* - * FileStack routines - */ - +#include +#include #include #include -#include #include #include #include -#include -#include -#include #include "asm/fstack.h" -#include "asm/lexer.h" #include "asm/macro.h" #include "asm/main.h" -#include "asm/output.h" +#include "asm/symbol.h" #include "asm/warning.h" - -#include "extern/err.h" - -#include "platform.h" // S_ISDIR (stat macro) -#include "types.h" - -static struct sContext *pFileStack; -static unsigned int nFileStackDepth; +#include "platform.h" /* S_ISDIR (stat macro) */ + +struct Context { + struct Context *parent; + struct Context *child; + struct LexerState *lexerState; + uint32_t uniqueID; + char *fileName; + uint32_t lineNo; /* Line number at which the context was EXITED */ + struct Symbol const *macro; + uint32_t nbReptIters; /* If zero, this isn't a REPT block */ + size_t reptDepth; + uint32_t reptIters[]; +}; + +static struct Context *contextStack; +static struct Context *topLevelContext; +static unsigned int contextDepth = 0; unsigned int nMaxRecursionDepth; -static struct Symbol const *pCurrentMacro; -static uint32_t nCurrentStatus; -static char IncludePaths[MAXINCPATHS][_MAX_PATH + 1]; -static int32_t NextIncPath; -static uint32_t nMacroCount; - -static char const *pCurrentREPTBlock; -static uint32_t nCurrentREPTBlockSize; -static uint32_t nCurrentREPTBlockCount; -static int32_t nCurrentREPTBodyFirstLine; -static int32_t nCurrentREPTBodyLastLine; - -uint32_t ulMacroReturnValue; - -/* - * defines for nCurrentStatus - */ -#define STAT_isInclude 0 /* 'Normal' state as well */ -#define STAT_isMacro 1 -#define STAT_isMacroArg 2 -#define STAT_isREPTBlock 3 - -/* Max context stack size */ - -/* - * Context push and pop - */ -static void pushcontext(void) -{ - struct sContext **ppFileStack; - - if (++nFileStackDepth > nMaxRecursionDepth) - fatalerror("Recursion limit (%u) exceeded\n", nMaxRecursionDepth); - - ppFileStack = &pFileStack; - while (*ppFileStack) - ppFileStack = &((*ppFileStack)->next); - - *ppFileStack = malloc(sizeof(struct sContext)); - - if (*ppFileStack == NULL) - fatalerror("No memory for context\n"); - - (*ppFileStack)->next = NULL; - (*ppFileStack)->nLine = lexer_GetLineNo(); - - switch ((*ppFileStack)->nStatus = nCurrentStatus) { - case STAT_isMacroArg: - case STAT_isMacro: - (*ppFileStack)->macroArgs = macro_GetCurrentArgs(); - (*ppFileStack)->pMacro = pCurrentMacro; - break; - case STAT_isInclude: - break; - case STAT_isREPTBlock: - (*ppFileStack)->macroArgs = macro_GetCurrentArgs(); - (*ppFileStack)->pREPTBlock = pCurrentREPTBlock; - (*ppFileStack)->nREPTBlockSize = nCurrentREPTBlockSize; - (*ppFileStack)->nREPTBlockCount = nCurrentREPTBlockCount; - (*ppFileStack)->nREPTBodyFirstLine = nCurrentREPTBodyFirstLine; - (*ppFileStack)->nREPTBodyLastLine = nCurrentREPTBodyLastLine; - break; - default: - fatalerror("%s: Internal error.\n", __func__); - } - (*ppFileStack)->uniqueID = macro_GetUniqueID(); -} - -static int32_t popcontext(void) -{ - struct sContext *pLastFile, **ppLastFile; - - if (nCurrentStatus == STAT_isREPTBlock) { - if (--nCurrentREPTBlockCount) { - char *pREPTIterationWritePtr; - unsigned long nREPTIterationNo; - int nNbCharsWritten; - int nNbCharsLeft; - - macro_SetUniqueID(nMacroCount++); - - /* Increment REPT count in file path */ - pREPTIterationWritePtr = - strrchr(lexer_GetFileName(), '~') + 1; - nREPTIterationNo = - strtoul(pREPTIterationWritePtr, NULL, 10); - nNbCharsLeft = sizeof(lexer_GetFileName()) - - (pREPTIterationWritePtr - lexer_GetFileName()); - nNbCharsWritten = snprintf(pREPTIterationWritePtr, - nNbCharsLeft, "%lu", - nREPTIterationNo + 1); - if (nNbCharsWritten >= nNbCharsLeft) { - /* - * The string is probably corrupted somehow, - * revert the change to avoid a bad error - * output. - */ - sprintf(pREPTIterationWritePtr, "%lu", - nREPTIterationNo); - fatalerror("Cannot write REPT count to file path\n"); - } - - return 0; - } - } - - pLastFile = pFileStack; - if (pLastFile == NULL) - return 1; - - ppLastFile = &pFileStack; - while (pLastFile->next) { - ppLastFile = &(pLastFile->next); - pLastFile = *ppLastFile; - } - - lexer_DeleteState(lexer_GetState()); - lexer_SetState(pLastFile->lexerState); - - switch (pLastFile->nStatus) { - struct MacroArgs *args; - - case STAT_isMacroArg: - case STAT_isMacro: - args = macro_GetCurrentArgs(); - if (nCurrentStatus == STAT_isMacro) { - macro_FreeArgs(args); - free(args); - } - macro_UseNewArgs(pLastFile->macroArgs); - pCurrentMacro = pLastFile->pMacro; - break; - case STAT_isInclude: - break; - case STAT_isREPTBlock: - args = macro_GetCurrentArgs(); - if (nCurrentStatus == STAT_isMacro) { - macro_FreeArgs(args); - free(args); - } - macro_UseNewArgs(pLastFile->macroArgs); - pCurrentREPTBlock = pLastFile->pREPTBlock; - nCurrentREPTBlockSize = pLastFile->nREPTBlockSize; - nCurrentREPTBlockCount = pLastFile->nREPTBlockCount; - nCurrentREPTBodyFirstLine = pLastFile->nREPTBodyFirstLine; - break; - default: - fatalerror("%s: Internal error.\n", __func__); - } - macro_SetUniqueID(pLastFile->uniqueID); - - nCurrentStatus = pLastFile->nStatus; - - nFileStackDepth--; - - free(*ppLastFile); - *ppLastFile = NULL; - return 0; -} -int32_t fstk_GetLine(void) -{ - struct sContext *pLastFile, **ppLastFile; - - switch (nCurrentStatus) { - case STAT_isInclude: - /* This is the normal mode, also used when including a file. */ - return lexer_GetLineNo(); - case STAT_isMacro: - break; /* Peek top file of the stack */ - case STAT_isMacroArg: - return lexer_GetLineNo(); /* ??? */ - case STAT_isREPTBlock: - break; /* Peek top file of the stack */ - default: - fatalerror("%s: Internal error.\n", __func__); - } - - pLastFile = pFileStack; - - if (pLastFile != NULL) { - while (pLastFile->next) { - ppLastFile = &(pLastFile->next); - pLastFile = *ppLastFile; - } - return pLastFile->nLine; - } - - /* - * This is only reached if the lexer is in REPT or MACRO mode but there - * are no saved contexts with the origin of said REPT or MACRO. - */ - fatalerror("%s: Internal error.\n", __func__); -} - -int yywrap(void) -{ - return popcontext(); -} +static unsigned int nbIncPaths = 0; +static char const *includePaths[MAXINCPATHS]; -/* - * Dump the context stack to stderr - */ -void fstk_Dump(void) +void fstk_AddIncludePath(char const *path) { - const struct sContext *pLastFile; - - pLastFile = pFileStack; - - while (pLastFile) { - fprintf(stderr, "%s(%" PRId32 ") -> ", pLastFile->tzFileName, - pLastFile->nLine); - pLastFile = pLastFile->next; + if (path[0] == '\0') + return; + if (nbIncPaths >= MAXINCPATHS) { + error("Too many include directories passed from command line\n"); + return; } - char const *fileName = lexer_GetFileName(); + size_t len = strlen(path); + size_t allocSize = len + (path[len - 1] != '/') + 1; + char *str = malloc(allocSize); - if (fileName) - fprintf(stderr, "%s(%" PRId32 ",%" PRId32 "): ", - fileName, lexer_GetLineNo(), lexer_GetColNo()); -} - -void fstk_DumpToStr(char *buf, size_t buflen) -{ - const struct sContext *pLastFile = pFileStack; - int retcode; - size_t len = buflen; - - while (pLastFile) { - retcode = snprintf(&buf[buflen - len], len, "%s(%" PRId32 ") -> ", - pLastFile->tzFileName, pLastFile->nLine); - if (retcode < 0) - fatalerror("Failed to dump file stack to string: %s\n", strerror(errno)); - else if (retcode >= len) - len = 0; - else - len -= retcode; - pLastFile = pLastFile->next; + if (!str) { + /* Attempt to continue without that path */ + error("Failed to allocate new include path: %s\n", strerror(errno)); + return; } + memcpy(str, path, len); + char *end = str + len - 1; - retcode = snprintf(&buf[buflen - len], len, "%s(%" PRId32 ")", - lexer_GetFileName(), lexer_GetLineNo()); - if (retcode < 0) - fatalerror("Failed to dump file stack to string: %s\n", strerror(errno)); - else if (retcode >= len) - len = 0; - else - len -= retcode; - - if (!len) - warning(WARNING_LONG_STR, "File stack dump too long, got truncated\n"); -} - -/* - * Extra includepath stuff - */ -void fstk_AddIncludePath(char *s) -{ - if (NextIncPath == MAXINCPATHS) - fatalerror("Too many include directories passed from command line\n"); - - // Find last occurrence of slash; is it at the end of the string? - char const *lastSlash = strrchr(s, '/'); - char const *pattern = lastSlash && *(lastSlash + 1) == 0 ? "%s" : "%s/"; - - if (snprintf(IncludePaths[NextIncPath++], _MAX_PATH, pattern, - s) >= _MAX_PATH) - fatalerror("Include path too long '%s'\n", s); + if (*end++ != '/') + *end++ = '/'; + *end = '\0'; + includePaths[nbIncPaths++] = str; } -static void printdep(const char *fileName) +static void printDep(char const *path) { if (dependfile) { - fprintf(dependfile, "%s: %s\n", tzTargetFileName, fileName); + fprintf(dependfile, "%s: %s\n", tzTargetFileName, path); if (oGeneratePhonyDeps) - fprintf(dependfile, "%s:\n", fileName); + fprintf(dependfile, "%s:\n", path); } } -static bool isPathValid(char const *pathname) +static bool isPathValid(char const *path) { struct stat statbuf; - if (stat(pathname, &statbuf) != 0) + if (stat(path, &statbuf) != 0) return false; /* Reject directories */ @@ -335,8 +99,8 @@ bool fstk_FindFile(char const *path, char **fullPath, size_t *size) } if (*fullPath) { - for (size_t i = 0; i <= NextIncPath; ++i) { - char *incPath = i ? IncludePaths[i - 1] : ""; + for (size_t i = 0; i <= nbIncPaths; ++i) { + char const *incPath = i ? includePaths[i - 1] : ""; int len = snprintf(*fullPath, *size, "%s%s", incPath, path); /* Oh how I wish `asnprintf` was standard... */ @@ -355,7 +119,7 @@ bool fstk_FindFile(char const *path, char **fullPath, size_t *size) error("snprintf error during include path search: %s\n", strerror(errno)); } else if (isPathValid(*fullPath)) { - printdep(*fullPath); + printDep(*fullPath); return true; } } @@ -363,114 +127,210 @@ bool fstk_FindFile(char const *path, char **fullPath, size_t *size) errno = ENOENT; if (oGeneratedMissingIncludes) - printdep(path); + printDep(path); return false; } -/* - * Set up an include file for parsing - */ -void fstk_RunInclude(char *tzFileName) +bool yywrap(void) +{ + if (contextStack->nbReptIters) { /* The context is a REPT block, which may loop */ + contextStack->reptIters[contextStack->reptDepth - 1]++; + /* If this wasn't the last iteration, wrap instead of popping */ + if (contextStack->reptIters[contextStack->reptDepth - 1] + <= contextStack->nbReptIters) { + lexer_RestartRept(contextStack->parent->lineNo); + contextStack->uniqueID = macro_UseNewUniqueID(); + return false; + } + } else if (!contextStack->parent) { + return true; + } + contextStack = contextStack->parent; + contextDepth--; + + lexer_DeleteState(contextStack->child->lexerState); + /* If at top level (= not in macro or in REPT), free the file name */ + if (!contextStack->macro && contextStack->reptIters == 0) + free(contextStack->child->fileName); + /* Free the entry and make its parent the current entry */ + free(contextStack->child); + + contextStack->child = NULL; + lexer_SetState(contextStack->lexerState); + return false; +} + +static void newContext(uint32_t reptDepth) +{ + if (++contextDepth >= nMaxRecursionDepth) + fatalerror("Recursion limit (%u) exceeded\n", nMaxRecursionDepth); + contextStack->child = malloc(sizeof(*contextStack->child) + + reptDepth * sizeof(contextStack->reptIters[0])); + if (!contextStack->child) + fatalerror("Failed to allocate memory for new context: %s\n", strerror(errno)); + + contextStack->lineNo = lexer_GetLineNo(); + /* Link new entry to its parent so it's reachable later */ + contextStack->child->parent = contextStack; + contextStack = contextStack->child; + + contextStack->child = NULL; + contextStack->reptDepth = reptDepth; +} + +void fstk_RunInclude(char const *path) { char *fullPath = NULL; size_t size = 0; - if (!fstk_FindFile(tzFileName, &fullPath, &size)) { - if (oGeneratedMissingIncludes) - oFailedOnMissingInclude = true; - else - error("Unable to open included file '%s': %s\n", - tzFileName, strerror(errno)); + if (!fstk_FindFile(path, &fullPath, &size)) { free(fullPath); + error("Unable to open included file '%s': %s\n", path, strerror(errno)); return; } - pushcontext(); - nCurrentStatus = STAT_isInclude; - if (verbose) - printf("Assembling %s\n", fullPath); - - struct LexerState *state = lexer_OpenFile(fullPath); - - if (!state) - /* If lexer had an error, it already reported it */ - fatalerror("Failed to open file for INCLUDE\n"); /* TODO: make this non-fatal? */ - lexer_SetStateAtEOL(state); - free(fullPath); + newContext(0); + contextStack->lexerState = lexer_OpenFile(fullPath); + if (!contextStack->lexerState) + fatalerror("Failed to set up lexer for file include\n"); + lexer_SetStateAtEOL(contextStack->lexerState); + /* We're back at top-level, so most things are reset */ + contextStack->uniqueID = 0; + macro_SetUniqueID(0); + contextStack->fileName = fullPath; + contextStack->macro = NULL; + contextStack->nbReptIters = 0; } -/* - * Set up a macro for parsing - */ -void fstk_RunMacro(char *s, struct MacroArgs *args) +void fstk_RunMacro(char *macroName, struct MacroArgs *args) { - struct Symbol const *sym = sym_FindSymbol(s); + struct Symbol *macro = sym_FindSymbol(macroName); - if (sym == NULL) { - error("Macro \"%s\" not defined\n", s); + if (!macro) { + error("Macro \"%s\" not defined\n", macroName); return; } - if (sym->type != SYM_MACRO) { - error("\"%s\" is not a macro\n", s); + if (macro->type != SYM_MACRO) { + error("\"%s\" is not a macro\n", macroName); return; } - - pushcontext(); - macro_SetUniqueID(nMacroCount++); - /* Minus 1 because there is a newline at the beginning of the buffer */ macro_UseNewArgs(args); - nCurrentStatus = STAT_isMacro; - pCurrentMacro = sym; + newContext(0); + contextStack->lexerState = lexer_OpenFileView(macro->macro, + macro->macroSize, macro->fileLine); + if (!contextStack->lexerState) + fatalerror("Failed to set up lexer for macro invocation\n"); + lexer_SetStateAtEOL(contextStack->lexerState); + contextStack->uniqueID = macro_UseNewUniqueID(); + contextStack->fileName = macro->fileName; + contextStack->macro = macro; + contextStack->nbReptIters = 0; } -/* - * Set up a repeat block for parsing - */ -void fstk_RunRept(uint32_t count, int32_t nReptLineNo, char const *body, size_t size) +void fstk_RunRept(uint32_t count, int32_t nReptLineNo, char *body, size_t size) { - if (count) { - pushcontext(); - macro_SetUniqueID(nMacroCount++); - nCurrentREPTBlockCount = count; - nCurrentStatus = STAT_isREPTBlock; - nCurrentREPTBlockSize = size; - pCurrentREPTBlock = body; - nCurrentREPTBodyFirstLine = nReptLineNo + 1; - } + uint32_t reptDepth = contextStack->reptDepth; + + newContext(reptDepth + 1); + contextStack->lexerState = lexer_OpenFileView(body, size, nReptLineNo); + if (!contextStack->lexerState) + fatalerror("Failed to set up lexer for macro invocation\n"); + lexer_SetStateAtEOL(contextStack->lexerState); + contextStack->uniqueID = macro_UseNewUniqueID(); + contextStack->fileName = contextStack->parent->fileName; + contextStack->macro = contextStack->parent->macro; /* Inherit */ + contextStack->nbReptIters = count; + /* Copy all of parent's iters, and add ours */ + if (reptDepth) + memcpy(contextStack->reptIters, contextStack->parent->reptIters, + sizeof(contextStack->reptIters[0]) * reptDepth); + contextStack->reptIters[reptDepth] = 1; + + /* Correct our parent's line number, which currently points to the `ENDR` line */ + contextStack->parent->lineNo = nReptLineNo; } -/* - * Initialize the filestack routines - */ -void fstk_Init(char *pFileName) +static void printContext(FILE *stream, struct Context const *context) { - char tzSymFileName[_MAX_PATH + 1 + 2]; - - char *c = pFileName; - int fileNameIndex = 0; - - tzSymFileName[fileNameIndex++] = '"'; + fprintf(stream, "%s", context->fileName); + if (context->macro) + fprintf(stream, "::%s", context->macro->name); + for (size_t i = 0; i < context->reptDepth; i++) + fprintf(stream, "::REPT~%" PRIu32, context->reptIters[i]); + fprintf(stream, "(%" PRId32 ")", context->lineNo); +} - // minus 2 to account for trailing "\"\0" - // minus 1 to avoid a buffer overflow in extreme cases - while (*c && fileNameIndex < sizeof(tzSymFileName) - 2 - 1) { - if (*c == '"') { - tzSymFileName[fileNameIndex++] = '\\'; - } +static void dumpToStream(FILE *stream) +{ + struct Context *context = topLevelContext; - tzSymFileName[fileNameIndex++] = *c; - ++c; + while (context != contextStack) { + printContext(stream, context); + fprintf(stream, " -> "); + context = context->child; } + contextStack->lineNo = lexer_GetLineNo(); + printContext(stream, contextStack); +} + +void fstk_Dump(void) +{ + dumpToStream(stderr); +} - tzSymFileName[fileNameIndex++] = '"'; - tzSymFileName[fileNameIndex] = '\0'; +char *fstk_DumpToStr(void) +{ + char *str; + size_t size; + /* `open_memstream` is specified to always include a '\0' at the end of the buffer! */ + FILE *stream = open_memstream(&str, &size); - sym_AddString("__FILE__", tzSymFileName); + if (!stream) + fatalerror("Failed to dump file stack to string: %s\n", strerror(errno)); + dumpToStream(stream); + fclose(stream); + return str; +} - pFileStack = NULL; - nFileStackDepth = 0; +uint32_t fstk_GetLine(void) +{ + return lexer_GetLineNo(); +} - nMacroCount = 0; - nCurrentStatus = STAT_isInclude; +void fstk_Init(char *mainPath, uint32_t maxRecursionDepth) +{ + topLevelContext = malloc(sizeof(*topLevelContext)); + if (!topLevelContext) + fatalerror("Failed to allocate memory for initial context: %s\n", strerror(errno)); + topLevelContext->parent = NULL; + topLevelContext->child = NULL; + topLevelContext->lexerState = lexer_OpenFile(mainPath); + if (!topLevelContext->lexerState) + fatalerror("Failed to open main file!\n"); + lexer_SetState(topLevelContext->lexerState); + topLevelContext->uniqueID = 0; + macro_SetUniqueID(0); + topLevelContext->fileName = mainPath; + topLevelContext->macro = NULL; + topLevelContext->nbReptIters = 0; + topLevelContext->reptDepth = 0; + + contextStack = topLevelContext; + +#if 0 + if (maxRecursionDepth + > (SIZE_MAX - sizeof(*contextStack)) / sizeof(contextStack->reptIters[0])) { +#else + /* If this holds, then GCC raises a warning about the `if` above being dead code */ + static_assert(UINT32_MAX + <= (SIZE_MAX - sizeof(*contextStack)) / sizeof(contextStack->reptIters[0])); + if (0) { +#endif + error("Recursion depth may not be higher than %zu, defaulting to 64\n", + (SIZE_MAX - sizeof(*contextStack)) / sizeof(contextStack->reptIters[0])); + nMaxRecursionDepth = 64; + } else { + nMaxRecursionDepth = maxRecursionDepth; + } } diff --git a/src/asm/lexer.c b/src/asm/lexer.c index 71c4a95c0..6bfc11876 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -258,6 +258,8 @@ struct LexerState { }; /* Common state */ + bool isFile; + enum LexerMode mode; bool atLineStart; uint32_t lineNo; @@ -278,6 +280,21 @@ struct LexerState { struct LexerState *lexerState = NULL; struct LexerState *lexerStateEOL = NULL; +static void initState(struct LexerState *state) +{ + state->mode = LEXER_NORMAL; + state->atLineStart = true; /* yylex() will init colNo due to this */ + state->lastToken = 0; + + state->capturing = false; + state->captureBuf = NULL; + + state->nbChars = 0; + state->expandStrings = true; + state->expansions = NULL; + state->expansionOfs = 0; +} + struct LexerState *lexer_OpenFile(char const *path) { bool isStdin = !strcmp(path, "-"); @@ -292,6 +309,7 @@ struct LexerState *lexer_OpenFile(char const *path) } state->path = path; + state->isFile = true; state->fd = isStdin ? STDIN_FILENO : open(path, O_RDONLY); if (state->fd == -1) { error("Failed to open file \"%s\": %s\n", path, strerror(errno)); @@ -345,32 +363,45 @@ struct LexerState *lexer_OpenFile(char const *path) state->index = 0; } - state->mode = LEXER_NORMAL; - state->atLineStart = true; /* yylex() will init colNo due to this */ - state->lineNo = 0; - state->lastToken = 0; + initState(state); + state->lineNo = 0; /* Will be incremented at first line start */ + return state; +} - state->capturing = false; - state->captureBuf = NULL; +struct LexerState *lexer_OpenFileView(char *buf, size_t size, uint32_t lineNo) +{ + struct LexerState *state = malloc(sizeof(*state)); - state->nbChars = 0; - state->expandStrings = true; - state->expansions = NULL; - state->expansionOfs = 0; + if (!state) { + error("Failed to allocate memory for lexer state: %s", strerror(errno)); + return NULL; + } + // TODO: init `path` + + state->isFile = false; + state->isMmapped = true; /* It's not *really* mmap()ed, but it behaves the same */ + state->ptr = buf; + state->size = size; + state->offset = 0; + + initState(state); + state->lineNo = lineNo; /* Will be incremented at first line start */ return state; } -struct LexerState *lexer_OpenFileView(void) +void lexer_RestartRept(uint32_t lineNo) { - return NULL; + lexerState->offset = 0; + initState(lexerState); + lexerState->lineNo = lineNo; } void lexer_DeleteState(struct LexerState *state) { - if (state->isMmapped) - munmap(state->ptr, state->size); - else + if (!state->isMmapped) close(state->fd); + else if (state->isFile) + munmap(state->ptr, state->size); free(state); } @@ -523,7 +554,7 @@ static void beginExpansion(size_t distance, uint8_t skip, #define LOOKUP_PRE_NEST(exp) (exp)->totalLen += size #define LOOKUP_POST_NEST(exp) do { \ if (++depth >= nMaxRecursionDepth) \ - fatalerror("Recursion limit (%u) exceeded", nMaxRecursionDepth); \ + fatalerror("Recursion limit (%u) exceeded\n", nMaxRecursionDepth); \ } while (0) lookupExpansion(parent, distance); #undef LOOKUP_PRE_NEST @@ -536,7 +567,7 @@ static void beginExpansion(size_t distance, uint8_t skip, *insertPoint = malloc(sizeof(**insertPoint)); if (!*insertPoint) - fatalerror("Unable to allocate new expansion: %s", strerror(errno)); + fatalerror("Unable to allocate new expansion: %s\n", strerror(errno)); (*insertPoint)->firstChild = NULL; (*insertPoint)->next = NULL; /* Expansions are always performed left to right */ (*insertPoint)->name = strdup(name); @@ -1417,10 +1448,6 @@ static int yylex_NORMAL(void) return '\n'; case EOF: - /* Captures end at their buffer's boundary no matter what */ - if (!lexerState->capturing) { - /* TODO: use `yywrap()` */ - } return 0; /* Handle identifiers... or error out */ @@ -1520,6 +1547,7 @@ static int yylex_SKIP_TO_ENDC(void) int yylex(void) { +restart: if (lexerState->atLineStart /* Newlines read within an expansion should not increase the line count */ && (!lexerState->expansions || lexerState->expansions->distance)) { @@ -1536,8 +1564,17 @@ int yylex(void) int token = lexerModeFuncs[lexerState->mode](); /* Make sure to terminate files with a line feed */ - if (token == 0 && lexerState->lastToken != '\n') - token = '\n'; + if (token == 0) { + if (lexerState->lastToken != '\n') { + token = '\n'; + } else { /* Try to switch to new buffer; if it succeeds, scan again */ + /* Captures end at their buffer's boundary no matter what */ + if (!lexerState->capturing) { + if (!yywrap()) + goto restart; + } + } + } lexerState->lastToken = token; lexerState->atLineStart = false; @@ -1547,16 +1584,18 @@ int yylex(void) return token; } -void lexer_CaptureBlock(int blockStartToken, int blockEndToken, char const **capture, size_t *size, +void lexer_CaptureBlock(int blockStartToken, int blockEndToken, char **capture, size_t *size, char const *name) { + assert(!lexerState->expansions); + lexerState->capturing = true; lexerState->captureSize = 0; unsigned int level = 0; char *captureStart; if (lexerState->isMmapped) { - captureStart = lexerState->ptr; + captureStart = &lexerState->ptr[lexerState->offset]; } else { lexerState->captureCapacity = 128; /* The initial size will be twice that */ reallocCaptureBuf(); diff --git a/src/asm/macro.c b/src/asm/macro.c index 713649430..ed0bdc646 100644 --- a/src/asm/macro.c +++ b/src/asm/macro.c @@ -29,7 +29,8 @@ struct MacroArgs { sizeof(((struct MacroArgs){0}).args[0]) * (nbArgs)) static struct MacroArgs *macroArgs = NULL; -static uint32_t uniqueID = -1; +static uint32_t uniqueID = 0; +static uint32_t maxUniqueID = 0; /* * The initialization is somewhat harmful, since it is never used, but it * guarantees the size of the buffer will be correct. I was unable to find a @@ -107,15 +108,23 @@ char const *macro_GetUniqueIDStr(void) void macro_SetUniqueID(uint32_t id) { uniqueID = id; - if (id == -1) { + if (id == 0) { uniqueIDPtr = NULL; } else { + if (uniqueID > maxUniqueID) + maxUniqueID = uniqueID; /* The buffer is guaranteed to be the correct size */ sprintf(uniqueIDBuf, "_%" PRIu32, id); uniqueIDPtr = uniqueIDBuf; } } +uint32_t macro_UseNewUniqueID(void) +{ + macro_SetUniqueID(++maxUniqueID); + return maxUniqueID; +} + void macro_ShiftCurrentArgs(void) { if (macroArgs->shift != macroArgs->nbArgs) diff --git a/src/asm/main.c b/src/asm/main.c index 9c559e7a9..4f264d369 100644 --- a/src/asm/main.c +++ b/src/asm/main.c @@ -23,8 +23,10 @@ #include "asm/lexer.h" #include "asm/main.h" #include "asm/output.h" +#include "asm/rpn.h" #include "asm/symbol.h" #include "asm/warning.h" +#include "asmy.h" #include "extern/err.h" #include "extern/getopt.h" @@ -32,8 +34,6 @@ #include "helpers.h" #include "version.h" -extern int yyparse(void); - size_t cldefines_index; size_t cldefines_numindices; size_t cldefines_bufsize; @@ -307,11 +307,11 @@ int main(int argc, char *argv[]) yydebug = 1; #endif - nMaxRecursionDepth = 64; oGeneratePhonyDeps = false; oGeneratedMissingIncludes = false; oFailedOnMissingInclude = false; tzTargetFileName = NULL; + uint32_t maxRecursionDepth = 64; size_t nTargetFileNameLen = 0; DefaultOptions.gbgfx[0] = '0'; @@ -390,7 +390,7 @@ int main(int argc, char *argv[]) break; case 'r': - nMaxRecursionDepth = strtoul(optarg, &ep, 0); + maxRecursionDepth = strtoul(optarg, &ep, 0); if (optarg[0] == '\0' || *ep != '\0') errx(1, "Invalid argument for option 'r'"); @@ -483,13 +483,9 @@ int main(int argc, char *argv[]) fprintf(dependfile, "%s: %s\n", tzTargetFileName, tzMainfile); } - /* Init lexer; important to do first, since that's what provides the file name, line, etc */ - struct LexerState *state = lexer_OpenFile(tzMainfile); - - if (!state) - fatalerror("Failed to open main file!\n"); + /* Init file stack; important to do first, since it provides the file name, line, etc */ lexer_Init(); - lexer_SetState(state); + fstk_Init(tzMainfile, maxRecursionDepth); nStartClock = clock(); @@ -497,7 +493,6 @@ int main(int argc, char *argv[]) nIFDepth = 0; sym_Init(); sym_SetExportAll(exportall); - fstk_Init(tzMainfile); opt_ParseDefines(); charmap_New("main", NULL); diff --git a/src/asm/output.c b/src/asm/output.c index 45f2f1e17..b8919edd1 100644 --- a/src/asm/output.c +++ b/src/asm/output.c @@ -33,7 +33,7 @@ #include "platform.h" // strdup struct Patch { - char tzFilename[_MAX_PATH + 1]; + char *tzFilename; uint32_t nOffset; struct Section *pcSection; uint32_t pcOffset; @@ -318,7 +318,7 @@ static struct Patch *allocpatch(uint32_t type, struct Expression const *expr, fatalerror("No memory for patch's RPN expression: %s\n", strerror(errno)); patch->type = type; - fstk_DumpToStr(patch->tzFilename, sizeof(patch->tzFilename)); + patch->tzFilename = fstk_DumpToStr(); patch->nOffset = ofs; patch->pcSection = sect_GetSymbolSection(); patch->pcOffset = sect_GetSymbolOffset(); diff --git a/src/asm/symbol.c b/src/asm/symbol.c index 8253ffac7..bd790fe92 100644 --- a/src/asm/symbol.c +++ b/src/asm/symbol.c @@ -477,7 +477,7 @@ void sym_Export(char const *symName) /* * Add a macro definition */ -struct Symbol *sym_AddMacro(char const *symName, int32_t defLineNo, char const *body, size_t size) +struct Symbol *sym_AddMacro(char const *symName, int32_t defLineNo, char *body, size_t size) { struct Symbol *sym = createNonrelocSymbol(symName); diff --git a/test/asm/file-sym.asm b/test/asm/file-sym.asm new file mode 100644 index 000000000..25782859a --- /dev/null +++ b/test/asm/file-sym.asm @@ -0,0 +1 @@ +PRINTT "{__FILE__}\n" diff --git a/test/asm/file-sym.err b/test/asm/file-sym.err new file mode 100644 index 000000000..e69de29bb diff --git a/test/asm/file-sym.out b/test/asm/file-sym.out new file mode 100644 index 000000000..61fc13a23 --- /dev/null +++ b/test/asm/file-sym.out @@ -0,0 +1 @@ +"test/asm/file-sym.asm" From 6e805cd3182390d756dc973dd61f2d31a476cdb1 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Sat, 1 Aug 2020 00:52:10 +0200 Subject: [PATCH 13/59] Implement macro args This finally allows running 90% of the test suite, debugging time! --- src/asm/lexer.c | 112 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 100 insertions(+), 12 deletions(-) diff --git a/src/asm/lexer.c b/src/asm/lexer.c index 6bfc11876..26c4a1074 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -373,7 +373,7 @@ struct LexerState *lexer_OpenFileView(char *buf, size_t size, uint32_t lineNo) struct LexerState *state = malloc(sizeof(*state)); if (!state) { - error("Failed to allocate memory for lexer state: %s", strerror(errno)); + error("Failed to allocate memory for lexer state: %s\n", strerror(errno)); return NULL; } // TODO: init `path` @@ -1088,7 +1088,7 @@ static char const *readInterpolation(void) } } } else if (c == EOF || c == '\r' || c == '\n' || c == '"') { - error("Unterminated interpolation\n"); + error("Missing }\n"); break; } else if (c == '}') { shiftChars(1); @@ -1441,10 +1441,6 @@ static int yylex_NORMAL(void) shiftChars(1); /* Shift that EOL */ /* fallthrough */ case '\n': - if (lexerStateEOL) { - lexer_SetState(lexerStateEOL); - lexerStateEOL = NULL; - } return '\n'; case EOF: @@ -1490,9 +1486,95 @@ static int yylex_NORMAL(void) } } +static bool isWhitespace(int c) +{ + return c == ' ' || c == '\t'; +} + static int yylex_RAW(void) { - fatalerror("LEXER_RAW not yet implemented\n"); + /* This is essentially a modified `readString` */ + size_t i = 0; + + /* Trim left of string... */ + while (isWhitespace(peek(0))) + shiftChars(1); + + for (;;) { + int c = peek(0); + + switch (c) { + case ',': + shiftChars(1); + /* fallthrough */ + case '\r': + case '\n': /* Do not shift these! */ + case EOF: + if (c != ',') + lexer_SetMode(LEXER_NORMAL); + if (i == sizeof(yylval.tzString)) { + i--; + warning(WARNING_LONG_STR, "Macro argument too long\n"); + } + /* Trim whitespace */ + while (i && isWhitespace(yylval.tzString[i - 1])) + i--; + yylval.tzString[i] = '\0'; + return T_STRING; + + case '\\': /* Character escape */ + c = peek(1); + switch (c) { + case ',': + case '\\': /* Return that character unchanged */ + case '"': + case '{': + case '}': + shiftChars(1); + break; + case 'n': + c = '\n'; + shiftChars(1); + break; + case 'r': + c = '\r'; + shiftChars(1); + break; + case 't': + c = '\t'; + shiftChars(1); + break; + + case EOF: /* Can't really print that one */ + error("Illegal character escape at end of input\n"); + c = '\\'; + break; + default: + error("Illegal character escape '%s'\n", print(c)); + c = '\\'; + break; + } + break; + + case '{': /* Symbol interpolation */ + shiftChars(1); + char const *ptr = readInterpolation(); + + if (ptr) { + while (*ptr) { + if (i == sizeof(yylval.tzString)) + break; + yylval.tzString[i++] = *ptr++; + } + } + continue; /* Do not copy an additional character */ + + /* Regular characters will just get copied */ + } + if (i < sizeof(yylval.tzString)) /* Copy one extra to flag overflow */ + yylval.tzString[i++] = c; + shiftChars(1); + } } /* @@ -1548,11 +1630,16 @@ static int yylex_SKIP_TO_ENDC(void) int yylex(void) { restart: - if (lexerState->atLineStart - /* Newlines read within an expansion should not increase the line count */ - && (!lexerState->expansions || lexerState->expansions->distance)) { - lexerState->lineNo++; - lexerState->colNo = 0; + if (lexerState->atLineStart) { + /* Newlines read within an expansion should not increase the line count */ + if (!lexerState->expansions || lexerState->expansions->distance) { + lexerState->lineNo++; + lexerState->colNo = 0; + } + if (lexerStateEOL) { + lexer_SetState(lexerStateEOL); + lexerStateEOL = NULL; + } } static int (* const lexerModeFuncs[])(void) = { @@ -1572,6 +1659,7 @@ int yylex(void) if (!lexerState->capturing) { if (!yywrap()) goto restart; + return 0; } } } From 81a77a9b884eb3208ff6c9a38062e184301ae1f6 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Sat, 1 Aug 2020 17:50:59 +0200 Subject: [PATCH 14/59] Re-implement block copy to avoid expanding macro args They were expanded during the capture, and there was no easy way to avoid expanding them (believe me, after three hours and somehow an OOM, I gave up trying). --- include/asm/lexer.h | 4 +- src/asm/asmy.y | 10 +-- src/asm/lexer.c | 189 ++++++++++++++++++++++++++++++++++++-------- 3 files changed, 164 insertions(+), 39 deletions(-) diff --git a/include/asm/lexer.h b/include/asm/lexer.h index d1cfd08bf..0f33c9160 100644 --- a/include/asm/lexer.h +++ b/include/asm/lexer.h @@ -51,7 +51,7 @@ uint32_t lexer_GetLineNo(void); uint32_t lexer_GetColNo(void); void lexer_DumpStringExpansions(void); int yylex(void); -void lexer_CaptureBlock(int blockStartToken, int blockEndToken, char **capture, size_t *size, - char const *name); +void lexer_CaptureRept(char **capture, size_t *size); +void lexer_CaptureMacroBody(char **capture, size_t *size); #endif /* RGBDS_ASM_LEXER_H */ diff --git a/src/asm/asmy.y b/src/asm/asmy.y index ac08843dc..81093c72b 100644 --- a/src/asm/asmy.y +++ b/src/asm/asmy.y @@ -599,9 +599,8 @@ rept : T_POP_REPT uconst { uint32_t nDefinitionLineNo = lexer_GetLineNo(); char *body; size_t size; - lexer_CaptureBlock(T_POP_REPT, T_POP_ENDR, &body, &size, - "REPT block"); - fstk_RunRept($2, nDefinitionLineNo, body, size - strlen("ENDR")); + lexer_CaptureRept(&body, &size); + fstk_RunRept($2, nDefinitionLineNo, body, size); } ; @@ -609,9 +608,8 @@ macrodef : T_LABEL ':' T_POP_MACRO { int32_t nDefinitionLineNo = lexer_GetLineNo(); char *body; size_t size; - lexer_CaptureBlock(T_POP_MACRO, T_POP_ENDM, &body, &size, - "macro definition"); - sym_AddMacro($1, nDefinitionLineNo, body, size - strlen("ENDM")); + lexer_CaptureMacroBody(&body, &size); + sym_AddMacro($1, nDefinitionLineNo, body, size); } ; diff --git a/src/asm/lexer.c b/src/asm/lexer.c index 26c4a1074..cfb6ce51e 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -626,19 +626,19 @@ static int peek(uint8_t distance) /* Do not perform expansions while capturing */ if (!lexerState->capturing) { - /* Scan the newly-inserted chars for any macro args */ - bool escaped = false; - - while (lexerState->nbChars < distance && !escaped) { - char c = lexerState->ptr[lexerState->offset - + lexerState->nbChars++]; - - if (escaped) { - escaped = false; + /* Scan the new chars for any macro args */ +#define BUF_OFS (lexerState->offset + lexerState->nbChars) + while (lexerState->nbChars <= distance) { + char c = lexerState->ptr[BUF_OFS]; + + lexerState->nbChars++; + if (c == '\\') { + if (lexerState->size <= BUF_OFS) + break; /* This was the last char in the buffer */ + c = lexerState->ptr[BUF_OFS]; + lexerState->nbChars++; if ((c >= '1' && c <= '9') || c == '@') fatalerror("Macro arg expansion is not implemented yet\n"); - } else if (c == '\\') { - escaped = true; } } } @@ -774,13 +774,13 @@ static void shiftChars(uint8_t distance) if (lexerState->isMmapped) { lexerState->offset += distance; } else { - lexerState->nbChars -= distance; lexerState->index += distance; /* Wrap around if necessary */ if (lexerState->index >= LEXER_BUF_SIZE) lexerState->index %= LEXER_BUF_SIZE; } + lexerState->nbChars -= distance; lexerState->colNo += distance; } @@ -975,6 +975,11 @@ static void readGfxConstant(void) /* Function to read identifiers & keywords */ +static bool startsIdentifier(int c) +{ + return (c <= 'Z' && c >= 'A') || (c <= 'z' && c >= 'a') || c == '.' || c == '_'; +} + static int readIdentifier(char firstChar) { /* Lex while checking for a keyword */ @@ -1449,9 +1454,7 @@ static int yylex_NORMAL(void) /* Handle identifiers... or error out */ default: - if ((c <= 'Z' && c >= 'A') - || (c <= 'z' && c >= 'a') - || c == '.' || c == '_') { + if (startsIdentifier(c)) { int tokenType = readIdentifier(c); /* If a keyword, don't try to expand */ @@ -1672,39 +1675,163 @@ int yylex(void) return token; } -void lexer_CaptureBlock(int blockStartToken, int blockEndToken, char **capture, size_t *size, - char const *name) +static char *startCapture(void) { assert(!lexerState->expansions); lexerState->capturing = true; lexerState->captureSize = 0; - unsigned int level = 0; - char *captureStart; if (lexerState->isMmapped) { - captureStart = &lexerState->ptr[lexerState->offset]; + return &lexerState->ptr[lexerState->offset]; } else { lexerState->captureCapacity = 128; /* The initial size will be twice that */ reallocCaptureBuf(); - captureStart = lexerState->captureBuf; + return lexerState->captureBuf; } +} +void lexer_CaptureRept(char **capture, size_t *size) +{ + char *captureStart = startCapture(); + unsigned int level = 0; + int c; + + /* + * Due to parser internals, it reads the EOL after the expression before calling this. + * Thus, we don't need to keep one in the buffer afterwards. + * The following assertion checks that. + */ + assert(lexerState->atLineStart); for (;;) { - int token = yylex(); + /* We're at line start, so attempt to match a `REPT` or `ENDR` token */ + do { /* Discard initial whitespace */ + c = nextChar(); + } while (isWhitespace(c)); + /* Now, try to match either `REPT` or `ENDR` as a **whole** identifier */ + if (startsIdentifier(c)) { + switch (readIdentifier(c)) { + case T_POP_REPT: + level++; + /* Ignore the rest of that line */ + break; - if (level == 0 && token == blockEndToken) - break; + case T_POP_ENDR: + if (!level) { + /* Read (but don't capture) until EOL or EOF */ + lexerState->capturing = false; + do { + c = nextChar(); + } while (c != EOF && c != '\r' && c != '\n'); + /* Handle Windows CRLF */ + if (c == '\r' && peek(0) == '\n') + shiftChars(1); + goto finish; + } + level--; + } + } + lexerState->lineNo++; + + /* Just consume characters until EOL or EOF */ + for (;;) { + if (c == EOF) { + error("Unterminated REPT block\n"); + goto finish; + } else if (c == '\n') { + break; + } else if (c == '\r') { + if (peek(0) == '\n') + shiftChars(1); + break; + } + c = nextChar(); + } + } + +finish: + *capture = captureStart; + *size = lexerState->captureSize - strlen("ENDR"); + lexerState->captureBuf = NULL; +} + +void lexer_CaptureMacroBody(char **capture, size_t *size) +{ + char *captureStart = startCapture(); + unsigned int level = 0; + int c = peek(0); + + /* + * Due to parser internals, it does not read the EOL after the T_POP_MACRO before calling + * this. Thus, we need to keep one in the buffer afterwards. + * (Note that this also means the captured buffer begins with a newline and maybe comment) + * The following assertion checks that. + */ + assert(!lexerState->atLineStart); + for (;;) { + /* Just consume characters until EOL or EOF */ + for (;;) { + if (c == EOF) { + error("Unterminated macro definition\n"); + goto finish; + } else if (c == '\n') { + break; + } else if (c == '\r') { + if (peek(0) == '\n') + shiftChars(1); + break; + } + c = nextChar(); + } - if (token == EOF) - error("Unterminated %s\n", name); - else if (token == blockStartToken) - level++; - else if (token == blockEndToken) - level--; + /* We're at line start, attempt to match a `label: MACRO` line or `ENDM` token */ + do { /* Discard initial whitespace */ + c = nextChar(); + } while (isWhitespace(c)); + /* Now, try to match either `REPT` or `ENDR` as a **whole** identifier */ + if (startsIdentifier(c)) { + switch (readIdentifier(c)) { + case T_ID: + /* We have an initial label, look for a single colon */ + do { + c = nextChar(); + } while (isWhitespace(c)); + if (c != ':') /* If not a colon, give up */ + break; + /* And finally, a `MACRO` token */ + do { + c = nextChar(); + } while (isWhitespace(c)); + if (!startsIdentifier(c)) + break; + if (readIdentifier(c) != T_POP_MACRO) + break; + level++; + break; + + case T_POP_ENDM: + if (!level) { + /* Read (but don't capture) until EOL or EOF */ + lexerState->capturing = false; + do { + c = peek(0); + if (c == EOF || c == '\r' || c == '\n') + break; + shiftChars(1); + } while (c != EOF && c != '\r' && c != '\n'); + /* Handle Windows CRLF */ + if (c == '\r' && peek(1) == '\n') + shiftChars(1); + goto finish; + } + level--; + } + } + lexerState->lineNo++; } +finish: *capture = captureStart; - *size = lexerState->captureSize; + *size = lexerState->captureSize - strlen("ENDM"); lexerState->captureBuf = NULL; } From adcaf4cd46615d424ea328431fbd4feb60d84cf0 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Sun, 2 Aug 2020 14:08:26 +0200 Subject: [PATCH 15/59] Fix crash when no macro args are being used --- src/asm/lexer.c | 117 ++++++++++++++++++++++++------------------------ src/asm/macro.c | 3 ++ 2 files changed, 62 insertions(+), 58 deletions(-) diff --git a/src/asm/lexer.c b/src/asm/lexer.c index cfb6ce51e..291cd232b 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -553,7 +553,7 @@ static void beginExpansion(size_t distance, uint8_t skip, #define LOOKUP_PRE_NEST(exp) (exp)->totalLen += size #define LOOKUP_POST_NEST(exp) do { \ - if (++depth >= nMaxRecursionDepth) \ + if (name && ++depth >= nMaxRecursionDepth) \ fatalerror("Recursion limit (%u) exceeded\n", nMaxRecursionDepth); \ } while (0) lookupExpansion(parent, distance); @@ -570,7 +570,7 @@ static void beginExpansion(size_t distance, uint8_t skip, fatalerror("Unable to allocate new expansion: %s\n", strerror(errno)); (*insertPoint)->firstChild = NULL; (*insertPoint)->next = NULL; /* Expansions are always performed left to right */ - (*insertPoint)->name = strdup(name); + (*insertPoint)->name = name ? strdup(name) : NULL; (*insertPoint)->contents = str; (*insertPoint)->len = size; (*insertPoint)->totalLen = size; @@ -596,6 +596,21 @@ static void freeExpansion(struct Expansion *expansion) free(expansion); } +static char const *expandMacroArg(char name, size_t distance) +{ + char const *str; + + if (name == '@') + str = macro_GetUniqueIDStr(); + else + str = macro_GetArg(name - '0'); + if (!str) + fatalerror("Macro argument '\\%c' not defined\n", name); + + beginExpansion(distance, 2, str, strlen(str), NULL); + return str; +} + /* If at any point we need more than 255 characters of lookahead, something went VERY wrong. */ static int peek(uint8_t distance) { @@ -623,27 +638,23 @@ static int peek(uint8_t distance) * avoid that duplication. If you have any ideas, please discuss them in an issue or * pull request. Thank you! */ - - /* Do not perform expansions while capturing */ - if (!lexerState->capturing) { - /* Scan the new chars for any macro args */ -#define BUF_OFS (lexerState->offset + lexerState->nbChars) - while (lexerState->nbChars <= distance) { - char c = lexerState->ptr[BUF_OFS]; - - lexerState->nbChars++; - if (c == '\\') { - if (lexerState->size <= BUF_OFS) - break; /* This was the last char in the buffer */ - c = lexerState->ptr[BUF_OFS]; - lexerState->nbChars++; - if ((c >= '1' && c <= '9') || c == '@') - fatalerror("Macro arg expansion is not implemented yet\n"); - } + unsigned char c = lexerState->ptr[lexerState->offset + distance]; + + /* If not capturing and character is a backslash, check for a macro arg */ + if (!lexerState->capturing && c == '\\') { + /* We need to read the following character, so check if that's possible */ + if (lexerState->offset + distance + 1 < lexerState->size) { + c = lexerState->ptr[lexerState->offset + distance + 1]; + if (c == '@' || (c >= '1' && c <= '9')) + /* Expand the argument and return its first character */ + c = expandMacroArg(c, distance)[0]; + /* WARNING: this assumes macro args can't be empty!! */ + else + c = '\\'; } } - return (unsigned char)lexerState->ptr[lexerState->offset + distance]; + return c; } if (lexerState->nbChars <= distance) { @@ -677,45 +688,29 @@ static int peek(uint8_t distance) #undef readChars - /* Do not perform expansions when capturing */ - if (!lexerState->capturing) { - /* Scan the newly-inserted chars for any macro args */ - bool escaped = false; - size_t index = (lexerState->index + lexerState->nbChars) % LEXER_BUF_SIZE; - - for (ssize_t i = 0; i < totalCharsRead; i++) { - char c = lexerState->buf[index++]; - - if (escaped) { - escaped = false; - if ((c >= '1' && c <= '9') || c == '@') - fatalerror("Macro arg expansion is not implemented yet\n"); - } else if (c == '\\') { - escaped = true; - } - if (index == LEXER_BUF_SIZE) /* Wrap around buffer */ - index = 0; - } - - /* - * If last char read was a backslash, pretend we didn't read it; this is - * important, otherwise we may miss an expansion that straddles refills - */ - if (escaped) { - totalCharsRead--; - /* However, if that prevents having enough characters, error out */ - if (lexerState->nbChars + totalCharsRead <= distance) - fatalerror("Internal lexer error: cannot read far enough due to backslash\n"); - } - } - lexerState->nbChars += totalCharsRead; /* If there aren't enough chars even after refilling, give up */ if (lexerState->nbChars <= distance) return EOF; } - return (unsigned char)lexerState->buf[(lexerState->index + distance) % LEXER_BUF_SIZE]; + unsigned char c = lexerState->buf[(lexerState->index + distance) % LEXER_BUF_SIZE]; + + /* If not capturing and character is a backslash, check for a macro arg */ + if (!lexerState->capturing && c == '\\') { + /* We need to read the character at `distance + 1`, so check if that's possible */ + if (lexerState->nbChars == distance + 1) /* We know that ...->nbChars > distance */ + fatalerror("Internal lexer error: not enough lookahead for macro arg check\n"); + c = lexerState->buf[(lexerState->index + distance + 1) % LEXER_BUF_SIZE]; + if (c == '@' || (c >= '1' && c <= '9')) + /* Expand the argument and return its first character */ + c = expandMacroArg(c, distance)[0]; + /* WARNING: this assumes macro args can't be empty!! */ + else + c = '\\'; + } + + return c; } static void shiftChars(uint8_t distance) @@ -775,13 +770,13 @@ static void shiftChars(uint8_t distance) lexerState->offset += distance; } else { lexerState->index += distance; + lexerState->colNo += distance; /* Wrap around if necessary */ if (lexerState->index >= LEXER_BUF_SIZE) lexerState->index %= LEXER_BUF_SIZE; } lexerState->nbChars -= distance; - lexerState->colNo += distance; } static int nextChar(void) @@ -816,12 +811,17 @@ void lexer_DumpStringExpansions(void) if (!lexerState) return; struct Expansion *stack[nMaxRecursionDepth + 1]; + struct Expansion *expansion; unsigned int depth = 0; size_t distance = lexerState->expansionOfs; -#define LOOKUP_PRE_NEST(exp) +#define LOOKUP_PRE_NEST(exp) do { \ + /* Only register EQUS expansions, not string args */ \ + if (expansion->name) \ + stack[depth++] = expansion; \ +} while (0) #define LOOKUP_POST_NEST(exp) - lookupExpansion(stack[depth++], distance); + lookupExpansion(expansion, distance); #undef LOOKUP_PRE_NEST #undef LOOKUP_POST_NEST @@ -1513,8 +1513,9 @@ static int yylex_RAW(void) case '\r': case '\n': /* Do not shift these! */ case EOF: - if (c != ',') - lexer_SetMode(LEXER_NORMAL); + /* Empty macro args break their expansion, so prevent that */ + if (i == 0) + return c; if (i == sizeof(yylval.tzString)) { i--; warning(WARNING_LONG_STR, "Macro argument too long\n"); diff --git a/src/asm/macro.c b/src/asm/macro.c index ed0bdc646..310372842 100644 --- a/src/asm/macro.c +++ b/src/asm/macro.c @@ -89,6 +89,9 @@ void macro_FreeArgs(struct MacroArgs *args) char const *macro_GetArg(uint32_t i) { + if (!macroArgs) + return NULL; + uint32_t realIndex = i + macroArgs->shift - 1; return realIndex >= macroArgs->nbArgs ? NULL From df75fd2ec2d959e8c69e54b491eb6c1de4157cd3 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Sun, 2 Aug 2020 15:48:37 +0200 Subject: [PATCH 16/59] Fix expansion reporting being incorrect --- src/asm/lexer.c | 80 +++++++++++++++++++++---------------------------- 1 file changed, 34 insertions(+), 46 deletions(-) diff --git a/src/asm/lexer.c b/src/asm/lexer.c index 291cd232b..027aaf6ea 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -254,6 +254,7 @@ struct LexerState { int fd; size_t index; /* Read index into the buffer */ char buf[LEXER_BUF_SIZE]; /* Circular buffer */ + size_t nbChars; /* Number of "fresh" chars in the buffer */ }; }; @@ -271,7 +272,7 @@ struct LexerState { char *captureBuf; /* Buffer to send the captured text to if non-NULL */ size_t captureCapacity; /* Size of the buffer above */ - size_t nbChars; /* Number of chars of lookahead, for processing expansions */ + size_t expansionDistance; /* Distance already considered for expansions */ bool expandStrings; struct Expansion *expansions; size_t expansionOfs; /* Offset into the current top-level expansion (negative = before) */ @@ -289,7 +290,7 @@ static void initState(struct LexerState *state) state->capturing = false; state->captureBuf = NULL; - state->nbChars = 0; + state->expansionDistance = 0; state->expandStrings = true; state->expansions = NULL; state->expansionOfs = 0; @@ -361,6 +362,7 @@ struct LexerState *lexer_OpenFile(char const *path) path, strerror(errno)); lseek(state->fd, 0, SEEK_SET); state->index = 0; + state->nbChars = 0; } initState(state); @@ -612,7 +614,7 @@ static char const *expandMacroArg(char name, size_t distance) } /* If at any point we need more than 255 characters of lookahead, something went VERY wrong. */ -static int peek(uint8_t distance) +static int peekInternal(uint8_t distance) { if (distance >= LEXER_BUF_SIZE) fatalerror("Internal lexer error: buffer has insufficient size for peeking (%u >= %u)\n", @@ -632,29 +634,7 @@ static int peek(uint8_t distance) if (lexerState->offset + distance >= lexerState->size) return EOF; - /* - * Note: the following block is also duplicated for the non-mmap() path. This sucks. - * However, due to subtle handling differences, I haven't found a clean way to - * avoid that duplication. If you have any ideas, please discuss them in an issue or - * pull request. Thank you! - */ - unsigned char c = lexerState->ptr[lexerState->offset + distance]; - - /* If not capturing and character is a backslash, check for a macro arg */ - if (!lexerState->capturing && c == '\\') { - /* We need to read the following character, so check if that's possible */ - if (lexerState->offset + distance + 1 < lexerState->size) { - c = lexerState->ptr[lexerState->offset + distance + 1]; - if (c == '@' || (c >= '1' && c <= '9')) - /* Expand the argument and return its first character */ - c = expandMacroArg(c, distance)[0]; - /* WARNING: this assumes macro args can't be empty!! */ - else - c = '\\'; - } - } - - return c; + return (unsigned char)lexerState->ptr[lexerState->offset + distance]; } if (lexerState->nbChars <= distance) { @@ -694,22 +674,28 @@ static int peek(uint8_t distance) if (lexerState->nbChars <= distance) return EOF; } - unsigned char c = lexerState->buf[(lexerState->index + distance) % LEXER_BUF_SIZE]; - - /* If not capturing and character is a backslash, check for a macro arg */ - if (!lexerState->capturing && c == '\\') { - /* We need to read the character at `distance + 1`, so check if that's possible */ - if (lexerState->nbChars == distance + 1) /* We know that ...->nbChars > distance */ - fatalerror("Internal lexer error: not enough lookahead for macro arg check\n"); - c = lexerState->buf[(lexerState->index + distance + 1) % LEXER_BUF_SIZE]; - if (c == '@' || (c >= '1' && c <= '9')) - /* Expand the argument and return its first character */ - c = expandMacroArg(c, distance)[0]; - /* WARNING: this assumes macro args can't be empty!! */ - else - c = '\\'; - } + return (unsigned char)lexerState->buf[(lexerState->index + distance) % LEXER_BUF_SIZE]; +} +static int peek(uint8_t distance) +{ + int c = peekInternal(distance); + + if (distance >= lexerState->expansionDistance) { + /* If not capturing and character is a backslash, check for a macro arg */ + if (!lexerState->capturing && c == '\\') { + distance++; + c = peekInternal(distance); + if (c == '@' || (c >= '1' && c <= '9')) { + /* Expand the argument and return its first character */ + c = expandMacroArg(c, distance - 1)[0]; + /* WARNING: this assumes macro args can't be empty!! */ + } else { + c = '\\'; + } + } + lexerState->expansionDistance = distance + 1; /* Do not consider again */ + } return c; } @@ -727,6 +713,8 @@ static void shiftChars(uint8_t distance) } } + lexerState->expansionDistance -= distance; + /* FIXME: this may not be too great, as only the top level is considered... */ /* @@ -774,9 +762,8 @@ static void shiftChars(uint8_t distance) /* Wrap around if necessary */ if (lexerState->index >= LEXER_BUF_SIZE) lexerState->index %= LEXER_BUF_SIZE; + lexerState->nbChars -= distance; } - - lexerState->nbChars -= distance; } static int nextChar(void) @@ -811,17 +798,18 @@ void lexer_DumpStringExpansions(void) if (!lexerState) return; struct Expansion *stack[nMaxRecursionDepth + 1]; - struct Expansion *expansion; + struct Expansion *expansion; /* Temp var for `lookupExpansion` */ unsigned int depth = 0; size_t distance = lexerState->expansionOfs; #define LOOKUP_PRE_NEST(exp) do { \ /* Only register EQUS expansions, not string args */ \ - if (expansion->name) \ - stack[depth++] = expansion; \ + if ((exp)->name) \ + stack[depth++] = (exp); \ } while (0) #define LOOKUP_POST_NEST(exp) lookupExpansion(expansion, distance); + (void)expansion; #undef LOOKUP_PRE_NEST #undef LOOKUP_POST_NEST From cd747d81750f6a04867782e3581a19b66272380b Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Fri, 14 Aug 2020 21:11:44 +0200 Subject: [PATCH 17/59] Fix many lexer bugs More to come... --- Makefile | 2 +- include/asm/util.h | 2 +- src/asm/fstack.c | 13 +- src/asm/lexer.c | 164 ++++++++++++++-------- src/asm/util.c | 5 +- test/asm/assert.err | 2 +- test/asm/divzero-instr.err | 2 +- test/asm/divzero-section-bank.err | 2 +- test/asm/equs-newline.err | 7 +- test/asm/equs-purge.err | 2 +- test/asm/equs-recursion.err | 3 +- test/asm/garbage_char.asm | 2 +- test/asm/garbage_char.err | 3 +- test/asm/if@-no-sect.asm | 2 +- test/asm/include-recursion.err | 2 +- test/asm/line-continuation-rept.asm | 6 +- test/asm/line-continuation-whitespace.asm | 3 +- test/asm/load-overflow.err | 2 +- test/asm/local-purge.err | 2 +- test/asm/local-ref-without-parent.err | 2 +- test/asm/macro-line-no.asm | 8 ++ test/asm/macro-line-no.err | 8 ++ test/asm/macro-line-no.out | 0 test/asm/macro-recursion.err | 2 +- test/asm/nested-brackets.err | 1 + test/asm/nested-brackets.out | 1 + test/asm/null-in-macro.asm | Bin 15 -> 23 bytes test/asm/null-in-macro.err | 5 +- test/asm/pops-no-pushed-sections.err | 2 +- test/asm/pops-restore-no-section.err | 2 +- test/asm/remote-local-noexist.err | 2 +- test/asm/rept-shift.err | 2 +- test/asm/section-union.err | 2 +- test/asm/sym-collision.err | 2 +- test/asm/sym-collision.out | 2 +- test/asm/symbol-invalid-macro-arg.err | 4 +- test/asm/test.sh | 6 +- test/asm/unique-id.err | 18 +-- 38 files changed, 188 insertions(+), 107 deletions(-) create mode 100644 test/asm/macro-line-no.asm create mode 100644 test/asm/macro-line-no.err create mode 100644 test/asm/macro-line-no.out diff --git a/Makefile b/Makefile index cecb0a6ee..84d162569 100644 --- a/Makefile +++ b/Makefile @@ -198,7 +198,7 @@ develop: -fsanitize=unreachable -fsanitize=vla-bound \ -fsanitize=signed-integer-overflow -fsanitize=bounds \ -fsanitize=object-size -fsanitize=bool -fsanitize=enum \ - -fsanitize=alignment -fsanitize=null -DDEVELOP" CFLAGS="-g -O0" + -fsanitize=alignment -fsanitize=null -DDEVELOP" CFLAGS="-ggdb3 -O0" # Targets for the project maintainer to easily create Windows exes. # This is not for Windows users! diff --git a/include/asm/util.h b/include/asm/util.h index d0c12bfb4..ca2e43e03 100644 --- a/include/asm/util.h +++ b/include/asm/util.h @@ -12,7 +12,7 @@ #include uint32_t calchash(const char *s); -char const *print(char c); +char const *print(int c); size_t readUTF8Char(uint8_t *dest, char const *src); #endif /* RGBDS_UTIL_H */ diff --git a/src/asm/fstack.c b/src/asm/fstack.c index 3e02c936f..65cee70f1 100644 --- a/src/asm/fstack.c +++ b/src/asm/fstack.c @@ -26,7 +26,7 @@ struct Context { struct Context *child; struct LexerState *lexerState; uint32_t uniqueID; - char *fileName; + char const *fileName; uint32_t lineNo; /* Line number at which the context was EXITED */ struct Symbol const *macro; uint32_t nbReptIters; /* If zero, this isn't a REPT block */ @@ -149,14 +149,12 @@ bool yywrap(void) contextDepth--; lexer_DeleteState(contextStack->child->lexerState); - /* If at top level (= not in macro or in REPT), free the file name */ - if (!contextStack->macro && contextStack->reptIters == 0) - free(contextStack->child->fileName); /* Free the entry and make its parent the current entry */ free(contextStack->child); contextStack->child = NULL; lexer_SetState(contextStack->lexerState); + macro_SetUniqueID(contextStack->uniqueID); return false; } @@ -197,7 +195,7 @@ void fstk_RunInclude(char const *path) /* We're back at top-level, so most things are reset */ contextStack->uniqueID = 0; macro_SetUniqueID(0); - contextStack->fileName = fullPath; + contextStack->fileName = lexer_GetFileName(); contextStack->macro = NULL; contextStack->nbReptIters = 0; } @@ -217,8 +215,9 @@ void fstk_RunMacro(char *macroName, struct MacroArgs *args) macro_UseNewArgs(args); newContext(0); + /* Line minus 1 because buffer begins with a newline */ contextStack->lexerState = lexer_OpenFileView(macro->macro, - macro->macroSize, macro->fileLine); + macro->macroSize, macro->fileLine - 1); if (!contextStack->lexerState) fatalerror("Failed to set up lexer for macro invocation\n"); lexer_SetStateAtEOL(contextStack->lexerState); @@ -311,7 +310,7 @@ void fstk_Init(char *mainPath, uint32_t maxRecursionDepth) lexer_SetState(topLevelContext->lexerState); topLevelContext->uniqueID = 0; macro_SetUniqueID(0); - topLevelContext->fileName = mainPath; + topLevelContext->fileName = lexer_GetFileName(); topLevelContext->macro = NULL; topLevelContext->nbReptIters = 0; topLevelContext->reptDepth = 0; diff --git a/src/asm/lexer.c b/src/asm/lexer.c index 027aaf6ea..54bc0adba 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -272,7 +272,6 @@ struct LexerState { char *captureBuf; /* Buffer to send the captured text to if non-NULL */ size_t captureCapacity; /* Size of the buffer above */ - size_t expansionDistance; /* Distance already considered for expansions */ bool expandStrings; struct Expansion *expansions; size_t expansionOfs; /* Offset into the current top-level expansion (negative = before) */ @@ -290,7 +289,6 @@ static void initState(struct LexerState *state) state->capturing = false; state->captureBuf = NULL; - state->expansionDistance = 0; state->expandStrings = true; state->expansions = NULL; state->expansionOfs = 0; @@ -538,7 +536,31 @@ static struct Expansion *getExpansionAtDistance(size_t *distance) #define LOOKUP_PRE_NEST(exp) #define LOOKUP_POST_NEST(exp) - lookupExpansion(expansion, *distance); + struct Expansion *exp = lexerState->expansions; + + for (;;) { + /* Find the closest expansion whose end is after the target */ + while (exp && exp->totalLen + exp->distance <= *distance) { + *distance -= exp->totalLen - exp->skip; + exp = exp->next; + } + + /* If there is none, or it begins after the target, return the previous level */ + if (!exp || exp->distance > *distance) + break; + + /* We know we are inside of that expansion */ + *distance -= exp->distance; /* Distances are relative to their parent */ + + /* Otherwise, register this expansion and repeat the process */ + LOOKUP_PRE_NEST(exp); + expansion = exp; + if (!exp->firstChild) /* If there are no children, this is it */ + break; + exp = exp->firstChild; + + LOOKUP_POST_NEST(exp); + } #undef LOOKUP_PRE_NEST #undef LOOKUP_POST_NEST @@ -553,7 +575,7 @@ static void beginExpansion(size_t distance, uint8_t skip, struct Expansion *parent = NULL; unsigned int depth = 0; -#define LOOKUP_PRE_NEST(exp) (exp)->totalLen += size +#define LOOKUP_PRE_NEST(exp) (exp)->totalLen += size - skip #define LOOKUP_POST_NEST(exp) do { \ if (name && ++depth >= nMaxRecursionDepth) \ fatalerror("Recursion limit (%u) exceeded\n", nMaxRecursionDepth); \ @@ -604,6 +626,8 @@ static char const *expandMacroArg(char name, size_t distance) if (name == '@') str = macro_GetUniqueIDStr(); + else if (name == '0') + fatalerror("Invalid macro argument '\\0'\n"); else str = macro_GetArg(name - '0'); if (!str) @@ -624,11 +648,11 @@ static int peekInternal(uint8_t distance) struct Expansion const *expansion = getExpansionAtDistance(&ofs); if (expansion) { - assert(distance < expansion->len); + assert(ofs < expansion->len); return expansion->contents[ofs]; } - distance = ofs - lexerState->expansionOfs; + distance = ofs; if (lexerState->isMmapped) { if (lexerState->offset + distance >= lexerState->size) @@ -681,20 +705,17 @@ static int peek(uint8_t distance) { int c = peekInternal(distance); - if (distance >= lexerState->expansionDistance) { - /* If not capturing and character is a backslash, check for a macro arg */ - if (!lexerState->capturing && c == '\\') { - distance++; - c = peekInternal(distance); - if (c == '@' || (c >= '1' && c <= '9')) { - /* Expand the argument and return its first character */ - c = expandMacroArg(c, distance - 1)[0]; - /* WARNING: this assumes macro args can't be empty!! */ - } else { - c = '\\'; - } + /* If not capturing and character is a backslash, check for a macro arg */ + if (!lexerState->capturing && c == '\\') { + distance++; + c = peekInternal(distance); + if (c == '@' || (c >= '0' && c <= '9')) { + /* Expand the argument and return its first character */ + c = expandMacroArg(c, distance - 1)[0]; + /* WARNING: this assumes macro args can't be empty!! */ + } else { + c = '\\'; } - lexerState->expansionDistance = distance + 1; /* Do not consider again */ } return c; } @@ -713,8 +734,6 @@ static void shiftChars(uint8_t distance) } } - lexerState->expansionDistance -= distance; - /* FIXME: this may not be too great, as only the top level is considered... */ /* @@ -830,6 +849,35 @@ static void discardComment(void) } } +/* Function to read a line continuation */ + +static bool isWhitespace(int c) +{ + return c == ' ' || c == '\t'; +} + +static void readLineContinuation(void) +{ + for (;;) { + int c = peek(0); + + if (isWhitespace(c)) { + shiftChars(1); + } else if (c == '\r' || c == '\n') { + shiftChars(1); + if (!lexerState->expansions + || lexerState->expansions->distance) { + lexerState->lineNo++; + } + return; + } else { + error("Begun line continuation, but encountered character %s\n", + print(c)); + return; + } + } +} + /* Functions to lex numbers of various radixes */ static void readNumber(int radix, int32_t baseValue) @@ -1190,6 +1238,13 @@ static void readString(void) shiftChars(1); break; + case ' ': + case '\r': + case '\n': + shiftChars(1); /* Shift the backslash */ + readLineContinuation(); + continue; + case EOF: /* Can't really print that one */ error("Illegal character escape at end of input\n"); c = '\\'; @@ -1477,15 +1532,11 @@ static int yylex_NORMAL(void) } } -static bool isWhitespace(int c) -{ - return c == ' ' || c == '\t'; -} - static int yylex_RAW(void) { /* This is essentially a modified `readString` */ size_t i = 0; + bool insideString = false; /* Trim left of string... */ while (isWhitespace(peek(0))) @@ -1495,15 +1546,23 @@ static int yylex_RAW(void) int c = peek(0); switch (c) { - case ',': - shiftChars(1); + case '"': + insideString = !insideString; + /* Other than that, just process quotes normally */ + break; + + case ';': /* Comments inside macro args */ + if (insideString) + break; + do { + shiftChars(1); + c = peek(0); + } while (c != EOF && c != '\r' && c != '\n'); /* fallthrough */ + case ',': case '\r': - case '\n': /* Do not shift these! */ + case '\n': case EOF: - /* Empty macro args break their expansion, so prevent that */ - if (i == 0) - return c; if (i == sizeof(yylval.tzString)) { i--; warning(WARNING_LONG_STR, "Macro argument too long\n"); @@ -1511,6 +1570,11 @@ static int yylex_RAW(void) /* Trim whitespace */ while (i && isWhitespace(yylval.tzString[i - 1])) i--; + /* Empty macro args break their expansion, so prevent that */ + if (i == 0) { + shiftChars(1); + return c == EOF ? 0 : c; + } yylval.tzString[i] = '\0'; return T_STRING; @@ -1518,31 +1582,21 @@ static int yylex_RAW(void) c = peek(1); switch (c) { case ',': - case '\\': /* Return that character unchanged */ - case '"': - case '{': - case '}': - shiftChars(1); - break; - case 'n': - c = '\n'; - shiftChars(1); - break; - case 'r': - c = '\r'; - shiftChars(1); - break; - case 't': - c = '\t'; shiftChars(1); break; + case ' ': + case '\r': + case '\n': + shiftChars(1); /* Shift the backslash */ + readLineContinuation(); + continue; + case EOF: /* Can't really print that one */ error("Illegal character escape at end of input\n"); c = '\\'; break; - default: - error("Illegal character escape '%s'\n", print(c)); + default: /* Pass the rest as-is */ c = '\\'; break; } @@ -1622,16 +1676,16 @@ static int yylex_SKIP_TO_ENDC(void) int yylex(void) { restart: + if (lexerState->atLineStart && lexerStateEOL) { + lexer_SetState(lexerStateEOL); + lexerStateEOL = NULL; + } if (lexerState->atLineStart) { /* Newlines read within an expansion should not increase the line count */ if (!lexerState->expansions || lexerState->expansions->distance) { lexerState->lineNo++; lexerState->colNo = 0; } - if (lexerStateEOL) { - lexer_SetState(lexerStateEOL); - lexerStateEOL = NULL; - } } static int (* const lexerModeFuncs[])(void) = { @@ -1693,6 +1747,7 @@ void lexer_CaptureRept(char **capture, size_t *size) */ assert(lexerState->atLineStart); for (;;) { + lexerState->lineNo++; /* We're at line start, so attempt to match a `REPT` or `ENDR` token */ do { /* Discard initial whitespace */ c = nextChar(); @@ -1720,7 +1775,6 @@ void lexer_CaptureRept(char **capture, size_t *size) level--; } } - lexerState->lineNo++; /* Just consume characters until EOL or EOF */ for (;;) { diff --git a/src/asm/util.c b/src/asm/util.c index ed5836c16..9af73ec07 100644 --- a/src/asm/util.c +++ b/src/asm/util.c @@ -28,10 +28,13 @@ uint32_t calchash(const char *s) return hash; } -char const *print(char c) +char const *print(int c) { static char buf[5]; /* '\xNN' + '\0' */ + if (c == EOF) + return "EOF"; + if (isprint(c)) { buf[0] = c; buf[1] = '\0'; diff --git a/test/asm/assert.err b/test/asm/assert.err index e4ef868de..8ce4402f9 100644 --- a/test/asm/assert.err +++ b/test/asm/assert.err @@ -6,5 +6,5 @@ ERROR: assert.asm(18): Expected constant expression: 'FloatingBase' is not constant at assembly time ERROR: assert.asm(18): Assertion failed -ERROR: assert.asm(21): +FATAL: assert.asm(21): Assertion failed diff --git a/test/asm/divzero-instr.err b/test/asm/divzero-instr.err index e1e0e3e2d..6a7ed04c4 100644 --- a/test/asm/divzero-instr.err +++ b/test/asm/divzero-instr.err @@ -1,2 +1,2 @@ -ERROR: divzero-instr.asm(2): +FATAL: divzero-instr.asm(2): Division by zero diff --git a/test/asm/divzero-section-bank.err b/test/asm/divzero-section-bank.err index b382e48f2..31044dc06 100644 --- a/test/asm/divzero-section-bank.err +++ b/test/asm/divzero-section-bank.err @@ -1,2 +1,2 @@ -ERROR: divzero-section-bank.asm(1): +FATAL: divzero-section-bank.asm(1): Division by zero diff --git a/test/asm/equs-newline.err b/test/asm/equs-newline.err index 620f86313..04ceb7beb 100644 --- a/test/asm/equs-newline.err +++ b/test/asm/equs-newline.err @@ -1,8 +1,7 @@ -warning: test/asm/equs-newline.asm(2): [-Wuser] +warning: equs-newline.asm(3): [-Wuser] First while expanding symbol "ACT" -warning: test/asm/equs-newline.asm(3): [-Wuser] +warning: equs-newline.asm(3): [-Wuser] Second -while expanding symbol "ACT" -warning: test/asm/equs-newline.asm(4): [-Wuser] +warning: equs-newline.asm(4): [-Wuser] Third diff --git a/test/asm/equs-purge.err b/test/asm/equs-purge.err index 6d834761b..c42c2bac3 100644 --- a/test/asm/equs-purge.err +++ b/test/asm/equs-purge.err @@ -1,3 +1,3 @@ -warning: test/asm/equs-purge.asm(0): [-Wuser] +warning: equs-purge.asm(2): [-Wuser] Crash? while expanding symbol "BYE" diff --git a/test/asm/equs-recursion.err b/test/asm/equs-recursion.err index c6180ed3b..002d04903 100644 --- a/test/asm/equs-recursion.err +++ b/test/asm/equs-recursion.err @@ -1,4 +1,4 @@ -ERROR: equs-recursion.asm(2): +FATAL: equs-recursion.asm(2): Recursion limit (64) exceeded while expanding symbol "recurse" while expanding symbol "recurse" @@ -64,3 +64,4 @@ while expanding symbol "recurse" while expanding symbol "recurse" while expanding symbol "recurse" while expanding symbol "recurse" +while expanding symbol "recurse" diff --git a/test/asm/garbage_char.asm b/test/asm/garbage_char.asm index ca5f132c4..29e181ebb 100644 --- a/test/asm/garbage_char.asm +++ b/test/asm/garbage_char.asm @@ -1 +1 @@ -x \ No newline at end of file + diff --git a/test/asm/garbage_char.err b/test/asm/garbage_char.err index b2a30cb7b..7f51a3e4f 100644 --- a/test/asm/garbage_char.err +++ b/test/asm/garbage_char.err @@ -1,2 +1,3 @@ ERROR: garbage_char.asm(1): - Found garbage character: 0xFF + Unknown character 0xFF +error: Assembly aborted (1 errors)! diff --git a/test/asm/if@-no-sect.asm b/test/asm/if@-no-sect.asm index 7c6072d97..5ea733f7f 100644 --- a/test/asm/if@-no-sect.asm +++ b/test/asm/if@-no-sect.asm @@ -1,2 +1,2 @@ -if {@} +if "{@}" endc diff --git a/test/asm/include-recursion.err b/test/asm/include-recursion.err index 8a256e6b7..1d598df30 100644 --- a/test/asm/include-recursion.err +++ b/test/asm/include-recursion.err @@ -1,2 +1,2 @@ -ERROR: include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1): +FATAL: include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1): Recursion limit (64) exceeded diff --git a/test/asm/line-continuation-rept.asm b/test/asm/line-continuation-rept.asm index 90a354ada..e62fddef8 100644 --- a/test/asm/line-continuation-rept.asm +++ b/test/asm/line-continuation-rept.asm @@ -2,7 +2,9 @@ m: MACRO ENDM REPT 1 - m ENDR + m +ENDR REPT 1 - m \ ENDR + m \ +ENDR diff --git a/test/asm/line-continuation-whitespace.asm b/test/asm/line-continuation-whitespace.asm index 43b1fa33b..52b6b465b 100644 --- a/test/asm/line-continuation-whitespace.asm +++ b/test/asm/line-continuation-whitespace.asm @@ -2,6 +2,7 @@ ; file doesn't cause a segfault. bar: MACRO + WARN "" ENDM -foo: bar baz\ +foo: bar baz\ diff --git a/test/asm/load-overflow.err b/test/asm/load-overflow.err index be3e50b9f..68bcfc5a7 100644 --- a/test/asm/load-overflow.err +++ b/test/asm/load-overflow.err @@ -1,2 +1,2 @@ -ERROR: load-overflow.asm(4): +FATAL: load-overflow.asm(4): Section 'Overflow' grew too big (max size = 0x8000 bytes, reached 0x8001). diff --git a/test/asm/local-purge.err b/test/asm/local-purge.err index 4fb44b6ec..3daacd320 100644 --- a/test/asm/local-purge.err +++ b/test/asm/local-purge.err @@ -1,3 +1,3 @@ ERROR: local-purge.asm(8): - '.loc' not defined + Interpolated symbol ".loc" does not exist error: Assembly aborted (1 errors)! diff --git a/test/asm/local-ref-without-parent.err b/test/asm/local-ref-without-parent.err index fd784ef32..d332bdf97 100644 --- a/test/asm/local-ref-without-parent.err +++ b/test/asm/local-ref-without-parent.err @@ -1,2 +1,2 @@ -ERROR: local-ref-without-parent.asm(3): +FATAL: local-ref-without-parent.asm(3): Local label reference '.test' in main scope diff --git a/test/asm/macro-line-no.asm b/test/asm/macro-line-no.asm new file mode 100644 index 000000000..89adebbb9 --- /dev/null +++ b/test/asm/macro-line-no.asm @@ -0,0 +1,8 @@ + +WARN "Line 2" +m: macro + WARN "Line 4" +endm +WARN "Line 6" + m +WARN "Line 8" diff --git a/test/asm/macro-line-no.err b/test/asm/macro-line-no.err new file mode 100644 index 000000000..947639e55 --- /dev/null +++ b/test/asm/macro-line-no.err @@ -0,0 +1,8 @@ +warning: macro-line-no.asm(2): [-Wuser] + Line 2 +warning: macro-line-no.asm(6): [-Wuser] + Line 6 +warning: macro-line-no.asm(7) -> macro-line-no.asm::m(4): [-Wuser] + Line 4 +warning: macro-line-no.asm(8): [-Wuser] + Line 8 diff --git a/test/asm/macro-line-no.out b/test/asm/macro-line-no.out new file mode 100644 index 000000000..e69de29bb diff --git a/test/asm/macro-recursion.err b/test/asm/macro-recursion.err index e69c4c5f9..d515ed61e 100644 --- a/test/asm/macro-recursion.err +++ b/test/asm/macro-recursion.err @@ -1,2 +1,2 @@ -ERROR: macro-recursion.asm(4) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2): +FATAL: macro-recursion.asm(4) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2): Recursion limit (64) exceeded diff --git a/test/asm/nested-brackets.err b/test/asm/nested-brackets.err index b3e006431..24035a4f9 100644 --- a/test/asm/nested-brackets.err +++ b/test/asm/nested-brackets.err @@ -1,2 +1,3 @@ ERROR: nested-brackets.asm(5): Missing } +error: Assembly aborted (1 errors)! diff --git a/test/asm/nested-brackets.out b/test/asm/nested-brackets.out index d86bac9de..bcaba8626 100644 --- a/test/asm/nested-brackets.out +++ b/test/asm/nested-brackets.out @@ -1 +1,2 @@ OK +OK \ No newline at end of file diff --git a/test/asm/null-in-macro.asm b/test/asm/null-in-macro.asm index c9f87a3db649b187390074c2dd3139012bb8803e..944922e8d78e4c4b242bfc9ae2c3e1fc4c15b35b 100644 GIT binary patch literal 23 ecmYex&$m*@O-wGz=i+4GO3h2j null-in-macro.asm::foo(2): + Unknown character 0x00 +error: Assembly aborted (1 errors)! diff --git a/test/asm/pops-no-pushed-sections.err b/test/asm/pops-no-pushed-sections.err index 4f8c06c19..7dfcea483 100644 --- a/test/asm/pops-no-pushed-sections.err +++ b/test/asm/pops-no-pushed-sections.err @@ -1,2 +1,2 @@ -ERROR: pops-no-pushed-sections.asm(1): +FATAL: pops-no-pushed-sections.asm(1): No entries in the section stack diff --git a/test/asm/pops-restore-no-section.err b/test/asm/pops-restore-no-section.err index 4eac44422..c00efd30e 100644 --- a/test/asm/pops-restore-no-section.err +++ b/test/asm/pops-restore-no-section.err @@ -1,4 +1,4 @@ ERROR: pops-restore-no-section.asm(9): Label "DisallowedContent" created outside of a SECTION -ERROR: pops-restore-no-section.asm(10): +FATAL: pops-restore-no-section.asm(10): Code generation before SECTION directive diff --git a/test/asm/remote-local-noexist.err b/test/asm/remote-local-noexist.err index f1fbac287..3193664b3 100644 --- a/test/asm/remote-local-noexist.err +++ b/test/asm/remote-local-noexist.err @@ -1,2 +1,2 @@ -ERROR: remote-local-noexist.asm(7): +FATAL: remote-local-noexist.asm(7): 'Parent.child.fail' is a nonsensical reference to a nested local symbol diff --git a/test/asm/rept-shift.err b/test/asm/rept-shift.err index 531d3cd3e..30fd34900 100644 --- a/test/asm/rept-shift.err +++ b/test/asm/rept-shift.err @@ -1,2 +1,2 @@ -ERROR: rept-shift.asm(17) -> rept-shift.asm::m(14): +FATAL: rept-shift.asm(17) -> rept-shift.asm::m(14): Macro argument '\1' not defined diff --git a/test/asm/section-union.err b/test/asm/section-union.err index e50f5ceb1..c0c2921f0 100644 --- a/test/asm/section-union.err +++ b/test/asm/section-union.err @@ -6,5 +6,5 @@ ERROR: section-union.asm(37): Section "test" already declared as fixed at $c000 ERROR: section-union.asm(37): Section "test" already declared as aligned to 256 bytes -ERROR: section-union.asm(37): +FATAL: section-union.asm(37): Cannot create section "test" (3 errors) diff --git a/test/asm/sym-collision.err b/test/asm/sym-collision.err index 53a25652c..29a82c43f 100644 --- a/test/asm/sym-collision.err +++ b/test/asm/sym-collision.err @@ -1,3 +1,3 @@ ERROR: sym-collision.asm(26): - 'dork' not defined + Interpolated symbol "dork" does not exist error: Assembly aborted (1 errors)! diff --git a/test/asm/sym-collision.out b/test/asm/sym-collision.out index 5f7cb0108..cab13a559 100644 --- a/test/asm/sym-collision.out +++ b/test/asm/sym-collision.out @@ -1,7 +1,7 @@ aqfj: $FE00 cxje: $FE01 dgsd: $FE02 -dork: $0 +dork: lxok: $FE04 psgp: $FE05 sfly: $FE06 diff --git a/test/asm/symbol-invalid-macro-arg.err b/test/asm/symbol-invalid-macro-arg.err index ff94c3c24..2e03eab64 100644 --- a/test/asm/symbol-invalid-macro-arg.err +++ b/test/asm/symbol-invalid-macro-arg.err @@ -1,2 +1,2 @@ -ERROR: symbol-invalid-macro-arg.asm(1): - Invalid macro argument '\0' in symbol +FATAL: symbol-invalid-macro-arg.asm(1): + Invalid macro argument '\0' diff --git a/test/asm/test.sh b/test/asm/test.sh index a4ad6a232..678bb0dbe 100755 --- a/test/asm/test.sh +++ b/test/asm/test.sh @@ -12,6 +12,7 @@ rc=0 bold=$(tput bold) resbold=$(tput sgr0) red=$(tput setaf 1) +green=$(tput setaf 2) rescolors=$(tput op) tryDiff () { diff -u --strip-trailing-cr $1 $2 || (echo "${bold}${red}${i%.asm}${variant}.$3 mismatch!${rescolors}${resbold}"; false) @@ -36,6 +37,7 @@ fi for i in *.asm; do for variant in '' '.pipe'; do + echo -e "${bold}${green}${i%.asm}${variant}...${rescolors}${resbold}" if [ -z "$variant" ]; then ../../rgbasm -Weverything -o $o $i > $output 2> $errput desired_output=${i%.asm}.out @@ -59,8 +61,8 @@ for i in *.asm; do # Escape regex metacharacters subst="$(printf '%s\n' "$i" | sed 's:[][\/.^$*]:\\&:g')" # Replace the file name with a dash to match changed output - sed "s/$subst/-/g" ${i%.asm}.out > $desired_output - sed "s/$subst/-/g" ${i%.asm}.err > $desired_errput + sed "s/$subst//g" ${i%.asm}.out > $desired_output + sed "s/$subst//g" ${i%.asm}.err > $desired_errput fi tryDiff $desired_output $output out diff --git a/test/asm/unique-id.err b/test/asm/unique-id.err index a39a3fa8e..9c5fdd472 100644 --- a/test/asm/unique-id.err +++ b/test/asm/unique-id.err @@ -1,19 +1,19 @@ warning: unique-id.asm(12) -> unique-id.asm::m(4): [-Wuser] - _0 -warning: unique-id.asm(12) -> unique-id.asm::m(5) -> unique-id.asm::m::REPT~1(6): [-Wuser] _1 -warning: unique-id.asm(12) -> unique-id.asm::m(5) -> unique-id.asm::m::REPT~2(6): [-Wuser] +warning: unique-id.asm(12) -> unique-id.asm::m(5) -> unique-id.asm::m::REPT~1(6): [-Wuser] _2 +warning: unique-id.asm(12) -> unique-id.asm::m(5) -> unique-id.asm::m::REPT~2(6): [-Wuser] + _3 warning: unique-id.asm(12) -> unique-id.asm::m(8): [-Wuser] - _0 + _1 warning: unique-id.asm(14) -> unique-id.asm::m(4): [-Wuser] - _3 -warning: unique-id.asm(14) -> unique-id.asm::m(5) -> unique-id.asm::m::REPT~1(6): [-Wuser] _4 -warning: unique-id.asm(14) -> unique-id.asm::m(5) -> unique-id.asm::m::REPT~2(6): [-Wuser] +warning: unique-id.asm(14) -> unique-id.asm::m(5) -> unique-id.asm::m::REPT~1(6): [-Wuser] _5 +warning: unique-id.asm(14) -> unique-id.asm::m(5) -> unique-id.asm::m::REPT~2(6): [-Wuser] + _6 warning: unique-id.asm(14) -> unique-id.asm::m(8): [-Wuser] - _3 -ERROR: unique-id.asm(15): + _4 +FATAL: unique-id.asm(15): Macro argument '\@' not defined while expanding symbol "print" From d9ecaabac1f49ebf074d1170777135bd6359b3a7 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Sat, 15 Aug 2020 14:33:01 +0200 Subject: [PATCH 18/59] Add debug tracing code to lexer Hidden behind a #define, like YYDEBUG --- src/asm/fstack.c | 15 +++++++++++++++ src/asm/lexer.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/src/asm/fstack.c b/src/asm/fstack.c index 65cee70f1..927ddc4a9 100644 --- a/src/asm/fstack.c +++ b/src/asm/fstack.c @@ -21,6 +21,12 @@ #include "asm/warning.h" #include "platform.h" /* S_ISDIR (stat macro) */ +#ifdef LEXER_DEBUG + #define dbgPrint(...) fprintf(stderr, "[lexer] " __VA_ARGS__) +#else + #define dbgPrint(...) +#endif + struct Context { struct Context *parent; struct Context *child; @@ -145,6 +151,8 @@ bool yywrap(void) } else if (!contextStack->parent) { return true; } + dbgPrint("Popping context\n"); + contextStack = contextStack->parent; contextDepth--; @@ -178,6 +186,8 @@ static void newContext(uint32_t reptDepth) void fstk_RunInclude(char const *path) { + dbgPrint("Including path \"%s\"\n", path); + char *fullPath = NULL; size_t size = 0; @@ -186,6 +196,7 @@ void fstk_RunInclude(char const *path) error("Unable to open included file '%s': %s\n", path, strerror(errno)); return; } + dbgPrint("Full path: \"%s\"\n", fullPath); newContext(0); contextStack->lexerState = lexer_OpenFile(fullPath); @@ -202,6 +213,8 @@ void fstk_RunInclude(char const *path) void fstk_RunMacro(char *macroName, struct MacroArgs *args) { + dbgPrint("Running macro \"%s\"\n", macroName); + struct Symbol *macro = sym_FindSymbol(macroName); if (!macro) { @@ -229,6 +242,8 @@ void fstk_RunMacro(char *macroName, struct MacroArgs *args) void fstk_RunRept(uint32_t count, int32_t nReptLineNo, char *body, size_t size) { + dbgPrint("Running REPT(%" PRIu32 ")\n", count); + uint32_t reptDepth = contextStack->reptDepth; newContext(reptDepth + 1); diff --git a/src/asm/lexer.c b/src/asm/lexer.c index 54bc0adba..d3259dcc8 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -35,6 +35,12 @@ /* Include this last so it gets all type & constant definitions */ #include "asmy.h" /* For token definitions, generated from asmy.y */ +#ifdef LEXER_DEBUG + #define dbgPrint(...) fprintf(stderr, "[lexer] " __VA_ARGS__) +#else + #define dbgPrint(...) +#endif + /* * Identifiers that are also keywords are listed here. This ONLY applies to ones * that would normally be matched as identifiers! Check out `yylex_NORMAL` to @@ -296,6 +302,8 @@ static void initState(struct LexerState *state) struct LexerState *lexer_OpenFile(char const *path) { + dbgPrint("Opening file \"%s\"\n", path); + bool isStdin = !strcmp(path, "-"); struct LexerState *state = malloc(sizeof(*state)); @@ -370,6 +378,8 @@ struct LexerState *lexer_OpenFile(char const *path) struct LexerState *lexer_OpenFileView(char *buf, size_t size, uint32_t lineNo) { + dbgPrint("Opening view on buffer \"%.*s\"[...]\n", size < 16 ? (int)size : 16, buf); + struct LexerState *state = malloc(sizeof(*state)); if (!state) { @@ -391,6 +401,7 @@ struct LexerState *lexer_OpenFileView(char *buf, size_t size, uint32_t lineNo) void lexer_RestartRept(uint32_t lineNo) { + dbgPrint("Restarting REPT\n"); lexerState->offset = 0; initState(lexerState); lexerState->lineNo = lineNo; @@ -840,6 +851,7 @@ void lexer_DumpStringExpansions(void) static void discardComment(void) { + dbgPrint("Discarding comment\n"); for (;;) { int c = peek(0); @@ -858,6 +870,7 @@ static bool isWhitespace(int c) static void readLineContinuation(void) { + dbgPrint("Beginning line continuation\n"); for (;;) { int c = peek(0); @@ -903,6 +916,7 @@ static void readFractionalPart(void) { uint32_t value = 0, divisor = 1; + dbgPrint("Reading fractional part\n"); for (;;) { int c = peek(0); @@ -933,6 +947,7 @@ static void readBinaryNumber(void) { uint32_t value = 0; + dbgPrint("Reading binary number\n"); for (;;) { int c = peek(0); @@ -952,6 +967,7 @@ static void readHexNumber(void) uint32_t value = 0; bool empty = true; + dbgPrint("Reading hex number\n"); for (;;) { int c = peek(0); @@ -983,6 +999,7 @@ static void readGfxConstant(void) uint32_t bp0 = 0, bp1 = 0; uint8_t width = 0; + dbgPrint("Reading gfx constant\n"); for (;;) { int c = peek(0); @@ -1018,6 +1035,7 @@ static bool startsIdentifier(int c) static int readIdentifier(char firstChar) { + dbgPrint("Reading identifier or keyword\n"); /* Lex while checking for a keyword */ yylval.tzSym[0] = firstChar; uint16_t nodeID = keywordDict[0].children[dictIndex(firstChar)]; @@ -1053,6 +1071,7 @@ static int readIdentifier(char firstChar) i = sizeof(yylval.tzSym) - 1; } yylval.tzSym[i] = '\0'; /* Terminate the string */ + dbgPrint("Ident/keyword = \"%s\"\n", yylval.tzSym); if (keywordDict[nodeID].keyword) return keywordDict[nodeID].keyword->token; @@ -1193,6 +1212,7 @@ static void readString(void) { size_t i = 0; + dbgPrint("Reading string\n"); for (;;) { int c = peek(0); @@ -1204,6 +1224,7 @@ static void readString(void) warning(WARNING_LONG_STR, "String constant too long\n"); } yylval.tzString[i] = '\0'; + dbgPrint("Read string \"%s\"\n", yylval.tzString); return; case '\r': case '\n': /* Do not shift these! */ @@ -1214,6 +1235,7 @@ static void readString(void) } yylval.tzString[i] = '\0'; error("Unterminated string\n"); + dbgPrint("Read string \"%s\"\n", yylval.tzString); return; case '\\': /* Character escape */ @@ -1330,6 +1352,7 @@ static char const *reportGarbageChar(unsigned char firstByte) static int yylex_NORMAL(void) { + dbgPrint("Lexing in normal mode\n"); for (;;) { int c = nextChar(); @@ -1534,6 +1557,8 @@ static int yylex_NORMAL(void) static int yylex_RAW(void) { + dbgPrint("Lexing in raw mode\n"); + /* This is essentially a modified `readString` */ size_t i = 0; bool insideString = false; @@ -1576,6 +1601,7 @@ static int yylex_RAW(void) return c == EOF ? 0 : c; } yylval.tzString[i] = '\0'; + dbgPrint("Read raw string \"%s\"\n", yylval.tzString); return T_STRING; case '\\': /* Character escape */ @@ -1699,12 +1725,15 @@ int yylex(void) /* Make sure to terminate files with a line feed */ if (token == 0) { if (lexerState->lastToken != '\n') { + dbgPrint("Forcing EOL at EOF\n"); token = '\n'; } else { /* Try to switch to new buffer; if it succeeds, scan again */ + dbgPrint("Reached EOF!\n"); /* Captures end at their buffer's boundary no matter what */ if (!lexerState->capturing) { if (!yywrap()) goto restart; + dbgPrint("Reached end of input."); return 0; } } From b27b821e7f7bf1c3e0ce8c07cb047cca6cf09434 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Sat, 15 Aug 2020 14:34:47 +0200 Subject: [PATCH 19/59] Fix RAW lexer length underflow Also added an assertion to check against more such overflows --- src/asm/lexer.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/asm/lexer.c b/src/asm/lexer.c index d3259dcc8..f11453e8c 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -792,6 +792,7 @@ static void shiftChars(uint8_t distance) /* Wrap around if necessary */ if (lexerState->index >= LEXER_BUF_SIZE) lexerState->index %= LEXER_BUF_SIZE; + assert(lexerState->nbChars >= distance); lexerState->nbChars -= distance; } } @@ -1467,9 +1468,7 @@ static int yylex_NORMAL(void) case '8': case '9': readNumber(10, c - '0'); - int perhapsPeriod = peek(0); - - if (perhapsPeriod == '.') { + if (peek(0) == '.') { shiftChars(1); readFractionalPart(); } @@ -1597,8 +1596,11 @@ static int yylex_RAW(void) i--; /* Empty macro args break their expansion, so prevent that */ if (i == 0) { + /* Return the EOF token, and don't shift a non-existent char! */ + if (c == EOF) + return 0; shiftChars(1); - return c == EOF ? 0 : c; + return c; } yylval.tzString[i] = '\0'; dbgPrint("Read raw string \"%s\"\n", yylval.tzString); From cf992164f79a0e5c0fd49b445d698be2361a9b67 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Sat, 15 Aug 2020 15:10:32 +0200 Subject: [PATCH 20/59] Fix lexer capture sometimes not being reset --- src/asm/lexer.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/asm/lexer.c b/src/asm/lexer.c index f11453e8c..e143745d9 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -1674,7 +1674,7 @@ static int skipIfBlock(bool toEndc) token = yylex(); if (token == 0) { /* Pass EOF through */ - return token; + break; } else if (atLineStart && token == T_POP_IF) { /* Increase nesting */ nIFDepth++; } else if (atLineStart && nIFDepth == startingDepth) { /* An occasion to finish? */ @@ -1811,6 +1811,7 @@ void lexer_CaptureRept(char **capture, size_t *size) for (;;) { if (c == EOF) { error("Unterminated REPT block\n"); + lexerState->capturing = false; goto finish; } else if (c == '\n') { break; @@ -1824,6 +1825,7 @@ void lexer_CaptureRept(char **capture, size_t *size) } finish: + assert(!lexerState->capturing); *capture = captureStart; *size = lexerState->captureSize - strlen("ENDR"); lexerState->captureBuf = NULL; @@ -1847,6 +1849,7 @@ void lexer_CaptureMacroBody(char **capture, size_t *size) for (;;) { if (c == EOF) { error("Unterminated macro definition\n"); + lexerState->capturing = false; goto finish; } else if (c == '\n') { break; @@ -1905,6 +1908,7 @@ void lexer_CaptureMacroBody(char **capture, size_t *size) } finish: + assert(!lexerState->capturing); *capture = captureStart; *size = lexerState->captureSize - strlen("ENDM"); lexerState->captureBuf = NULL; From 9081feab51c8727b81f938d4eeb0c324c18b7a0f Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Sat, 15 Aug 2020 15:15:18 +0200 Subject: [PATCH 21/59] Reinstate macro arg scan distance Used to be broken, so it was removed, but doing so prevents escaping them. So it was instead put back in, but with corrected behavior --- src/asm/lexer.c | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/src/asm/lexer.c b/src/asm/lexer.c index e143745d9..fb8adb859 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -278,6 +278,7 @@ struct LexerState { char *captureBuf; /* Buffer to send the captured text to if non-NULL */ size_t captureCapacity; /* Size of the buffer above */ + size_t macroArgScanDistance; /* Max distance already scanned for macro args */ bool expandStrings; struct Expansion *expansions; size_t expansionOfs; /* Offset into the current top-level expansion (negative = before) */ @@ -295,6 +296,7 @@ static void initState(struct LexerState *state) state->capturing = false; state->captureBuf = NULL; + state->macroArgScanDistance = 0; state->expandStrings = true; state->expansions = NULL; state->expansionOfs = 0; @@ -716,16 +718,29 @@ static int peek(uint8_t distance) { int c = peekInternal(distance); - /* If not capturing and character is a backslash, check for a macro arg */ - if (!lexerState->capturing && c == '\\') { - distance++; - c = peekInternal(distance); - if (c == '@' || (c >= '0' && c <= '9')) { - /* Expand the argument and return its first character */ - c = expandMacroArg(c, distance - 1)[0]; - /* WARNING: this assumes macro args can't be empty!! */ - } else { - c = '\\'; + if (distance >= lexerState->macroArgScanDistance) { + lexerState->macroArgScanDistance = distance + 1; /* Do not consider again */ + /* If not capturing and character is a backslash, check for a macro arg */ + if (!lexerState->capturing && c == '\\') { + distance++; + lexerState->macroArgScanDistance++; + c = peekInternal(distance); + if (c == '@' || (c >= '1' && c <= '9')) { + /* Expand the argument and return its first character */ + char const *str = expandMacroArg(c, distance - 1); + + /* + * Assuming macro args can't be recursive (I'll be damned if a way + * is found...), then we mark the entire macro arg as scanned; + * however, the two macro arg characters (\1) will be ignored, + * so they shouldn't be counted in the scan distance! + */ + lexerState->macroArgScanDistance += strlen(str) - 2; + /* WARNING: this assumes macro args can't be empty!! */ + c = str[0]; + } else { + c = '\\'; + } } } return c; @@ -745,6 +760,8 @@ static void shiftChars(uint8_t distance) } } + lexerState->macroArgScanDistance -= distance; + /* FIXME: this may not be too great, as only the top level is considered... */ /* From 08867b3cec6fbc8bc215a1049055e3c65281751c Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Sat, 15 Aug 2020 15:26:22 +0200 Subject: [PATCH 22/59] Enable catching invalid macro arg 0 --- src/asm/lexer.c | 70 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 57 insertions(+), 13 deletions(-) diff --git a/src/asm/lexer.c b/src/asm/lexer.c index fb8adb859..d2e4970ad 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -725,7 +725,7 @@ static int peek(uint8_t distance) distance++; lexerState->macroArgScanDistance++; c = peekInternal(distance); - if (c == '@' || (c >= '1' && c <= '9')) { + if (c == '@' || (c >= '0' && c <= '9')) { /* Expand the argument and return its first character */ char const *str = expandMacroArg(c, distance - 1); @@ -1677,9 +1677,11 @@ static int yylex_RAW(void) */ static int skipIfBlock(bool toEndc) { + dbgPrint("Skipping IF block (toEndc = %s)\n", toEndc ? "true" : "false"); lexer_SetMode(LEXER_NORMAL); int startingDepth = nIFDepth; int token; + bool atLineStart = lexerState->atLineStart; /* Prevent expanding macro args in this state by enabling capture to nothing */ lexerState->capturing = true; @@ -1687,23 +1689,65 @@ static int skipIfBlock(bool toEndc) lexerState->captureBuf = NULL; for (;;) { - bool atLineStart = lexerState->atLineStart; + if (atLineStart) { + int c; - token = yylex(); - if (token == 0) { /* Pass EOF through */ - break; - } else if (atLineStart && token == T_POP_IF) { /* Increase nesting */ - nIFDepth++; - } else if (atLineStart && nIFDepth == startingDepth) { /* An occasion to finish? */ - if (token == T_POP_ENDC || (!toEndc && (token == T_POP_ELIF - || token == T_POP_ELSE))) - break; - } else if (atLineStart && token == T_POP_ENDC) { /* Decrease nesting */ - nIFDepth--; + for (;;) { + c = peek(0); + if (!isWhitespace(c)) + break; + shiftChars(1); + } + + if (startsIdentifier(c)) { + shiftChars(1); + token = readIdentifier(c); + switch (token) { + case T_POP_IF: + nIFDepth++; + break; + + case T_POP_ELIF: + case T_POP_ELSE: + if (toEndc) /* Ignore ELIF and ELSE, go to ENDC */ + break; + /* fallthrough */ + case T_POP_ENDC: + if (nIFDepth == startingDepth) + goto finish; + if (token == T_POP_ENDC) + nIFDepth--; + } + } + atLineStart = false; } + + /* Read chars until EOL */ + do { + int c = nextChar(); + + if (c == EOF) { + token = 0; + goto finish; + } else if (c == '\\') { + /* Unconditionally skip the next char, including line conts */ + c = nextChar(); + } else if (c == '\r' || c == '\n') { + atLineStart = true; + } + + if (c == '\r' || c == '\n') + /* Do this both on line continuations and plain EOLs */ + lexerState->lineNo++; + /* Handle CRLF */ + if (c == '\r' && peek(0) == '\n') + shiftChars(1); + } while (!atLineStart); } +finish: lexerState->capturing = false; + lexerState->atLineStart = false; return token; } From 3f5f9bcaf01c0f6b3382f6fb85daa3ac7857ebee Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Sun, 16 Aug 2020 02:48:41 +0200 Subject: [PATCH 23/59] Fix numeric constant overflow checks --- src/asm/lexer.c | 17 ++++++++++------- test/asm/equs-nest.out | 1 + test/asm/line-continuation-whitespace.asm | 1 - test/asm/local-purge.out | 2 +- test/asm/overflow.err | 4 ++-- 5 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/asm/lexer.c b/src/asm/lexer.c index d2e4970ad..12f9ea1a0 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -920,7 +920,7 @@ static void readNumber(int radix, int32_t baseValue) if (c < '0' || c > '0' + radix - 1) break; - if (value > UINT32_MAX / radix) + if (value > (UINT32_MAX - (c - '0')) / radix) warning(WARNING_LARGE_CONSTANT, "Integer constant is too large\n"); value = value * radix + (c - '0'); @@ -940,7 +940,7 @@ static void readFractionalPart(void) if (c < '0' || c > '9') break; - if (divisor > UINT32_MAX / 10) { + if (divisor > (UINT32_MAX - (c - '0')) / 10) { warning(WARNING_LARGE_CONSTANT, "Precision of fixed-point constant is too large\n"); /* Discard any additional digits */ @@ -948,6 +948,7 @@ static void readFractionalPart(void) shiftChars(1); break; } + value = value * 10 + (c - '0'); } if (yylval.nConstValue > INT16_MAX || yylval.nConstValue < INT16_MIN) @@ -972,6 +973,8 @@ static void readBinaryNumber(void) /* TODO: handle `-b`'s dynamic chars */ if (c != '0' && c != '1') break; + if (value > (UINT32_MAX - (c - '0')) / 2) + warning(WARNING_LARGE_CONSTANT, "Integer constant is too large\n"); value = value * 2 + (c - '0'); shiftChars(1); @@ -998,7 +1001,7 @@ static void readHexNumber(void) else break; - if (value > UINT32_MAX / 16) + if (value > (UINT32_MAX - c) / 16) warning(WARNING_LARGE_CONSTANT, "Integer constant is too large\n"); value = value * 16 + c; @@ -1030,16 +1033,16 @@ static void readGfxConstant(void) bp0 = bp0 << 1 | (pixel & 1); bp1 = bp1 << 1 | (pixel >> 1); } - if (width <= 8) + if (width < 9) width++; shiftChars(1); } if (width == 0) - error("Invalid gfx constant, no digits after '`'\n"); - else if (width == 8) + error("Invalid graphics constant, no digits after '`'\n"); + else if (width == 9) warning(WARNING_LARGE_CONSTANT, - "Gfx constant is too large, only 8 first pixels considered\n"); + "Graphics constant is too long, only 8 first pixels considered\n"); yylval.nConstValue = bp1 << 8 | bp0; } diff --git a/test/asm/equs-nest.out b/test/asm/equs-nest.out index e69de29bb..f985b46af 100644 --- a/test/asm/equs-nest.out +++ b/test/asm/equs-nest.out @@ -0,0 +1 @@ +Success! diff --git a/test/asm/line-continuation-whitespace.asm b/test/asm/line-continuation-whitespace.asm index 52b6b465b..404e536af 100644 --- a/test/asm/line-continuation-whitespace.asm +++ b/test/asm/line-continuation-whitespace.asm @@ -2,7 +2,6 @@ ; file doesn't cause a segfault. bar: MACRO - WARN "" ENDM foo: bar baz\ diff --git a/test/asm/local-purge.out b/test/asm/local-purge.out index ebfabbe4b..8b1378917 100644 --- a/test/asm/local-purge.out +++ b/test/asm/local-purge.out @@ -1 +1 @@ -$0 + diff --git a/test/asm/overflow.err b/test/asm/overflow.err index b51baa5eb..a3b449427 100644 --- a/test/asm/overflow.err +++ b/test/asm/overflow.err @@ -3,6 +3,6 @@ warning: overflow.asm(24): [-Wdiv] warning: overflow.asm(25): [-Wdiv] Division of -2147483648 by -1 yields -2147483648 warning: overflow.asm(39): [-Wlarge-constant] - Integer constant '4294967296' is too large + Integer constant is too large warning: overflow.asm(42): [-Wlarge-constant] - Graphics constant '`333333333' is too long + Graphics constant is too long, only 8 first pixels considered From e4f2fad215650efd9155adc5312bb76c6c0c3747 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Sun, 16 Aug 2020 03:08:31 +0200 Subject: [PATCH 24/59] Support line continuations in main scope --- src/asm/lexer.c | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/src/asm/lexer.c b/src/asm/lexer.c index 12f9ea1a0..bef7397d6 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -896,13 +896,14 @@ static void readLineContinuation(void) shiftChars(1); } else if (c == '\r' || c == '\n') { shiftChars(1); + if (c == '\r' && peek(0) == '\n') + shiftChars(1); if (!lexerState->expansions - || lexerState->expansions->distance) { + || lexerState->expansions->distance) lexerState->lineNo++; - } return; } else { - error("Begun line continuation, but encountered character %s\n", + error("Begun line continuation, but encountered character '%s'\n", print(c)); return; } @@ -1536,7 +1537,28 @@ static int yylex_NORMAL(void) case EOF: return 0; - /* Handle identifiers... or error out */ + /* Handle escapes */ + + case '\\': + c = peek(0); + + switch (c) { + case ' ': + case '\r': + case '\n': + readLineContinuation(); + break; + + case EOF: + error("Illegal character escape at end of input\n"); + break; + default: + shiftChars(1); + error("Illegal character escape '%s'\n", print(c)); + } + break; + + /* Handle identifiers and escapes... or error out */ default: if (startsIdentifier(c)) { From 62ecdce0b0db976894f88336f1fa7f257a77e950 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Sun, 16 Aug 2020 03:11:14 +0200 Subject: [PATCH 25/59] Fix line-continuation-macro test --- test/asm/line-continuation-macro.asm | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/asm/line-continuation-macro.asm b/test/asm/line-continuation-macro.asm index b6b409076..4ea1f6bf6 100644 --- a/test/asm/line-continuation-macro.asm +++ b/test/asm/line-continuation-macro.asm @@ -2,6 +2,7 @@ m: MACRO ENDM m2: MACRO - m \ ENDM + m \ +ENDM m2 From fd02ffb7bd6e41d310dd1c6f0652fcee82152437 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Sun, 16 Aug 2020 13:33:06 +0200 Subject: [PATCH 26/59] Implement __FILE__ symbol Also clean up built-in symbol creation This is not great, but currently okay. Should be fixed later, like the rest... --- include/asm/fstack.h | 1 + include/asm/symbol.h | 19 ++++++--- src/asm/fstack.c | 9 +++- src/asm/symbol.c | 95 +++++++++++++++++++++++++++++++------------ test/asm/file-sym.out | 2 +- 5 files changed, 91 insertions(+), 35 deletions(-) diff --git a/include/asm/fstack.h b/include/asm/fstack.h index bbf321c36..52466a118 100644 --- a/include/asm/fstack.h +++ b/include/asm/fstack.h @@ -58,6 +58,7 @@ void fstk_RunRept(uint32_t count, int32_t nReptLineNo, char *body, size_t size); void fstk_Dump(void); char *fstk_DumpToStr(void); +char const *fstk_GetFileName(void); uint32_t fstk_GetLine(void); void fstk_Init(char *mainPath, uint32_t maxRecursionDepth); diff --git a/include/asm/symbol.h b/include/asm/symbol.h index 22ae95abf..e2beda470 100644 --- a/include/asm/symbol.h +++ b/include/asm/symbol.h @@ -38,14 +38,19 @@ struct Symbol { char fileName[_MAX_PATH + 1]; /* File where the symbol was defined. */ uint32_t fileLine; /* Line where the symbol was defined. */ + bool hasCallback; union { - struct { /* If sym_IsNumeric */ + union { /* Otherwise */ + /* If sym_IsNumeric */ int32_t value; - int32_t (*callback)(void); - }; - struct { /* For SYM_MACRO */ - size_t macroSize; - char *macro; + int32_t (*numCallback)(void); + /* For SYM_MACRO */ + struct { + size_t macroSize; + char *macro; + }; + /* For SYM_EQUS, TODO: separate "base" fields from SYM_MACRO */ + char const *(*strCallback)(void); /* For SYM_EQUS */ }; }; @@ -101,6 +106,8 @@ static inline bool sym_IsExported(struct Symbol const *sym) */ static inline char const *sym_GetStringValue(struct Symbol const *sym) { + if (sym->hasCallback) + return sym->strCallback(); return sym->macro; } diff --git a/src/asm/fstack.c b/src/asm/fstack.c index 927ddc4a9..99eb96626 100644 --- a/src/asm/fstack.c +++ b/src/asm/fstack.c @@ -229,8 +229,8 @@ void fstk_RunMacro(char *macroName, struct MacroArgs *args) newContext(0); /* Line minus 1 because buffer begins with a newline */ - contextStack->lexerState = lexer_OpenFileView(macro->macro, - macro->macroSize, macro->fileLine - 1); + contextStack->lexerState = lexer_OpenFileView(macro->macro, macro->macroSize, + macro->fileLine - 1); if (!contextStack->lexerState) fatalerror("Failed to set up lexer for macro invocation\n"); lexer_SetStateAtEOL(contextStack->lexerState); @@ -307,6 +307,11 @@ char *fstk_DumpToStr(void) return str; } +char const *fstk_GetFileName(void) +{ + return contextStack->fileName; +} + uint32_t fstk_GetLine(void) { return lexer_GetLineNo(); diff --git a/src/asm/symbol.c b/src/asm/symbol.c index bd790fe92..45bbb9678 100644 --- a/src/asm/symbol.c +++ b/src/asm/symbol.c @@ -85,6 +85,45 @@ static int32_t Callback__LINE__(void) return lexer_GetLineNo(); } +static char const *Callback__FILE__(void) +{ + /* + * FIXME: this is dangerous, and here's why this is CURRENTLY okay. It's still bad, fix it. + * There are only two call sites for this; one copies the contents directly, the other is + * EQUS expansions, which cannot straddle file boundaries. So this should be fine. + */ + static char *buf = NULL; + static size_t bufsize = 0; + char const *fileName = fstk_GetFileName(); + size_t j = 1; + + /* TODO: is there a way for a file name to be empty? */ + assert(strlen(fileName) != 0); + /* The assertion above ensures the loop runs at least once */ + for (size_t i = 0; fileName[i]; i++, j++) { + /* Account for the extra backslash inserted below */ + if (fileName[i] == '"') + j++; + /* Ensure there will be enough room; DO NOT PRINT ANYTHING ABOVE THIS!! */ + if (j + 2 >= bufsize) { /* Always keep room for 2 tail chars */ + bufsize = bufsize ? bufsize * 2 : 64; + buf = realloc(buf, bufsize); + if (!buf) + fatalerror("Failed to grow buffer for file name: %s\n", + strerror(errno)); + } + /* Escape quotes, since we're returning a string */ + if (fileName[i] == '"') + buf[j - 1] = '\\'; + buf[j] = fileName[i]; + } + /* Write everything after the loop, to ensure everything has been allocated */ + buf[0] = '"'; + buf[j++] = '"'; + buf[j] = '\0'; + return buf; +} + static int32_t CallbackPC(void) { struct Section const *section = sect_GetSymbolSection(); @@ -97,8 +136,8 @@ static int32_t CallbackPC(void) */ int32_t sym_GetValue(struct Symbol const *sym) { - if (sym_IsNumeric(sym) && sym->callback) - return sym->callback(); + if (sym_IsNumeric(sym) && sym->hasCallback) + return sym->numCallback(); if (sym->type == SYM_LABEL) /* TODO: do not use section's org directly */ @@ -113,9 +152,8 @@ int32_t sym_GetValue(struct Symbol const *sym) static void updateSymbolFilename(struct Symbol *sym) { if (snprintf(sym->fileName, _MAX_PATH + 1, "%s", - lexer_GetFileName()) > _MAX_PATH) - fatalerror("%s: File name is too long: '%s'\n", __func__, - lexer_GetFileName()); + fstk_GetFileName()) > _MAX_PATH) + fatalerror("%s: File name is too long: '%s'\n", __func__, fstk_GetFileName()); sym->fileLine = fstk_GetLine(); } @@ -134,6 +172,7 @@ static struct Symbol *createsymbol(char const *s) symbol->isExported = false; symbol->isBuiltin = false; + symbol->hasCallback = false; symbol->section = NULL; updateSymbolFilename(symbol); symbol->ID = -1; @@ -310,7 +349,6 @@ struct Symbol *sym_AddEqu(char const *symName, int32_t value) struct Symbol *sym = createNonrelocSymbol(symName); sym->type = SYM_EQU; - sym->callback = NULL; sym->value = value; return sym; @@ -364,7 +402,6 @@ struct Symbol *sym_AddSet(char const *symName, int32_t value) updateSymbolFilename(sym); sym->type = SYM_SET; - sym->callback = NULL; sym->value = value; return sym; @@ -375,7 +412,7 @@ struct Symbol *sym_AddSet(char const *symName, int32_t value) * @param name The label's full name (so `.name` is invalid) * @return The created symbol */ -static struct Symbol *addSectionlessLabel(char const *name) +static struct Symbol *addLabel(char const *name) { assert(name[0] != '.'); /* The symbol name must have been expanded prior */ struct Symbol *sym = findsymbol(name, NULL); /* Due to this, don't look for expansions */ @@ -389,20 +426,12 @@ static struct Symbol *addSectionlessLabel(char const *name) } /* If the symbol already exists as a ref, just "take over" it */ sym->type = SYM_LABEL; - sym->callback = NULL; sym->value = sect_GetSymbolOffset(); if (exportall) sym->isExported = true; sym->section = sect_GetSymbolSection(); updateSymbolFilename(sym); - return sym; -} - -static struct Symbol *addLabel(char const *name) -{ - struct Symbol *sym = addSectionlessLabel(name); - if (sym && !sym->section) error("Label \"%s\" created outside of a SECTION\n", name); return sym; @@ -538,21 +567,35 @@ static inline char const *removeLeadingZeros(char const *ptr) return ptr; } +static inline struct Symbol *createBuiltinSymbol(char const *name) +{ + struct Symbol *sym = createsymbol(name); + + sym->isBuiltin = true; + sym->hasCallback = true; + strcpy(sym->fileName, ""); + sym->fileLine = 0; + return sym; +} /* * Initialize the symboltable */ void sym_Init(void) { - struct Symbol *_NARGSymbol = sym_AddEqu("_NARG", 0); - struct Symbol *__LINE__Symbol = sym_AddEqu("__LINE__", 0); - - PCSymbol = addSectionlessLabel("@"); - PCSymbol->isBuiltin = true; - PCSymbol->callback = CallbackPC; - _NARGSymbol->isBuiltin = true; - _NARGSymbol->callback = Callback_NARG; - __LINE__Symbol->isBuiltin = true; - __LINE__Symbol->callback = Callback__LINE__; + PCSymbol = createBuiltinSymbol("@"); + struct Symbol *_NARGSymbol = createBuiltinSymbol("_NARG"); + struct Symbol *__LINE__Symbol = createBuiltinSymbol("__LINE__"); + struct Symbol *__FILE__Symbol = createBuiltinSymbol("__FILE__"); + + PCSymbol->type = SYM_LABEL; + PCSymbol->section = NULL; + PCSymbol->numCallback = CallbackPC; + _NARGSymbol->type = SYM_EQU; + _NARGSymbol->numCallback = Callback_NARG; + __LINE__Symbol->type = SYM_EQU; + __LINE__Symbol->numCallback = Callback__LINE__; + __FILE__Symbol->type = SYM_EQUS; + __FILE__Symbol->strCallback = Callback__FILE__; sym_AddSet("_RS", 0)->isBuiltin = true; sym_AddEqu("__RGBDS_MAJOR__", PACKAGE_VERSION_MAJOR)->isBuiltin = true; diff --git a/test/asm/file-sym.out b/test/asm/file-sym.out index 61fc13a23..c3259a8be 100644 --- a/test/asm/file-sym.out +++ b/test/asm/file-sym.out @@ -1 +1 @@ -"test/asm/file-sym.asm" +"file-sym.asm" From baeb180acdfa7d4f49d7afd53035e8f707fe6551 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Sun, 16 Aug 2020 22:17:18 +0200 Subject: [PATCH 27/59] Apply error reporting changes to tests --- test/asm/label-redefinition.err | 2 +- test/link/section-union/align-conflict.out | 4 ++-- test/link/section-union/align-ofs-conflict.out | 4 ++-- test/link/section-union/assert.out | 2 +- test/link/section-union/bad-types.out | 4 ++-- test/link/section-union/bank-conflict.out | 4 ++-- test/link/section-union/data-overlay.out | 4 ++-- test/link/section-union/different-data.out | 4 ++-- test/link/section-union/different-ofs.out | 4 ++-- test/link/section-union/different-size.out | 4 ++-- test/link/section-union/different-syntaxes.out | 4 ++-- test/link/section-union/org-conflict.out | 4 ++-- test/link/section-union/split-data.out | 4 ++-- 13 files changed, 24 insertions(+), 24 deletions(-) diff --git a/test/asm/label-redefinition.err b/test/asm/label-redefinition.err index 09eb79b3f..1880de77d 100644 --- a/test/asm/label-redefinition.err +++ b/test/asm/label-redefinition.err @@ -1,3 +1,3 @@ ERROR: label-redefinition.asm(7): - 'Sym' already defined in label-redefinition.asm::m(6) + 'Sym' already defined in label-redefinition.asm::m(4) error: Assembly aborted (1 errors)! diff --git a/test/link/section-union/align-conflict.out b/test/link/section-union/align-conflict.out index d29b7b973..0a9a98eb8 100644 --- a/test/link/section-union/align-conflict.out +++ b/test/link/section-union/align-conflict.out @@ -1,6 +1,6 @@ error: Section "conflicting alignment" is defined with conflicting 4-byte alignment (offset 0) and address $cafe --- -ERROR: -(18): +ERROR: (18): Section "conflicting alignment" already declared as aligned to 4 bytes (offset 0) -ERROR: -(18): +FATAL: (18): Cannot create section "conflicting alignment" (1 errors) diff --git a/test/link/section-union/align-ofs-conflict.out b/test/link/section-union/align-ofs-conflict.out index 08cc5929b..6a545f34b 100644 --- a/test/link/section-union/align-ofs-conflict.out +++ b/test/link/section-union/align-ofs-conflict.out @@ -1,6 +1,6 @@ error: Section "conflicting alignment" is defined with conflicting 8-byte alignment (offset 7) and 16-byte alignment (offset 14) --- -ERROR: -(18): +ERROR: (18): Section "conflicting alignment" already declared with incompatible 3-byte alignment (offset 7) -ERROR: -(18): +FATAL: (18): Cannot create section "conflicting alignment" (1 errors) diff --git a/test/link/section-union/assert.out b/test/link/section-union/assert.out index c50272d99..fa3c90e51 100644 --- a/test/link/section-union/assert.out +++ b/test/link/section-union/assert.out @@ -1,6 +1,6 @@ error: section-union/assert.asm(11): Force failing the build Linking failed with 1 error --- -ERROR: -(30): +ERROR: (30): Assertion failed: Force failing the build error: Assembly aborted (1 errors)! diff --git a/test/link/section-union/bad-types.out b/test/link/section-union/bad-types.out index bd5fdfcff..1fcd01a17 100644 --- a/test/link/section-union/bad-types.out +++ b/test/link/section-union/bad-types.out @@ -1,6 +1,6 @@ error: Section "conflicting types" is defined with conflicting types HRAM and WRAM0 --- -ERROR: -(18): +ERROR: (18): Section "conflicting types" already exists but with type HRAM -ERROR: -(18): +FATAL: (18): Cannot create section "conflicting types" (1 errors) diff --git a/test/link/section-union/bank-conflict.out b/test/link/section-union/bank-conflict.out index c806274f9..2d16cff80 100644 --- a/test/link/section-union/bank-conflict.out +++ b/test/link/section-union/bank-conflict.out @@ -1,6 +1,6 @@ error: Section "conflicting banks" is defined with conflicting banks 4 and 1 --- -ERROR: -(14): +ERROR: (14): Section "conflicting banks" already declared with different bank 4 -ERROR: -(14): +FATAL: (14): Cannot create section "conflicting banks" (1 errors) diff --git a/test/link/section-union/data-overlay.out b/test/link/section-union/data-overlay.out index b2cfb039d..d504ce95e 100644 --- a/test/link/section-union/data-overlay.out +++ b/test/link/section-union/data-overlay.out @@ -1,6 +1,6 @@ error: Section "overlaid data" is of type ROM0, which cannot be unionized --- -ERROR: -(18): +ERROR: (18): Cannot declare ROM sections as UNION -ERROR: -(18): +FATAL: (18): Cannot create section "overlaid data" (1 errors) diff --git a/test/link/section-union/different-data.out b/test/link/section-union/different-data.out index 46619e1cd..4357cd596 100644 --- a/test/link/section-union/different-data.out +++ b/test/link/section-union/different-data.out @@ -1,6 +1,6 @@ error: Section "different data" is of type ROM0, which cannot be unionized --- -ERROR: -(16): +ERROR: (16): Cannot declare ROM sections as UNION -ERROR: -(16): +FATAL: (16): Cannot create section "different data" (1 errors) diff --git a/test/link/section-union/different-ofs.out b/test/link/section-union/different-ofs.out index c897549f2..ee86d2855 100644 --- a/test/link/section-union/different-ofs.out +++ b/test/link/section-union/different-ofs.out @@ -1,6 +1,6 @@ error: Section "conflicting alignment" is defined with conflicting 8-byte alignment (offset 7) and 8-byte alignment (offset 6) --- -ERROR: -(18): +ERROR: (18): Section "conflicting alignment" already declared with incompatible 3-byte alignment (offset 7) -ERROR: -(18): +FATAL: (18): Cannot create section "conflicting alignment" (1 errors) diff --git a/test/link/section-union/different-size.out b/test/link/section-union/different-size.out index bc6131f62..16089d5e6 100644 --- a/test/link/section-union/different-size.out +++ b/test/link/section-union/different-size.out @@ -1,6 +1,6 @@ error: Section "different section sizes" is of type ROM0, which cannot be unionized --- -ERROR: -(16): +ERROR: (16): Cannot declare ROM sections as UNION -ERROR: -(16): +FATAL: (16): Cannot create section "different section sizes" (1 errors) diff --git a/test/link/section-union/different-syntaxes.out b/test/link/section-union/different-syntaxes.out index 52dd77093..8e5b8084f 100644 --- a/test/link/section-union/different-syntaxes.out +++ b/test/link/section-union/different-syntaxes.out @@ -1,6 +1,6 @@ error: Section "different syntaxes" is of type ROM0, which cannot be unionized --- -ERROR: -(18): +ERROR: (18): Cannot declare ROM sections as UNION -ERROR: -(18): +FATAL: (18): Cannot create section "different syntaxes" (1 errors) diff --git a/test/link/section-union/org-conflict.out b/test/link/section-union/org-conflict.out index b5a332eca..7c0140da8 100644 --- a/test/link/section-union/org-conflict.out +++ b/test/link/section-union/org-conflict.out @@ -1,6 +1,6 @@ error: Section "conflicting address" is defined with conflicting addresses $beef and $babe --- -ERROR: -(16): +ERROR: (16): Section "conflicting address" already declared as fixed at different address $beef -ERROR: -(16): +FATAL: (16): Cannot create section "conflicting address" (1 errors) diff --git a/test/link/section-union/split-data.out b/test/link/section-union/split-data.out index 36db397f4..8bd671932 100644 --- a/test/link/section-union/split-data.out +++ b/test/link/section-union/split-data.out @@ -1,6 +1,6 @@ error: Section "mutually-overlaid data" is of type ROM0, which cannot be unionized --- -ERROR: -(18): +ERROR: (18): Cannot declare ROM sections as UNION -ERROR: -(18): +FATAL: (18): Cannot create section "mutually-overlaid data" (1 errors) From ae77893021c9c728d1ded0eda78aa13ab419dd1f Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Sun, 16 Aug 2020 22:18:34 +0200 Subject: [PATCH 28/59] Fix file name reporting As noted in the function's code, this is very error-prone, but will do the job; this needs rewriting due to #491 anyways, so, temporary. --- src/asm/fstack.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/asm/fstack.c b/src/asm/fstack.c index 99eb96626..45082ee87 100644 --- a/src/asm/fstack.c +++ b/src/asm/fstack.c @@ -309,7 +309,30 @@ char *fstk_DumpToStr(void) char const *fstk_GetFileName(void) { - return contextStack->fileName; + /* FIXME: this is awful, but all callees copy the buffer anyways */ + static char fileName[_MAX_PATH + 1]; + size_t remainingChars = _MAX_PATH + 1; + char *dest = fileName; + char const *src = contextStack->fileName; + +#define append(...) do { \ + int nbChars = snprintf(dest, remainingChars, __VA_ARGS__); \ + \ + if (nbChars >= remainingChars) \ + fatalerror("File stack entry too large"); \ + remainingChars -= nbChars; \ + dest += nbChars; \ +} while (0) + + while (*src && --remainingChars) /* Leave room for terminator */ + *dest++ = *src++; + if (remainingChars && contextStack->macro) + append("::%s", contextStack->macro->name); + for (size_t i = 0; i < contextStack->reptDepth; i++) + append("::REPT~%" PRIu32, contextStack->reptIters[i]); + + *dest = '\0'; + return fileName; } uint32_t fstk_GetLine(void) From 8d18b39eeed5188216483e21675911c046586b72 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Sun, 16 Aug 2020 23:11:12 +0200 Subject: [PATCH 29/59] Support missing register tokens Made possible by #491 --- src/asm/lexer.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/asm/lexer.c b/src/asm/lexer.c index bef7397d6..aacdb9387 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -115,6 +115,8 @@ static struct KeywordMapping { {"DE", T_MODE_DE}, {"HL", T_MODE_HL}, {"SP", T_MODE_SP}, + {"HLD", T_MODE_HL_DEC}, + {"HLI", T_MODE_HL_INC}, {"A", T_TOKEN_A}, {"B", T_TOKEN_B}, @@ -429,7 +431,7 @@ struct KeywordDictNode { uint16_t children[0x60 - ' ']; struct KeywordMapping const *keyword; /* Since the keyword structure is invariant, the min number of nodes is known at compile time */ -} keywordDict[336] = {0}; /* Make sure to keep this correct when adding keywords! */ +} keywordDict[338] = {0}; /* Make sure to keep this correct when adding keywords! */ /* Convert a char into its index into the dict */ static inline uint8_t dictIndex(char c) @@ -1476,6 +1478,25 @@ static int yylex_NORMAL(void) case '$': yylval.nConstValue = 0; readHexNumber(); + /* Attempt to match `$ff00+c` */ + if (yylval.nConstValue == 0xff00) { + /* Whitespace is ignored anyways */ + while (isWhitespace(c = peek(0))) + shiftChars(1); + if (c == '+') { + /* FIXME: not great due to large lookahead */ + uint8_t distance = 1; + + do { + c = peek(distance++); + } while (isWhitespace(c)); + + if (c == 'c' || c == 'C') { + shiftChars(distance); + return T_MODE_HW_C; + } + } + } return T_NUMBER; case '0': /* Decimal number */ From 35396e6410e471626db52c917f405fcf65034373 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Mon, 17 Aug 2020 00:06:02 +0200 Subject: [PATCH 30/59] Fix files being unmapped when still referenced by macros --- src/asm/lexer.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/asm/lexer.c b/src/asm/lexer.c index aacdb9387..89c528805 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -257,6 +257,7 @@ struct LexerState { char *ptr; /* Technically `const` during the lexer's execution */ off_t size; off_t offset; + bool isReferenced; /* If a macro in this file requires not unmapping it*/ }; struct { /* Otherwise */ int fd; @@ -415,7 +416,7 @@ void lexer_DeleteState(struct LexerState *state) { if (!state->isMmapped) close(state->fd); - else if (state->isFile) + else if (state->isFile && !state->isReferenced) munmap(state->ptr, state->size); free(state); } @@ -1944,6 +1945,11 @@ void lexer_CaptureMacroBody(char **capture, size_t *size) unsigned int level = 0; int c = peek(0); + /* If the file is `mmap`ed, we need not to unmap it to keep access to the macro */ + if (lexerState->isMmapped) + /* FIXME: this is godawful, but RGBASM doesn't even clean up anything anyways. */ + lexerState->isReferenced = true; + /* * Due to parser internals, it does not read the EOL after the T_POP_MACRO before calling * this. Thus, we need to keep one in the buffer afterwards. From 4d1333e124369a63f69ee9ef4bc94bd6073f9b41 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Mon, 17 Aug 2020 12:00:28 +0200 Subject: [PATCH 31/59] Fix incorrect error reporting of `INCLUDE`d files --- src/asm/fstack.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/asm/fstack.c b/src/asm/fstack.c index 45082ee87..adbd2c6b8 100644 --- a/src/asm/fstack.c +++ b/src/asm/fstack.c @@ -33,6 +33,7 @@ struct Context { struct LexerState *lexerState; uint32_t uniqueID; char const *fileName; + char *fileNameBuf; uint32_t lineNo; /* Line number at which the context was EXITED */ struct Symbol const *macro; uint32_t nbReptIters; /* If zero, this isn't a REPT block */ @@ -153,6 +154,10 @@ bool yywrap(void) } dbgPrint("Popping context\n"); + /* Free an `INCLUDE`'s path */ + if (contextStack->fileNameBuf) + free(contextStack->fileNameBuf); + contextStack = contextStack->parent; contextDepth--; @@ -206,7 +211,8 @@ void fstk_RunInclude(char const *path) /* We're back at top-level, so most things are reset */ contextStack->uniqueID = 0; macro_SetUniqueID(0); - contextStack->fileName = lexer_GetFileName(); + contextStack->fileName = fullPath; + contextStack->fileNameBuf = fullPath; contextStack->macro = NULL; contextStack->nbReptIters = 0; } @@ -236,6 +242,7 @@ void fstk_RunMacro(char *macroName, struct MacroArgs *args) lexer_SetStateAtEOL(contextStack->lexerState); contextStack->uniqueID = macro_UseNewUniqueID(); contextStack->fileName = macro->fileName; + contextStack->fileNameBuf = NULL; contextStack->macro = macro; contextStack->nbReptIters = 0; } @@ -253,6 +260,7 @@ void fstk_RunRept(uint32_t count, int32_t nReptLineNo, char *body, size_t size) lexer_SetStateAtEOL(contextStack->lexerState); contextStack->uniqueID = macro_UseNewUniqueID(); contextStack->fileName = contextStack->parent->fileName; + contextStack->fileNameBuf = NULL; contextStack->macro = contextStack->parent->macro; /* Inherit */ contextStack->nbReptIters = count; /* Copy all of parent's iters, and add ours */ @@ -354,6 +362,7 @@ void fstk_Init(char *mainPath, uint32_t maxRecursionDepth) topLevelContext->uniqueID = 0; macro_SetUniqueID(0); topLevelContext->fileName = lexer_GetFileName(); + topLevelContext->fileNameBuf = NULL; topLevelContext->macro = NULL; topLevelContext->nbReptIters = 0; topLevelContext->reptDepth = 0; From d641972cded4339c8763d55115760c58c9b78c18 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Mon, 17 Aug 2020 17:01:16 +0200 Subject: [PATCH 32/59] Fix macro args not being restored when exiting macros --- src/asm/fstack.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/asm/fstack.c b/src/asm/fstack.c index adbd2c6b8..c1f8a0848 100644 --- a/src/asm/fstack.c +++ b/src/asm/fstack.c @@ -36,6 +36,7 @@ struct Context { char *fileNameBuf; uint32_t lineNo; /* Line number at which the context was EXITED */ struct Symbol const *macro; + struct MacroArgs *macroArgs; /* Macro args are *saved* here */ uint32_t nbReptIters; /* If zero, this isn't a REPT block */ size_t reptDepth; uint32_t reptIters[]; @@ -162,6 +163,11 @@ bool yywrap(void) contextDepth--; lexer_DeleteState(contextStack->child->lexerState); + /* Restore args if a macro (not REPT) saved them */ + if (contextStack->child->nbReptIters == 0 && contextStack->child->macro) { + dbgPrint("Restoring macro args %p\n", contextStack->macroArgs); + macro_UseNewArgs(contextStack->macroArgs); + } /* Free the entry and make its parent the current entry */ free(contextStack->child); @@ -231,7 +237,7 @@ void fstk_RunMacro(char *macroName, struct MacroArgs *args) error("\"%s\" is not a macro\n", macroName); return; } - macro_UseNewArgs(args); + contextStack->macroArgs = macro_GetCurrentArgs(); newContext(0); /* Line minus 1 because buffer begins with a newline */ @@ -245,6 +251,7 @@ void fstk_RunMacro(char *macroName, struct MacroArgs *args) contextStack->fileNameBuf = NULL; contextStack->macro = macro; contextStack->nbReptIters = 0; + macro_UseNewArgs(args); } void fstk_RunRept(uint32_t count, int32_t nReptLineNo, char *body, size_t size) From b83b9825f81e99414df95b000df0fd47d55004aa Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Mon, 17 Aug 2020 17:04:23 +0200 Subject: [PATCH 33/59] Fix _NARG crashing outside of macros And add a test for it --- src/asm/symbol.c | 4 ++++ test/asm/narg-nosect.asm | 1 + test/asm/narg-nosect.err | 3 +++ test/asm/narg-nosect.out | 1 + 4 files changed, 9 insertions(+) create mode 100644 test/asm/narg-nosect.asm create mode 100644 test/asm/narg-nosect.err create mode 100644 test/asm/narg-nosect.out diff --git a/src/asm/symbol.c b/src/asm/symbol.c index 45bbb9678..f1f622ddc 100644 --- a/src/asm/symbol.c +++ b/src/asm/symbol.c @@ -77,6 +77,10 @@ void sym_ForEach(void (*func)(struct Symbol *, void *), void *arg) static int32_t Callback_NARG(void) { + if (!macro_GetCurrentArgs()) { + yyerror("_NARG does not make sense outside of a macro"); + return 0; + } return macro_NbArgs(); } diff --git a/test/asm/narg-nosect.asm b/test/asm/narg-nosect.asm new file mode 100644 index 000000000..540994340 --- /dev/null +++ b/test/asm/narg-nosect.asm @@ -0,0 +1 @@ + PRINTT "{_NARG}\n" diff --git a/test/asm/narg-nosect.err b/test/asm/narg-nosect.err new file mode 100644 index 000000000..918c3b11e --- /dev/null +++ b/test/asm/narg-nosect.err @@ -0,0 +1,3 @@ +ERROR: narg-nosect.asm(1): + _NARG does not make sense outside of a macro +error: Assembly aborted (1 errors)! diff --git a/test/asm/narg-nosect.out b/test/asm/narg-nosect.out new file mode 100644 index 000000000..ebfabbe4b --- /dev/null +++ b/test/asm/narg-nosect.out @@ -0,0 +1 @@ +$0 From aa76603da9a1c04835f4a431251f77d481c26bdb Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Mon, 17 Aug 2020 18:12:21 +0200 Subject: [PATCH 34/59] Add line+col trace info to lexer --- src/asm/lexer.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/asm/lexer.c b/src/asm/lexer.c index 89c528805..15483226a 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -1377,7 +1377,8 @@ static char const *reportGarbageChar(unsigned char firstByte) static int yylex_NORMAL(void) { - dbgPrint("Lexing in normal mode\n"); + dbgPrint("Lexing in normal mode, line=%" PRIu32 ", col=%" PRIu32 "\n", + lexer_GetLineNo(), lexer_GetColNo()); for (;;) { int c = nextChar(); @@ -1620,7 +1621,8 @@ static int yylex_NORMAL(void) static int yylex_RAW(void) { - dbgPrint("Lexing in raw mode\n"); + dbgPrint("Lexing in raw mode, line=%" PRIu32 ", col=%" PRIu32 "\n", + lexer_GetLineNo(), lexer_GetColNo()); /* This is essentially a modified `readString` */ size_t i = 0; From f9b48c0cad60a45625180e71ced52179b07c3d94 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Tue, 18 Aug 2020 12:27:37 +0200 Subject: [PATCH 35/59] Fix else working incorrectly from macros Since the "skip ELSE blocks" variable is global, it used to get carried over from the macro's `if` to the outer's. --- src/asm/asmy.y | 15 ++++++++------- test/asm/if-macro.asm | 13 +++++++++++++ test/asm/if-macro.err | 2 ++ test/asm/if-macro.out | 0 4 files changed, 23 insertions(+), 7 deletions(-) create mode 100644 test/asm/if-macro.asm create mode 100644 test/asm/if-macro.err create mode 100644 test/asm/if-macro.out diff --git a/src/asm/asmy.y b/src/asm/asmy.y index 81093c72b..653c001e1 100644 --- a/src/asm/asmy.y +++ b/src/asm/asmy.y @@ -39,7 +39,7 @@ uint32_t nListCountEmpty; char *tzNewMacro; uint32_t ulNewMacroSize; int32_t nPCOffset; -bool executedIfBlock; /* If this is set, ELIFs cannot be executed anymore */ +bool executeElseBlock; /* If this is set, ELIFs cannot be executed anymore */ static uint32_t str2int2(uint8_t *s, int32_t length) { @@ -360,8 +360,8 @@ conditional : if if : T_POP_IF const '\n' { nIFDepth++; - executedIfBlock = !!$2; - if (!executedIfBlock) + executeElseBlock = !$2; + if (executeElseBlock) lexer_SetMode(LEXER_SKIP_TO_ELIF); } ; @@ -370,11 +370,11 @@ elif : T_POP_ELIF const '\n' { if (nIFDepth <= 0) fatalerror("Found ELIF outside an IF construct\n"); - if (executedIfBlock) { + if (!executeElseBlock) { lexer_SetMode(LEXER_SKIP_TO_ENDC); } else { - executedIfBlock = !!$2; - if (!executedIfBlock) + executeElseBlock = !$2; + if (executeElseBlock) lexer_SetMode(LEXER_SKIP_TO_ELIF); } } @@ -384,7 +384,7 @@ else : T_POP_ELSE '\n' { if (nIFDepth <= 0) fatalerror("Found ELSE outside an IF construct\n"); - if (executedIfBlock) + if (!executeElseBlock) lexer_SetMode(LEXER_SKIP_TO_ENDC); } ; @@ -394,6 +394,7 @@ endc : T_POP_ENDC '\n' { fatalerror("Found ENDC outside an IF construct\n"); nIFDepth--; + executeElseBlock = false; } ; diff --git a/test/asm/if-macro.asm b/test/asm/if-macro.asm new file mode 100644 index 000000000..427e5eb3a --- /dev/null +++ b/test/asm/if-macro.asm @@ -0,0 +1,13 @@ +m: macro + if 0 + WARN "3" + else + WARN "5" + endc +endm + +if 1 + m +else + WARN "12" +endc diff --git a/test/asm/if-macro.err b/test/asm/if-macro.err new file mode 100644 index 000000000..23463fe9a --- /dev/null +++ b/test/asm/if-macro.err @@ -0,0 +1,2 @@ +warning: if-macro.asm(10) -> if-macro.asm::m(5): [-Wuser] + 5 diff --git a/test/asm/if-macro.out b/test/asm/if-macro.out new file mode 100644 index 000000000..e69de29bb From b7b03ee4510f1dd3f8b7dfdbe4a21d5c0d9bfeb2 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Tue, 18 Aug 2020 12:34:05 +0200 Subject: [PATCH 36/59] Fix "REPT 0" not being a no-op --- src/asm/fstack.c | 2 ++ test/asm/rept-0.asm | 3 +++ test/asm/rept-0.err | 0 test/asm/rept-0.out | 0 4 files changed, 5 insertions(+) create mode 100644 test/asm/rept-0.asm create mode 100644 test/asm/rept-0.err create mode 100644 test/asm/rept-0.out diff --git a/src/asm/fstack.c b/src/asm/fstack.c index c1f8a0848..0a8cf5f10 100644 --- a/src/asm/fstack.c +++ b/src/asm/fstack.c @@ -258,6 +258,8 @@ void fstk_RunRept(uint32_t count, int32_t nReptLineNo, char *body, size_t size) { dbgPrint("Running REPT(%" PRIu32 ")\n", count); + if (count == 0) + return; uint32_t reptDepth = contextStack->reptDepth; newContext(reptDepth + 1); diff --git a/test/asm/rept-0.asm b/test/asm/rept-0.asm new file mode 100644 index 000000000..13a52a69c --- /dev/null +++ b/test/asm/rept-0.asm @@ -0,0 +1,3 @@ +REPT 0 + WARN "2" +ENDR diff --git a/test/asm/rept-0.err b/test/asm/rept-0.err new file mode 100644 index 000000000..e69de29bb diff --git a/test/asm/rept-0.out b/test/asm/rept-0.out new file mode 100644 index 000000000..e69de29bb From ece6853e0f5d206d5b2159aa300ce2a81030d130 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Tue, 18 Aug 2020 13:05:55 +0200 Subject: [PATCH 37/59] Implement `opt b` and `opt g` --- include/asm/lexer.h | 13 +++++++++++++ src/asm/fstack.c | 2 +- src/asm/lexer.c | 38 +++++++++++++++++++++++++++----------- src/asm/main.c | 7 ++++--- test/asm/opt-b.asm | 2 ++ test/asm/opt-b.err | 0 test/asm/opt-b.out | 1 + test/asm/opt-g.asm | 2 ++ test/asm/opt-g.err | 0 test/asm/opt-g.out | 1 + 10 files changed, 51 insertions(+), 15 deletions(-) create mode 100644 test/asm/opt-b.asm create mode 100644 test/asm/opt-b.err create mode 100644 test/asm/opt-b.out create mode 100644 test/asm/opt-g.asm create mode 100644 test/asm/opt-g.err create mode 100644 test/asm/opt-g.out diff --git a/include/asm/lexer.h b/include/asm/lexer.h index 0f33c9160..6f9d1fa17 100644 --- a/include/asm/lexer.h +++ b/include/asm/lexer.h @@ -30,6 +30,19 @@ static inline void lexer_SetStateAtEOL(struct LexerState *state) lexerStateEOL = state; } +extern char const *binDigits; +extern char const *gfxDigits; + +static inline void lexer_SetBinDigits(char const *digits) +{ + binDigits = digits; +} + +static inline void lexer_SetGfxDigits(char const *digits) +{ + gfxDigits = digits; +} + struct LexerState *lexer_OpenFile(char const *path); struct LexerState *lexer_OpenFileView(char *buf, size_t size, uint32_t lineNo); void lexer_RestartRept(uint32_t lineNo); diff --git a/src/asm/fstack.c b/src/asm/fstack.c index 0a8cf5f10..a464f436a 100644 --- a/src/asm/fstack.c +++ b/src/asm/fstack.c @@ -336,7 +336,7 @@ char const *fstk_GetFileName(void) int nbChars = snprintf(dest, remainingChars, __VA_ARGS__); \ \ if (nbChars >= remainingChars) \ - fatalerror("File stack entry too large"); \ + fatalerror("File stack entry too large\n"); \ remainingChars -= nbChars; \ dest += nbChars; \ } while (0) diff --git a/src/asm/lexer.c b/src/asm/lexer.c index 15483226a..762fa1487 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -966,20 +966,26 @@ static void readFractionalPart(void) yylval.nConstValue |= fractional * (yylval.nConstValue >= 0 ? 1 : -1); } +char const *binDigits; + static void readBinaryNumber(void) { uint32_t value = 0; - dbgPrint("Reading binary number\n"); + dbgPrint("Reading binary number with digits [%c,%c]\n", binDigits[0], binDigits[1]); for (;;) { int c = peek(0); + int bit; - /* TODO: handle `-b`'s dynamic chars */ - if (c != '0' && c != '1') + if (c == binDigits[0]) + bit = 0; + else if (c == binDigits[1]) + bit = 1; + else break; - if (value > (UINT32_MAX - (c - '0')) / 2) + if (value > (UINT32_MAX - bit) / 2) warning(WARNING_LARGE_CONSTANT, "Integer constant is too large\n"); - value = value * 2 + (c - '0'); + value = value * 2 + bit; shiftChars(1); } @@ -1019,19 +1025,29 @@ static void readHexNumber(void) yylval.nConstValue = value; } +char const *gfxDigits; + static void readGfxConstant(void) { uint32_t bp0 = 0, bp1 = 0; uint8_t width = 0; - dbgPrint("Reading gfx constant\n"); + dbgPrint("Reading gfx constant with digits [%c,%c,%c,%c]\n", + gfxDigits[0], gfxDigits[1], gfxDigits[2], gfxDigits[3]); for (;;) { int c = peek(0); - - /* TODO: handle `-g`'s dynamic chars */ - if (c < '0' || c > '3') + uint32_t pixel; + + if (c == gfxDigits[0]) + pixel = 0; + else if (c == gfxDigits[1]) + pixel = 1; + else if (c == gfxDigits[2]) + pixel = 2; + else if (c == gfxDigits[3]) + pixel = 3; + else break; - uint8_t pixel = c - '0'; if (width < 8) { bp0 = bp0 << 1 | (pixel & 1); @@ -1531,7 +1547,7 @@ static int yylex_NORMAL(void) case '%': /* Either a modulo, or a binary constant */ secondChar = peek(0); - if (secondChar != '0' && secondChar != '1') + if (secondChar != binDigits[0] && secondChar != binDigits[1]) return T_OP_MOD; yylval.nConstValue = 0; diff --git a/src/asm/main.c b/src/asm/main.c index 4f264d369..c57b7fbaf 100644 --- a/src/asm/main.c +++ b/src/asm/main.c @@ -71,10 +71,11 @@ struct sOptionStackEntry { struct sOptionStackEntry *pOptionStack; -void opt_SetCurrentOptions(struct sOptions *pOpt) +void opt_SetCurrentOptions(struct sOptions *opt) { - /* TODO */ - (void)pOpt; + CurrentOptions = *opt; + lexer_SetGfxDigits(CurrentOptions.gbgfx); + lexer_SetBinDigits(CurrentOptions.binary); } void opt_Parse(char *s) diff --git a/test/asm/opt-b.asm b/test/asm/opt-b.asm new file mode 100644 index 000000000..cd16ab382 --- /dev/null +++ b/test/asm/opt-b.asm @@ -0,0 +1,2 @@ +OPT b.X +PRINTV %..X.X.X. diff --git a/test/asm/opt-b.err b/test/asm/opt-b.err new file mode 100644 index 000000000..e69de29bb diff --git a/test/asm/opt-b.out b/test/asm/opt-b.out new file mode 100644 index 000000000..fce2ec6e5 --- /dev/null +++ b/test/asm/opt-b.out @@ -0,0 +1 @@ +$2A \ No newline at end of file diff --git a/test/asm/opt-g.asm b/test/asm/opt-g.asm new file mode 100644 index 000000000..5c1658175 --- /dev/null +++ b/test/asm/opt-g.asm @@ -0,0 +1,2 @@ +OPT g.x0X +PRINTV `.x.x0X0X diff --git a/test/asm/opt-g.err b/test/asm/opt-g.err new file mode 100644 index 000000000..e69de29bb diff --git a/test/asm/opt-g.out b/test/asm/opt-g.out new file mode 100644 index 000000000..e0c24b67e --- /dev/null +++ b/test/asm/opt-g.out @@ -0,0 +1 @@ +$F55 \ No newline at end of file From f7b7a97407c473c246546905e8ed4123503b579b Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Tue, 18 Aug 2020 13:16:56 +0200 Subject: [PATCH 38/59] Prevent expanding macro args in comments Also use a cleaner way, instead of hardcoding to capture --- src/asm/lexer.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/asm/lexer.c b/src/asm/lexer.c index 762fa1487..6a5d91985 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -281,6 +281,7 @@ struct LexerState { char *captureBuf; /* Buffer to send the captured text to if non-NULL */ size_t captureCapacity; /* Size of the buffer above */ + bool disableMacroArgs; size_t macroArgScanDistance; /* Max distance already scanned for macro args */ bool expandStrings; struct Expansion *expansions; @@ -299,6 +300,7 @@ static void initState(struct LexerState *state) state->capturing = false; state->captureBuf = NULL; + state->disableMacroArgs = false; state->macroArgScanDistance = 0; state->expandStrings = true; state->expansions = NULL; @@ -723,8 +725,8 @@ static int peek(uint8_t distance) if (distance >= lexerState->macroArgScanDistance) { lexerState->macroArgScanDistance = distance + 1; /* Do not consider again */ - /* If not capturing and character is a backslash, check for a macro arg */ - if (!lexerState->capturing && c == '\\') { + /* If enabled and character is a backslash, check for a macro arg */ + if (!lexerState->disableMacroArgs && c == '\\') { distance++; lexerState->macroArgScanDistance++; c = peekInternal(distance); @@ -873,6 +875,7 @@ void lexer_DumpStringExpansions(void) static void discardComment(void) { dbgPrint("Discarding comment\n"); + lexerState->disableMacroArgs = true; for (;;) { int c = peek(0); @@ -880,6 +883,7 @@ static void discardComment(void) break; shiftChars(1); } + lexerState->disableMacroArgs = false; } /* Function to read a line continuation */ @@ -1748,10 +1752,8 @@ static int skipIfBlock(bool toEndc) int token; bool atLineStart = lexerState->atLineStart; - /* Prevent expanding macro args in this state by enabling capture to nothing */ - lexerState->capturing = true; - lexerState->captureSize = 0; - lexerState->captureBuf = NULL; + /* Prevent expanding macro args in this state */ + lexerState->disableMacroArgs = true; for (;;) { if (atLineStart) { @@ -1811,7 +1813,7 @@ static int skipIfBlock(bool toEndc) } finish: - lexerState->capturing = false; + lexerState->disableMacroArgs = false; lexerState->atLineStart = false; return token; @@ -1881,6 +1883,7 @@ static char *startCapture(void) lexerState->capturing = true; lexerState->captureSize = 0; + lexerState->disableMacroArgs = true; if (lexerState->isMmapped) { return &lexerState->ptr[lexerState->offset]; @@ -1955,6 +1958,7 @@ void lexer_CaptureRept(char **capture, size_t *size) *capture = captureStart; *size = lexerState->captureSize - strlen("ENDR"); lexerState->captureBuf = NULL; + lexerState->disableMacroArgs = false; } void lexer_CaptureMacroBody(char **capture, size_t *size) @@ -2043,4 +2047,5 @@ void lexer_CaptureMacroBody(char **capture, size_t *size) *capture = captureStart; *size = lexerState->captureSize - strlen("ENDM"); lexerState->captureBuf = NULL; + lexerState->disableMacroArgs = false; } From 615f1072d95d5d382012b2f0b2b55bc5ec61c436 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Tue, 18 Aug 2020 13:31:59 +0200 Subject: [PATCH 39/59] Fix `readFractionalPart` never shifting characters --- src/asm/lexer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/asm/lexer.c b/src/asm/lexer.c index 6a5d91985..1fd5d2b00 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -948,6 +948,7 @@ static void readFractionalPart(void) if (c < '0' || c > '9') break; + shiftChars(1); if (divisor > (UINT32_MAX - (c - '0')) / 10) { warning(WARNING_LARGE_CONSTANT, "Precision of fixed-point constant is too large\n"); From e33c2ad6a2b5b04903c1ebbce5bed8f547898451 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Tue, 18 Aug 2020 13:47:26 +0200 Subject: [PATCH 40/59] Fix `INCLUDE` ignoring `-MG` --- src/asm/fstack.c | 5 ++++- src/asm/symbol.c | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/asm/fstack.c b/src/asm/fstack.c index a464f436a..51caf249a 100644 --- a/src/asm/fstack.c +++ b/src/asm/fstack.c @@ -204,7 +204,10 @@ void fstk_RunInclude(char const *path) if (!fstk_FindFile(path, &fullPath, &size)) { free(fullPath); - error("Unable to open included file '%s': %s\n", path, strerror(errno)); + if (oGeneratedMissingIncludes) + oFailedOnMissingInclude = true; + else + error("Unable to open included file '%s': %s\n", path, strerror(errno)); return; } dbgPrint("Full path: \"%s\"\n", fullPath); diff --git a/src/asm/symbol.c b/src/asm/symbol.c index f1f622ddc..8bf37b97a 100644 --- a/src/asm/symbol.c +++ b/src/asm/symbol.c @@ -78,7 +78,7 @@ void sym_ForEach(void (*func)(struct Symbol *, void *), void *arg) static int32_t Callback_NARG(void) { if (!macro_GetCurrentArgs()) { - yyerror("_NARG does not make sense outside of a macro"); + error("_NARG does not make sense outside of a macro\n"); return 0; } return macro_NbArgs(); From 9e3d7a50e6a93152f60872dd03080c2c1942a97e Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Tue, 18 Aug 2020 16:37:01 +0200 Subject: [PATCH 41/59] Handle comments in line continuations --- src/asm/lexer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/asm/lexer.c b/src/asm/lexer.c index 1fd5d2b00..14d693ce2 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -909,6 +909,8 @@ static void readLineContinuation(void) || lexerState->expansions->distance) lexerState->lineNo++; return; + } else if (c == ';') { + discardComment(); } else { error("Begun line continuation, but encountered character '%s'\n", print(c)); From ac011fe69f009152eb3bc87707d6140620995547 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Tue, 18 Aug 2020 16:49:21 +0200 Subject: [PATCH 42/59] Use common function to discard comments in macro args --- src/asm/lexer.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/asm/lexer.c b/src/asm/lexer.c index 14d693ce2..9f369f9d8 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -1667,10 +1667,8 @@ static int yylex_RAW(void) case ';': /* Comments inside macro args */ if (insideString) break; - do { - shiftChars(1); - c = peek(0); - } while (c != EOF && c != '\r' && c != '\n'); + discardComment(); + c = peek(0); /* fallthrough */ case ',': case '\r': From 71a0a42cfb482929834973fd206f5dcc1bdbdaae Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Tue, 18 Aug 2020 16:53:31 +0200 Subject: [PATCH 43/59] Fix C2x use of `static_assert` --- src/asm/fstack.c | 3 ++- src/asm/lexer.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/asm/fstack.c b/src/asm/fstack.c index 51caf249a..e89e3c92b 100644 --- a/src/asm/fstack.c +++ b/src/asm/fstack.c @@ -387,7 +387,8 @@ void fstk_Init(char *mainPath, uint32_t maxRecursionDepth) #else /* If this holds, then GCC raises a warning about the `if` above being dead code */ static_assert(UINT32_MAX - <= (SIZE_MAX - sizeof(*contextStack)) / sizeof(contextStack->reptIters[0])); + <= (SIZE_MAX - sizeof(*contextStack)) / sizeof(contextStack->reptIters[0]), + "Please enable recursion depth capping"); if (0) { #endif error("Recursion depth may not be higher than %zu, defaulting to 64\n", diff --git a/src/asm/lexer.c b/src/asm/lexer.c index 9f369f9d8..bdc3e877f 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -234,7 +234,7 @@ static struct KeywordMapping { #define LEXER_BUF_SIZE 42 /* TODO: determine a sane value for this */ /* This caps the size of buffer reads, and according to POSIX, passing more than SSIZE_MAX is UB */ -static_assert(LEXER_BUF_SIZE <= SSIZE_MAX); +static_assert(LEXER_BUF_SIZE <= SSIZE_MAX, "Lexer buffer size is too large"); struct Expansion { struct Expansion *firstChild; From 542b5d18f1307f23104bdc52fed03ad249895c1c Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Sun, 23 Aug 2020 00:51:36 +0200 Subject: [PATCH 44/59] Fix possible capture buffer size overflow Attempt to grow it to the max size first. Seriously, if this triggers, *how* --- src/asm/lexer.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/asm/lexer.c b/src/asm/lexer.c index bdc3e877f..e2a0636ec 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -503,7 +503,12 @@ void lexer_ToggleStringExpansion(bool enable) static void reallocCaptureBuf(void) { - lexerState->captureCapacity *= 2; + if (lexerState->captureCapacity == SIZE_MAX) + fatalerror("Cannot grow capture buffer past %zu bytes", SIZE_MAX); + else if (lexerState->captureCapacity > SIZE_MAX / 2) + lexerState->captureCapacity = SIZE_MAX; + else + lexerState->captureCapacity *= 2; lexerState->captureBuf = realloc(lexerState->captureBuf, lexerState->captureCapacity); if (!lexerState->captureBuf) fatalerror("realloc error while resizing capture buffer: %s\n", strerror(errno)); From b65ea64a583855ae69b685e3c094d0697d86b2c3 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Sun, 23 Aug 2020 01:31:56 +0200 Subject: [PATCH 45/59] Add newlines to all test output MacOS treats them differently, for some reason. --- test/asm/nested-brackets.asm | 1 + test/asm/nested-brackets.out | 2 +- test/asm/opt-b.asm | 1 + test/asm/opt-b.out | 2 +- test/asm/opt-g.asm | 1 + test/asm/opt-g.out | 2 +- 6 files changed, 6 insertions(+), 3 deletions(-) diff --git a/test/asm/nested-brackets.asm b/test/asm/nested-brackets.asm index 0823286ac..344d9f8c5 100644 --- a/test/asm/nested-brackets.asm +++ b/test/asm/nested-brackets.asm @@ -3,3 +3,4 @@ WRAPPER equs "TRIN" PRINTT "{S{WRAPPER}G}\n" PRINTT "{S{WRAPPER}G" + PRINTT "\n" diff --git a/test/asm/nested-brackets.out b/test/asm/nested-brackets.out index bcaba8626..2c94e4837 100644 --- a/test/asm/nested-brackets.out +++ b/test/asm/nested-brackets.out @@ -1,2 +1,2 @@ OK -OK \ No newline at end of file +OK diff --git a/test/asm/opt-b.asm b/test/asm/opt-b.asm index cd16ab382..293421edc 100644 --- a/test/asm/opt-b.asm +++ b/test/asm/opt-b.asm @@ -1,2 +1,3 @@ OPT b.X PRINTV %..X.X.X. +PRINTT "\n" diff --git a/test/asm/opt-b.out b/test/asm/opt-b.out index fce2ec6e5..725558707 100644 --- a/test/asm/opt-b.out +++ b/test/asm/opt-b.out @@ -1 +1 @@ -$2A \ No newline at end of file +$2A diff --git a/test/asm/opt-g.asm b/test/asm/opt-g.asm index 5c1658175..a4cd3a381 100644 --- a/test/asm/opt-g.asm +++ b/test/asm/opt-g.asm @@ -1,2 +1,3 @@ OPT g.x0X PRINTV `.x.x0X0X +PRINTT "\n" diff --git a/test/asm/opt-g.out b/test/asm/opt-g.out index e0c24b67e..78f6c0a73 100644 --- a/test/asm/opt-g.out +++ b/test/asm/opt-g.out @@ -1 +1 @@ -$F55 \ No newline at end of file +$F55 From c952dd8a6e1837ebfbcb5ade04f7d66cefa38cbd Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Sun, 23 Aug 2020 02:08:04 +0200 Subject: [PATCH 46/59] Fix fixed-point constants not working correctly And added a test to check their behavior --- src/asm/lexer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/asm/lexer.c b/src/asm/lexer.c index e2a0636ec..b5a881294 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -965,6 +965,7 @@ static void readFractionalPart(void) break; } value = value * 10 + (c - '0'); + divisor *= 10; } if (yylval.nConstValue > INT16_MAX || yylval.nConstValue < INT16_MIN) From dbef51ba052ff9d98fbed87f36b53e5a2d4af074 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Sun, 23 Aug 2020 02:09:40 +0200 Subject: [PATCH 47/59] Move `isWhitespace` to a place where it makes more sense --- src/asm/lexer.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/asm/lexer.c b/src/asm/lexer.c index b5a881294..0361e8836 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -232,6 +232,11 @@ static struct KeywordMapping { {"OPT", T_POP_OPT} }; +static bool isWhitespace(int c) +{ + return c == ' ' || c == '\t'; +} + #define LEXER_BUF_SIZE 42 /* TODO: determine a sane value for this */ /* This caps the size of buffer reads, and according to POSIX, passing more than SSIZE_MAX is UB */ static_assert(LEXER_BUF_SIZE <= SSIZE_MAX, "Lexer buffer size is too large"); @@ -893,11 +898,6 @@ static void discardComment(void) /* Function to read a line continuation */ -static bool isWhitespace(int c) -{ - return c == ' ' || c == '\t'; -} - static void readLineContinuation(void) { dbgPrint("Beginning line continuation\n"); From 7381d7b92fa21cce3c086cb01199036053687078 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Sun, 23 Aug 2020 02:22:23 +0200 Subject: [PATCH 48/59] Remove unnecessarily nested symbol data union --- include/asm/symbol.h | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/include/asm/symbol.h b/include/asm/symbol.h index e2beda470..5efdc150e 100644 --- a/include/asm/symbol.h +++ b/include/asm/symbol.h @@ -40,18 +40,16 @@ struct Symbol { bool hasCallback; union { - union { /* Otherwise */ - /* If sym_IsNumeric */ - int32_t value; - int32_t (*numCallback)(void); - /* For SYM_MACRO */ - struct { - size_t macroSize; - char *macro; - }; - /* For SYM_EQUS, TODO: separate "base" fields from SYM_MACRO */ - char const *(*strCallback)(void); /* For SYM_EQUS */ + /* If sym_IsNumeric */ + int32_t value; + int32_t (*numCallback)(void); + /* For SYM_MACRO */ + struct { + size_t macroSize; + char *macro; }; + /* For SYM_EQUS, TODO: separate "base" fields from SYM_MACRO */ + char const *(*strCallback)(void); /* For SYM_EQUS */ }; uint32_t ID; /* ID of the symbol in the object file (-1 if none) */ From b224cab3e052e79ee8628ed896d4350765e1b75d Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Sun, 23 Aug 2020 02:32:33 +0200 Subject: [PATCH 49/59] Harmonize printing `distance` --- src/asm/lexer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/asm/lexer.c b/src/asm/lexer.c index 0361e8836..2a0c62e1a 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -509,7 +509,7 @@ void lexer_ToggleStringExpansion(bool enable) static void reallocCaptureBuf(void) { if (lexerState->captureCapacity == SIZE_MAX) - fatalerror("Cannot grow capture buffer past %zu bytes", SIZE_MAX); + fatalerror("Cannot grow capture buffer past %zu bytes\n", SIZE_MAX); else if (lexerState->captureCapacity > SIZE_MAX / 2) lexerState->captureCapacity = SIZE_MAX; else @@ -669,8 +669,8 @@ static char const *expandMacroArg(char name, size_t distance) static int peekInternal(uint8_t distance) { if (distance >= LEXER_BUF_SIZE) - fatalerror("Internal lexer error: buffer has insufficient size for peeking (%u >= %u)\n", - distance, LEXER_BUF_SIZE); + fatalerror("Internal lexer error: buffer has insufficient size for peeking (%" + PRIu8 " >= %u)\n", distance, LEXER_BUF_SIZE); size_t ofs = lexerState->expansionOfs + distance; struct Expansion const *expansion = getExpansionAtDistance(&ofs); From 96cb5e10ed87e7f072f9f98440a4139a13ba013d Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Sun, 23 Aug 2020 03:02:17 +0200 Subject: [PATCH 50/59] Fix range-dependent dead code in recursion depth check --- include/asm/fstack.h | 4 ++-- src/asm/fstack.c | 18 +++++------------- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/include/asm/fstack.h b/include/asm/fstack.h index 52466a118..d381a7556 100644 --- a/include/asm/fstack.h +++ b/include/asm/fstack.h @@ -39,7 +39,7 @@ struct sContext { int32_t nREPTBodyLastLine; }; -extern unsigned int nMaxRecursionDepth; +extern size_t nMaxRecursionDepth; void fstk_AddIncludePath(char const *s); /** @@ -61,6 +61,6 @@ char *fstk_DumpToStr(void); char const *fstk_GetFileName(void); uint32_t fstk_GetLine(void); -void fstk_Init(char *mainPath, uint32_t maxRecursionDepth); +void fstk_Init(char *mainPath, size_t maxRecursionDepth); #endif /* RGBDS_ASM_FSTACK_H */ diff --git a/src/asm/fstack.c b/src/asm/fstack.c index e89e3c92b..83df7696a 100644 --- a/src/asm/fstack.c +++ b/src/asm/fstack.c @@ -44,8 +44,8 @@ struct Context { static struct Context *contextStack; static struct Context *topLevelContext; -static unsigned int contextDepth = 0; -unsigned int nMaxRecursionDepth; +static size_t contextDepth = 0; +size_t nMaxRecursionDepth; static unsigned int nbIncPaths = 0; static char const *includePaths[MAXINCPATHS]; @@ -180,7 +180,7 @@ bool yywrap(void) static void newContext(uint32_t reptDepth) { if (++contextDepth >= nMaxRecursionDepth) - fatalerror("Recursion limit (%u) exceeded\n", nMaxRecursionDepth); + fatalerror("Recursion limit (%zu) exceeded\n", nMaxRecursionDepth); contextStack->child = malloc(sizeof(*contextStack->child) + reptDepth * sizeof(contextStack->reptIters[0])); if (!contextStack->child) @@ -360,7 +360,7 @@ uint32_t fstk_GetLine(void) return lexer_GetLineNo(); } -void fstk_Init(char *mainPath, uint32_t maxRecursionDepth) +void fstk_Init(char *mainPath, size_t maxRecursionDepth) { topLevelContext = malloc(sizeof(*topLevelContext)); if (!topLevelContext) @@ -381,18 +381,10 @@ void fstk_Init(char *mainPath, uint32_t maxRecursionDepth) contextStack = topLevelContext; -#if 0 if (maxRecursionDepth > (SIZE_MAX - sizeof(*contextStack)) / sizeof(contextStack->reptIters[0])) { -#else - /* If this holds, then GCC raises a warning about the `if` above being dead code */ - static_assert(UINT32_MAX - <= (SIZE_MAX - sizeof(*contextStack)) / sizeof(contextStack->reptIters[0]), - "Please enable recursion depth capping"); - if (0) { -#endif error("Recursion depth may not be higher than %zu, defaulting to 64\n", - (SIZE_MAX - sizeof(*contextStack)) / sizeof(contextStack->reptIters[0])); + (SIZE_MAX - sizeof(*contextStack)) / sizeof(contextStack->reptIters[0])); nMaxRecursionDepth = 64; } else { nMaxRecursionDepth = maxRecursionDepth; From 82469ac0fde1b2e73bfbfc58853a7ecb95e4a794 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Mon, 31 Aug 2020 14:27:24 +0200 Subject: [PATCH 51/59] Shim around `mmap` on Windows --- src/asm/lexer.c | 84 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 58 insertions(+), 26 deletions(-) diff --git a/src/asm/lexer.c b/src/asm/lexer.c index 2a0c62e1a..ced17eb9e 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -6,7 +6,7 @@ * SPDX-License-Identifier: MIT */ -#include +#include #include #include #include @@ -22,6 +22,7 @@ #include #include "extern/utf8decoder.h" +#include "platform.h" /* For `mmap` */ #include "asm/asm.h" #include "asm/lexer.h" @@ -312,12 +313,55 @@ static void initState(struct LexerState *state) state->expansionOfs = 0; } +/* Neither MSVC nor MinGW provide `mmap` */ +#if defined(_MSC_VER) || defined(__MINGW32__) +# include +# include +# include +# define MAP_FAILED NULL +# define mapFile(ptr, fd, path, size) do { \ + HANDLE file = CreateFileA(path, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, \ + FILE_FLAG_POSIX_SEMANTICS | FILE_FLAG_RANDOM_ACCESS, NULL); \ + HANDLE mappingObj; \ + \ + if (file == INVALID_HANDLE_VALUE) \ + break; \ + mappingObj = CreateFileMappingA(file, NULL, PAGE_READONLY, 0, 0, NULL); \ + (ptr) = mappingObj == INVALID_HANDLE_VALUE \ + ? NULL \ + : MapViewOfFile(mappingObj, FILE_MAP_READ, 0, 0, 0); \ + CloseHandle(mappingObj); \ + CloseHandle(file); \ +} while (0) +# define munmap(ptr, size) UnmapViewOfFile((ptr)) + +#else /* defined(_MSC_VER) || defined(__MINGW32__) */ + +# include +# define mapFile(ptr, fd, path, size) do { \ + (ptr) = mmap(NULL, (size), PROT_READ, MAP_PRIVATE, (fd), 0); \ + \ + if ((ptr) == MAP_FAILED && errno == ENOTSUP) { \ + /* + * The implementation may not support MAP_PRIVATE; try again with MAP_SHARED + * instead, offering, I believe, weaker guarantees about external + * modifications to the file while reading it. That's still better than not + * opening it at all, though. + */ \ + if (verbose) \ + printf("mmap(%s, MAP_PRIVATE) failed, retrying with MAP_SHARED\n", path); \ + (ptr) = mmap(NULL, (size), PROT_READ, MAP_SHARED, (fd), 0); \ + } \ +} while (0) +#endif /* !( defined(_MSC_VER) || defined(__MINGW32__) ) */ + struct LexerState *lexer_OpenFile(char const *path) { dbgPrint("Opening file \"%s\"\n", path); bool isStdin = !strcmp(path, "-"); struct LexerState *state = malloc(sizeof(*state)); + struct stat fileInfo; /* Give stdin a nicer file name */ if (isStdin) @@ -326,38 +370,27 @@ struct LexerState *lexer_OpenFile(char const *path) error("Failed to allocate memory for lexer state: %s\n", strerror(errno)); return NULL; } - state->path = path; - - state->isFile = true; - state->fd = isStdin ? STDIN_FILENO : open(path, O_RDONLY); - if (state->fd == -1) { - error("Failed to open file \"%s\": %s\n", path, strerror(errno)); + if (!isStdin && stat(path, &fileInfo) != 0) { + error("Failed to stat file \"%s\": %s\n", path, strerror(errno)); free(state); return NULL; } + state->path = path; + state->isFile = true; + state->fd = isStdin ? STDIN_FILENO : open(path, O_RDONLY); state->isMmapped = false; /* By default, assume it won't be mmap()ed */ - off_t size = lseek(state->fd, 0, SEEK_END); - - if (size != 1) { - /* The file is a regular file, so use `mmap` for better performance */ + if (!isStdin && fileInfo.st_size > 0) { + /* Try using `mmap` for better performance */ /* * Important: do NOT assign to `state->ptr` directly, to avoid a cast that may * alter an eventual `MAP_FAILED` value. It would also invalidate `state->fd`, * being on the other side of the union. */ - void *pa = mmap(NULL, size, PROT_READ, MAP_PRIVATE, state->fd, 0); - - if (pa == MAP_FAILED && errno == ENOTSUP) - /* - * The implementation may not support MAP_PRIVATE; try again with MAP_SHARED - * instead, offering, I believe, weaker guarantees about external - * modifications to the file while reading it. That's still better than not - * opening it at all, though. - */ - pa = mmap(NULL, size, PROT_READ, MAP_SHARED, state->fd, 0); - - if (pa == MAP_FAILED) { + void *mappingAddr; + + mapFile(mappingAddr, state->fd, state->path, fileInfo.st_size); + if (mappingAddr == MAP_FAILED) { /* If mmap()ing failed, try again using another method (below) */ state->isMmapped = false; } else { @@ -365,8 +398,8 @@ struct LexerState *lexer_OpenFile(char const *path) close(state->fd); state->isMmapped = true; - state->ptr = pa; - state->size = size; + state->ptr = mappingAddr; + state->size = fileInfo.st_size; state->offset = 0; if (verbose) @@ -378,7 +411,6 @@ struct LexerState *lexer_OpenFile(char const *path) if (verbose) printf("File %s opened as regular, errno reports \"%s\"\n", path, strerror(errno)); - lseek(state->fd, 0, SEEK_SET); state->index = 0; state->nbChars = 0; } From 138523570e1b8a6a4e45f5c64a4335807c37c657 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Mon, 31 Aug 2020 14:47:55 +0200 Subject: [PATCH 52/59] Fix possible uninitialized read on Windows --- src/asm/lexer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/asm/lexer.c b/src/asm/lexer.c index ced17eb9e..b8f888180 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -320,6 +320,7 @@ static void initState(struct LexerState *state) # include # define MAP_FAILED NULL # define mapFile(ptr, fd, path, size) do { \ + (ptr) = MAP_FAILED; \ HANDLE file = CreateFileA(path, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, \ FILE_FLAG_POSIX_SEMANTICS | FILE_FLAG_RANDOM_ACCESS, NULL); \ HANDLE mappingObj; \ @@ -327,9 +328,8 @@ static void initState(struct LexerState *state) if (file == INVALID_HANDLE_VALUE) \ break; \ mappingObj = CreateFileMappingA(file, NULL, PAGE_READONLY, 0, 0, NULL); \ - (ptr) = mappingObj == INVALID_HANDLE_VALUE \ - ? NULL \ - : MapViewOfFile(mappingObj, FILE_MAP_READ, 0, 0, 0); \ + if (mappingObj != INVALID_HANDLE_VALUE) \ + (ptr) = MapViewOfFile(mappingObj, FILE_MAP_READ, 0, 0, 0); \ CloseHandle(mappingObj); \ CloseHandle(file); \ } while (0) From 8e7afb0ab38aa80464317774a5166e09bd5aa084 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Mon, 31 Aug 2020 16:29:51 +0200 Subject: [PATCH 53/59] Move some MSVC-specific defines to `platform.h` --- include/platform.h | 7 ++++ src/asm/lexer.c | 85 +++++++++++++++++++++++----------------------- 2 files changed, 49 insertions(+), 43 deletions(-) diff --git a/include/platform.h b/include/platform.h index 6c6170d6e..4c060e967 100644 --- a/include/platform.h +++ b/include/platform.h @@ -32,4 +32,11 @@ # define S_ISDIR(mode) (((mode) & S_IFMT) == S_IFDIR) #endif +/* MSVC doesn't use POSIX types or defines for `read` */ +#ifdef _MSC_VER +# define STDIN_FILENO 0 +# define ssize_t int +# define SSIZE_MAX INT_MAX +#endif + #endif /* RGBDS_PLATFORM_H */ diff --git a/src/asm/lexer.c b/src/asm/lexer.c index b8f888180..9b9cc1bd4 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -22,7 +22,7 @@ #include #include "extern/utf8decoder.h" -#include "platform.h" /* For `mmap` */ +#include "platform.h" /* For `ssize_t` */ #include "asm/asm.h" #include "asm/lexer.h" @@ -42,6 +42,47 @@ #define dbgPrint(...) #endif +/* Neither MSVC nor MinGW provide `mmap` */ +#if defined(_MSC_VER) || defined(__MINGW32__) +# include +# include +# include +# define MAP_FAILED NULL +# define mapFile(ptr, fd, path, size) do { \ + (ptr) = MAP_FAILED; \ + HANDLE file = CreateFileA(path, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, \ + FILE_FLAG_POSIX_SEMANTICS | FILE_FLAG_RANDOM_ACCESS, NULL); \ + HANDLE mappingObj; \ + \ + if (file == INVALID_HANDLE_VALUE) \ + break; \ + mappingObj = CreateFileMappingA(file, NULL, PAGE_READONLY, 0, 0, NULL); \ + if (mappingObj != INVALID_HANDLE_VALUE) \ + (ptr) = MapViewOfFile(mappingObj, FILE_MAP_READ, 0, 0, 0); \ + CloseHandle(mappingObj); \ + CloseHandle(file); \ +} while (0) +# define munmap(ptr, size) UnmapViewOfFile((ptr)) + +#else /* defined(_MSC_VER) || defined(__MINGW32__) */ + +# include +# define mapFile(ptr, fd, path, size) do { \ + (ptr) = mmap(NULL, (size), PROT_READ, MAP_PRIVATE, (fd), 0); \ + \ + if ((ptr) == MAP_FAILED && errno == ENOTSUP) { \ + /* + * The implementation may not support MAP_PRIVATE; try again with MAP_SHARED + * instead, offering, I believe, weaker guarantees about external modifications to + * the file while reading it. That's still better than not opening it at all, though + */ \ + if (verbose) \ + printf("mmap(%s, MAP_PRIVATE) failed, retrying with MAP_SHARED\n", path); \ + (ptr) = mmap(NULL, (size), PROT_READ, MAP_SHARED, (fd), 0); \ + } \ +} while (0) +#endif /* !( defined(_MSC_VER) || defined(__MINGW32__) ) */ + /* * Identifiers that are also keywords are listed here. This ONLY applies to ones * that would normally be matched as identifiers! Check out `yylex_NORMAL` to @@ -313,48 +354,6 @@ static void initState(struct LexerState *state) state->expansionOfs = 0; } -/* Neither MSVC nor MinGW provide `mmap` */ -#if defined(_MSC_VER) || defined(__MINGW32__) -# include -# include -# include -# define MAP_FAILED NULL -# define mapFile(ptr, fd, path, size) do { \ - (ptr) = MAP_FAILED; \ - HANDLE file = CreateFileA(path, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, \ - FILE_FLAG_POSIX_SEMANTICS | FILE_FLAG_RANDOM_ACCESS, NULL); \ - HANDLE mappingObj; \ - \ - if (file == INVALID_HANDLE_VALUE) \ - break; \ - mappingObj = CreateFileMappingA(file, NULL, PAGE_READONLY, 0, 0, NULL); \ - if (mappingObj != INVALID_HANDLE_VALUE) \ - (ptr) = MapViewOfFile(mappingObj, FILE_MAP_READ, 0, 0, 0); \ - CloseHandle(mappingObj); \ - CloseHandle(file); \ -} while (0) -# define munmap(ptr, size) UnmapViewOfFile((ptr)) - -#else /* defined(_MSC_VER) || defined(__MINGW32__) */ - -# include -# define mapFile(ptr, fd, path, size) do { \ - (ptr) = mmap(NULL, (size), PROT_READ, MAP_PRIVATE, (fd), 0); \ - \ - if ((ptr) == MAP_FAILED && errno == ENOTSUP) { \ - /* - * The implementation may not support MAP_PRIVATE; try again with MAP_SHARED - * instead, offering, I believe, weaker guarantees about external - * modifications to the file while reading it. That's still better than not - * opening it at all, though. - */ \ - if (verbose) \ - printf("mmap(%s, MAP_PRIVATE) failed, retrying with MAP_SHARED\n", path); \ - (ptr) = mmap(NULL, (size), PROT_READ, MAP_SHARED, (fd), 0); \ - } \ -} while (0) -#endif /* !( defined(_MSC_VER) || defined(__MINGW32__) ) */ - struct LexerState *lexer_OpenFile(char const *path) { dbgPrint("Opening file \"%s\"\n", path); From 930080f556fc5ac7c41a1e5e05e8ab01e1789142 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Tue, 22 Sep 2020 17:18:57 +0200 Subject: [PATCH 54/59] Mark not unmapping macro-containing files as okay There isn't really a better alternative. Making several mappings instead requires too much bookkeeping. --- src/asm/lexer.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/asm/lexer.c b/src/asm/lexer.c index 9b9cc1bd4..c54f854dd 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -304,7 +304,7 @@ struct LexerState { char *ptr; /* Technically `const` during the lexer's execution */ off_t size; off_t offset; - bool isReferenced; /* If a macro in this file requires not unmapping it*/ + bool isReferenced; /* If a macro in this file requires not unmapping it */ }; struct { /* Otherwise */ int fd; @@ -2007,7 +2007,6 @@ void lexer_CaptureMacroBody(char **capture, size_t *size) /* If the file is `mmap`ed, we need not to unmap it to keep access to the macro */ if (lexerState->isMmapped) - /* FIXME: this is godawful, but RGBASM doesn't even clean up anything anyways. */ lexerState->isReferenced = true; /* From 5a65188ca94f2ebaa6ffabac4e7d04cb166de4d6 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Tue, 29 Sep 2020 03:40:15 +0200 Subject: [PATCH 55/59] Implement compact file stacks in object files Gets rid of `open_memstream`, enabling Windows compatibility again Also fixes #491 as a nice bonus! --- CMakeLists.txt | 2 +- Makefile | 2 +- include/asm/fstack.h | 57 +++-- include/asm/lexer.h | 3 + include/asm/output.h | 2 + include/asm/symbol.h | 4 +- include/link/main.h | 34 ++- include/link/object.h | 9 +- include/link/section.h | 4 +- include/link/symbol.h | 4 +- include/linkdefs.h | 2 +- src/asm/fstack.c | 396 +++++++++++++++++++------------- src/asm/output.c | 131 +++++++++-- src/asm/rpn.c | 4 +- src/asm/symbol.c | 71 ++++-- src/asm/warning.c | 2 +- src/link/assign.c | 6 +- src/link/main.c | 68 +++++- src/link/object.c | 150 +++++++++--- src/link/patch.c | 114 ++++----- src/link/symbol.c | 17 +- src/rgbds.5 | 83 +++++-- test/asm/label-redefinition.err | 2 +- 23 files changed, 799 insertions(+), 368 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f4f234e96..4106b98a6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,7 +46,7 @@ if(MSVC) add_definitions(/D_CRT_SECURE_NO_WARNINGS) else() if(DEVELOP) - add_compile_options(-Werror -Wall -Wextra -pedantic + add_compile_options(-Werror -Wall -Wextra -pedantic -Wno-type-limits -Wno-sign-compare -Wformat -Wformat-security -Wformat-overflow=2 -Wformat-truncation=1 -Wformat-y2k -Wswitch-enum -Wunused -Wuninitialized -Wunknown-pragmas -Wstrict-overflow=5 diff --git a/Makefile b/Makefile index 84d162569..fe1cf548f 100644 --- a/Makefile +++ b/Makefile @@ -186,7 +186,7 @@ checkpatch: # compilation and make the continous integration infrastructure return failure. develop: - $Qenv $(MAKE) -j WARNFLAGS="-Werror -Wall -Wextra -Wpedantic \ + $Qenv $(MAKE) -j WARNFLAGS="-Werror -Wall -Wextra -Wpedantic -Wno-type-limits \ -Wno-sign-compare -Wformat -Wformat-security -Wformat-overflow=2 \ -Wformat-truncation=1 -Wformat-y2k -Wswitch-enum -Wunused \ -Wuninitialized -Wunknown-pragmas -Wstrict-overflow=5 \ diff --git a/include/asm/fstack.h b/include/asm/fstack.h index d381a7556..80e2096d8 100644 --- a/include/asm/fstack.h +++ b/include/asm/fstack.h @@ -21,26 +21,44 @@ #include "types.h" -struct MacroArgs; +struct FileStackNode { + struct FileStackNode *parent; /* Pointer to parent node, for error reporting */ + /* Line at which the parent context was exited; meaningless for the root level */ + uint32_t lineNo; + + struct FileStackNode *next; /* Next node in the output linked list */ + bool referenced; /* If referenced, don't free! */ + uint32_t ID; /* Set only if referenced: ID within the object file, -1 if not output yet */ + + enum { + NODE_REPT, + NODE_FILE, + NODE_MACRO, + } type; +}; + +struct FileStackReptNode { /* NODE_REPT */ + struct FileStackNode node; + uint32_t reptDepth; + /* WARNING: if changing this type, change overflow check in `fstk_Init` */ + uint32_t iters[]; /* REPT iteration counts since last named node, in reverse depth order */ +}; -struct sContext { - struct LexerState *lexerState; - struct Symbol const *pMacro; - struct sContext *next; - char tzFileName[_MAX_PATH + 1]; - struct MacroArgs *macroArgs; - uint32_t uniqueID; - int32_t nLine; - uint32_t nStatus; - char const *pREPTBlock; - uint32_t nREPTBlockCount; - uint32_t nREPTBlockSize; - int32_t nREPTBodyFirstLine; - int32_t nREPTBodyLastLine; +struct FileStackNamedNode { /* NODE_FILE, NODE_MACRO */ + struct FileStackNode node; + char name[]; /* File name for files, file::macro name for macros */ }; extern size_t nMaxRecursionDepth; +struct MacroArgs; + +void fstk_Dump(struct FileStackNode const *node, uint32_t lineNo); +void fstk_DumpCurrent(void); +struct FileStackNode *fstk_GetFileStack(void); +/* The lifetime of the returned chars is until reaching the end of that file */ +char const *fstk_GetFileName(void); + void fstk_AddIncludePath(char const *s); /** * @param path The user-provided file name @@ -53,14 +71,9 @@ bool fstk_FindFile(char const *path, char **fullPath, size_t *size); bool yywrap(void); void fstk_RunInclude(char const *path); -void fstk_RunMacro(char *macroName, struct MacroArgs *args); +void fstk_RunMacro(char const *macroName, struct MacroArgs *args); void fstk_RunRept(uint32_t count, int32_t nReptLineNo, char *body, size_t size); -void fstk_Dump(void); -char *fstk_DumpToStr(void); -char const *fstk_GetFileName(void); -uint32_t fstk_GetLine(void); - -void fstk_Init(char *mainPath, size_t maxRecursionDepth); +void fstk_Init(char const *mainPath, size_t maxRecursionDepth); #endif /* RGBDS_ASM_FSTACK_H */ diff --git a/include/asm/lexer.h b/include/asm/lexer.h index 6f9d1fa17..9494ab6fc 100644 --- a/include/asm/lexer.h +++ b/include/asm/lexer.h @@ -43,6 +43,9 @@ static inline void lexer_SetGfxDigits(char const *digits) gfxDigits = digits; } +/* + * `path` is referenced, but not held onto..! + */ struct LexerState *lexer_OpenFile(char const *path); struct LexerState *lexer_OpenFileView(char *buf, size_t size, uint32_t lineNo); void lexer_RestartRept(uint32_t lineNo); diff --git a/include/asm/output.h b/include/asm/output.h index e6a45df77..6f8895b65 100644 --- a/include/asm/output.h +++ b/include/asm/output.h @@ -18,6 +18,8 @@ struct Expression; extern char *tzObjectname; extern struct Section *pSectionList, *pCurrentSection; +void out_RegisterNode(struct FileStackNode *node); +void out_ReplaceNode(struct FileStackNode *node); void out_SetFileName(char *s); void out_CreatePatch(uint32_t type, struct Expression const *expr, uint32_t ofs); diff --git a/include/asm/symbol.h b/include/asm/symbol.h index 5efdc150e..fa02a821c 100644 --- a/include/asm/symbol.h +++ b/include/asm/symbol.h @@ -35,8 +35,8 @@ struct Symbol { bool isExported; /* Whether the symbol is to be exported */ bool isBuiltin; /* Whether the symbol is a built-in */ struct Section *section; - char fileName[_MAX_PATH + 1]; /* File where the symbol was defined. */ - uint32_t fileLine; /* Line where the symbol was defined. */ + struct FileStackNode *src; /* Where the symbol was defined */ + uint32_t fileLine; /* Line where the symbol was defined */ bool hasCallback; union { diff --git a/include/link/main.h b/include/link/main.h index 25a69a9a9..d55e0a8e2 100644 --- a/include/link/main.h +++ b/include/link/main.h @@ -29,15 +29,45 @@ extern bool beVerbose; extern bool isWRA0Mode; extern bool disablePadding; +struct FileStackNode { + struct FileStackNode *parent; + /* Line at which the parent context was exited; meaningless for the root level */ + uint32_t lineNo; + + enum { + NODE_REPT, + NODE_FILE, + NODE_MACRO, + } type; + union { + char *name; /* NODE_FILE, NODE_MACRO */ + struct { /* NODE_REPT */ + uint32_t reptDepth; + uint32_t *iters; + }; + }; +}; + /* Helper macro for printing verbose-mode messages */ #define verbosePrint(...) do { \ if (beVerbose) \ fprintf(stderr, __VA_ARGS__); \ } while (0) -void error(char const *fmt, ...); +/** + * Dump a file stack to stderr + * @param node The leaf node to dump the context of + */ +char const *dumpFileStack(struct FileStackNode const *node); + +void warning(struct FileStackNode const *where, uint32_t lineNo, + char const *fmt, ...) format_(printf, 3, 4); + +void error(struct FileStackNode const *where, uint32_t lineNo, + char const *fmt, ...) format_(printf, 3, 4); -noreturn_ void fatal(char const *fmt, ...); +noreturn_ void fatal(struct FileStackNode const *where, uint32_t lineNo, + char const *fmt, ...) format_(printf, 3, 4); /** * Opens a file if specified, and aborts on error. diff --git a/include/link/object.h b/include/link/object.h index 2ecb0046c..b43d728be 100644 --- a/include/link/object.h +++ b/include/link/object.h @@ -14,8 +14,9 @@ /** * Read an object (.o) file, and add its info to the data structures. * @param fileName A path to the object file to be read + * @param i The ID of the file */ -void obj_ReadFile(char const *fileName); +void obj_ReadFile(char const *fileName, unsigned int i); /** * Perform validation on the object files' contents @@ -27,6 +28,12 @@ void obj_DoSanityChecks(void); */ void obj_CheckAssertions(void); +/** + * Sets up object file reading + * @param nbFiles The number of object files that will be read + */ +void obj_Setup(unsigned int nbFiles); + /** * `free`s all object memory that was allocated. */ diff --git a/include/link/section.h b/include/link/section.h index 24280412e..d9da1a5b0 100644 --- a/include/link/section.h +++ b/include/link/section.h @@ -19,6 +19,7 @@ #include "linkdefs.h" +struct FileStackNode; struct Section; struct AttachedSymbol { @@ -27,7 +28,8 @@ struct AttachedSymbol { }; struct Patch { - char *fileName; + struct FileStackNode const *src; + uint32_t lineNo; int32_t offset; uint32_t pcSectionID; uint32_t pcOffset; diff --git a/include/link/symbol.h b/include/link/symbol.h index 14210f7ea..d00857953 100644 --- a/include/link/symbol.h +++ b/include/link/symbol.h @@ -16,12 +16,14 @@ #include "linkdefs.h" +struct FileStackNode; + struct Symbol { /* Info contained in the object files */ char *name; enum ExportLevel type; char const *objFileName; - char *fileName; + struct FileStackNode const *src; int32_t lineNo; int32_t sectionID; union { diff --git a/include/linkdefs.h b/include/linkdefs.h index 029501ca5..e088a5116 100644 --- a/include/linkdefs.h +++ b/include/linkdefs.h @@ -14,7 +14,7 @@ #define RGBDS_OBJECT_VERSION_STRING "RGB%1u" #define RGBDS_OBJECT_VERSION_NUMBER 9U -#define RGBDS_OBJECT_REV 5U +#define RGBDS_OBJECT_REV 6U enum AssertionType { ASSERT_WARN, diff --git a/src/asm/fstack.c b/src/asm/fstack.c index 83df7696a..11e6b504f 100644 --- a/src/asm/fstack.c +++ b/src/asm/fstack.c @@ -29,27 +29,83 @@ struct Context { struct Context *parent; - struct Context *child; + struct FileStackNode *fileInfo; struct LexerState *lexerState; uint32_t uniqueID; - char const *fileName; - char *fileNameBuf; - uint32_t lineNo; /* Line number at which the context was EXITED */ - struct Symbol const *macro; struct MacroArgs *macroArgs; /* Macro args are *saved* here */ - uint32_t nbReptIters; /* If zero, this isn't a REPT block */ - size_t reptDepth; - uint32_t reptIters[]; + uint32_t nbReptIters; }; static struct Context *contextStack; -static struct Context *topLevelContext; static size_t contextDepth = 0; +#define DEFAULT_MAX_DEPTH 64 size_t nMaxRecursionDepth; static unsigned int nbIncPaths = 0; static char const *includePaths[MAXINCPATHS]; +char const *dumpNodeAndParents(struct FileStackNode const *node) +{ + char const *name; + + if (node->type == NODE_REPT) { + assert(node->parent); /* REPT nodes should always have a parent */ + struct FileStackReptNode const *reptInfo = (struct FileStackReptNode const *)node; + + name = dumpNodeAndParents(node->parent); + fprintf(stderr, "(%" PRIu32 ") -> %s", node->lineNo, name); + for (uint32_t i = reptInfo->reptDepth; i--; ) + fprintf(stderr, "::REPT~%" PRIu32, reptInfo->iters[i]); + } else { + name = ((struct FileStackNamedNode const *)node)->name; + if (node->parent) { + dumpNodeAndParents(node->parent); + fprintf(stderr, "(%" PRIu32 ") -> %s", node->lineNo, name); + } else { + fputs(name, stderr); + } + } + return name; +} + +void fstk_Dump(struct FileStackNode const *node, uint32_t lineNo) +{ + dumpNodeAndParents(node); + fprintf(stderr, "(%" PRIu32 ")", lineNo); +} + +void fstk_DumpCurrent(void) +{ + if (!contextStack) { + fputs("at top level", stderr); + return; + } + fstk_Dump(contextStack->fileInfo, lexer_GetLineNo()); +} + +struct FileStackNode *fstk_GetFileStack(void) +{ + struct FileStackNode *node = contextStack->fileInfo; + + /* Mark node and all of its parents as referenced if not already so they don't get freed */ + while (node && !node->referenced) { + node->ID = -1; + node->referenced = true; + node = node->parent; + } + return contextStack->fileInfo; +} + +char const *fstk_GetFileName(void) +{ + /* Iterating via the nodes themselves skips nested REPTs */ + struct FileStackNode const *node = contextStack->fileInfo; + + while (node->type != NODE_FILE) + node = node->parent; + return ((struct FileStackNamedNode const *)node)->name; +} + void fstk_AddIncludePath(char const *path) { if (path[0] == '\0') @@ -141,12 +197,28 @@ bool fstk_FindFile(char const *path, char **fullPath, size_t *size) bool yywrap(void) { - if (contextStack->nbReptIters) { /* The context is a REPT block, which may loop */ - contextStack->reptIters[contextStack->reptDepth - 1]++; + if (contextStack->fileInfo->type == NODE_REPT) { /* The context is a REPT block, which may loop */ + struct FileStackReptNode *fileInfo = (struct FileStackReptNode *)contextStack->fileInfo; + + /* If the node is referenced, we can't edit it; duplicate it */ + if (contextStack->fileInfo->referenced) { + struct FileStackReptNode *copy = malloc(sizeof(*copy) + sizeof(copy->iters[0]) * fileInfo->reptDepth); + + if (!copy) + fatalerror("Failed to duplicate REPT file node: %s\n", strerror(errno)); + /* Copy all info but the referencing */ + *copy = *fileInfo; + copy->node.next = NULL; + copy->node.referenced = false; + + fileInfo = copy; + contextStack->fileInfo = (struct FileStackNode *)fileInfo; + } + + fileInfo->iters[0]++; /* If this wasn't the last iteration, wrap instead of popping */ - if (contextStack->reptIters[contextStack->reptDepth - 1] - <= contextStack->nbReptIters) { - lexer_RestartRept(contextStack->parent->lineNo); + if (fileInfo->iters[0] <= contextStack->nbReptIters) { + lexer_RestartRept(contextStack->fileInfo->lineNo); contextStack->uniqueID = macro_UseNewUniqueID(); return false; } @@ -155,44 +227,52 @@ bool yywrap(void) } dbgPrint("Popping context\n"); - /* Free an `INCLUDE`'s path */ - if (contextStack->fileNameBuf) - free(contextStack->fileNameBuf); + struct Context *context = contextStack; contextStack = contextStack->parent; contextDepth--; - lexer_DeleteState(contextStack->child->lexerState); + lexer_DeleteState(context->lexerState); /* Restore args if a macro (not REPT) saved them */ - if (contextStack->child->nbReptIters == 0 && contextStack->child->macro) { + if (context->fileInfo->type == NODE_MACRO) { dbgPrint("Restoring macro args %p\n", contextStack->macroArgs); macro_UseNewArgs(contextStack->macroArgs); } + /* Free the file stack node */ + if (!context->fileInfo->referenced) + free(context->fileInfo); /* Free the entry and make its parent the current entry */ - free(contextStack->child); + free(context); - contextStack->child = NULL; lexer_SetState(contextStack->lexerState); macro_SetUniqueID(contextStack->uniqueID); return false; } -static void newContext(uint32_t reptDepth) +/* + * Make sure not to switch the lexer state before calling this, so the saved line no is correct + * BE CAREFUL!! This modifies the file stack directly, you should have set up the file info first + */ +static void newContext(struct FileStackNode *fileInfo) { if (++contextDepth >= nMaxRecursionDepth) fatalerror("Recursion limit (%zu) exceeded\n", nMaxRecursionDepth); - contextStack->child = malloc(sizeof(*contextStack->child) - + reptDepth * sizeof(contextStack->reptIters[0])); - if (!contextStack->child) - fatalerror("Failed to allocate memory for new context: %s\n", strerror(errno)); + struct Context *context = malloc(sizeof(*context)); - contextStack->lineNo = lexer_GetLineNo(); - /* Link new entry to its parent so it's reachable later */ - contextStack->child->parent = contextStack; - contextStack = contextStack->child; + if (!context) + fatalerror("Failed to allocate memory for new context: %s\n", strerror(errno)); + fileInfo->parent = contextStack->fileInfo; + fileInfo->lineNo = 0; /* Init to a default value, see struct definition for info */ + fileInfo->referenced = false; + fileInfo->lineNo = lexer_GetLineNo(); + context->fileInfo = fileInfo; + /* + * Link new entry to its parent so it's reachable later + * ERRORS SHOULD NOT OCCUR AFTER THIS!! + */ + context->parent = contextStack; + contextStack = context; - contextStack->child = NULL; - contextStack->reptDepth = reptDepth; } void fstk_RunInclude(char const *path) @@ -212,21 +292,27 @@ void fstk_RunInclude(char const *path) } dbgPrint("Full path: \"%s\"\n", fullPath); - newContext(0); - contextStack->lexerState = lexer_OpenFile(fullPath); + struct FileStackNamedNode *fileInfo = malloc(sizeof(*fileInfo) + size); + + if (!fileInfo) { + error("Failed to alloc file info for INCLUDE: %s\n", strerror(errno)); + return; + } + fileInfo->node.type = NODE_FILE; + strcpy(fileInfo->name, fullPath); + free(fullPath); + + newContext((struct FileStackNode *)fileInfo); + contextStack->lexerState = lexer_OpenFile(fileInfo->name); if (!contextStack->lexerState) fatalerror("Failed to set up lexer for file include\n"); lexer_SetStateAtEOL(contextStack->lexerState); /* We're back at top-level, so most things are reset */ contextStack->uniqueID = 0; macro_SetUniqueID(0); - contextStack->fileName = fullPath; - contextStack->fileNameBuf = fullPath; - contextStack->macro = NULL; - contextStack->nbReptIters = 0; } -void fstk_RunMacro(char *macroName, struct MacroArgs *args) +void fstk_RunMacro(char const *macroName, struct MacroArgs *args) { dbgPrint("Running macro \"%s\"\n", macroName); @@ -242,7 +328,53 @@ void fstk_RunMacro(char *macroName, struct MacroArgs *args) } contextStack->macroArgs = macro_GetCurrentArgs(); - newContext(0); + /* Compute total length of this node's name: :: */ + size_t reptNameLen = 0; + struct FileStackNode const *node = macro->src; + + if (node->type == NODE_REPT) { + struct FileStackReptNode const *reptNode = (struct FileStackReptNode const *)node; + + /* 4294967295 = 2^32 - 1, aka UINT32_MAX */ + reptNameLen += reptNode->reptDepth * strlen("::REPT~4294967295"); + /* Look for next named node */ + do { + node = node->parent; + } while (node->type == NODE_REPT); + } + struct FileStackNamedNode const *baseNode = (struct FileStackNamedNode const *)node; + size_t baseLen = strlen(baseNode->name); + size_t macroNameLen = strlen(macro->name); + struct FileStackNamedNode *fileInfo = malloc(sizeof(*fileInfo) + baseLen + + reptNameLen + 2 + macroNameLen + 1); + + if (!fileInfo) { + error("Failed to alloc file info for \"%s\": %s\n", macro->name, strerror(errno)); + return; + } + fileInfo->node.type = NODE_MACRO; + /* Print the name... */ + char *dest = fileInfo->name; + + memcpy(dest, baseNode->name, baseLen); + dest += baseLen; + if (node->type == NODE_REPT) { + struct FileStackReptNode const *reptNode = (struct FileStackReptNode const *)node; + + for (uint32_t i = reptNode->reptDepth; i--; ) { + int nbChars = sprintf(dest, "::REPT~%" PRIu32, reptNode->iters[i]); + + if (nbChars < 0) + fatalerror("Failed to write macro invocation info: %s\n", + strerror(errno)); + dest += nbChars; + } + } + *dest++ = ':'; + *dest++ = ':'; + memcpy(dest, macro->name, macroNameLen + 1); + + newContext((struct FileStackNode *)fileInfo); /* Line minus 1 because buffer begins with a newline */ contextStack->lexerState = lexer_OpenFileView(macro->macro, macro->macroSize, macro->fileLine - 1); @@ -250,143 +382,93 @@ void fstk_RunMacro(char *macroName, struct MacroArgs *args) fatalerror("Failed to set up lexer for macro invocation\n"); lexer_SetStateAtEOL(contextStack->lexerState); contextStack->uniqueID = macro_UseNewUniqueID(); - contextStack->fileName = macro->fileName; - contextStack->fileNameBuf = NULL; - contextStack->macro = macro; - contextStack->nbReptIters = 0; macro_UseNewArgs(args); } -void fstk_RunRept(uint32_t count, int32_t nReptLineNo, char *body, size_t size) +void fstk_RunRept(uint32_t count, int32_t reptLineNo, char *body, size_t size) { dbgPrint("Running REPT(%" PRIu32 ")\n", count); - if (count == 0) return; - uint32_t reptDepth = contextStack->reptDepth; - - newContext(reptDepth + 1); - contextStack->lexerState = lexer_OpenFileView(body, size, nReptLineNo); - if (!contextStack->lexerState) - fatalerror("Failed to set up lexer for macro invocation\n"); - lexer_SetStateAtEOL(contextStack->lexerState); - contextStack->uniqueID = macro_UseNewUniqueID(); - contextStack->fileName = contextStack->parent->fileName; - contextStack->fileNameBuf = NULL; - contextStack->macro = contextStack->parent->macro; /* Inherit */ - contextStack->nbReptIters = count; - /* Copy all of parent's iters, and add ours */ - if (reptDepth) - memcpy(contextStack->reptIters, contextStack->parent->reptIters, - sizeof(contextStack->reptIters[0]) * reptDepth); - contextStack->reptIters[reptDepth] = 1; - - /* Correct our parent's line number, which currently points to the `ENDR` line */ - contextStack->parent->lineNo = nReptLineNo; -} - -static void printContext(FILE *stream, struct Context const *context) -{ - fprintf(stream, "%s", context->fileName); - if (context->macro) - fprintf(stream, "::%s", context->macro->name); - for (size_t i = 0; i < context->reptDepth; i++) - fprintf(stream, "::REPT~%" PRIu32, context->reptIters[i]); - fprintf(stream, "(%" PRId32 ")", context->lineNo); -} -static void dumpToStream(FILE *stream) -{ - struct Context *context = topLevelContext; + uint32_t reptDepth = contextStack->fileInfo->type == NODE_REPT + ? ((struct FileStackReptNode *)contextStack->fileInfo)->reptDepth + : 0; + struct FileStackReptNode *fileInfo = malloc(sizeof(*fileInfo) + + (reptDepth + 1) * sizeof(fileInfo->iters[0])); - while (context != contextStack) { - printContext(stream, context); - fprintf(stream, " -> "); - context = context->child; + if (!fileInfo) { + error("Failed to alloc file info for REPT: %s\n", strerror(errno)); + return; } - contextStack->lineNo = lexer_GetLineNo(); - printContext(stream, contextStack); -} + fileInfo->node.type = NODE_REPT; + fileInfo->reptDepth = reptDepth + 1; + fileInfo->iters[0] = 1; + if (reptDepth) + /* Copy all parent iter counts */ + memcpy(&fileInfo->iters[1], + ((struct FileStackReptNode *)contextStack->fileInfo)->iters, + reptDepth * sizeof(fileInfo->iters[0])); -void fstk_Dump(void) -{ - dumpToStream(stderr); -} + newContext((struct FileStackNode *)fileInfo); + /* Correct our line number, which currently points to the `ENDR` line */ + contextStack->fileInfo->lineNo = reptLineNo; -char *fstk_DumpToStr(void) -{ - char *str; - size_t size; - /* `open_memstream` is specified to always include a '\0' at the end of the buffer! */ - FILE *stream = open_memstream(&str, &size); - - if (!stream) - fatalerror("Failed to dump file stack to string: %s\n", strerror(errno)); - dumpToStream(stream); - fclose(stream); - return str; -} + contextStack->lexerState = lexer_OpenFileView(body, size, reptLineNo); + if (!contextStack->lexerState) + fatalerror("Failed to set up lexer for rept block\n"); + lexer_SetStateAtEOL(contextStack->lexerState); + contextStack->uniqueID = macro_UseNewUniqueID(); + contextStack->nbReptIters = count; -char const *fstk_GetFileName(void) -{ - /* FIXME: this is awful, but all callees copy the buffer anyways */ - static char fileName[_MAX_PATH + 1]; - size_t remainingChars = _MAX_PATH + 1; - char *dest = fileName; - char const *src = contextStack->fileName; - -#define append(...) do { \ - int nbChars = snprintf(dest, remainingChars, __VA_ARGS__); \ - \ - if (nbChars >= remainingChars) \ - fatalerror("File stack entry too large\n"); \ - remainingChars -= nbChars; \ - dest += nbChars; \ -} while (0) - - while (*src && --remainingChars) /* Leave room for terminator */ - *dest++ = *src++; - if (remainingChars && contextStack->macro) - append("::%s", contextStack->macro->name); - for (size_t i = 0; i < contextStack->reptDepth; i++) - append("::REPT~%" PRIu32, contextStack->reptIters[i]); - - *dest = '\0'; - return fileName; } -uint32_t fstk_GetLine(void) +void fstk_Init(char const *mainPath, size_t maxRecursionDepth) { - return lexer_GetLineNo(); -} + struct LexerState *state = lexer_OpenFile(mainPath); -void fstk_Init(char *mainPath, size_t maxRecursionDepth) -{ - topLevelContext = malloc(sizeof(*topLevelContext)); - if (!topLevelContext) - fatalerror("Failed to allocate memory for initial context: %s\n", strerror(errno)); - topLevelContext->parent = NULL; - topLevelContext->child = NULL; - topLevelContext->lexerState = lexer_OpenFile(mainPath); - if (!topLevelContext->lexerState) + if (!state) fatalerror("Failed to open main file!\n"); - lexer_SetState(topLevelContext->lexerState); - topLevelContext->uniqueID = 0; + lexer_SetState(state); + char const *fileName = lexer_GetFileName(); + size_t len = strlen(fileName); + struct Context *context = malloc(sizeof(*contextStack)); + struct FileStackNamedNode *fileInfo = malloc(sizeof(*fileInfo) + len + 1); + + if (!context) + fatalerror("Failed to allocate memory for main context: %s\n", strerror(errno)); + if (!fileInfo) + fatalerror("Failed to allocate memory for main file info: %s\n", strerror(errno)); + + context->fileInfo = (struct FileStackNode *)fileInfo; + /* lineNo and reptIter are unused on the top-level context */ + context->fileInfo->parent = NULL; + context->fileInfo->referenced = false; + context->fileInfo->type = NODE_FILE; + memcpy(fileInfo->name, fileName, len + 1); + + context->parent = NULL; + context->lexerState = state; + context->uniqueID = 0; macro_SetUniqueID(0); - topLevelContext->fileName = lexer_GetFileName(); - topLevelContext->fileNameBuf = NULL; - topLevelContext->macro = NULL; - topLevelContext->nbReptIters = 0; - topLevelContext->reptDepth = 0; - - contextStack = topLevelContext; - - if (maxRecursionDepth - > (SIZE_MAX - sizeof(*contextStack)) / sizeof(contextStack->reptIters[0])) { - error("Recursion depth may not be higher than %zu, defaulting to 64\n", - (SIZE_MAX - sizeof(*contextStack)) / sizeof(contextStack->reptIters[0])); - nMaxRecursionDepth = 64; + context->nbReptIters = 0; + + /* Now that it's set up properly, register the context */ + contextStack = context; + + /* + * Check that max recursion depth won't allow overflowing node `malloc`s + * This assumes that the rept node is larger + */ +#define DEPTH_LIMIT ((SIZE_MAX - sizeof(struct FileStackReptNode)) / sizeof(uint32_t)) + if (maxRecursionDepth > DEPTH_LIMIT) { + error("Recursion depth may not be higher than %zu, defaulting to " + EXPAND_AND_STR(DEFAULT_MAX_DEPTH) "\n", DEPTH_LIMIT); + nMaxRecursionDepth = DEFAULT_MAX_DEPTH; } else { nMaxRecursionDepth = maxRecursionDepth; } + /* Make sure that the default of 64 is OK, though */ + assert(DEPTH_LIMIT >= DEFAULT_MAX_DEPTH); +#undef DEPTH_LIMIT } diff --git a/src/asm/output.c b/src/asm/output.c index b8919edd1..a51683385 100644 --- a/src/asm/output.c +++ b/src/asm/output.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -33,7 +34,8 @@ #include "platform.h" // strdup struct Patch { - char *tzFilename; + struct FileStackNode const *src; + uint32_t lineNo; uint32_t nOffset; struct Section *pcSection; uint32_t pcOffset; @@ -62,19 +64,17 @@ static uint32_t nbSymbols = 0; /* Length of the above list */ static struct Assertion *assertions = NULL; +static struct FileStackNode *fileStackNodes = NULL; + /* * Count the number of sections used in this object */ static uint32_t countsections(void) { - struct Section *sect; uint32_t count = 0; - sect = pSectionList; - while (sect) { + for (struct Section const *sect = pSectionList; sect; sect = sect->next) count++; - sect = sect->next; - } return count; } @@ -129,16 +129,60 @@ static void fputstring(char const *s, FILE *f) fputc(0, f); } +static uint32_t getNbFileStackNodes(void) +{ + return fileStackNodes ? fileStackNodes->ID + 1 : 0; +} + +void out_RegisterNode(struct FileStackNode *node) +{ + /* If node is not already registered, register it (and parents), and give it a unique ID */ + while (node->ID == -1) { + node->ID = getNbFileStackNodes(); + if (node->ID == -1) + fatalerror("Reached too many file stack nodes; try splitting the file up\n"); + node->next = fileStackNodes; + fileStackNodes = node; + + /* Also register the node's parents */ + node = node->parent; + if (!node) + break; + } +} + +void out_ReplaceNode(struct FileStackNode *node) +{ + (void)node; +#if 0 +This is code intended to replace a node, which is pretty useless until ref counting is added... + + struct FileStackNode **ptr = &fileStackNodes; + + /* + * The linked list is supposed to have decrementing IDs, so iterate with less memory reads, + * to hopefully hit the cache less. A debug check is added after, in case a change is made + * that breaks this assumption. + */ + for (uint32_t i = fileStackNodes->ID; i != node->ID; i--) + ptr = &(*ptr)->next; + assert((*ptr)->ID == node->ID); + + node->next = (*ptr)->next; + assert(!node->next || node->next->ID == node->ID - 1); /* Catch inconsistencies early */ + /* TODO: unreference the node */ + *ptr = node; +#endif +} + /* * Return a section's ID */ static uint32_t getsectid(struct Section const *sect) { - struct Section const *sec; + struct Section const *sec = pSectionList; uint32_t ID = 0; - sec = pSectionList; - while (sec) { if (sec == sect) return ID; @@ -159,7 +203,10 @@ static uint32_t getSectIDIfAny(struct Section const *sect) */ static void writepatch(struct Patch const *patch, FILE *f) { - fputstring(patch->tzFilename, f); + assert(patch->src->ID != -1); + + fputlong(patch->src->ID, f); + fputlong(patch->lineNo, f); fputlong(patch->nOffset, f); fputlong(getSectIDIfAny(patch->pcSection), f); fputlong(patch->pcOffset, f); @@ -206,26 +253,35 @@ static void writesymbol(struct Symbol const *sym, FILE *f) if (!sym_IsDefined(sym)) { fputc(SYMTYPE_IMPORT, f); } else { + assert(sym->src->ID != -1); + fputc(sym->isExported ? SYMTYPE_EXPORT : SYMTYPE_LOCAL, f); - fputstring(sym->fileName, f); + fputlong(sym->src->ID, f); fputlong(sym->fileLine, f); fputlong(getSectIDIfAny(sym_GetSection(sym)), f); fputlong(sym->value, f); } } +static void registerSymbol(struct Symbol *sym) +{ + *objectSymbolsTail = sym; + objectSymbolsTail = &sym->next; + out_RegisterNode(sym->src); + if (nbSymbols == -1) + fatalerror("Registered too many symbols (%" PRIu32 + "); try splitting up your files\n", (uint32_t)-1); + sym->ID = nbSymbols++; +} + /* * Returns a symbol's ID within the object file * If the symbol does not have one, one is assigned by registering the symbol */ static uint32_t getSymbolID(struct Symbol *sym) { - if (sym->ID == -1) { - sym->ID = nbSymbols++; - - *objectSymbolsTail = sym; - objectSymbolsTail = &sym->next; - } + if (sym->ID == -1 && !sym_IsPC(sym)) + registerSymbol(sym); return sym->ID; } @@ -303,22 +359,25 @@ static void writerpn(uint8_t *rpnexpr, uint32_t *rpnptr, uint8_t *rpn, /* * Allocate a new patch structure and link it into the list + * WARNING: all patches are assumed to eventually be written, so the file stack node is registered */ -static struct Patch *allocpatch(uint32_t type, struct Expression const *expr, - uint32_t ofs) +static struct Patch *allocpatch(uint32_t type, struct Expression const *expr, uint32_t ofs) { struct Patch *patch = malloc(sizeof(struct Patch)); uint32_t rpnSize = expr->isKnown ? 5 : expr->nRPNPatchSize; + struct FileStackNode *node = fstk_GetFileStack(); if (!patch) fatalerror("No memory for patch: %s\n", strerror(errno)); - patch->pRPN = malloc(sizeof(*patch->pRPN) * rpnSize); + patch->pRPN = malloc(sizeof(*patch->pRPN) * rpnSize); if (!patch->pRPN) fatalerror("No memory for patch's RPN expression: %s\n", strerror(errno)); patch->type = type; - patch->tzFilename = fstk_DumpToStr(); + patch->src = node; + out_RegisterNode(node); + patch->lineNo = lexer_GetLineNo(); patch->nOffset = ofs; patch->pcSection = sect_GetSymbolSection(); patch->pcOffset = sect_GetSymbolOffset(); @@ -382,13 +441,28 @@ static void writeassert(struct Assertion *assert, FILE *f) fputstring(assert->message, f); } +static void writeFileStackNode(struct FileStackNode const *node, FILE *f) +{ + fputlong(node->parent ? node->parent->ID : -1, f); + fputlong(node->lineNo, f); + fputc(node->type, f); + if (node->type != NODE_REPT) { + fputstring(((struct FileStackNamedNode const *)node)->name, f); + } else { + struct FileStackReptNode const *reptNode = (struct FileStackReptNode const *)node; + + fputlong(reptNode->reptDepth, f); + /* Iters are stored by decreasing depth, so reverse the order for output */ + for (uint32_t i = reptNode->reptDepth; i--; ) + fputlong(reptNode->iters[i], f); + } +} + static void registerExportedSymbol(struct Symbol *symbol, void *arg) { (void)arg; if (sym_IsExported(symbol) && symbol->ID == -1) { - *objectSymbolsTail = symbol; - objectSymbolsTail = &symbol->next; - nbSymbols++; + registerSymbol(symbol); } } @@ -411,6 +485,15 @@ void out_WriteObject(void) fputlong(nbSymbols, f); fputlong(countsections(), f); + fputlong(getNbFileStackNodes(), f); + for (struct FileStackNode const *node = fileStackNodes; node; node = node->next) { + writeFileStackNode(node, f); + if (node->next && node->next->ID != node->ID - 1) + fatalerror("Internal error: fstack node #%" PRIu32 " follows #%" PRIu32 + ". Please report this to the developers!\n", + node->next->ID, node->ID); + } + for (struct Symbol const *sym = objectSymbols; sym; sym = sym->next) writesymbol(sym, f); diff --git a/src/asm/rpn.c b/src/asm/rpn.c index 991dbb3f4..bb7aa8717 100644 --- a/src/asm/rpn.c +++ b/src/asm/rpn.c @@ -258,8 +258,8 @@ static int32_t shift(int32_t shiftee, int32_t amount) if (amount >= 0) { // Left shift if (amount >= 32) { - warning(WARNING_SHIFT_AMOUNT, "Shifting left by large amount %" PRId32 "\n", - amount); + warning(WARNING_SHIFT_AMOUNT, "Shifting left by large amount %" + PRId32 "\n", amount); return 0; } else { diff --git a/src/asm/symbol.c b/src/asm/symbol.c index 8bf37b97a..f6d9736f9 100644 --- a/src/asm/symbol.c +++ b/src/asm/symbol.c @@ -23,6 +23,7 @@ #include "asm/macro.h" #include "asm/main.h" #include "asm/mymath.h" +#include "asm/output.h" #include "asm/section.h" #include "asm/symbol.h" #include "asm/util.h" @@ -121,7 +122,7 @@ static char const *Callback__FILE__(void) buf[j - 1] = '\\'; buf[j] = fileName[i]; } - /* Write everything after the loop, to ensure everything has been allocated */ + /* Write everything after the loop, to ensure the buffer has been allocated */ buf[0] = '"'; buf[j++] = '"'; buf[j] = '\0'; @@ -150,15 +151,35 @@ int32_t sym_GetValue(struct Symbol const *sym) return sym->value; } +static void dumpFilename(struct Symbol const *sym) +{ + if (!sym->src) + fputs("", stderr); + else + fstk_Dump(sym->src, sym->fileLine); +} + +/* + * Set a symbol's definition filename and line + */ +static void setSymbolFilename(struct Symbol *sym) +{ + sym->src = fstk_GetFileStack(); + sym->fileLine = lexer_GetLineNo(); +} + /* * Update a symbol's definition filename and line */ static void updateSymbolFilename(struct Symbol *sym) { - if (snprintf(sym->fileName, _MAX_PATH + 1, "%s", - fstk_GetFileName()) > _MAX_PATH) - fatalerror("%s: File name is too long: '%s'\n", __func__, fstk_GetFileName()); - sym->fileLine = fstk_GetLine(); + struct FileStackNode *oldSrc = sym->src; + + setSymbolFilename(sym); + /* If the old node was referenced, ensure the new one is */ + if (oldSrc->referenced && oldSrc->ID != -1) + out_RegisterNode(sym->src); + /* TODO: unref the old node, and use `out_ReplaceNode` instead if deleting it */ } /* @@ -178,7 +199,7 @@ static struct Symbol *createsymbol(char const *s) symbol->isBuiltin = false; symbol->hasCallback = false; symbol->section = NULL; - updateSymbolFilename(symbol); + setSymbolFilename(symbol); symbol->ID = -1; symbol->next = NULL; @@ -253,6 +274,7 @@ void sym_Purge(char const *symName) labelScope = NULL; hash_RemoveElement(symbols, symbol->name); + /* TODO: ideally, also unref the file stack nodes */ free(symbol); } } @@ -338,9 +360,11 @@ static struct Symbol *createNonrelocSymbol(char const *symbolName) if (!symbol) symbol = createsymbol(symbolName); - else if (sym_IsDefined(symbol)) - error("'%s' already defined at %s(%" PRIu32 ")\n", symbolName, - symbol->fileName, symbol->fileLine); + else if (sym_IsDefined(symbol)) { + error("'%s' already defined at ", symbolName); + dumpFilename(symbol); + putc('\n', stderr); + } return symbol; } @@ -395,15 +419,17 @@ struct Symbol *sym_AddSet(char const *symName, int32_t value) { struct Symbol *sym = findsymbol(symName, NULL); - if (sym == NULL) + if (sym == NULL) { sym = createsymbol(symName); - else if (sym_IsDefined(sym) && sym->type != SYM_SET) - error("'%s' already defined as %s at %s(%" PRIu32 ")\n", - symName, sym->type == SYM_LABEL ? "label" : "constant", - sym->fileName, sym->fileLine); - else - /* TODO: can the scope be incorrect when talking over refs? */ + } else if (sym_IsDefined(sym) && sym->type != SYM_SET) { + error("'%s' already defined as %s at ", + symName, sym->type == SYM_LABEL ? "label" : "constant"); + dumpFilename(sym); + putc('\n', stderr); + } else { + /* TODO: can the scope be incorrect when taking over refs? */ updateSymbolFilename(sym); + } sym->type = SYM_SET; sym->value = value; @@ -424,9 +450,12 @@ static struct Symbol *addLabel(char const *name) if (!sym) { sym = createsymbol(name); } else if (sym_IsDefined(sym)) { - error("'%s' already defined in %s(%" PRIu32 ")\n", - name, sym->fileName, sym->fileLine); + error("'%s' already defined at ", name); + dumpFilename(sym); + putc('\n', stderr); return NULL; + } else { + updateSymbolFilename(sym); } /* If the symbol already exists as a ref, just "take over" it */ sym->type = SYM_LABEL; @@ -434,7 +463,6 @@ static struct Symbol *addLabel(char const *name) if (exportall) sym->isExported = true; sym->section = sect_GetSymbolSection(); - updateSymbolFilename(sym); if (sym && !sym->section) error("Label \"%s\" created outside of a SECTION\n", name); @@ -517,7 +545,7 @@ struct Symbol *sym_AddMacro(char const *symName, int32_t defLineNo, char *body, sym->type = SYM_MACRO; sym->macroSize = size; sym->macro = body; - updateSymbolFilename(sym); + setSymbolFilename(sym); /* TODO: is this really necessary? */ /* * The symbol is created at the line after the `endm`, * override this with the actual definition line @@ -577,10 +605,11 @@ static inline struct Symbol *createBuiltinSymbol(char const *name) sym->isBuiltin = true; sym->hasCallback = true; - strcpy(sym->fileName, ""); + sym->src = NULL; sym->fileLine = 0; return sym; } + /* * Initialize the symboltable */ diff --git a/src/asm/warning.c b/src/asm/warning.c index cb58ff8de..f8ea7cb9c 100644 --- a/src/asm/warning.c +++ b/src/asm/warning.c @@ -202,7 +202,7 @@ void printDiag(const char *fmt, va_list args, char const *type, char const *flagfmt, char const *flag) { fputs(type, stderr); - fstk_Dump(); + fstk_DumpCurrent(); fprintf(stderr, flagfmt, flag); vfprintf(stderr, fmt, args); lexer_DumpStringExpansions(); diff --git a/src/link/assign.c b/src/link/assign.c index c209ed9c5..64244f014 100644 --- a/src/link/assign.c +++ b/src/link/assign.c @@ -81,14 +81,14 @@ static void processLinkerScript(void) /* Check if this doesn't conflict with what the code says */ if (section->isBankFixed && placement->bank != section->bank) - error("Linker script contradicts \"%s\"'s bank placement", + error(NULL, 0, "Linker script contradicts \"%s\"'s bank placement", section->name); if (section->isAddressFixed && placement->org != section->org) - error("Linker script contradicts \"%s\"'s address placement", + error(NULL, 0, "Linker script contradicts \"%s\"'s address placement", section->name); if (section->isAlignFixed && (placement->org & section->alignMask) != 0) - error("Linker script contradicts \"%s\"'s alignment", + error(NULL, 0, "Linker script contradicts \"%s\"'s alignment", section->name); section->isAddressFixed = true; diff --git a/src/link/main.c b/src/link/main.c index d88bc0549..8d2b5a586 100644 --- a/src/link/main.c +++ b/src/link/main.c @@ -6,8 +6,10 @@ * SPDX-License-Identifier: MIT */ +#include #include #include +#include #include #include #include @@ -39,11 +41,55 @@ bool disablePadding; /* -x */ static uint32_t nbErrors = 0; -void error(char const *fmt, ...) +/***** Helper function to dump a file stack to stderr *****/ + +char const *dumpFileStack(struct FileStackNode const *node) +{ + char const *lastName; + + if (node->parent) { + lastName = dumpFileStack(node->parent); + /* REPT nodes use their parent's name */ + if (node->type != NODE_REPT) + lastName = node->name; + fprintf(stderr, "(%" PRIu32 ") -> %s", node->lineNo, lastName); + if (node->type == NODE_REPT) { + for (uint32_t i = 0; i < node->reptDepth; i++) + fprintf(stderr, "::REPT~%" PRIu32, node->iters[i]); + } + } else { + assert(node->type != NODE_REPT); + lastName = node->name; + fputs(lastName, stderr); + } + + return lastName; +} + +void warning(struct FileStackNode const *where, uint32_t lineNo, char const *fmt, ...) +{ + va_list ap; + + fputs("warning: ", stderr); + if (where) { + dumpFileStack(where); + fprintf(stderr, "(%" PRIu32 "): ", lineNo); + } + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + putc('\n', stderr); +} + +void error(struct FileStackNode const *where, uint32_t lineNo, char const *fmt, ...) { va_list ap; - fprintf(stderr, "error: "); + fputs("error: ", stderr); + if (where) { + dumpFileStack(where); + fprintf(stderr, "(%" PRIu32 "): ", lineNo); + } va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); @@ -53,11 +99,15 @@ void error(char const *fmt, ...) nbErrors++; } -noreturn_ void fatal(char const *fmt, ...) +noreturn_ void fatal(struct FileStackNode const *where, uint32_t lineNo, char const *fmt, ...) { va_list ap; - fprintf(stderr, "fatal: "); + fputs("fatal: ", stderr); + if (where) { + dumpFileStack(where); + fprintf(stderr, "(%" PRIu32 "): ", lineNo); + } va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); @@ -177,11 +227,11 @@ int main(int argc, char *argv[]) case 'p': value = strtoul(optarg, &endptr, 0); if (optarg[0] == '\0' || *endptr != '\0') { - error("Invalid argument for option 'p'"); + error(NULL, 0, "Invalid argument for option 'p'"); value = 0xFF; } if (value > 0xFF) { - error("Argument for 'p' must be a byte (between 0 and 0xFF)"); + error(NULL, 0, "Argument for 'p' must be a byte (between 0 and 0xFF)"); value = 0xFF; } padValue = value; @@ -189,7 +239,7 @@ int main(int argc, char *argv[]) case 's': /* FIXME: nobody knows what this does, figure it out */ (void)optarg; - warnx("Nobody has any idea what `-s` does"); + warning(NULL, 0, "Nobody has any idea what `-s` does"); break; case 't': is32kMode = true; @@ -234,8 +284,8 @@ int main(int argc, char *argv[]) bankranges[SECTTYPE_VRAM][1] = BANK_MIN_VRAM; /* Read all object files first, */ - while (curArgIndex < argc) - obj_ReadFile(argv[curArgIndex++]); + for (obj_Setup(argc - curArgIndex); curArgIndex < argc; curArgIndex++) + obj_ReadFile(argv[curArgIndex], argc - curArgIndex - 1); /* then process them, */ obj_DoSanityChecks(); diff --git a/src/link/object.c b/src/link/object.c index bd3c05ab0..8e63bd2a1 100644 --- a/src/link/object.c +++ b/src/link/object.c @@ -31,6 +31,11 @@ static struct SymbolList { struct SymbolList *next; } *symbolLists; +unsigned int nbObjFiles; +static struct { + struct FileStackNode *nodes; + uint32_t nbNodes; +} *nodes; static struct Assertion *assertions; /***** Helper functions for reading object files *****/ @@ -170,12 +175,56 @@ static char *readstr(FILE *file) /***** Functions to parse object files *****/ /** - * Reads a RGB6 symbol from a file. + * Reads a file stack node form a file. + * @param file The file to read from + * @param nodes The file's array of nodes + * @param i The ID of the node in the array + * @param fileName The filename to report in errors + */ +static void readFileStackNode(FILE *file, struct FileStackNode fileNodes[], uint32_t i, + char const *fileName) +{ + uint32_t parentID; + + tryReadlong(parentID, file, + "%s: Cannot read node #%" PRIu32 "'s parent ID: %s", fileName, i); + fileNodes[i].parent = parentID == -1 ? NULL : &fileNodes[parentID]; + tryReadlong(fileNodes[i].lineNo, file, + "%s: Cannot read node #%" PRIu32 "'s line number: %s", fileName, i); + tryGetc(fileNodes[i].type, file, "%s: Cannot read node #%" PRIu32 "'s type: %s", + fileName, i); + switch (fileNodes[i].type) { + case NODE_FILE: + case NODE_MACRO: + tryReadstr(fileNodes[i].name, file, + "%s: Cannot read node #%" PRIu32 "'s file name: %s", fileName, i); + break; + + case NODE_REPT: + tryReadlong(fileNodes[i].reptDepth, file, + "%s: Cannot read node #%" PRIu32 "'s rept depth: %s", fileName, i); + fileNodes[i].iters = malloc(sizeof(*fileNodes[i].iters) * fileNodes[i].reptDepth); + if (!fileNodes[i].iters) + fatal(NULL, 0, "%s: Failed to alloc node #%" PRIu32 "'s iters: %s", + fileName, i, strerror(errno)); + for (uint32_t k = 0; k < fileNodes[i].reptDepth; k++) + tryReadlong(fileNodes[i].iters[k], file, + "%s: Cannot read node #%" PRIu32 "'s iter #%" PRIu32 ": %s", + fileName, i, k); + if (!fileNodes[i].parent) + fatal(NULL, 0, "%s is not a valid object file: root node (#%" + PRIu32 ") may not be REPT", fileName, i); + } +} + +/** + * Reads a symbol from a file. * @param file The file to read from * @param symbol The struct to fill * @param fileName The filename to report in errors */ -static void readSymbol(FILE *file, struct Symbol *symbol, char const *fileName) +static void readSymbol(FILE *file, struct Symbol *symbol, + char const *fileName, struct FileStackNode fileNodes[]) { tryReadstr(symbol->name, file, "%s: Cannot read symbol name: %s", fileName); @@ -184,9 +233,12 @@ static void readSymbol(FILE *file, struct Symbol *symbol, char const *fileName) /* If the symbol is defined in this file, read its definition */ if (symbol->type != SYMTYPE_IMPORT) { symbol->objFileName = fileName; - tryReadstr(symbol->fileName, file, - "%s: Cannot read \"%s\"'s file name: %s", + uint32_t nodeID; + + tryReadlong(nodeID, file, + "%s: Cannot read \"%s\"'s node ID: %s", fileName, symbol->name); + symbol->src = &fileNodes[nodeID]; tryReadlong(symbol->lineNo, file, "%s: Cannot read \"%s\"'s line number: %s", fileName, symbol->name); @@ -202,7 +254,7 @@ static void readSymbol(FILE *file, struct Symbol *symbol, char const *fileName) } /** - * Reads a RGB6 patch from a file. + * Reads a patch from a file. * @param file The file to read from * @param patch The struct to fill * @param fileName The filename to report in errors @@ -210,20 +262,25 @@ static void readSymbol(FILE *file, struct Symbol *symbol, char const *fileName) */ static void readPatch(FILE *file, struct Patch *patch, char const *fileName, char const *sectName, uint32_t i, - struct Section *fileSections[]) + struct Section *fileSections[], struct FileStackNode fileNodes[]) { - tryReadstr(patch->fileName, file, - "%s: Unable to read \"%s\"'s patch #%" PRIu32 "'s name: %s", + uint32_t nodeID; + + tryReadlong(nodeID, file, + "%s: Unable to read \"%s\"'s patch #%" PRIu32 "'s node ID: %s", fileName, sectName, i); + patch->src = &fileNodes[nodeID]; + tryReadlong(patch->lineNo, file, + "%s: Unable to read \"%s\"'s patch #%" PRIu32 "'s line number: %s", + fileName, sectName, i); tryReadlong(patch->offset, file, "%s: Unable to read \"%s\"'s patch #%" PRIu32 "'s offset: %s", fileName, sectName, i); tryReadlong(patch->pcSectionID, file, "%s: Unable to read \"%s\"'s patch #%" PRIu32 "'s PC offset: %s", fileName, sectName, i); - patch->pcSection = patch->pcSectionID == -1 - ? NULL - : fileSections[patch->pcSectionID]; + patch->pcSection = patch->pcSectionID == -1 ? NULL + : fileSections[patch->pcSectionID]; tryReadlong(patch->pcOffset, file, "%s: Unable to read \"%s\"'s patch #%" PRIu32 "'s PC offset: %s", fileName, sectName, i); @@ -234,16 +291,17 @@ static void readPatch(FILE *file, struct Patch *patch, char const *fileName, "%s: Unable to read \"%s\"'s patch #%" PRIu32 "'s RPN size: %s", fileName, sectName, i); - uint8_t *rpnExpression = - malloc(sizeof(*rpnExpression) * patch->rpnSize); - size_t nbElementsRead = fread(rpnExpression, sizeof(*rpnExpression), + patch->rpnExpression = malloc(sizeof(*patch->rpnExpression) * patch->rpnSize); + if (!patch->rpnExpression) + err(1, "%s: Failed to alloc \"%s\"'s patch #%" PRIu32 "'s RPN expression", + fileName, sectName, i); + size_t nbElementsRead = fread(patch->rpnExpression, sizeof(*patch->rpnExpression), patch->rpnSize, file); if (nbElementsRead != patch->rpnSize) errx(1, "%s: Cannot read \"%s\"'s patch #%" PRIu32 "'s RPN expression: %s", fileName, sectName, i, feof(file) ? "Unexpected end of file" : strerror(errno)); - patch->rpnExpression = rpnExpression; } /** @@ -252,8 +310,8 @@ static void readPatch(FILE *file, struct Patch *patch, char const *fileName, * @param section The struct to fill * @param fileName The filename to report in errors */ -static void readSection(FILE *file, struct Section *section, - char const *fileName, struct Section *fileSections[]) +static void readSection(FILE *file, struct Section *section, char const *fileName, + struct Section *fileSections[], struct FileStackNode fileNodes[]) { int32_t tmp; uint8_t byte; @@ -280,7 +338,7 @@ static void readSection(FILE *file, struct Section *section, fileName, section->name); section->isAddressFixed = tmp >= 0; if (tmp > UINT16_MAX) { - error("\"%s\"'s org is too large (%" PRId32 ")", + error(NULL, 0, "\"%s\"'s org is too large (%" PRId32 ")", section->name, tmp); tmp = UINT16_MAX; } @@ -296,7 +354,7 @@ static void readSection(FILE *file, struct Section *section, tryReadlong(tmp, file, "%s: Cannot read \"%s\"'s alignment offset: %s", fileName, section->name); if (tmp > UINT16_MAX) { - error("\"%s\"'s alignment offset is too large (%" PRId32 ")", + error(NULL, 0, "\"%s\"'s alignment offset is too large (%" PRId32 ")", section->name, tmp); tmp = UINT16_MAX; } @@ -332,7 +390,7 @@ static void readSection(FILE *file, struct Section *section, section->name); for (uint32_t i = 0; i < section->nbPatches; i++) { readPatch(file, &patches[i], fileName, section->name, - i, fileSections); + i, fileSections, fileNodes); } section->patches = patches; } @@ -375,13 +433,13 @@ static void linkSymToSect(struct Symbol const *symbol, struct Section *section) */ static void readAssertion(FILE *file, struct Assertion *assert, char const *fileName, uint32_t i, - struct Section *fileSections[]) + struct Section *fileSections[], struct FileStackNode fileNodes[]) { char assertName[sizeof("Assertion #" EXPAND_AND_STR(UINT32_MAX))]; snprintf(assertName, sizeof(assertName), "Assertion #%" PRIu32, i); - readPatch(file, &assert->patch, fileName, assertName, 0, fileSections); + readPatch(file, &assert->patch, fileName, assertName, 0, fileSections, fileNodes); tryReadstr(assert->message, file, "%s: Cannot read assertion's message: %s", fileName); } @@ -394,11 +452,7 @@ static inline struct Section *getMainSection(struct Section *section) return section; } -/** - * Reads an object file of any supported format - * @param fileName The filename to report for errors - */ -void obj_ReadFile(char const *fileName) +void obj_ReadFile(char const *fileName, unsigned int fileID) { FILE *file = strcmp("-", fileName) ? fopen(fileName, "rb") : stdin; @@ -438,6 +492,14 @@ void obj_ReadFile(char const *fileName) nbSectionsToAssign += nbSections; + tryReadlong(nodes[fileID].nbNodes, file, "%s: Cannot read number of nodes: %s", fileName); + nodes[fileID].nodes = calloc(nodes[fileID].nbNodes, sizeof(nodes[fileID].nodes[0])); + if (!nodes[fileID].nodes) + err(1, "Failed to get memory for %s's nodes", fileName); + verbosePrint("Reading %u nodes...\n", nodes[fileID].nbNodes); + for (uint32_t i = 0; i < nodes[fileID].nbNodes; i++) + readFileStackNode(file, nodes[fileID].nodes, i, fileName); + /* This file's symbols, kept to link sections to them */ struct Symbol **fileSymbols = malloc(sizeof(*fileSymbols) * nbSymbols + 1); @@ -464,7 +526,7 @@ void obj_ReadFile(char const *fileName) if (!symbol) err(1, "%s: Couldn't create new symbol", fileName); - readSymbol(file, symbol, fileName); + readSymbol(file, symbol, fileName, nodes[fileID].nodes); fileSymbols[i] = symbol; if (symbol->type == SYMTYPE_EXPORT) @@ -485,7 +547,7 @@ void obj_ReadFile(char const *fileName) err(1, "%s: Couldn't create new section", fileName); fileSections[i]->nextu = NULL; - readSection(file, fileSections[i], fileName, fileSections); + readSection(file, fileSections[i], fileName, fileSections, nodes[fileID].nodes); fileSections[i]->fileSymbols = fileSymbols; if (nbSymPerSect[i]) { fileSections[i]->symbols = malloc(nbSymPerSect[i] @@ -535,7 +597,7 @@ void obj_ReadFile(char const *fileName) if (!assertion) err(1, "%s: Couldn't create new assertion", fileName); - readAssertion(file, assertion, fileName, i, fileSections); + readAssertion(file, assertion, fileName, i, fileSections, nodes[fileID].nodes); assertion->fileSymbols = fileSymbols; assertion->next = assertions; assertions = assertion; @@ -555,6 +617,15 @@ void obj_CheckAssertions(void) patch_CheckAssertions(assertions); } +void obj_Setup(unsigned int nbFiles) +{ + nbObjFiles = nbFiles; + + if (nbFiles > SIZE_MAX / sizeof(*nodes)) + fatal(NULL, 0, "Impossible to link more than %zu files!", SIZE_MAX / sizeof(*nodes)); + nodes = malloc(sizeof(*nodes) * nbFiles); +} + static void freeSection(struct Section *section, void *arg) { (void)arg; @@ -562,12 +633,8 @@ static void freeSection(struct Section *section, void *arg) free(section->name); if (sect_HasData(section->type)) { free(section->data); - for (int32_t i = 0; i < section->nbPatches; i++) { - struct Patch *patch = §ion->patches[i]; - - free(patch->fileName); - free(patch->rpnExpression); - } + for (int32_t i = 0; i < section->nbPatches; i++) + free(section->patches[i].rpnExpression); free(section->patches); } free(section->symbols); @@ -577,13 +644,20 @@ static void freeSection(struct Section *section, void *arg) static void freeSymbol(struct Symbol *symbol) { free(symbol->name); - if (symbol->type != SYMTYPE_IMPORT) - free(symbol->fileName); free(symbol); } void obj_Cleanup(void) { + for (unsigned int i = 0; i < nbObjFiles; i++) { + for (uint32_t j = 0; j < nodes[i].nbNodes; j++) { + if (nodes[i].nodes[j].type == NODE_REPT) + free(nodes[i].nodes[j].iters); + } + free(nodes[i].nodes); + } + free(nodes); + sym_CleanupSymbols(); sect_ForEach(freeSection, NULL); diff --git a/src/link/patch.c b/src/link/patch.c index 20b31c6e5..97dbb762d 100644 --- a/src/link/patch.c +++ b/src/link/patch.c @@ -6,11 +6,13 @@ * SPDX-License-Identifier: MIT */ +#include #include #include #include #include +#include "link/object.h" #include "link/patch.h" #include "link/section.h" #include "link/symbol.h" @@ -104,10 +106,10 @@ static void pushRPN(int32_t value) stack.size++; } -static int32_t popRPN(char const *fileName) +static int32_t popRPN(struct FileStackNode const *node, uint32_t lineNo) { if (stack.size == 0) - errx(1, "%s: Internal error, RPN stack empty", fileName); + fatal(node, lineNo, "Internal error, RPN stack empty"); stack.size--; return stack.buf[stack.size]; @@ -121,16 +123,18 @@ static inline void freeRPNStack(void) /* RPN operators */ static uint32_t getRPNByte(uint8_t const **expression, int32_t *size, - char const *fileName) + struct FileStackNode const *node, uint32_t lineNo) { if (!(*size)--) - errx(1, "%s: RPN expression overread", fileName); + fatal(node, lineNo, "Internal error, RPN expression overread"); + return *(*expression)++; } static struct Symbol const *getSymbol(struct Symbol const * const *symbolList, uint32_t index) { + assert(index != -1); /* PC needs to be handled specially, not here */ struct Symbol const *symbol = symbolList[index]; /* If the symbol is defined elsewhere... */ @@ -150,7 +154,7 @@ static int32_t computeRPNExpr(struct Patch const *patch, struct Symbol const * const *fileSymbols) { /* Small shortcut to avoid a lot of repetition */ -#define popRPN() popRPN(patch->fileName) +#define popRPN() popRPN(patch->src, patch->lineNo) uint8_t const *expression = patch->rpnExpression; int32_t size = patch->rpnSize; @@ -159,7 +163,7 @@ static int32_t computeRPNExpr(struct Patch const *patch, while (size > 0) { enum RPNCommand command = getRPNByte(&expression, &size, - patch->fileName); + patch->src, patch->lineNo); int32_t value; /* @@ -187,7 +191,7 @@ static int32_t computeRPNExpr(struct Patch const *patch, case RPN_DIV: value = popRPN(); if (value == 0) { - error("%s: Division by 0", patch->fileName); + error(patch->src, patch->lineNo, "Division by 0"); popRPN(); value = INT32_MAX; } else { @@ -197,7 +201,7 @@ static int32_t computeRPNExpr(struct Patch const *patch, case RPN_MOD: value = popRPN(); if (value == 0) { - error("%s: Modulo by 0", patch->fileName); + error(patch->src, patch->lineNo, "Modulo by 0"); popRPN(); value = 0; } else { @@ -269,17 +273,17 @@ static int32_t computeRPNExpr(struct Patch const *patch, value = 0; for (uint8_t shift = 0; shift < 32; shift += 8) value |= getRPNByte(&expression, &size, - patch->fileName) << shift; + patch->src, patch->lineNo) << shift; symbol = getSymbol(fileSymbols, value); if (!symbol) { - error("%s: Requested BANK() of symbol \"%s\", which was not found", - patch->fileName, + error(patch->src, patch->lineNo, + "Requested BANK() of symbol \"%s\", which was not found", fileSymbols[value]->name); value = 1; } else if (!symbol->section) { - error("%s: Requested BANK() of non-label symbol \"%s\"", - patch->fileName, + error(patch->src, patch->lineNo, + "Requested BANK() of non-label symbol \"%s\"", fileSymbols[value]->name); value = 1; } else { @@ -289,14 +293,15 @@ static int32_t computeRPNExpr(struct Patch const *patch, case RPN_BANK_SECT: name = (char const *)expression; - while (getRPNByte(&expression, &size, patch->fileName)) + while (getRPNByte(&expression, &size, patch->src, patch->lineNo)) ; sect = sect_GetSection(name); if (!sect) { - error("%s: Requested BANK() of section \"%s\", which was not found", - patch->fileName, name); + error(patch->src, patch->lineNo, + "Requested BANK() of section \"%s\", which was not found", + name); value = 1; } else { value = sect->bank; @@ -305,7 +310,8 @@ static int32_t computeRPNExpr(struct Patch const *patch, case RPN_BANK_SELF: if (!patch->pcSection) { - error("%s: PC has no bank outside a section"); + error(patch->src, patch->lineNo, + "PC has no bank outside a section"); value = 1; } else { value = patch->pcSection->bank; @@ -317,8 +323,8 @@ static int32_t computeRPNExpr(struct Patch const *patch, if (value < 0 || (value > 0xFF && value < 0xFF00) || value > 0xFFFF) - error("%s: Value %" PRId32 " is not in HRAM range", - patch->fileName, value); + error(patch->src, patch->lineNo, + "Value %" PRId32 " is not in HRAM range", value); value &= 0xFF; break; @@ -328,8 +334,8 @@ static int32_t computeRPNExpr(struct Patch const *patch, * They can be easily checked with a bitmask */ if (value & ~0x38) - error("%s: Value %" PRId32 " is not a RST vector", - patch->fileName, value); + error(patch->src, patch->lineNo, + "Value %" PRId32 " is not a RST vector", value); value |= 0xC7; break; @@ -337,32 +343,35 @@ static int32_t computeRPNExpr(struct Patch const *patch, value = 0; for (uint8_t shift = 0; shift < 32; shift += 8) value |= getRPNByte(&expression, &size, - patch->fileName) << shift; + patch->src, patch->lineNo) << shift; break; case RPN_SYM: value = 0; for (uint8_t shift = 0; shift < 32; shift += 8) value |= getRPNByte(&expression, &size, - patch->fileName) << shift; - - symbol = getSymbol(fileSymbols, value); - - if (!symbol) { - error("%s: Unknown symbol \"%s\"", - patch->fileName, - fileSymbols[value]->name); - } else if (strcmp(symbol->name, "@")) { - value = symbol->value; - /* Symbols attached to sections have offsets */ - if (symbol->section) - value += symbol->section->org; - } else if (!patch->pcSection) { - error("%s: PC has no value outside a section", - patch->fileName); - value = 0; + patch->src, patch->lineNo) << shift; + + if (value == -1) { /* PC */ + if (!patch->pcSection) { + error(patch->src, patch->lineNo, + "PC has no value outside a section"); + value = 0; + } else { + value = patch->pcOffset + patch->pcSection->org; + } } else { - value = patch->pcOffset + patch->pcSection->org; + symbol = getSymbol(fileSymbols, value); + + if (!symbol) { + error(patch->src, patch->lineNo, + "Unknown symbol \"%s\"", fileSymbols[value]->name); + } else { + value = symbol->value; + /* Symbols attached to sections have offsets */ + if (symbol->section) + value += symbol->section->org; + } } break; } @@ -371,8 +380,8 @@ static int32_t computeRPNExpr(struct Patch const *patch, } if (stack.size > 1) - error("%s: RPN stack has %zu entries on exit, not 1", - patch->fileName, stack.size); + error(patch->src, patch->lineNo, + "RPN stack has %zu entries on exit, not 1", stack.size); return popRPN(); @@ -390,20 +399,20 @@ void patch_CheckAssertions(struct Assertion *assert) assert->fileSymbols)) { switch ((enum AssertionType)assert->patch.type) { case ASSERT_FATAL: - fatal("%s: %s", assert->patch.fileName, + fatal(assert->patch.src, assert->patch.lineNo, "%s", assert->message[0] ? assert->message : "assert failure"); /* Not reached */ break; /* Here so checkpatch doesn't complain */ case ASSERT_ERROR: - error("%s: %s", assert->patch.fileName, + error(assert->patch.src, assert->patch.lineNo, "%s", assert->message[0] ? assert->message : "assert failure"); break; case ASSERT_WARN: - warnx("%s: %s", assert->patch.fileName, - assert->message[0] ? assert->message - : "assert failure"); + warning(assert->patch.src, assert->patch.lineNo, "%s", + assert->message[0] ? assert->message + : "assert failure"); break; } } @@ -442,8 +451,9 @@ static void applyFilePatches(struct Section *section, struct Section *dataSectio int16_t jumpOffset = value - address; if (jumpOffset < -128 || jumpOffset > 127) - error("%s: jr target out of reach (expected -129 < %" PRId16 " < 128)", - patch->fileName, jumpOffset); + error(patch->src, patch->lineNo, + "jr target out of reach (expected -129 < %" PRId16 " < 128)", + jumpOffset); dataSection->data[offset] = jumpOffset & 0xFF; } else { /* Patch a certain number of bytes */ @@ -459,9 +469,9 @@ static void applyFilePatches(struct Section *section, struct Section *dataSectio if (value < types[patch->type].min || value > types[patch->type].max) - error("%s: Value %#" PRIx32 "%s is not %u-bit", - patch->fileName, value, - value < 0 ? " (maybe negative?)" : "", + error(patch->src, patch->lineNo, + "Value %#" PRIx32 "%s is not %u-bit", + value, value < 0 ? " (maybe negative?)" : "", types[patch->type].size * 8U); for (uint8_t i = 0; i < types[patch->type].size; i++) { dataSection->data[offset + i] = value & 0xFF; diff --git a/src/link/symbol.c b/src/link/symbol.c index 051a7db94..483c2c048 100644 --- a/src/link/symbol.c +++ b/src/link/symbol.c @@ -8,9 +8,12 @@ #include #include +#include +#include "link/object.h" #include "link/symbol.h" #include "link/main.h" + #include "extern/err.h" #include "hashmap.h" @@ -40,11 +43,15 @@ void sym_AddSymbol(struct Symbol *symbol) /* Check if the symbol already exists */ struct Symbol *other = hash_GetElement(symbols, symbol->name); - if (other) - errx(1, "\"%s\" both in %s from %s(%" PRId32 ") and in %s from %s(%" PRId32 ")", - symbol->name, - symbol->objFileName, symbol->fileName, symbol->lineNo, - other->objFileName, other->fileName, other->lineNo); + if (other) { + fprintf(stderr, "error: \"%s\" both in %s from ", symbol->name, symbol->objFileName); + dumpFileStack(symbol->src); + fprintf(stderr, "(%" PRIu32 ") and in %s from ", + symbol->lineNo, other->objFileName); + dumpFileStack(other->src); + fprintf(stderr, "(%" PRIu32 ")\n", other->lineNo); + exit(1); + } /* If not, add it */ bool collided = hash_AddElement(symbols, symbol->name, symbol); diff --git a/src/rgbds.5 b/src/rgbds.5 index 70de8e945..9c4b55303 100644 --- a/src/rgbds.5 +++ b/src/rgbds.5 @@ -16,7 +16,7 @@ This is the description of the object files used by .Xr rgbasm 1 and .Xr rgblink 1 . -.Em Please note that the specifications may change. +.Em Please note that the specifications may change . This toolchain is in development and new features may require adding more information to the current format, or modifying some fields, which would break compatibility with older versions. .Pp .Sh FILE STRUCTURE @@ -34,34 +34,67 @@ is a 0‐terminated string of ; Header BYTE ID[4] ; "RGB9" -LONG RevisionNumber ; The format's revision number this file uses -LONG NumberOfSymbols ; The number of symbols used in this file -LONG NumberOfSections ; The number of sections used in this file +LONG RevisionNumber ; The format's revision number this file uses. +LONG NumberOfSymbols ; The number of symbols used in this file. +LONG NumberOfSections ; The number of sections used in this file. + +; File info + +LONG NumberOfNodes ; The number of nodes contained in this file. + +REPT NumberOfNodes ; IMPORTANT NOTE: the nodes are actually written in + ; **reverse** order, meaningthe node with ID 0 is + ; the last one in the file! + + LONG ParentID ; ID of the parent node, -1 means this is the root. + + LONG ParentLineNo ; Line at which the parent context was exited. + ; Meaningless on the root node. + + BYTE Type ; 0 = REPT node + ; 1 = File node + ; 2 = Macro node + + IF Type != 0 ; If the node is not a REPT... + + STRING Name ; The node's name: either a file name, or macro name + ; prefixed by its definition file name. + + ELSE ; If the node is a REPT, it also contains the iter + ; counts of all the parent REPTs. + + LONG Depth ; Size of the array below. + + LONG Iter[Depth] ; The number of REPT iterations by increasing depth. + + ENDC + +ENDR ; Symbols -REPT NumberOfSymbols ; Number of symbols defined in this object file. +REPT NumberOfSymbols ; Number of symbols defined in this object file. - STRING Name ; The name of this symbol. Local symbols are stored - ; as "Scope.Symbol". + STRING Name ; The name of this symbol. Local symbols are stored + ; as "Scope.Symbol". - BYTE Type ; 0 = LOCAL symbol only used in this file. - ; 1 = IMPORT this symbol from elsewhere - ; 2 = EXPORT this symbol to other objects. + BYTE Type ; 0 = LOCAL symbol only used in this file. + ; 1 = IMPORT this symbol from elsewhere + ; 2 = EXPORT this symbol to other objects. - IF (Type & 0x7F) != 1 ; If symbol is defined in this object file. + IF (Type & 0x7F) != 1 ; If symbol is defined in this object file. - STRING FileName ; File where the symbol is defined. + LONG SourceFile ; File where the symbol is defined. - LONG LineNum ; Line number in the file where the symbol is defined. + LONG LineNum ; Line number in the file where the symbol is defined. - LONG SectionID ; The section number (of this object file) in which - ; this symbol is defined. If it doesn't belong to any - ; specific section (like a constant), this field has - ; the value -1. + LONG SectionID ; The section number (of this object file) in which + ; this symbol is defined. If it doesn't belong to any + ; specific section (like a constant), this field has + ; the value -1. - LONG Value ; The symbols value. It's the offset into that - ; symbol's section. + LONG Value ; The symbols value. It's the offset into that + ; symbol's section. ENDC @@ -107,8 +140,10 @@ REPT NumberOfSections REPT NumberOfPatches - STRING SourceFile ; Name of the source file (for printing error - ; messages). + LONG SourceFile ; ID of the source file node (for printing + ; error messages). + + LONG LineNo ; Line at which the patch was created. LONG Offset ; Offset into the section where patch should ; be applied (in bytes). @@ -145,7 +180,9 @@ LONG NumberOfAssertions REPT NumberOfAssertions - STRING SourceFile ; Name of the source file (for printing the failure). + LONG SourceFile ; ID of the source file node (for printing the failure). + + LONG LineNo ; Line at which the assertion was created. LONG Offset ; Offset into the section where the assertion is located. @@ -209,7 +246,7 @@ with some bytes being special prefixes for integers and symbols. .It Li $50 Ta Li BANK(symbol) , a .Ar LONG -Symbol ID follows. +Symbol ID follows, where -1 means PC .It Li $51 Ta Li BANK(section_name) , a null-terminated string follows. .It Li $52 Ta Li Current BANK() diff --git a/test/asm/label-redefinition.err b/test/asm/label-redefinition.err index 1880de77d..59664cf55 100644 --- a/test/asm/label-redefinition.err +++ b/test/asm/label-redefinition.err @@ -1,3 +1,3 @@ ERROR: label-redefinition.asm(7): - 'Sym' already defined in label-redefinition.asm::m(4) + 'Sym' already defined at label-redefinition.asm(6) -> label-redefinition.asm::m(4) error: Assembly aborted (1 errors)! From ee9e45b3d45f0b6f4342159ec75179df503fdc29 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Tue, 29 Sep 2020 09:06:58 +0200 Subject: [PATCH 56/59] Change assertion condition in __FILE__ buf dumping Removes a false positive from Clang static analysis --- src/asm/symbol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/asm/symbol.c b/src/asm/symbol.c index f6d9736f9..59c77541c 100644 --- a/src/asm/symbol.c +++ b/src/asm/symbol.c @@ -103,7 +103,7 @@ static char const *Callback__FILE__(void) size_t j = 1; /* TODO: is there a way for a file name to be empty? */ - assert(strlen(fileName) != 0); + assert(fileName[0]); /* The assertion above ensures the loop runs at least once */ for (size_t i = 0; fileName[i]; i++, j++) { /* Account for the extra backslash inserted below */ From 423a7c48994fc9430960f83955d6697d484e342a Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Wed, 30 Sep 2020 01:13:07 +0200 Subject: [PATCH 57/59] Handle \\r better Translate it to \\n regardless of the lexer mode --- src/asm/lexer.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/asm/lexer.c b/src/asm/lexer.c index c54f854dd..62904e29d 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -1611,9 +1611,7 @@ static int yylex_NORMAL(void) /* Handle newlines and EOF */ case '\r': - if (peek(0) == '\n') - shiftChars(1); /* Shift that EOL */ - /* fallthrough */ + return '\r'; case '\n': return '\n'; @@ -1905,6 +1903,10 @@ int yylex(void) return 0; } } + } else if (token == '\r') { /* Handle CR and CRLF line endings */ + token = '\n'; /* We universally use '\n' as the value for line ending tokens */ + if (peek(0) == '\n') + shiftChars(1); /* Shift the CRLF's LF */ } lexerState->lastToken = token; From c24694233f5f1103a33d3476f44cf47099936a84 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Wed, 30 Sep 2020 13:16:23 +0200 Subject: [PATCH 58/59] Fix incomplete duplication of REPT nodes "Initialization, sizeof, and the assignment operator ignore the flexible array member." Oops! --- src/asm/fstack.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/asm/fstack.c b/src/asm/fstack.c index 11e6b504f..33387ad73 100644 --- a/src/asm/fstack.c +++ b/src/asm/fstack.c @@ -202,12 +202,13 @@ bool yywrap(void) /* If the node is referenced, we can't edit it; duplicate it */ if (contextStack->fileInfo->referenced) { - struct FileStackReptNode *copy = malloc(sizeof(*copy) + sizeof(copy->iters[0]) * fileInfo->reptDepth); + size_t size = sizeof(*fileInfo) + sizeof(fileInfo->iters[0]) * fileInfo->reptDepth; + struct FileStackReptNode *copy = malloc(size); if (!copy) fatalerror("Failed to duplicate REPT file node: %s\n", strerror(errno)); /* Copy all info but the referencing */ - *copy = *fileInfo; + memcpy(copy, fileInfo, size); copy->node.next = NULL; copy->node.referenced = false; From 2eca43cd2da5a4be21cada359bd5e152f1896453 Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Sun, 4 Oct 2020 16:10:32 +0200 Subject: [PATCH 59/59] Fix critical oversight in lexer buffer refilling Since the lexer buffer wraps, the refilling gets handled in two steps: First, iff the buffer would wrap, the buffer is refilled until its end. Then, if more characters are requested, that amount is refilled too. An important detail is that `read()` may not return as many characters as requested; for this reason, the first step checks if its `read()` was "full", and skips the second step otherwise. This is also where a bug lied. After a *lot* of trying, I eventually managed to reproduce the bug on an OpenBSD VM, and after adding a couple of `assert`s in `peekInternal`, this is what happened, starting at line 724: 0. `lexerState->nbChars` is 0, `lexerState->index` is 19; 1. We end up with `target` = 42, and `writeIndex` = 19; 2. 42 + 19 is greater than `LEXER_BUF_SIZE` (= 42), so the `if` is entered; 3. Within the first `readChars`, **`read` only returns 16 bytes**, advancing `writeIndex` to 35 and `target` to 26; 4. Within the second `readChars`, a `read(26)` is issued, overflowing the buffer. The bug should be clear now: **the check at line 750 failed to work!** Why? Because `readChars` modifies `writeIndex`. The fix is simply to cache the number of characters expected, and use that. --- src/asm/lexer.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/asm/lexer.c b/src/asm/lexer.c index 62904e29d..146dd2c5d 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -729,6 +729,8 @@ static int peekInternal(uint8_t distance) ssize_t nbCharsRead = 0, totalCharsRead = 0; #define readChars(size) do { \ + /* This buffer overflow made me lose WEEKS of my life. Never again. */ \ + assert(writeIndex + (size) <= LEXER_BUF_SIZE); \ nbCharsRead = read(lexerState->fd, &lexerState->buf[writeIndex], (size)); \ if (nbCharsRead == -1) \ fatalerror("Error while reading \"%s\": %s\n", lexerState->path, errno); \ @@ -741,9 +743,11 @@ static int peekInternal(uint8_t distance) /* If the range to fill passes over the buffer wrapping point, we need two reads */ if (writeIndex + target > LEXER_BUF_SIZE) { - readChars(LEXER_BUF_SIZE - writeIndex); + size_t nbExpectedChars = LEXER_BUF_SIZE - writeIndex; + + readChars(nbExpectedChars); /* If the read was incomplete, don't perform a second read */ - if (nbCharsRead < LEXER_BUF_SIZE - writeIndex) + if (nbCharsRead < nbExpectedChars) target = 0; } if (target != 0)