From 02e7c2775b4074822aad2842e2bcc08b54803506 Mon Sep 17 00:00:00 2001 From: servostar Date: Mon, 13 May 2024 16:13:49 +0200 Subject: [PATCH 1/4] added better parser error messages --- src/lex/lexer.l | 15 +++++++- src/lex/util.c | 79 ++++++++++++++++++++++++++++++++++++++++++ src/lex/util.h | 37 ++++++++++++++++++++ src/main.c | 8 +++-- src/sys/col.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++ src/sys/col.h | 43 +++++++++++++++++++++++ src/yacc/parser.y | 73 +++++++++++++++++++++++++++++++++++++-- 7 files changed, 337 insertions(+), 5 deletions(-) create mode 100644 src/lex/util.c create mode 100644 src/lex/util.h create mode 100644 src/sys/col.c create mode 100644 src/sys/col.h diff --git a/src/lex/lexer.l b/src/lex/lexer.l index c9bf739..d0e6d10 100644 --- a/src/lex/lexer.l +++ b/src/lex/lexer.l @@ -2,9 +2,21 @@ %{ #include #include + #include int yyLineNumber = 1; + int yylex(); + + extern int yyerror(char* s); + + #define YY_USER_ACTION beginToken(yytext); + + #define YY_INPUT(buf,result,max_size) {\ + result = nextChar(buf); \ + if ( result <= 0 ) \ + result = YY_NULL; \ + } %} /* disable the following functions */ @@ -81,5 +93,6 @@ [a-zA-Z_0-9]+ {DEBUG("\"%s\" tokenized with \'Ident\'", yytext); yylval.string = strdup(yytext); return(Ident); }; \"([^\"\n])*\" {DEBUG("\"%s\" tokenized with \'ValStr\'", yytext); yylval.string = strdup(yytext); return(ValStr);}; \"\"\"([^\"\n]|\\\n)*\"\"\" {DEBUG("\"%s\" tokenized with \'ValMultistr\'", yytext); yylval.string = strdup(yytext); return(ValMultistr);}; -.; +[ \r\t] { /* ignore whitespace */ }; +. { }; %% diff --git a/src/lex/util.c b/src/lex/util.c new file mode 100644 index 0000000..b5b92ac --- /dev/null +++ b/src/lex/util.c @@ -0,0 +1,79 @@ + +#include +#include +#include + +// implementation based on: +// https://github.com/sunxfancy/flex-bison-examples/blob/master/error-handling/ccalc.c + +char* buffer = NULL; + +static int eof = 0; +static int nRow = 0; +static int nBuffer = 0; +static int lBuffer = 0; +static int nTokenStart = 0; +static int nTokenLength = 0; +static int nTokenNextStart = 0; + +static void lex_deinit(void) { + free(buffer); +} + +void lex_init(void) { + buffer = malloc(MAX_READ_BUFFER_SIZE); + atexit(lex_deinit); +} + +void beginToken(char *t) { + nTokenStart = nTokenNextStart; + nTokenLength = (int) strlen(t); + nTokenNextStart = nBuffer + 1; + + yylloc.first_line = nRow; + yylloc.first_column = nTokenStart; + yylloc.last_line = nRow; + yylloc.last_column = nTokenStart + nTokenLength - 1; +} + +int nextChar(char *dst) { + int frc; + + if (eof) + return 0; + + while (nBuffer >= lBuffer) { + frc = getNextLine(); + if (frc != 0) { + return 0; + } + } + + dst[0] = buffer[nBuffer]; + nBuffer += 1; + + return dst[0] != 0; +} + +int getNextLine(void) { + char *p; + + nBuffer = 0; + nTokenStart = -1; + nTokenNextStart = 1; + eof = 0; + + p = fgets(buffer, MAX_READ_BUFFER_SIZE, yyin); + if (p == NULL) { + if (ferror(yyin)) { + return -1; + } + eof = 1; + return 1; + } + + nRow += 1; + lBuffer = (int) strlen(buffer); + + return 0; +} diff --git a/src/lex/util.h b/src/lex/util.h new file mode 100644 index 0000000..12e0837 --- /dev/null +++ b/src/lex/util.h @@ -0,0 +1,37 @@ + +#ifndef LEX_UTIL_H_ +#define LEX_UTIL_H_ + +#include +#include + +#define MAX_READ_BUFFER_SIZE 1000 + +extern FILE* yyin; +extern YYLTYPE yylloc; +extern char* buffer; + +/** + * @brief Initialize global state needed for the lexer + */ +void lex_init(void); + +/** + * @brief Begin counting a new token. This will fill the global struct yylloc. + * @param t the text of the token. Must be null terminated + */ +[[gnu::nonnull(1)]] +void beginToken(char *t); + +/** + * @brief Stores the next character into the supplied buffer + * @param dst the buffer to store character in + */ +int nextChar(char *dst); + +/** + * @brief Reads the next line from yyin into a global buffer + */ +int getNextLine(void); + +#endif // LEX_UTIL_H_ diff --git a/src/main.c b/src/main.c index 03ab6b9..5f1c8ab 100644 --- a/src/main.c +++ b/src/main.c @@ -2,11 +2,11 @@ #include #include #include +#include +#include #define LOG_LEVEL LOG_LEVEL_DEBUG -extern FILE *yyin; - /** * @brief Log a debug message to inform about beginning exit procedures * @@ -41,6 +41,10 @@ void setup(void) { // actual setup AST_init(); + col_init(); + + lex_init(); + DEBUG("finished starting up gemstone..."); } diff --git a/src/sys/col.c b/src/sys/col.c new file mode 100644 index 0000000..76260ec --- /dev/null +++ b/src/sys/col.c @@ -0,0 +1,87 @@ + +#include +#include +#include +#include + +#ifdef __unix__ +#include +#elif defined(_WIN32) || defined(WIN32) +#include +#endif + +char *RED; +char *YELLOW; +char *MAGENTA; +char *CYAN; +char *GREEN; +char *RESET; +char *BOLD; +char *FAINT; + +void col_init(void) { + if (stdout_supports_ansi_esc()) { + enable_ansi_colors(); + } else { + disable_ansi_colors(); + } +} + +void disable_ansi_colors() { + DEBUG("disabling ANSI escape codes"); + + RED = ""; + YELLOW = ""; + MAGENTA = ""; + CYAN = ""; + GREEN = ""; + RESET = ""; + BOLD = ""; + FAINT = ""; +} + +void enable_ansi_colors() { + DEBUG("enabling ANSI escape codes"); + + RED = "\x1b[31m"; + YELLOW = "\x1b[33m"; + MAGENTA = "\x1b[35m"; + CYAN = "\x1b[36m"; + GREEN = "\x1b[32m"; + RESET = "\x1b[0m"; + BOLD = "\x1b[1m"; + FAINT = "\x1b[2m"; +} + +int stdout_supports_ansi_esc() { + +#ifdef __unix__ + // check if TTY + if (isatty(STDOUT_FILENO)) { + const char *colors = getenv("COLORTERM"); + // check if colors are set and allowed + if (colors != NULL && (strcmp(colors, "truecolor") == 0 || strcmp(colors, "24bit") == 0)) { + return ANSI_ENABLED; + } + } +#elif defined(_WIN32) || defined(WIN32) + // see: + // https://stackoverflow.com/questions/63913005/how-to-test-if-console-supports-ansi-color-codes + DWORD mode; + HANDLE hConsole = GetStdHandle(STD_OUTPUT_HANDLE); + + if (!GetConsoleMode(hConsole, &mode)) { + ERROR("failed to get console mode"); + return ANSI_ENABLED; + } + + if ((mode & ENABLE_VIRTUAL_TERMINAL_INPUT) | + (mode & ENABLE_VIRTUAL_TERMINAL_PROCESSING)) { + return ANSI_ENABLED; + } +#else +#warning "unsupported platform, ASNI escape codes disabled by default" +#endif + + return ASNI_DISABLED; +} diff --git a/src/sys/col.h b/src/sys/col.h new file mode 100644 index 0000000..125d0d6 --- /dev/null +++ b/src/sys/col.h @@ -0,0 +1,43 @@ + +#ifndef COLORS_H_ +#define COLORS_H_ + +#define ANSI_ENABLED 1 +#define ASNI_DISABLED 0 + +// Common escape codes +// can be used to print colored text +extern char *RED; +extern char *YELLOW; +extern char *MAGENTA; +extern char *CYAN; +extern char *GREEN; +extern char *RESET; +extern char *BOLD; +extern char *FAINT; + +/** + * @brief Initialize global state + */ +void col_init(void); + +/** + * @brief Enable ANSI escape codes. This will set the correct escape codes to + * the global strings above. + */ +void enable_ansi_colors(); + +/** + * @brief Disable ANSI escape codes. This will set all the above global strings to be empty. + */ +void disable_ansi_colors(); + +/** + * @brief Check if stdout may support ANSI escape codes. + * @attention This function may report escape codes to be unavailable even if they actually are. + * @return ANSI_ENABLED if escape sequences are supported ASNI_DISABLED otherwise + */ +[[nodiscard]] +int stdout_supports_ansi_esc(); + +#endif // COLORS_H_ diff --git a/src/yacc/parser.y b/src/yacc/parser.y index 36c5931..6221d8b 100644 --- a/src/yacc/parser.y +++ b/src/yacc/parser.y @@ -1,9 +1,15 @@ +%locations +%define parse.error verbose + %{ #include - extern int yylineno; + #include int yyerror(char*); + extern char* buffer; + extern int yylineno; + extern int yylex(); %} @@ -55,6 +61,7 @@ %token FunFunname %token FunLineno %token FunExtsupport +%token Invalid /* Operator associativity */ %right '=' @@ -222,7 +229,69 @@ opbit: expr OpBitand expr | OpBitnot expr %prec OpBitand; %% + +const char* ERROR = "error"; +const char* WARNING = "warning"; +const char* NOTE = "note"; + +int print_message(const char* kind, char* message) { + // number of characters written + int char_count = 0; + // highlight to use + char* HIGHLIGHT = CYAN; + + // convert message kind into color + if (kind == ERROR) { + HIGHLIGHT = RED; + } else if (kind == WARNING) { + HIGHLIGHT = YELLOW; + } + + // print message + char_count += printf("%sfilename:%d:%d%s:%s%s %s: %s%s\n", BOLD, yylloc.first_line, yylloc.first_column, RESET, HIGHLIGHT, BOLD, kind, RESET, message); + + // print line in which error occurred + + char_count += printf(" %4d | ", yylloc.first_line); + + for (int i = 0; i < yylloc.first_column - 1; i++) { + if (buffer[i] == '\n') { + break; + } + printf("%c", buffer[i]); + } + + char_count += printf("%s%s", BOLD, HIGHLIGHT); + + for (int i = yylloc.first_column - 1; i < yylloc.last_column; i++) { + if (buffer[i] == '\n') { + break; + } + char_count += printf("%c", buffer[i]); + } + + char_count += printf("%s", RESET); + + for (int i = yylloc.last_column; buffer[i] != '\0' && buffer[i] != '\n'; i++) { + printf("%c", buffer[i]); + } + + char_count += printf("\n | "); + + for (int i = 0; i < yylloc.first_column - 1; i++) { + char_count += printf(" "); + } + + char_count += printf("%s^", HIGHLIGHT); + + for (int i = 0; i < yylloc.last_column - yylloc.first_column; i++) { + printf("~"); + } + + char_count += printf("%s\n\n", RESET); +} + int yyerror(char *s) { - ERROR("%s", s); + print_message(ERROR, s); return 0; } \ No newline at end of file From fa8c97d84f847c503532b36e44b5e904888077a1 Mon Sep 17 00:00:00 2001 From: servostar Date: Tue, 14 May 2024 12:27:21 +0200 Subject: [PATCH 2/4] lexer unknown token passthrough --- .gitignore | 3 +++ src/lex/lexer.l | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index bb676ce..10613c6 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,6 @@ parser.tab.c parser.tab.h build /Testing/ +CTestTestfile.cmake +DartConfiguration.tcl +*.cmake diff --git a/src/lex/lexer.l b/src/lex/lexer.l index d0e6d10..2c96d23 100644 --- a/src/lex/lexer.l +++ b/src/lex/lexer.l @@ -94,5 +94,5 @@ \"([^\"\n])*\" {DEBUG("\"%s\" tokenized with \'ValStr\'", yytext); yylval.string = strdup(yytext); return(ValStr);}; \"\"\"([^\"\n]|\\\n)*\"\"\" {DEBUG("\"%s\" tokenized with \'ValMultistr\'", yytext); yylval.string = strdup(yytext); return(ValMultistr);}; [ \r\t] { /* ignore whitespace */ }; -. { }; +. { return yytext[0]; /* passthrough unknown token, let parser handle the error */ }; %% From 5aa192871badc14a75a5a4b5c855e05b30252391 Mon Sep 17 00:00:00 2001 From: servostar Date: Tue, 14 May 2024 12:37:52 +0200 Subject: [PATCH 3/4] fixed bison build --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 12e8c66..be613ed 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -62,7 +62,7 @@ set(YACC_GENERATED_SOURCE_FILE ${PROJECT_SOURCE_DIR}/src/yacc/parser.tab.c) add_custom_command(OUTPUT ${YACC_GENERATED_SOURCE_FILE} COMMAND yacc - ARGS -Wcounterexamples -d -o ${YACC_GENERATED_SOURCE_FILE} ${YACC_SOURCE_FILE} + ARGS -Wno-yacc -Wcounterexamples -d -o ${YACC_GENERATED_SOURCE_FILE} ${YACC_SOURCE_FILE} COMMENT "generate C source file for parser" VERBATIM) From 4c61851a487523ceb5f0a43f0a1d934effe381fd Mon Sep 17 00:00:00 2001 From: servostar Date: Tue, 14 May 2024 13:28:05 +0200 Subject: [PATCH 4/4] fixed build errors --- src/lex/lexer.l | 2 +- src/yacc/parser.y | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/lex/lexer.l b/src/lex/lexer.l index 2c96d23..b93499a 100644 --- a/src/lex/lexer.l +++ b/src/lex/lexer.l @@ -8,7 +8,7 @@ int yylex(); - extern int yyerror(char* s); + extern int yyerror(const char* s); #define YY_USER_ACTION beginToken(yytext); diff --git a/src/yacc/parser.y b/src/yacc/parser.y index 6221d8b..19d8aa1 100644 --- a/src/yacc/parser.y +++ b/src/yacc/parser.y @@ -5,7 +5,7 @@ #include #include - int yyerror(char*); + int yyerror(const char*); extern char* buffer; extern int yylineno; @@ -234,7 +234,7 @@ const char* ERROR = "error"; const char* WARNING = "warning"; const char* NOTE = "note"; -int print_message(const char* kind, char* message) { +int print_message(const char* kind, const char* message) { // number of characters written int char_count = 0; // highlight to use @@ -289,9 +289,10 @@ int print_message(const char* kind, char* message) { } char_count += printf("%s\n\n", RESET); + + return char_count; } -int yyerror(char *s) { - print_message(ERROR, s); - return 0; +int yyerror(const char *s) { + return print_message(ERROR, s); } \ No newline at end of file