From 1e2f3c263a924458a684cf124d8d074f6c6525b5 Mon Sep 17 00:00:00 2001 From: servostar Date: Wed, 5 Jun 2024 15:03:23 +0200 Subject: [PATCH] fixed: lexer memory leak due to strdup --- src/compiler.c | 2 ++ src/lex/lexer.l | 10 +++++----- src/lex/util.c | 26 ++++++++++++++++++++++++++ src/lex/util.h | 4 ++++ 4 files changed, 37 insertions(+), 5 deletions(-) diff --git a/src/compiler.c b/src/compiler.c index 709592e..e9b5c8d 100644 --- a/src/compiler.c +++ b/src/compiler.c @@ -148,6 +148,8 @@ static void build_target(ModuleFileStack *unit, const TargetConfig *target) { AST_delete_node(ast); + lex_purge_str_cache(); + print_file_statistics(file); } diff --git a/src/lex/lexer.l b/src/lex/lexer.l index cb507f6..8f65ff7 100644 --- a/src/lex/lexer.l +++ b/src/lex/lexer.l @@ -89,20 +89,20 @@ "lineno" {DEBUG("\"%s\" tokenized with \'FunLineno\'", yytext); return(FunLineno);}; "extsupport" {DEBUG("\"%s\" tokenized with \'FunExtsupport\'", yytext); return(FunExtsupport);}; -[0-9]+ {DEBUG("\"%s\" tokenized with \'ValInt\'", yytext); yylval.string = strdup(yytext); return(ValInt); }; -[0-9]*\.[0-9]+ {DEBUG("\"%s\" tokenized with \'ValFloat\'", yytext); yylval.string = strdup(yytext); return(ValFloat);}; -[a-zA-Z_0-9]+ {DEBUG("\"%s\" tokenized with \'Ident\'", yytext); yylval.string = strdup(yytext); return(Ident); }; +[0-9]+ {DEBUG("\"%s\" tokenized with \'ValInt\'", yytext); yylval.string = lex_cached_strdup(yytext); return(ValInt); }; +[0-9]*\.[0-9]+ {DEBUG("\"%s\" tokenized with \'ValFloat\'", yytext); yylval.string = lex_cached_strdup(yytext); return(ValFloat);}; +[a-zA-Z_0-9]+ {DEBUG("\"%s\" tokenized with \'Ident\'", yytext); yylval.string = lex_cached_strdup(yytext); return(Ident); }; \"([^\"\n])*\" { yytext = yytext +1; yytext[yyleng - 2] = 0; - DEBUG("\"%s\" tokenized with \'ValStr\'", yytext); yylval.string = strdup(yytext); return(ValStr);}; + DEBUG("\"%s\" tokenized with \'ValStr\'", yytext); yylval.string = lex_cached_strdup(yytext); return(ValStr);}; \"\"\"[^\"]*\"\"\" { yytext = yytext +3; yytext[yyleng - 4] = 0; - DEBUG("\"%s\" tokenized with \'ValMultistr\'", yytext); yylval.string = strdup(yytext); return(ValMultistr);}; + DEBUG("\"%s\" tokenized with \'ValMultistr\'", yytext); yylval.string = lex_cached_strdup(yytext); return(ValMultistr);}; [ \r\t] { /* ignore whitespace */ }; . { return yytext[0]; /* passthrough unknown token, let parser handle the error */ }; %% diff --git a/src/lex/util.c b/src/lex/util.c index 81f2855..0e01118 100644 --- a/src/lex/util.c +++ b/src/lex/util.c @@ -2,6 +2,7 @@ #include #include #include +#include // implementation based on: // https://github.com/sunxfancy/flex-bison-examples/blob/master/error-handling/ccalc.c @@ -16,12 +17,37 @@ static int nTokenStart = 0; static int nTokenLength = 0; static int nTokenNextStart = 0; +static GArray* stringCache = NULL; + +char* lex_cached_strdup(char* string) { + char* dup = strdup(string); + + g_array_append_val(stringCache, dup); + + return dup; +} + +void lex_purge_str_cache() { + DEBUG("purging string cache..."); + + const guint count = stringCache->len; + + for (guint i = 0; i < count; i++) { + free(((char**) stringCache->data)[i]); + } + + g_array_remove_range(stringCache, 0, count); +} + static void lex_deinit(void) { + lex_purge_str_cache(); + g_array_free(stringCache, TRUE); free(buffer); } void lex_init(void) { buffer = malloc(MAX_READ_BUFFER_SIZE); + stringCache = g_array_new(FALSE, FALSE, sizeof(char*)); atexit(lex_deinit); } diff --git a/src/lex/util.h b/src/lex/util.h index 6939526..6f96ef1 100644 --- a/src/lex/util.h +++ b/src/lex/util.h @@ -18,6 +18,10 @@ void lex_init(void); void lex_reset(void); +char* lex_cached_strdup(char* string); + +void lex_purge_str_cache(); + /** * @brief Begin counting a new token. This will fill the global struct yylloc. * @param t the text of the token. Must be null terminated