From 26fa8025eff69e65e8d4328a72014b7be3f4cde3 Mon Sep 17 00:00:00 2001 From: hugogogo Date: Wed, 29 Apr 2026 12:41:19 +0200 Subject: [PATCH] lexer complete with luke memory solution --- headers/lexer.h | 11 ++--- src/computorv1.c | 66 ++++++++++++++++++++++++------ src/lexer.c | 102 ++++++++++++++++++++++++++++++++++------------- 3 files changed, 134 insertions(+), 45 deletions(-) diff --git a/headers/lexer.h b/headers/lexer.h index 1290286..84b5253 100644 --- a/headers/lexer.h +++ b/headers/lexer.h @@ -6,7 +6,8 @@ typedef enum { TOKEN_VARIABLE, // x, y, etc. - TOKEN_NUMBER, // int or double + TOKEN_NUMBER_INT, // int + TOKEN_NUMBER_DOUBLE, // double TOKEN_POWER, // ^ or ** TOKEN_PLUS, // + TOKEN_MINUS, // - @@ -20,12 +21,12 @@ typedef struct token_type type; union { - double num_value; // For NUMBER - char var_value; // For VARIABLE (single char, e.g., 'x') + char value_char; + int value_int; + double value_double; }; } token; -#define MAX_TOKENS 100 -int lexerize(const char *input, token tokens[MAX_TOKENS]); +void lexerize(const char *input, token *tokens); #endif \ No newline at end of file diff --git a/src/computorv1.c b/src/computorv1.c index 534d6e2..1aa490f 100644 --- a/src/computorv1.c +++ b/src/computorv1.c @@ -3,18 +3,46 @@ #include "errors.h" #include // tmp for float debug +void remove_spaces(char *s) +{ + char *read = s; + char *write = s; + + // copy all non-space chars + while (*read) + { + if (!ft_isspace(*read)) + { + *write++ = *read; + } + read++; + } + *write = '\0'; + + // zero the rest of the buffer + while (write != read) + { + *write++ = '\0'; + } +} + int main(int ac, char **av) { int i; - int ret; + int arg_len; + char *input; if (ac < 2) { return 0; } + + // tmp debug output + ft_putstr("-> received args :\n"); // debug i = 0; while (i < ac) { + ft_putstr(" "); ft_putnbr(i); ft_putstr(" : "); ft_putstr(av[i]); @@ -22,14 +50,22 @@ int main(int ac, char **av) i++; } - token tokens[MAX_TOKENS]; - ret = lexerize(av[1], tokens); - if (ret <= 0) - { - stop_errors(ret); - } + input = av[1]; + remove_spaces(input); + arg_len = ft_strlen(input); + + ft_putstr("-> input without space : "); // debug + ft_putstr(input); // debug + ft_putchar('\n'); // debug + ft_putstr("-> arg_len : "); // debug + ft_putnbr(arg_len); // debug + ft_putchar('\n'); // debug + + token tokens[arg_len]; + lexerize(input, tokens); // tmp debug output + ft_putchar('\n'); // debug i = 0; while (tokens[i].type != TOKEN_END) { @@ -37,8 +73,10 @@ int main(int ac, char **av) if (tokens[i].type == TOKEN_VARIABLE) ft_printf("%20s", "TOKEN_VARIABLE"); - if (tokens[i].type == TOKEN_NUMBER) - ft_printf("%20s", "TOKEN_NUMBER"); + if (tokens[i].type == TOKEN_NUMBER_INT) + ft_printf("%20s", "TOKEN_NUMBER_INT"); + if (tokens[i].type == TOKEN_NUMBER_DOUBLE) + ft_printf("%20s", "TOKEN_NUMBER_DOUBLE"); if (tokens[i].type == TOKEN_POWER) ft_printf("%20s", "TOKEN_POWER"); if (tokens[i].type == TOKEN_PLUS) @@ -54,13 +92,17 @@ int main(int ac, char **av) ft_putstr(" - value : "); - if (tokens[i].type == TOKEN_NUMBER) + if (tokens[i].type == TOKEN_NUMBER_INT) { - printf("%f\n", tokens[i].num_value); + printf("%i\n", tokens[i].value_int); + } + else if (tokens[i].type == TOKEN_NUMBER_DOUBLE) + { + printf("%f\n", tokens[i].value_double); } else { - ft_printf("%c\n", tokens[i].var_value); + ft_printf("%c\n", tokens[i].value_char); } i++; } diff --git a/src/lexer.c b/src/lexer.c index 1517763..02b50f0 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -2,15 +2,6 @@ #include "errors.h" #include -static int skip_whitespace(const char *input, int input_pos) -{ - while (ft_isspace(input[input_pos])) - { - input_pos++; - } - return input_pos; -} - // any single letter is a valid variable, like "x" or "y" static bool token_is_variable(const char *input, int input_pos, int *token_size) { @@ -22,8 +13,8 @@ static bool token_is_variable(const char *input, int input_pos, int *token_size) return false; } -// number can be double "123.456" -static bool token_is_number(const char *input, int input_pos, int *token_size, int *float_precision) +// number can be int "123" +static bool token_is_number_int(const char *input, int input_pos, int *token_size) { int number_size; int max_number_size; @@ -33,6 +24,48 @@ static bool token_is_number(const char *input, int input_pos, int *token_size, i return false; } + number_size = 1; + max_number_size = 16; // max size for int + while (number_size <= max_number_size) + { + if (ft_isdigit(input[input_pos + number_size])) + { + number_size++; + } + else if (input[input_pos + number_size] == '.') + { + if (ft_isdigit(input[input_pos + number_size + 1])) + { + // number is double + return false; + } + else + break; + } + else + break; + } + if (number_size > max_number_size) + { + stop_errors(ERROR_NUMBER_TOO_BIG); + } + *token_size = number_size; + return true; +} + +// number can be double "123.456" +static bool token_is_number_double(const char *input, int input_pos, int *token_size) +{ + int number_size; + int max_number_size; + bool has_dot; + + if (!ft_isdigit(input[input_pos])) + { + return false; + } + + has_dot = false; number_size = 1; max_number_size = 129; // max size for double double is 128 bits, + the coma while (number_size <= max_number_size) @@ -43,6 +76,17 @@ static bool token_is_number(const char *input, int input_pos, int *token_size, i } else if (input[input_pos + number_size] == '.') { + if (has_dot) + { + // number is not a valid double, it has 2 dots + return false; + } + if (!ft_isdigit(input[input_pos + number_size + 1])) + { + // number is not a double, it has no number after the dot + return false; + } + has_dot = true; number_size++; } else @@ -119,20 +163,17 @@ static bool token_is_division(const char *input, int input_pos, int *token_size) /** * LEXER */ -int lexerize(const char *input, token tokens[MAX_TOKENS]) +void lexerize(const char *input, token *tokens) { int token_count; int input_pos; int token_size; - int float_precision; token_count = 0; input_pos = 0; - float_precision = 0; while (input[input_pos]) { token_size = 0; - input_pos = skip_whitespace(input, input_pos); if (input[input_pos] == '\0') { @@ -142,41 +183,48 @@ int lexerize(const char *input, token tokens[MAX_TOKENS]) if (token_is_variable(input, input_pos, &token_size)) { tokens[token_count].type = TOKEN_VARIABLE; - tokens[token_count].var_value = 'x'; + tokens[token_count].value_char = 'x'; } - else if (token_is_number(input, input_pos, &token_size, &float_precision)) + else if (token_is_number_int(input, input_pos, &token_size)) { - tokens[token_count].type = TOKEN_NUMBER; - tokens[token_count].num_value = ft_atof(&input[input_pos]); + tokens[token_count].type = TOKEN_NUMBER_INT; + tokens[token_count].value_int = ft_atoi(&input[input_pos]); + } + else if (token_is_number_double(input, input_pos, &token_size)) + { + tokens[token_count].type = TOKEN_NUMBER_DOUBLE; + tokens[token_count].value_double = ft_atof(&input[input_pos]); } else if (token_is_power(input, input_pos, &token_size)) { tokens[token_count].type = TOKEN_POWER; - tokens[token_count].var_value = '^'; + tokens[token_count].value_char = '^'; } else if (token_is_plus(input, input_pos, &token_size)) { tokens[token_count].type = TOKEN_PLUS; - tokens[token_count].var_value = '+'; + tokens[token_count].value_char = '+'; } else if (token_is_minus(input, input_pos, &token_size)) { tokens[token_count].type = TOKEN_MINUS; - tokens[token_count].var_value = '-'; + tokens[token_count].value_char = '-'; } else if (token_is_multiplication(input, input_pos, &token_size)) { tokens[token_count].type = TOKEN_MULTIPLICATION; - tokens[token_count].var_value = '*'; + tokens[token_count].value_char = '*'; } else if (token_is_division(input, input_pos, &token_size)) { tokens[token_count].type = TOKEN_DIVISION; - tokens[token_count].var_value = '/'; + tokens[token_count].value_char = '/'; } else { - stop_errors(ERROR_UNKNOWN_TOKEN); + // tmp + token_size = 1; + // stop_errors(ERROR_UNKNOWN_TOKEN); } token_count++; @@ -188,7 +236,5 @@ int lexerize(const char *input, token tokens[MAX_TOKENS]) } tokens[token_count].type = TOKEN_END; - tokens[token_count].var_value = '\0'; - - return 1; + tokens[token_count].value_char = '\0'; }