From 42cfdf9734e4437373b74033cc832942590a2a46 Mon Sep 17 00:00:00 2001 From: hugogogo Date: Sun, 3 May 2026 00:20:39 +0200 Subject: [PATCH] wip parsing and error --- headers/errors.h | 2 +- headers/lexer.h | 174 ----------------------------------------------- headers/parser.h | 7 +- src/computorv1.c | 61 +++-------------- src/errors.c | 55 +++++++++++++-- src/parser.c | 120 +++++++++++++++++++------------- 6 files changed, 134 insertions(+), 285 deletions(-) diff --git a/headers/errors.h b/headers/errors.h index 0034387..f06dbc6 100644 --- a/headers/errors.h +++ b/headers/errors.h @@ -12,6 +12,6 @@ typedef enum ERROR_SENTINEL, } program_error; -int stop_errors(program_error err, const char *format, ...); +int stop_errors(program_error err, token *tokens, char *input, const char *format, ...); #endif \ No newline at end of file diff --git a/headers/lexer.h b/headers/lexer.h index 6474fd5..381489a 100644 --- a/headers/lexer.h +++ b/headers/lexer.h @@ -5,180 +5,6 @@ #include "errors.h" #include -/** - * PROPOSITION 1 - */ - -// typedef enum -// { -// TOKEN_VARIABLE, // x, y, etc. -// TOKEN_NUMBER, // int or double -// TOKEN_POWER, // ^ or ** -// TOKEN_SIGN, // + or - -// TOKEN_FACTOR, // * or / or : -// TOKEN_EQUAL, // = -// TOKEN_END // null (end of input) -// } token_type; -// -// typedef enum -// { -// TOKEN_NUMBER_INT, // int -// TOKEN_NUMBER_DOUBLE, // double -// TOKEN_SIGN_ADD, // + -// TOKEN_SIGN_MINUS, // - -// TOKEN_FACTOR_MULTIPLICATION, // * -// TOKEN_FACTOR_DIVISION, // / or : -// } token_subtype; -// -// typedef struct -// { -// token_type type; -// token_subtype subtype; -// union -// { -// char value_char; -// double value_double; -// }; -// } token; - -/** - * PROPOSITION 2 - */ - -// // TYPES and SUBTYPES - -// typedef enum -// { -// TOKEN_VARIABLE, // x, y, etc. -// TOKEN_NUMBER, // int or double -// TOKEN_POWER, // ^ or ** -// TOKEN_SIGN, // + or - -// TOKEN_FACTOR, // * or / or : -// TOKEN_EQUAL, // = -// TOKEN_END // null (end of input) -// } token_type; -// -// typedef enum -// { -// NUMBER_INT, -// NUMBER_DOUBLE -// } number_subtype; -// -// typedef enum -// { -// SIGN_ADD, // + -// SIGN_MINUS // - -// } sign_subtype; -// -// typedef enum -// { -// FACTOR_MULTIPLICATION, // * -// FACTOR_DIVISION, // / or : -// } factor_subtype; -// -// // DATA -// -// typedef struct -// { -// char value; // e.g., 'x', 'y' -// } token_variable; -// -// typedef struct -// { -// number_subtype subtype; -// double value; -// } token_number; -// -// typedef struct -// { -// sign_subtype subtype; -// char value; -// } token_sign; -// -// typedef struct -// { -// factor_subtype subtype; -// char value; -// } token_factor; -// -// typedef struct -// { -// char value; -// } token_power; -// -// typedef struct -// { -// char value; -// } token_equal; -// -// typedef struct -// { -// char value; -// } token_end; -// -// // TOKEN -// -// typedef union -// { -// token_variable variable; // value -// token_number number; // subtype [INT, DOUBLE], value -// token_sign sign; // subtype [PLUS, MINUS], value -// token_factor factor; // subtype [MULT, DIV], value -// token_power power; // value -// token_equal equal; // value -// token_end end; // value -// } token_data; -// -// typedef struct -// { -// token_type type; -// token_data data; -// } token; - -/** - * PROPOSITION 3 - */ - -// typedef enum -// { -// TOKEN_VARIABLE, // x, y, etc. -// TOKEN_NUMBER, // int or double -// TOKEN_POWER, // ^ or ** -// TOKEN_SIGN, // + or - -// TOKEN_FACTOR, // * or / or : -// TOKEN_EQUAL, // = -// TOKEN_END // null (end of input) -// } token_type; -// -// typedef enum -// { -// TOKEN_NO_SUBTYPE, -// // NUMBER -// TOKEN_NUMBER_INT, -// TOKEN_NUMBER_DOUBLE, -// // SIGN -// TOKEN_SIGN_PLUS, -// TOKEN_SIGN_MINUS, -// // FACTOR -// TOKEN_FACTOR_MULTIPLICATION, -// TOKEN_FACTOR_DIVISION, -// } token_subtype; -// -// typedef struct -// { -// token_type type; -// token_subtype subtype; -// union -// { -// char value_char; -// double value_double; -// }; -// } token; - -/** - * PROPOSITION 4 - */ - typedef enum { TOKEN_VARIABLE, // x, y, etc. diff --git a/headers/parser.h b/headers/parser.h index 854639f..e96fcac 100644 --- a/headers/parser.h +++ b/headers/parser.h @@ -1,11 +1,6 @@ #ifndef PARSER_H #define PARSER_H -#include "libft.h" -#include "lexer.h" -#include "errors.h" -#include - typedef enum { TERM_LEFT, // a in "a = b" @@ -25,7 +20,7 @@ typedef struct term_position position; term_sign sign; double coefficient; - int exponent; + double exponent; } term; int parse(token *tokens, term *terms, int terms_count_max); diff --git a/src/computorv1.c b/src/computorv1.c index bd06c30..78e2c70 100644 --- a/src/computorv1.c +++ b/src/computorv1.c @@ -63,64 +63,19 @@ int main(int ac, char **av) input = av[1]; remove_spaces(input); - arg_len = ft_strlen(input); - - ft_putstr("-> input without space : "); // debug - ft_putstr(input); // debug - ft_putchar('\n'); // debug - ft_putstr("-> arg_len : "); // debug - ft_putnbr(arg_len); // debug - ft_putchar('\n'); // debug + arg_len = ft_strlen(input) + 1; // +1 for last END token token tokens[arg_len]; + // by security, make the last token as END (even though the real END token will likely be lower) + tokens[arg_len - 1].type = TOKEN_END; + tokens[arg_len - 1].tag = TOKEN_NO_TAG; + tokens[arg_len - 1].value_char = '\0'; + // lexerize int tokens_count = lexerize(input, tokens); - - ft_putstr("-> tokens_count : "); // debug - ft_putnbr(tokens_count); // debug - ft_putchar('\n'); // debug - - // tmp debug output - ft_putchar('\n'); // debug - i = 0; - while (tokens[i].type != TOKEN_END) + if (tokens_count == 0) { - ft_printf("token %2i - type : ", i); - - if (tokens[i].type == TOKEN_VARIABLE) - ft_printf("%20s", "TOKEN_VARIABLE"); - else if (tokens[i].type == TOKEN_NUMBER_INT) - ft_printf("%20s", "TOKEN_NUMBER_INT"); - else if (tokens[i].type == TOKEN_NUMBER_DOUBLE) - ft_printf("%20s", "TOKEN_NUMBER_DOUBLE"); - else if (tokens[i].type == TOKEN_POWER) - ft_printf("%20s", "TOKEN_POWER"); - else if (tokens[i].type == TOKEN_SIGN_PLUS) - ft_printf("%20s", "TOKEN_SIGN_PLUS"); - else if (tokens[i].type == TOKEN_SIGN_MINUS) - ft_printf("%20s", "TOKEN_SIGN_MINUS"); - else if (tokens[i].type == TOKEN_FACTOR_MULT) - ft_printf("%20s", "TOKEN_FACTOR_MULT"); - else if (tokens[i].type == TOKEN_FACTOR_DIV) - ft_printf("%20s", "TOKEN_FACTOR_DIV"); - else if (tokens[i].type == TOKEN_EQUAL) - ft_printf("%20s", "TOKEN_EQUAL"); - else if (tokens[i].type == TOKEN_END) - ft_printf("%20s", "TOKEN_END"); - - ft_putstr(" - value : "); - - if (tokens[i].tag == TOKEN_NUMBER) - { - printf("%g\n", tokens[i].value_double); - } - else - { - ft_printf("%c\n", tokens[i].value_char); - } - i++; + stop_errors(ERROR_BASE, tokens, input, "test error"); } - ft_putchar('\n'); // debug - // END tmp debug output terms_count_prediction = count_any_of(input, "-+=") + 2; // +1 for first term that can have no leading '+', +1 for last term == NULL diff --git a/src/errors.c b/src/errors.c index f298822..13f610c 100644 --- a/src/errors.c +++ b/src/errors.c @@ -1,13 +1,16 @@ #include "errors.h" #include "libft.h" +#include "lexer.h" #include -int stop_errors(program_error err, const char *details, ...) +int stop_errors(program_error err, token *tokens, char *input, const char *details, ...) { + int i; + // the base error message const char *msg = "error: error type is out of range"; - // Map error codes to messages + // map error codes to messages const char *error_messages[] = { [ERROR_BASE] = "error: undefined error, details :", [ERROR_UNKNOWN_TOKEN] = "error: unknown token, details :", @@ -18,16 +21,58 @@ int stop_errors(program_error err, const char *details, ...) // ⚠️ Add new error messages here when adding new error codes! }; - // Override msg if err is in the error_messages array + // override msg if err is in the error_messages array if (err >= ERROR_BASE && err < ERROR_SENTINEL) { msg = error_messages[err]; } - // Print the base message + // print context + ft_dprintf(STDERR_FILENO, "input : %s\n", input); + i = 0; + while (tokens[i].type != TOKEN_END) + { + ft_printf("token %2i - type : ", i); + + if (tokens[i].type == TOKEN_VARIABLE) + ft_printf("%20s", "TOKEN_VARIABLE"); + else if (tokens[i].type == TOKEN_NUMBER_INT) + ft_printf("%20s", "TOKEN_NUMBER_INT"); + else if (tokens[i].type == TOKEN_NUMBER_DOUBLE) + ft_printf("%20s", "TOKEN_NUMBER_DOUBLE"); + else if (tokens[i].type == TOKEN_POWER) + ft_printf("%20s", "TOKEN_POWER"); + else if (tokens[i].type == TOKEN_SIGN_PLUS) + ft_printf("%20s", "TOKEN_SIGN_PLUS"); + else if (tokens[i].type == TOKEN_SIGN_MINUS) + ft_printf("%20s", "TOKEN_SIGN_MINUS"); + else if (tokens[i].type == TOKEN_FACTOR_MULT) + ft_printf("%20s", "TOKEN_FACTOR_MULT"); + else if (tokens[i].type == TOKEN_FACTOR_DIV) + ft_printf("%20s", "TOKEN_FACTOR_DIV"); + else if (tokens[i].type == TOKEN_EQUAL) + ft_printf("%20s", "TOKEN_EQUAL"); + else if (tokens[i].type == TOKEN_END) + ft_printf("%20s", "TOKEN_END"); + + ft_putstr(" - value : "); + + if (tokens[i].tag == TOKEN_NUMBER) + { + printf("%g\n", tokens[i].value_double); + } + else + { + ft_printf("%c\n", tokens[i].value_char); + } + i++; + } + ft_putchar('\n'); + + // print the base message ft_dprintf(STDERR_FILENO, "%s (%i) - ", msg, err); - // Print the formatted details directly + // print the formatted details va_list args; va_start(args, details); ft_vdprintf(STDERR_FILENO, details, args); diff --git a/src/parser.c b/src/parser.c index 4c0f3b8..0ff44dc 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1,4 +1,8 @@ #include "parser.h" +#include "libft.h" +#include "lexer.h" +#include "errors.h" +#include /** TOKEN_VARIABLE, // x, y, etc. @@ -33,15 +37,15 @@ static term_sign get_sign(token *tokens, int i, int *token_count) // forbidden tokens if (tokens[i].type == TOKEN_POWER) { - stop_errors(ERROR_TOKEN_POSITION, "at sign place, we should not have a token 'power' : %c", tokens[i].value_char); + stop_errors(ERROR_TOKEN_POSITION, "at sign place, we should not have a token 'power' : '%c' (token number %i)", tokens[i].value_char, i); } if (tokens[i].tag == TOKEN_FACTOR) { - stop_errors(ERROR_TOKEN_POSITION, "at sign place, we should not have a token 'factor' : %c", tokens[i].value_char); + stop_errors(ERROR_TOKEN_POSITION, "at sign place, we should not have a token 'factor' : '%c' (token number %i)", tokens[i].value_char, i); } if (tokens[i].type == TOKEN_EQUAL) { - stop_errors(ERROR_TOKEN_POSITION, "at sign place, we should not have a token 'equal' : %c", tokens[i].value_char); + stop_errors(ERROR_TOKEN_POSITION, "at sign place, we should not have a token 'equal' : '%c' (token number %i)", tokens[i].value_char, i); } // sign @@ -61,7 +65,7 @@ static term_sign get_sign(token *tokens, int i, int *token_count) return '+'; } - return stop_errors(ERROR_TOKEN_POSITION, "at begining of term, we should have a token 'sign' : %c", tokens[i].value_char); + return stop_errors(ERROR_TOKEN_POSITION, "at begining of term, we should have a token 'sign', not : '%c' (token number %i)", tokens[i].value_char, i); } static double get_coefficient(token *tokens, int i, int *token_count) @@ -73,19 +77,19 @@ static double get_coefficient(token *tokens, int i, int *token_count) // forbidden tokens if (tokens[i].type == TOKEN_POWER) { - stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a token 'power' : %c", tokens[i].value_char); + stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a token 'power' : '%c' (token number %i)", tokens[i].value_char, i); } if (tokens[i].tag == TOKEN_FACTOR) { - stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a token 'factor' : %c", tokens[i].value_char); + stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a token 'factor' : '%c' (token number %i)", tokens[i].value_char, i); } if (tokens[i].type == TOKEN_EQUAL) { - stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a token 'equal' : %c", tokens[i].value_char); + stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a token 'equal' : '%c' (token number %i)", tokens[i].value_char, i); } if (tokens[i].tag == TOKEN_SIGN) { - stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a token 'sign' : %c", tokens[i].value_char); + stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a token 'sign' : '%c' (token number %i)", tokens[i].value_char, i); } // if not coefficient token @@ -127,59 +131,65 @@ static double get_coefficient(token *tokens, int i, int *token_count) return coefficient; } -static int get_exponent(token *tokens, int i, int *token_count) +static double get_exponent(token *tokens, int i, int *token_count) { - /** - * power - * number - * sign - * equal - * factor_div - */ - // forbidden tokens + // first reach VARIABLE + if (tokens[i].type == TOKEN_VARIABLE) + { + i++; + (*token_count)++; + } + else if (tokens[i].type == TOKEN_FACTOR_MULT) + { + i++; + if (tokens[i].type == TOKEN_VARIABLE) + { + i++; + (*token_count) += 2; + } + else + { + stop_errors(ERROR_TOKEN_POSITION, "at exponent place, after a '*' we should have a 'var', but instead got : '%c' (token number %i)", tokens[i].value_char, i); + } + } + else + { + stop_errors(ERROR_TOKEN_POSITION, "at exponent place, the first tokens should be 'x' or '*x', but instead got : '%c' (token number %i)", tokens[i].value_char, i); + } + + // then get power sign '^' if (tokens[i].type == TOKEN_POWER) { - stop_errors(ERROR_TOKEN_POSITION, "at exponent place, we should not have a token 'power' : %c", tokens[i].value_char); + i++; + (*token_count)++; } - if (tokens[i].tag == TOKEN_NUMBER) + else { - stop_errors(ERROR_TOKEN_POSITION, "at exponent place, we should not have a token 'number' : %c", tokens[i].value_char); - } - if (tokens[i].tag == TOKEN_SIGN) - { - stop_errors(ERROR_TOKEN_POSITION, "at exponent place, we should not have a token 'sign' : %c", tokens[i].value_char); - } - if (tokens[i].type == TOKEN_EQUAL) - { - stop_errors(ERROR_TOKEN_POSITION, "at exponent place, we should not have a token 'equal' : %c", tokens[i].value_char); - } - if (tokens[i].type == TOKEN_FACTOR_DIV) - { - stop_errors(ERROR_TOKEN_POSITION, "at exponent place, we should not have a token 'division' : %c", tokens[i].value_char); + stop_errors(ERROR_TOKEN_POSITION, "at exponent place, after variable we should have '^', but instead got : '%c' (token number %i)", tokens[i].value_char, i); } - // if 'var' -> token_count + 1 - // else if '*' + 'var' -> token_count + 2 + // then get exponent + if (tokens[i].type == TOKEN_NUMBER_INT) + { + i++; + (*token_count)++; + } + else + { + stop_errors(ERROR_TOKEN_POSITION, "at exponent place, we should have an int, but instead got : '%c' (token number %i)", tokens[i].value_char, i); + } - token_count += 0; // placeholder - - return 1; // placeholder + return tokens[i].value_double; } -int parse(token *tokens, term *terms, int terms_count_max) +static void check_variables(token *tokens) { int i; - int terms_count; - int token_count; - term_position term_position; char var; - terms_count = 0; - token_count = 0; i = 0; - term_position = TERM_LEFT; var = 0; - while (tokens[i].type != TOKEN_END && terms_count < terms_count_max) + while (tokens[i].type != TOKEN_END) { // variable -> all variables must be the same if (tokens[i].type == TOKEN_VARIABLE) @@ -190,10 +200,28 @@ int parse(token *tokens, term *terms, int terms_count_max) } else if (var != tokens[i].value_char) { - stop_errors(ERROR_VAR_DIFF, "old var : '%c' - new var : '%c'", var, tokens[i].value_char); + stop_errors(ERROR_VAR_DIFF, "old var : '%c' - new var : '%c' (token number %i)", var, tokens[i].value_char, i); } } + i++; + } +} +int parse(token *tokens, term *terms, int terms_count_max) +{ + int i; + int terms_count; + int token_count; + term_position term_position; + + check_variables(tokens); + + terms_count = 0; + token_count = 0; + i = 0; + term_position = TERM_LEFT; + while (tokens[i].type != TOKEN_END && terms_count < terms_count_max) + { // equal if (tokens[i].type == TOKEN_EQUAL) {