wip parsing and error

2026-05-03 00:20:39 +02:00
parent c24461cb33
commit 42cfdf9734
6 changed files with 134 additions and 285 deletions
--- a/headers/errors.h
+++ b/headers/errors.h
@@ -12,6 +12,6 @@ typedef enum
    ERROR_SENTINEL,
 } program_error;

-int stop_errors(program_error err, const char *format, ...);
+int stop_errors(program_error err, token *tokens, char *input, const char *format, ...);

 #endif
--- a/headers/lexer.h
+++ b/headers/lexer.h
@@ -5,180 +5,6 @@
 #include "errors.h"
 #include <stdbool.h>

-/**
- * PROPOSITION 1
- */
-
-// typedef enum
-// {
-//     TOKEN_VARIABLE, // x, y, etc.
-//     TOKEN_NUMBER,   // int or double
-//     TOKEN_POWER,    // ^ or **
-//     TOKEN_SIGN,     // + or -
-//     TOKEN_FACTOR,   // * or / or :
-//     TOKEN_EQUAL,    // =
-//     TOKEN_END       // null (end of input)
-// } token_type;
-//
-// typedef enum
-// {
-//     TOKEN_NUMBER_INT,            // int
-//     TOKEN_NUMBER_DOUBLE,         // double
-//     TOKEN_SIGN_ADD,              // +
-//     TOKEN_SIGN_MINUS,            // -
-//     TOKEN_FACTOR_MULTIPLICATION, // *
-//     TOKEN_FACTOR_DIVISION,       // / or :
-// } token_subtype;
-//
-// typedef struct
-// {
-//     token_type type;
-//     token_subtype subtype;
-//     union
-//     {
-//         char value_char;
-//         double value_double;
-//     };
-// } token;
-
-/**
- * PROPOSITION 2
- */
-
-// // TYPES and SUBTYPES
-
-// typedef enum
-// {
-//     TOKEN_VARIABLE, // x, y, etc.
-//     TOKEN_NUMBER,   // int or double
-//     TOKEN_POWER,    // ^ or **
-//     TOKEN_SIGN,     // + or -
-//     TOKEN_FACTOR,   // * or / or :
-//     TOKEN_EQUAL,    // =
-//     TOKEN_END       // null (end of input)
-// } token_type;
-//
-// typedef enum
-// {
-//     NUMBER_INT,
-//     NUMBER_DOUBLE
-// } number_subtype;
-//
-// typedef enum
-// {
-//     SIGN_ADD,  // +
-//     SIGN_MINUS // -
-// } sign_subtype;
-//
-// typedef enum
-// {
-//     FACTOR_MULTIPLICATION, // *
-//     FACTOR_DIVISION,       // / or :
-// } factor_subtype;
-//
-// // DATA
-//
-// typedef struct
-// {
-//     char value; // e.g., 'x', 'y'
-// } token_variable;
-//
-// typedef struct
-// {
-//     number_subtype subtype;
-//     double value;
-// } token_number;
-//
-// typedef struct
-// {
-//     sign_subtype subtype;
-//     char value;
-// } token_sign;
-//
-// typedef struct
-// {
-//     factor_subtype subtype;
-//     char value;
-// } token_factor;
-//
-// typedef struct
-// {
-//     char value;
-// } token_power;
-//
-// typedef struct
-// {
-//     char value;
-// } token_equal;
-//
-// typedef struct
-// {
-//     char value;
-// } token_end;
-//
-// // TOKEN
-//
-// typedef union
-// {
-//     token_variable variable; // value
-//     token_number number;     // subtype [INT, DOUBLE], value
-//     token_sign sign;         // subtype [PLUS, MINUS], value
-//     token_factor factor;     // subtype [MULT, DIV], value
-//     token_power power;       // value
-//     token_equal equal;       // value
-//     token_end end;           // value
-// } token_data;
-//
-// typedef struct
-// {
-//     token_type type;
-//     token_data data;
-// } token;
-
-/**
- * PROPOSITION 3
- */
-
-// typedef enum
-// {
-//     TOKEN_VARIABLE, // x, y, etc.
-//     TOKEN_NUMBER,   // int or double
-//     TOKEN_POWER,    // ^ or **
-//     TOKEN_SIGN,     // + or -
-//     TOKEN_FACTOR,   // * or / or :
-//     TOKEN_EQUAL,    // =
-//     TOKEN_END       // null (end of input)
-// } token_type;
-//
-// typedef enum
-// {
-//     TOKEN_NO_SUBTYPE,
-//     // NUMBER
-//     TOKEN_NUMBER_INT,
-//     TOKEN_NUMBER_DOUBLE,
-//     // SIGN
-//     TOKEN_SIGN_PLUS,
-//     TOKEN_SIGN_MINUS,
-//     // FACTOR
-//     TOKEN_FACTOR_MULTIPLICATION,
-//     TOKEN_FACTOR_DIVISION,
-// } token_subtype;
-//
-// typedef struct
-// {
-//     token_type type;
-//     token_subtype subtype;
-//     union
-//     {
-//         char value_char;
-//         double value_double;
-//     };
-// } token;
-
-/**
- * PROPOSITION 4
- */
-
 typedef enum
 {
    TOKEN_VARIABLE,      // x, y, etc.
--- a/headers/parser.h
+++ b/headers/parser.h
@@ -1,11 +1,6 @@
 #ifndef PARSER_H
 #define PARSER_H

-#include "libft.h"
-#include "lexer.h"
-#include "errors.h"
-#include <stdbool.h>
-
 typedef enum
 {
    TERM_LEFT,  // a in "a = b"
@@ -25,7 +20,7 @@ typedef struct
    term_position position;
    term_sign sign;
    double coefficient;
-    int exponent;
+    double exponent;
 } term;

 int parse(token *tokens, term *terms, int terms_count_max);
--- a/src/computorv1.c
+++ b/src/computorv1.c
@@ -63,64 +63,19 @@ int main(int ac, char **av)

    input = av[1];
    remove_spaces(input);
-    arg_len = ft_strlen(input);
-
-    ft_putstr("-> input without space : "); // debug
-    ft_putstr(input);                       // debug
-    ft_putchar('\n');                       // debug
-    ft_putstr("-> arg_len : ");             // debug
-    ft_putnbr(arg_len);                     // debug
-    ft_putchar('\n');                       // debug
+    arg_len = ft_strlen(input) + 1; // +1 for last END token

    token tokens[arg_len];
+    // by security, make the last token as END (even though the real END token will likely be lower)
+    tokens[arg_len - 1].type = TOKEN_END;
+    tokens[arg_len - 1].tag = TOKEN_NO_TAG;
+    tokens[arg_len - 1].value_char = '\0';
+    // lexerize
    int tokens_count = lexerize(input, tokens);
-
-    ft_putstr("-> tokens_count : "); // debug
-    ft_putnbr(tokens_count);         // debug
-    ft_putchar('\n');                // debug
-
-    // tmp debug output
-    ft_putchar('\n'); // debug
-    i = 0;
-    while (tokens[i].type != TOKEN_END)
+    if (tokens_count == 0)
    {
-        ft_printf("token %2i - type : ", i);
-
-        if (tokens[i].type == TOKEN_VARIABLE)
-            ft_printf("%20s", "TOKEN_VARIABLE");
-        else if (tokens[i].type == TOKEN_NUMBER_INT)
-            ft_printf("%20s", "TOKEN_NUMBER_INT");
-        else if (tokens[i].type == TOKEN_NUMBER_DOUBLE)
-            ft_printf("%20s", "TOKEN_NUMBER_DOUBLE");
-        else if (tokens[i].type == TOKEN_POWER)
-            ft_printf("%20s", "TOKEN_POWER");
-        else if (tokens[i].type == TOKEN_SIGN_PLUS)
-            ft_printf("%20s", "TOKEN_SIGN_PLUS");
-        else if (tokens[i].type == TOKEN_SIGN_MINUS)
-            ft_printf("%20s", "TOKEN_SIGN_MINUS");
-        else if (tokens[i].type == TOKEN_FACTOR_MULT)
-            ft_printf("%20s", "TOKEN_FACTOR_MULT");
-        else if (tokens[i].type == TOKEN_FACTOR_DIV)
-            ft_printf("%20s", "TOKEN_FACTOR_DIV");
-        else if (tokens[i].type == TOKEN_EQUAL)
-            ft_printf("%20s", "TOKEN_EQUAL");
-        else if (tokens[i].type == TOKEN_END)
-            ft_printf("%20s", "TOKEN_END");
-
-        ft_putstr(" - value : ");
-
-        if (tokens[i].tag == TOKEN_NUMBER)
-        {
-            printf("%g\n", tokens[i].value_double);
+        stop_errors(ERROR_BASE, tokens, input, "test error");
    }
-        else
-        {
-            ft_printf("%c\n", tokens[i].value_char);
-        }
-        i++;
-    }
-    ft_putchar('\n'); // debug
-    // END tmp debug output

    terms_count_prediction = count_any_of(input, "-+=") + 2; // +1 for first term that can have no leading '+', +1 for last term == NULL

--- a/src/errors.c
+++ b/src/errors.c
@@ -1,13 +1,16 @@
 #include "errors.h"
 #include "libft.h"
+#include "lexer.h"
 #include <stdarg.h>

-int stop_errors(program_error err, const char *details, ...)
+int stop_errors(program_error err, token *tokens, char *input, const char *details, ...)
 {
+    int i;
+
    // the base error message
    const char *msg = "error: error type is out of range";

-    // Map error codes to messages
+    // map error codes to messages
    const char *error_messages[] = {
        [ERROR_BASE] = "error: undefined error, details :",
        [ERROR_UNKNOWN_TOKEN] = "error: unknown token, details :",
@@ -18,16 +21,58 @@ int stop_errors(program_error err, const char *details, ...)
        // ⚠️ Add new error messages here when adding new error codes!
    };

-    // Override msg if err is in the error_messages array
+    // override msg if err is in the error_messages array
    if (err >= ERROR_BASE && err < ERROR_SENTINEL)
    {
        msg = error_messages[err];
    }

-    // Print the base message
+    // print context
+    ft_dprintf(STDERR_FILENO, "input : %s\n", input);
+    i = 0;
+    while (tokens[i].type != TOKEN_END)
+    {
+        ft_printf("token %2i - type : ", i);
+
+        if (tokens[i].type == TOKEN_VARIABLE)
+            ft_printf("%20s", "TOKEN_VARIABLE");
+        else if (tokens[i].type == TOKEN_NUMBER_INT)
+            ft_printf("%20s", "TOKEN_NUMBER_INT");
+        else if (tokens[i].type == TOKEN_NUMBER_DOUBLE)
+            ft_printf("%20s", "TOKEN_NUMBER_DOUBLE");
+        else if (tokens[i].type == TOKEN_POWER)
+            ft_printf("%20s", "TOKEN_POWER");
+        else if (tokens[i].type == TOKEN_SIGN_PLUS)
+            ft_printf("%20s", "TOKEN_SIGN_PLUS");
+        else if (tokens[i].type == TOKEN_SIGN_MINUS)
+            ft_printf("%20s", "TOKEN_SIGN_MINUS");
+        else if (tokens[i].type == TOKEN_FACTOR_MULT)
+            ft_printf("%20s", "TOKEN_FACTOR_MULT");
+        else if (tokens[i].type == TOKEN_FACTOR_DIV)
+            ft_printf("%20s", "TOKEN_FACTOR_DIV");
+        else if (tokens[i].type == TOKEN_EQUAL)
+            ft_printf("%20s", "TOKEN_EQUAL");
+        else if (tokens[i].type == TOKEN_END)
+            ft_printf("%20s", "TOKEN_END");
+
+        ft_putstr(" - value : ");
+
+        if (tokens[i].tag == TOKEN_NUMBER)
+        {
+            printf("%g\n", tokens[i].value_double);
+        }
+        else
+        {
+            ft_printf("%c\n", tokens[i].value_char);
+        }
+        i++;
+    }
+    ft_putchar('\n');
+
+    // print the base message
    ft_dprintf(STDERR_FILENO, "%s (%i) - ", msg, err);

-    // Print the formatted details directly
+    // print the formatted details
    va_list args;
    va_start(args, details);
    ft_vdprintf(STDERR_FILENO, details, args);
--- a/src/parser.c
+++ b/src/parser.c
@@ -1,4 +1,8 @@
 #include "parser.h"
+#include "libft.h"
+#include "lexer.h"
+#include "errors.h"
+#include <stdbool.h>

 /**
    TOKEN_VARIABLE,      // x, y, etc.
@@ -33,15 +37,15 @@ static term_sign get_sign(token *tokens, int i, int *token_count)
    // forbidden tokens
    if (tokens[i].type == TOKEN_POWER)
    {
-        stop_errors(ERROR_TOKEN_POSITION, "at sign place, we should not have a token 'power' : %c", tokens[i].value_char);
+        stop_errors(ERROR_TOKEN_POSITION, "at sign place, we should not have a token 'power' : '%c' (token number %i)", tokens[i].value_char, i);
    }
    if (tokens[i].tag == TOKEN_FACTOR)
    {
-        stop_errors(ERROR_TOKEN_POSITION, "at sign place, we should not have a token 'factor' : %c", tokens[i].value_char);
+        stop_errors(ERROR_TOKEN_POSITION, "at sign place, we should not have a token 'factor' : '%c' (token number %i)", tokens[i].value_char, i);
    }
    if (tokens[i].type == TOKEN_EQUAL)
    {
-        stop_errors(ERROR_TOKEN_POSITION, "at sign place, we should not have a token 'equal' : %c", tokens[i].value_char);
+        stop_errors(ERROR_TOKEN_POSITION, "at sign place, we should not have a token 'equal' : '%c' (token number %i)", tokens[i].value_char, i);
    }

    // sign
@@ -61,7 +65,7 @@ static term_sign get_sign(token *tokens, int i, int *token_count)
        return '+';
    }

-    return stop_errors(ERROR_TOKEN_POSITION, "at begining of term, we should have a token 'sign' : %c", tokens[i].value_char);
+    return stop_errors(ERROR_TOKEN_POSITION, "at begining of term, we should have a token 'sign', not : '%c' (token number %i)", tokens[i].value_char, i);
 }

 static double get_coefficient(token *tokens, int i, int *token_count)
@@ -73,19 +77,19 @@ static double get_coefficient(token *tokens, int i, int *token_count)
    // forbidden tokens
    if (tokens[i].type == TOKEN_POWER)
    {
-        stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a token 'power' : %c", tokens[i].value_char);
+        stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a token 'power' : '%c' (token number %i)", tokens[i].value_char, i);
    }
    if (tokens[i].tag == TOKEN_FACTOR)
    {
-        stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a token 'factor' : %c", tokens[i].value_char);
+        stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a token 'factor' : '%c' (token number %i)", tokens[i].value_char, i);
    }
    if (tokens[i].type == TOKEN_EQUAL)
    {
-        stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a token 'equal' : %c", tokens[i].value_char);
+        stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a token 'equal' : '%c' (token number %i)", tokens[i].value_char, i);
    }
    if (tokens[i].tag == TOKEN_SIGN)
    {
-        stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a token 'sign' : %c", tokens[i].value_char);
+        stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a token 'sign' : '%c' (token number %i)", tokens[i].value_char, i);
    }

    // if not coefficient token
@@ -127,59 +131,65 @@ static double get_coefficient(token *tokens, int i, int *token_count)
    return coefficient;
 }

-static int get_exponent(token *tokens, int i, int *token_count)
+static double get_exponent(token *tokens, int i, int *token_count)
 {
-    /**
-     * power
-     * number
-     * sign
-     * equal
-     * factor_div
-     */
-    // forbidden tokens
+    // first reach VARIABLE
+    if (tokens[i].type == TOKEN_VARIABLE)
+    {
+        i++;
+        (*token_count)++;
+    }
+    else if (tokens[i].type == TOKEN_FACTOR_MULT)
+    {
+        i++;
+        if (tokens[i].type == TOKEN_VARIABLE)
+        {
+            i++;
+            (*token_count) += 2;
+        }
+        else
+        {
+            stop_errors(ERROR_TOKEN_POSITION, "at exponent place, after a '*' we should have a 'var', but instead got : '%c' (token number %i)", tokens[i].value_char, i);
+        }
+    }
+    else
+    {
+        stop_errors(ERROR_TOKEN_POSITION, "at exponent place, the first tokens should be 'x' or '*x', but instead got : '%c' (token number %i)", tokens[i].value_char, i);
+    }
+
+    // then get power sign '^'
    if (tokens[i].type == TOKEN_POWER)
    {
-        stop_errors(ERROR_TOKEN_POSITION, "at exponent place, we should not have a token 'power' : %c", tokens[i].value_char);
+        i++;
+        (*token_count)++;
    }
-    if (tokens[i].tag == TOKEN_NUMBER)
+    else
    {
-        stop_errors(ERROR_TOKEN_POSITION, "at exponent place, we should not have a token 'number' : %c", tokens[i].value_char);
-    }
-    if (tokens[i].tag == TOKEN_SIGN)
-    {
-        stop_errors(ERROR_TOKEN_POSITION, "at exponent place, we should not have a token 'sign' : %c", tokens[i].value_char);
-    }
-    if (tokens[i].type == TOKEN_EQUAL)
-    {
-        stop_errors(ERROR_TOKEN_POSITION, "at exponent place, we should not have a token 'equal' : %c", tokens[i].value_char);
-    }
-    if (tokens[i].type == TOKEN_FACTOR_DIV)
-    {
-        stop_errors(ERROR_TOKEN_POSITION, "at exponent place, we should not have a token 'division' : %c", tokens[i].value_char);
+        stop_errors(ERROR_TOKEN_POSITION, "at exponent place, after variable we should have '^', but instead got : '%c' (token number %i)", tokens[i].value_char, i);
    }

-    // if 'var' -> token_count + 1
-    // else if '*' + 'var' -> token_count + 2
-
-    token_count += 0; // placeholder
-
-    return 1; // placeholder
+    // then get exponent
+    if (tokens[i].type == TOKEN_NUMBER_INT)
+    {
+        i++;
+        (*token_count)++;
+    }
+    else
+    {
+        stop_errors(ERROR_TOKEN_POSITION, "at exponent place, we should have an int, but instead got : '%c' (token number %i)", tokens[i].value_char, i);
    }

-int parse(token *tokens, term *terms, int terms_count_max)
+    return tokens[i].value_double;
+}
+
+static void check_variables(token *tokens)
 {
    int i;
-    int terms_count;
-    int token_count;
-    term_position term_position;
    char var;

-    terms_count = 0;
-    token_count = 0;
    i = 0;
-    term_position = TERM_LEFT;
    var = 0;
-    while (tokens[i].type != TOKEN_END && terms_count < terms_count_max)
+    while (tokens[i].type != TOKEN_END)
    {
        // variable -> all variables must be the same
        if (tokens[i].type == TOKEN_VARIABLE)
@@ -190,10 +200,28 @@ int parse(token *tokens, term *terms, int terms_count_max)
            }
            else if (var != tokens[i].value_char)
            {
-                stop_errors(ERROR_VAR_DIFF, "old var : '%c' - new var : '%c'", var, tokens[i].value_char);
+                stop_errors(ERROR_VAR_DIFF, "old var : '%c' - new var : '%c' (token number %i)", var, tokens[i].value_char, i);
+            }
+        }
+        i++;
    }
 }

+int parse(token *tokens, term *terms, int terms_count_max)
+{
+    int i;
+    int terms_count;
+    int token_count;
+    term_position term_position;
+
+    check_variables(tokens);
+
+    terms_count = 0;
+    token_count = 0;
+    i = 0;
+    term_position = TERM_LEFT;
+    while (tokens[i].type != TOKEN_END && terms_count < terms_count_max)
+    {
        // equal
        if (tokens[i].type == TOKEN_EQUAL)
        {