From 42cfdf9734e4437373b74033cc832942590a2a46 Mon Sep 17 00:00:00 2001
From: hugogogo <hugogogo@protonmail.com>
Date: Sun, 3 May 2026 00:20:39 +0200
Subject: [PATCH] wip parsing and error

---
 headers/errors.h |   2 +-
 headers/lexer.h  | 174 -----------------------------------------------
 headers/parser.h |   7 +-
 src/computorv1.c |  61 +++--------------
 src/errors.c     |  55 +++++++++++++--
 src/parser.c     | 120 +++++++++++++++++++-------------
 6 files changed, 134 insertions(+), 285 deletions(-)

diff --git a/headers/errors.h b/headers/errors.h
index 0034387..f06dbc6 100644
--- a/headers/errors.h
+++ b/headers/errors.h
@@ -12,6 +12,6 @@ typedef enum
     ERROR_SENTINEL,
 } program_error;
 
-int stop_errors(program_error err, const char *format, ...);
+int stop_errors(program_error err, token *tokens, char *input, const char *format, ...);
 
 #endif
\ No newline at end of file
diff --git a/headers/lexer.h b/headers/lexer.h
index 6474fd5..381489a 100644
--- a/headers/lexer.h
+++ b/headers/lexer.h
@@ -5,180 +5,6 @@
 #include "errors.h"
 #include <stdbool.h>
 
-/**
- * PROPOSITION 1
- */
-
-// typedef enum
-// {
-//     TOKEN_VARIABLE, // x, y, etc.
-//     TOKEN_NUMBER,   // int or double
-//     TOKEN_POWER,    // ^ or **
-//     TOKEN_SIGN,     // + or -
-//     TOKEN_FACTOR,   // * or / or :
-//     TOKEN_EQUAL,    // =
-//     TOKEN_END       // null (end of input)
-// } token_type;
-//
-// typedef enum
-// {
-//     TOKEN_NUMBER_INT,            // int
-//     TOKEN_NUMBER_DOUBLE,         // double
-//     TOKEN_SIGN_ADD,              // +
-//     TOKEN_SIGN_MINUS,            // -
-//     TOKEN_FACTOR_MULTIPLICATION, // *
-//     TOKEN_FACTOR_DIVISION,       // / or :
-// } token_subtype;
-//
-// typedef struct
-// {
-//     token_type type;
-//     token_subtype subtype;
-//     union
-//     {
-//         char value_char;
-//         double value_double;
-//     };
-// } token;
-
-/**
- * PROPOSITION 2
- */
-
-// // TYPES and SUBTYPES
-
-// typedef enum
-// {
-//     TOKEN_VARIABLE, // x, y, etc.
-//     TOKEN_NUMBER,   // int or double
-//     TOKEN_POWER,    // ^ or **
-//     TOKEN_SIGN,     // + or -
-//     TOKEN_FACTOR,   // * or / or :
-//     TOKEN_EQUAL,    // =
-//     TOKEN_END       // null (end of input)
-// } token_type;
-//
-// typedef enum
-// {
-//     NUMBER_INT,
-//     NUMBER_DOUBLE
-// } number_subtype;
-//
-// typedef enum
-// {
-//     SIGN_ADD,  // +
-//     SIGN_MINUS // -
-// } sign_subtype;
-//
-// typedef enum
-// {
-//     FACTOR_MULTIPLICATION, // *
-//     FACTOR_DIVISION,       // / or :
-// } factor_subtype;
-//
-// // DATA
-//
-// typedef struct
-// {
-//     char value; // e.g., 'x', 'y'
-// } token_variable;
-//
-// typedef struct
-// {
-//     number_subtype subtype;
-//     double value;
-// } token_number;
-//
-// typedef struct
-// {
-//     sign_subtype subtype;
-//     char value;
-// } token_sign;
-//
-// typedef struct
-// {
-//     factor_subtype subtype;
-//     char value;
-// } token_factor;
-//
-// typedef struct
-// {
-//     char value;
-// } token_power;
-//
-// typedef struct
-// {
-//     char value;
-// } token_equal;
-//
-// typedef struct
-// {
-//     char value;
-// } token_end;
-//
-// // TOKEN
-//
-// typedef union
-// {
-//     token_variable variable; // value
-//     token_number number;     // subtype [INT, DOUBLE], value
-//     token_sign sign;         // subtype [PLUS, MINUS], value
-//     token_factor factor;     // subtype [MULT, DIV], value
-//     token_power power;       // value
-//     token_equal equal;       // value
-//     token_end end;           // value
-// } token_data;
-//
-// typedef struct
-// {
-//     token_type type;
-//     token_data data;
-// } token;
-
-/**
- * PROPOSITION 3
- */
-
-// typedef enum
-// {
-//     TOKEN_VARIABLE, // x, y, etc.
-//     TOKEN_NUMBER,   // int or double
-//     TOKEN_POWER,    // ^ or **
-//     TOKEN_SIGN,     // + or -
-//     TOKEN_FACTOR,   // * or / or :
-//     TOKEN_EQUAL,    // =
-//     TOKEN_END       // null (end of input)
-// } token_type;
-//
-// typedef enum
-// {
-//     TOKEN_NO_SUBTYPE,
-//     // NUMBER
-//     TOKEN_NUMBER_INT,
-//     TOKEN_NUMBER_DOUBLE,
-//     // SIGN
-//     TOKEN_SIGN_PLUS,
-//     TOKEN_SIGN_MINUS,
-//     // FACTOR
-//     TOKEN_FACTOR_MULTIPLICATION,
-//     TOKEN_FACTOR_DIVISION,
-// } token_subtype;
-//
-// typedef struct
-// {
-//     token_type type;
-//     token_subtype subtype;
-//     union
-//     {
-//         char value_char;
-//         double value_double;
-//     };
-// } token;
-
-/**
- * PROPOSITION 4
- */
-
 typedef enum
 {
     TOKEN_VARIABLE,      // x, y, etc.
diff --git a/headers/parser.h b/headers/parser.h
index 854639f..e96fcac 100644
--- a/headers/parser.h
+++ b/headers/parser.h
@@ -1,11 +1,6 @@
 #ifndef PARSER_H
 #define PARSER_H
 
-#include "libft.h"
-#include "lexer.h"
-#include "errors.h"
-#include <stdbool.h>
-
 typedef enum
 {
     TERM_LEFT,  // a in "a = b"
@@ -25,7 +20,7 @@ typedef struct
     term_position position;
     term_sign sign;
     double coefficient;
-    int exponent;
+    double exponent;
 } term;
 
 int parse(token *tokens, term *terms, int terms_count_max);
diff --git a/src/computorv1.c b/src/computorv1.c
index bd06c30..78e2c70 100644
--- a/src/computorv1.c
+++ b/src/computorv1.c
@@ -63,64 +63,19 @@ int main(int ac, char **av)
 
     input = av[1];
     remove_spaces(input);
-    arg_len = ft_strlen(input);
-
-    ft_putstr("-> input without space : "); // debug
-    ft_putstr(input);                       // debug
-    ft_putchar('\n');                       // debug
-    ft_putstr("-> arg_len : ");             // debug
-    ft_putnbr(arg_len);                     // debug
-    ft_putchar('\n');                       // debug
+    arg_len = ft_strlen(input) + 1; // +1 for last END token
 
     token tokens[arg_len];
+    // by security, make the last token as END (even though the real END token will likely be lower)
+    tokens[arg_len - 1].type = TOKEN_END;
+    tokens[arg_len - 1].tag = TOKEN_NO_TAG;
+    tokens[arg_len - 1].value_char = '\0';
+    // lexerize
     int tokens_count = lexerize(input, tokens);
-
-    ft_putstr("-> tokens_count : "); // debug
-    ft_putnbr(tokens_count);         // debug
-    ft_putchar('\n');                // debug
-
-    // tmp debug output
-    ft_putchar('\n'); // debug
-    i = 0;
-    while (tokens[i].type != TOKEN_END)
+    if (tokens_count == 0)
     {
-        ft_printf("token %2i - type : ", i);
-
-        if (tokens[i].type == TOKEN_VARIABLE)
-            ft_printf("%20s", "TOKEN_VARIABLE");
-        else if (tokens[i].type == TOKEN_NUMBER_INT)
-            ft_printf("%20s", "TOKEN_NUMBER_INT");
-        else if (tokens[i].type == TOKEN_NUMBER_DOUBLE)
-            ft_printf("%20s", "TOKEN_NUMBER_DOUBLE");
-        else if (tokens[i].type == TOKEN_POWER)
-            ft_printf("%20s", "TOKEN_POWER");
-        else if (tokens[i].type == TOKEN_SIGN_PLUS)
-            ft_printf("%20s", "TOKEN_SIGN_PLUS");
-        else if (tokens[i].type == TOKEN_SIGN_MINUS)
-            ft_printf("%20s", "TOKEN_SIGN_MINUS");
-        else if (tokens[i].type == TOKEN_FACTOR_MULT)
-            ft_printf("%20s", "TOKEN_FACTOR_MULT");
-        else if (tokens[i].type == TOKEN_FACTOR_DIV)
-            ft_printf("%20s", "TOKEN_FACTOR_DIV");
-        else if (tokens[i].type == TOKEN_EQUAL)
-            ft_printf("%20s", "TOKEN_EQUAL");
-        else if (tokens[i].type == TOKEN_END)
-            ft_printf("%20s", "TOKEN_END");
-
-        ft_putstr(" - value : ");
-
-        if (tokens[i].tag == TOKEN_NUMBER)
-        {
-            printf("%g\n", tokens[i].value_double);
-        }
-        else
-        {
-            ft_printf("%c\n", tokens[i].value_char);
-        }
-        i++;
+        stop_errors(ERROR_BASE, tokens, input, "test error");
     }
-    ft_putchar('\n'); // debug
-    // END tmp debug output
 
     terms_count_prediction = count_any_of(input, "-+=") + 2; // +1 for first term that can have no leading '+', +1 for last term == NULL
 
diff --git a/src/errors.c b/src/errors.c
index f298822..13f610c 100644
--- a/src/errors.c
+++ b/src/errors.c
@@ -1,13 +1,16 @@
 #include "errors.h"
 #include "libft.h"
+#include "lexer.h"
 #include <stdarg.h>
 
-int stop_errors(program_error err, const char *details, ...)
+int stop_errors(program_error err, token *tokens, char *input, const char *details, ...)
 {
+    int i;
+
     // the base error message
     const char *msg = "error: error type is out of range";
 
-    // Map error codes to messages
+    // map error codes to messages
     const char *error_messages[] = {
         [ERROR_BASE] = "error: undefined error, details :",
         [ERROR_UNKNOWN_TOKEN] = "error: unknown token, details :",
@@ -18,16 +21,58 @@ int stop_errors(program_error err, const char *details, ...)
         // ⚠️ Add new error messages here when adding new error codes!
     };
 
-    // Override msg if err is in the error_messages array
+    // override msg if err is in the error_messages array
     if (err >= ERROR_BASE && err < ERROR_SENTINEL)
     {
         msg = error_messages[err];
     }
 
-    // Print the base message
+    // print context
+    ft_dprintf(STDERR_FILENO, "input : %s\n", input);
+    i = 0;
+    while (tokens[i].type != TOKEN_END)
+    {
+        ft_printf("token %2i - type : ", i);
+
+        if (tokens[i].type == TOKEN_VARIABLE)
+            ft_printf("%20s", "TOKEN_VARIABLE");
+        else if (tokens[i].type == TOKEN_NUMBER_INT)
+            ft_printf("%20s", "TOKEN_NUMBER_INT");
+        else if (tokens[i].type == TOKEN_NUMBER_DOUBLE)
+            ft_printf("%20s", "TOKEN_NUMBER_DOUBLE");
+        else if (tokens[i].type == TOKEN_POWER)
+            ft_printf("%20s", "TOKEN_POWER");
+        else if (tokens[i].type == TOKEN_SIGN_PLUS)
+            ft_printf("%20s", "TOKEN_SIGN_PLUS");
+        else if (tokens[i].type == TOKEN_SIGN_MINUS)
+            ft_printf("%20s", "TOKEN_SIGN_MINUS");
+        else if (tokens[i].type == TOKEN_FACTOR_MULT)
+            ft_printf("%20s", "TOKEN_FACTOR_MULT");
+        else if (tokens[i].type == TOKEN_FACTOR_DIV)
+            ft_printf("%20s", "TOKEN_FACTOR_DIV");
+        else if (tokens[i].type == TOKEN_EQUAL)
+            ft_printf("%20s", "TOKEN_EQUAL");
+        else if (tokens[i].type == TOKEN_END)
+            ft_printf("%20s", "TOKEN_END");
+
+        ft_putstr(" - value : ");
+
+        if (tokens[i].tag == TOKEN_NUMBER)
+        {
+            printf("%g\n", tokens[i].value_double);
+        }
+        else
+        {
+            ft_printf("%c\n", tokens[i].value_char);
+        }
+        i++;
+    }
+    ft_putchar('\n');
+
+    // print the base message
     ft_dprintf(STDERR_FILENO, "%s (%i) - ", msg, err);
 
-    // Print the formatted details directly
+    // print the formatted details
     va_list args;
     va_start(args, details);
     ft_vdprintf(STDERR_FILENO, details, args);
diff --git a/src/parser.c b/src/parser.c
index 4c0f3b8..0ff44dc 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -1,4 +1,8 @@
 #include "parser.h"
+#include "libft.h"
+#include "lexer.h"
+#include "errors.h"
+#include <stdbool.h>
 
 /**
     TOKEN_VARIABLE,      // x, y, etc.
@@ -33,15 +37,15 @@ static term_sign get_sign(token *tokens, int i, int *token_count)
     // forbidden tokens
     if (tokens[i].type == TOKEN_POWER)
     {
-        stop_errors(ERROR_TOKEN_POSITION, "at sign place, we should not have a token 'power' : %c", tokens[i].value_char);
+        stop_errors(ERROR_TOKEN_POSITION, "at sign place, we should not have a token 'power' : '%c' (token number %i)", tokens[i].value_char, i);
     }
     if (tokens[i].tag == TOKEN_FACTOR)
     {
-        stop_errors(ERROR_TOKEN_POSITION, "at sign place, we should not have a token 'factor' : %c", tokens[i].value_char);
+        stop_errors(ERROR_TOKEN_POSITION, "at sign place, we should not have a token 'factor' : '%c' (token number %i)", tokens[i].value_char, i);
     }
     if (tokens[i].type == TOKEN_EQUAL)
     {
-        stop_errors(ERROR_TOKEN_POSITION, "at sign place, we should not have a token 'equal' : %c", tokens[i].value_char);
+        stop_errors(ERROR_TOKEN_POSITION, "at sign place, we should not have a token 'equal' : '%c' (token number %i)", tokens[i].value_char, i);
     }
 
     // sign
@@ -61,7 +65,7 @@ static term_sign get_sign(token *tokens, int i, int *token_count)
         return '+';
     }
 
-    return stop_errors(ERROR_TOKEN_POSITION, "at begining of term, we should have a token 'sign' : %c", tokens[i].value_char);
+    return stop_errors(ERROR_TOKEN_POSITION, "at begining of term, we should have a token 'sign', not : '%c' (token number %i)", tokens[i].value_char, i);
 }
 
 static double get_coefficient(token *tokens, int i, int *token_count)
@@ -73,19 +77,19 @@ static double get_coefficient(token *tokens, int i, int *token_count)
     // forbidden tokens
     if (tokens[i].type == TOKEN_POWER)
     {
-        stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a token 'power' : %c", tokens[i].value_char);
+        stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a token 'power' : '%c' (token number %i)", tokens[i].value_char, i);
     }
     if (tokens[i].tag == TOKEN_FACTOR)
     {
-        stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a token 'factor' : %c", tokens[i].value_char);
+        stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a token 'factor' : '%c' (token number %i)", tokens[i].value_char, i);
     }
     if (tokens[i].type == TOKEN_EQUAL)
     {
-        stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a token 'equal' : %c", tokens[i].value_char);
+        stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a token 'equal' : '%c' (token number %i)", tokens[i].value_char, i);
     }
     if (tokens[i].tag == TOKEN_SIGN)
     {
-        stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a token 'sign' : %c", tokens[i].value_char);
+        stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a token 'sign' : '%c' (token number %i)", tokens[i].value_char, i);
     }
 
     // if not coefficient token
@@ -127,59 +131,65 @@ static double get_coefficient(token *tokens, int i, int *token_count)
     return coefficient;
 }
 
-static int get_exponent(token *tokens, int i, int *token_count)
+static double get_exponent(token *tokens, int i, int *token_count)
 {
-    /**
-     * power
-     * number
-     * sign
-     * equal
-     * factor_div
-     */
-    // forbidden tokens
+    // first reach VARIABLE
+    if (tokens[i].type == TOKEN_VARIABLE)
+    {
+        i++;
+        (*token_count)++;
+    }
+    else if (tokens[i].type == TOKEN_FACTOR_MULT)
+    {
+        i++;
+        if (tokens[i].type == TOKEN_VARIABLE)
+        {
+            i++;
+            (*token_count) += 2;
+        }
+        else
+        {
+            stop_errors(ERROR_TOKEN_POSITION, "at exponent place, after a '*' we should have a 'var', but instead got : '%c' (token number %i)", tokens[i].value_char, i);
+        }
+    }
+    else
+    {
+        stop_errors(ERROR_TOKEN_POSITION, "at exponent place, the first tokens should be 'x' or '*x', but instead got : '%c' (token number %i)", tokens[i].value_char, i);
+    }
+
+    // then get power sign '^'
     if (tokens[i].type == TOKEN_POWER)
     {
-        stop_errors(ERROR_TOKEN_POSITION, "at exponent place, we should not have a token 'power' : %c", tokens[i].value_char);
+        i++;
+        (*token_count)++;
     }
-    if (tokens[i].tag == TOKEN_NUMBER)
+    else
     {
-        stop_errors(ERROR_TOKEN_POSITION, "at exponent place, we should not have a token 'number' : %c", tokens[i].value_char);
-    }
-    if (tokens[i].tag == TOKEN_SIGN)
-    {
-        stop_errors(ERROR_TOKEN_POSITION, "at exponent place, we should not have a token 'sign' : %c", tokens[i].value_char);
-    }
-    if (tokens[i].type == TOKEN_EQUAL)
-    {
-        stop_errors(ERROR_TOKEN_POSITION, "at exponent place, we should not have a token 'equal' : %c", tokens[i].value_char);
-    }
-    if (tokens[i].type == TOKEN_FACTOR_DIV)
-    {
-        stop_errors(ERROR_TOKEN_POSITION, "at exponent place, we should not have a token 'division' : %c", tokens[i].value_char);
+        stop_errors(ERROR_TOKEN_POSITION, "at exponent place, after variable we should have '^', but instead got : '%c' (token number %i)", tokens[i].value_char, i);
     }
 
-    // if 'var' -> token_count + 1
-    // else if '*' + 'var' -> token_count + 2
+    // then get exponent
+    if (tokens[i].type == TOKEN_NUMBER_INT)
+    {
+        i++;
+        (*token_count)++;
+    }
+    else
+    {
+        stop_errors(ERROR_TOKEN_POSITION, "at exponent place, we should have an int, but instead got : '%c' (token number %i)", tokens[i].value_char, i);
+    }
 
-    token_count += 0; // placeholder
-
-    return 1; // placeholder
+    return tokens[i].value_double;
 }
 
-int parse(token *tokens, term *terms, int terms_count_max)
+static void check_variables(token *tokens)
 {
     int i;
-    int terms_count;
-    int token_count;
-    term_position term_position;
     char var;
 
-    terms_count = 0;
-    token_count = 0;
     i = 0;
-    term_position = TERM_LEFT;
     var = 0;
-    while (tokens[i].type != TOKEN_END && terms_count < terms_count_max)
+    while (tokens[i].type != TOKEN_END)
     {
         // variable -> all variables must be the same
         if (tokens[i].type == TOKEN_VARIABLE)
@@ -190,10 +200,28 @@ int parse(token *tokens, term *terms, int terms_count_max)
             }
             else if (var != tokens[i].value_char)
             {
-                stop_errors(ERROR_VAR_DIFF, "old var : '%c' - new var : '%c'", var, tokens[i].value_char);
+                stop_errors(ERROR_VAR_DIFF, "old var : '%c' - new var : '%c' (token number %i)", var, tokens[i].value_char, i);
             }
         }
+        i++;
+    }
+}
 
+int parse(token *tokens, term *terms, int terms_count_max)
+{
+    int i;
+    int terms_count;
+    int token_count;
+    term_position term_position;
+
+    check_variables(tokens);
+
+    terms_count = 0;
+    token_count = 0;
+    i = 0;
+    term_position = TERM_LEFT;
+    while (tokens[i].type != TOKEN_END && terms_count < terms_count_max)
+    {
         // equal
         if (tokens[i].type == TOKEN_EQUAL)
         {