From 26fa8025eff69e65e8d4328a72014b7be3f4cde3 Mon Sep 17 00:00:00 2001
From: hugogogo <hugogogo@protonmail.com>
Date: Wed, 29 Apr 2026 12:41:19 +0200
Subject: [PATCH] lexer complete with luke memory solution

---
 headers/lexer.h  |  11 ++---
 src/computorv1.c |  66 ++++++++++++++++++++++++------
 src/lexer.c      | 102 ++++++++++++++++++++++++++++++++++-------------
 3 files changed, 134 insertions(+), 45 deletions(-)

diff --git a/headers/lexer.h b/headers/lexer.h
index 1290286..84b5253 100644
--- a/headers/lexer.h
+++ b/headers/lexer.h
@@ -6,7 +6,8 @@
 typedef enum
 {
     TOKEN_VARIABLE,       // x, y, etc.
-    TOKEN_NUMBER,         // int or double
+    TOKEN_NUMBER_INT,     // int
+    TOKEN_NUMBER_DOUBLE,  // double
     TOKEN_POWER,          // ^ or **
     TOKEN_PLUS,           // +
     TOKEN_MINUS,          // -
@@ -20,12 +21,12 @@ typedef struct
     token_type type;
     union
     {
-        double num_value; // For NUMBER
-        char var_value;   // For VARIABLE (single char, e.g., 'x')
+        char value_char;
+        int value_int;
+        double value_double;
     };
 } token;
 
-#define MAX_TOKENS 100
-int lexerize(const char *input, token tokens[MAX_TOKENS]);
+void lexerize(const char *input, token *tokens);
 
 #endif
\ No newline at end of file
diff --git a/src/computorv1.c b/src/computorv1.c
index 534d6e2..1aa490f 100644
--- a/src/computorv1.c
+++ b/src/computorv1.c
@@ -3,18 +3,46 @@
 #include "errors.h"
 #include <stdio.h> // tmp for float debug
 
+void remove_spaces(char *s)
+{
+    char *read = s;
+    char *write = s;
+
+    // copy all non-space chars
+    while (*read)
+    {
+        if (!ft_isspace(*read))
+        {
+            *write++ = *read;
+        }
+        read++;
+    }
+    *write = '\0';
+
+    // zero the rest of the buffer
+    while (write != read)
+    {
+        *write++ = '\0';
+    }
+}
+
 int main(int ac, char **av)
 {
     int i;
-    int ret;
+    int arg_len;
+    char *input;
 
     if (ac < 2)
     {
         return 0;
     }
+
+    // tmp debug output
+    ft_putstr("-> received args :\n"); // debug
     i = 0;
     while (i < ac)
     {
+        ft_putstr("   ");
         ft_putnbr(i);
         ft_putstr(" : ");
         ft_putstr(av[i]);
@@ -22,14 +50,22 @@ int main(int ac, char **av)
         i++;
     }
 
-    token tokens[MAX_TOKENS];
-    ret = lexerize(av[1], tokens);
-    if (ret <= 0)
-    {
-        stop_errors(ret);
-    }
+    input = av[1];
+    remove_spaces(input);
+    arg_len = ft_strlen(input);
+
+    ft_putstr("-> input without space : "); // debug
+    ft_putstr(input);                       // debug
+    ft_putchar('\n');                       // debug
+    ft_putstr("-> arg_len : ");             // debug
+    ft_putnbr(arg_len);                     // debug
+    ft_putchar('\n');                       // debug
+
+    token tokens[arg_len];
+    lexerize(input, tokens);
 
     // tmp debug output
+    ft_putchar('\n'); // debug
     i = 0;
     while (tokens[i].type != TOKEN_END)
     {
@@ -37,8 +73,10 @@ int main(int ac, char **av)
 
         if (tokens[i].type == TOKEN_VARIABLE)
             ft_printf("%20s", "TOKEN_VARIABLE");
-        if (tokens[i].type == TOKEN_NUMBER)
-            ft_printf("%20s", "TOKEN_NUMBER");
+        if (tokens[i].type == TOKEN_NUMBER_INT)
+            ft_printf("%20s", "TOKEN_NUMBER_INT");
+        if (tokens[i].type == TOKEN_NUMBER_DOUBLE)
+            ft_printf("%20s", "TOKEN_NUMBER_DOUBLE");
         if (tokens[i].type == TOKEN_POWER)
             ft_printf("%20s", "TOKEN_POWER");
         if (tokens[i].type == TOKEN_PLUS)
@@ -54,13 +92,17 @@ int main(int ac, char **av)
 
         ft_putstr(" - value : ");
 
-        if (tokens[i].type == TOKEN_NUMBER)
+        if (tokens[i].type == TOKEN_NUMBER_INT)
         {
-            printf("%f\n", tokens[i].num_value);
+            printf("%i\n", tokens[i].value_int);
+        }
+        else if (tokens[i].type == TOKEN_NUMBER_DOUBLE)
+        {
+            printf("%f\n", tokens[i].value_double);
         }
         else
         {
-            ft_printf("%c\n", tokens[i].var_value);
+            ft_printf("%c\n", tokens[i].value_char);
         }
         i++;
     }
diff --git a/src/lexer.c b/src/lexer.c
index 1517763..02b50f0 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -2,15 +2,6 @@
 #include "errors.h"
 #include <stdbool.h>
 
-static int skip_whitespace(const char *input, int input_pos)
-{
-    while (ft_isspace(input[input_pos]))
-    {
-        input_pos++;
-    }
-    return input_pos;
-}
-
 // any single letter is a valid variable, like "x" or "y"
 static bool token_is_variable(const char *input, int input_pos, int *token_size)
 {
@@ -22,8 +13,8 @@ static bool token_is_variable(const char *input, int input_pos, int *token_size)
     return false;
 }
 
-// number can be double "123.456"
-static bool token_is_number(const char *input, int input_pos, int *token_size, int *float_precision)
+// number can be int "123"
+static bool token_is_number_int(const char *input, int input_pos, int *token_size)
 {
     int number_size;
     int max_number_size;
@@ -33,6 +24,48 @@ static bool token_is_number(const char *input, int input_pos, int *token_size, i
         return false;
     }
 
+    number_size = 1;
+    max_number_size = 16; // max size for int
+    while (number_size <= max_number_size)
+    {
+        if (ft_isdigit(input[input_pos + number_size]))
+        {
+            number_size++;
+        }
+        else if (input[input_pos + number_size] == '.')
+        {
+            if (ft_isdigit(input[input_pos + number_size + 1]))
+            {
+                // number is double
+                return false;
+            }
+            else
+                break;
+        }
+        else
+            break;
+    }
+    if (number_size > max_number_size)
+    {
+        stop_errors(ERROR_NUMBER_TOO_BIG);
+    }
+    *token_size = number_size;
+    return true;
+}
+
+// number can be double "123.456"
+static bool token_is_number_double(const char *input, int input_pos, int *token_size)
+{
+    int number_size;
+    int max_number_size;
+    bool has_dot;
+
+    if (!ft_isdigit(input[input_pos]))
+    {
+        return false;
+    }
+
+    has_dot = false;
     number_size = 1;
     max_number_size = 129; // max size for double double is 128 bits, + the coma
     while (number_size <= max_number_size)
@@ -43,6 +76,17 @@ static bool token_is_number(const char *input, int input_pos, int *token_size, i
         }
         else if (input[input_pos + number_size] == '.')
         {
+            if (has_dot)
+            {
+                // number is not a valid double, it has 2 dots
+                return false;
+            }
+            if (!ft_isdigit(input[input_pos + number_size + 1]))
+            {
+                // number is not a double, it has no number after the dot
+                return false;
+            }
+            has_dot = true;
             number_size++;
         }
         else
@@ -119,20 +163,17 @@ static bool token_is_division(const char *input, int input_pos, int *token_size)
 /**
  * LEXER
  */
-int lexerize(const char *input, token tokens[MAX_TOKENS])
+void lexerize(const char *input, token *tokens)
 {
     int token_count;
     int input_pos;
     int token_size;
-    int float_precision;
 
     token_count = 0;
     input_pos = 0;
-    float_precision = 0;
     while (input[input_pos])
     {
         token_size = 0;
-        input_pos = skip_whitespace(input, input_pos);
 
         if (input[input_pos] == '\0')
         {
@@ -142,41 +183,48 @@ int lexerize(const char *input, token tokens[MAX_TOKENS])
         if (token_is_variable(input, input_pos, &token_size))
         {
             tokens[token_count].type = TOKEN_VARIABLE;
-            tokens[token_count].var_value = 'x';
+            tokens[token_count].value_char = 'x';
         }
-        else if (token_is_number(input, input_pos, &token_size, &float_precision))
+        else if (token_is_number_int(input, input_pos, &token_size))
         {
-            tokens[token_count].type = TOKEN_NUMBER;
-            tokens[token_count].num_value = ft_atof(&input[input_pos]);
+            tokens[token_count].type = TOKEN_NUMBER_INT;
+            tokens[token_count].value_int = ft_atoi(&input[input_pos]);
+        }
+        else if (token_is_number_double(input, input_pos, &token_size))
+        {
+            tokens[token_count].type = TOKEN_NUMBER_DOUBLE;
+            tokens[token_count].value_double = ft_atof(&input[input_pos]);
         }
         else if (token_is_power(input, input_pos, &token_size))
         {
             tokens[token_count].type = TOKEN_POWER;
-            tokens[token_count].var_value = '^';
+            tokens[token_count].value_char = '^';
         }
         else if (token_is_plus(input, input_pos, &token_size))
         {
             tokens[token_count].type = TOKEN_PLUS;
-            tokens[token_count].var_value = '+';
+            tokens[token_count].value_char = '+';
         }
         else if (token_is_minus(input, input_pos, &token_size))
         {
             tokens[token_count].type = TOKEN_MINUS;
-            tokens[token_count].var_value = '-';
+            tokens[token_count].value_char = '-';
         }
         else if (token_is_multiplication(input, input_pos, &token_size))
         {
             tokens[token_count].type = TOKEN_MULTIPLICATION;
-            tokens[token_count].var_value = '*';
+            tokens[token_count].value_char = '*';
         }
         else if (token_is_division(input, input_pos, &token_size))
         {
             tokens[token_count].type = TOKEN_DIVISION;
-            tokens[token_count].var_value = '/';
+            tokens[token_count].value_char = '/';
         }
         else
         {
-            stop_errors(ERROR_UNKNOWN_TOKEN);
+            // tmp
+            token_size = 1;
+            // stop_errors(ERROR_UNKNOWN_TOKEN);
         }
 
         token_count++;
@@ -188,7 +236,5 @@ int lexerize(const char *input, token tokens[MAX_TOKENS])
     }
 
     tokens[token_count].type = TOKEN_END;
-    tokens[token_count].var_value = '\0';
-
-    return 1;
+    tokens[token_count].value_char = '\0';
 }