fix number tokens and limits

2026-05-26 12:48:36 +02:00
parent f1a6a8e586
commit c99bdfc63a
8 changed files with 577 additions and 481 deletions
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -0,0 +1,3 @@
 {
  "window.title": "${rootName} — ${activeEditorShort}"
 }
--- a/README.md
+++ b/README.md
@@ -1,5 +1,15 @@
 # 42_EXT_05_computorv1
 ## todo
 - [ ] arg limit ?
 - [x] change order is_number_int and is_number_double
 - [x] remove double limit in lexer
 - [ ] check why number stored in int or double
 - [x] get rid of lib math (check in libft)
 - [x] use ft_abs() instead of abs() -> actually abs() is in stdlib
 - [x] handling sign '-' : "+ -2"
 - [x] max exponent len :
 ## ressources
@@ -13,7 +23,6 @@ this project uses submodules (maybe recursively), so either :
 - `git clone --recurse-submodules <repo-url>`
 - or, after cloning : `git submodule update --init --recursive`
 ---
 # sqrt implementation
@@ -57,7 +66,6 @@ Ex :
 --------|---------------------- --> solution
 ```
 ## Newton–Raphson method
 it's like a self-correcting binary search, we get rid of the step "choose range", we use the formulae `x/v` to find the next range, with `x` being the number we are trying to get the sqaure root from, and `v` the value found at the previous step.
@@ -91,6 +99,7 @@ Ex :
 ```
 ### mathematical proof that each range is automatically in the right range :
 - if the value was higher than the answer, then new value is below old value, and vice versa
 - how ? :
  - define `x`, solution `s = √x`, and value `v = (old_value + x / old_value) / 2`
@@ -107,10 +116,10 @@ Ex :
       <=> v < √x
       <=> v² < x (as previously)
       <=> v² < x²/x
-        <=> v² * x < x²
+       <=> v² _ x < x²
-        <=> (v * √x)² < x²
+       <=> (v _ √x)² < x²
-        <=> v * √x < x
+       <=> v _ √x < x
-        <=> v * √x < v * x/v
+       <=> v _ √x < v \* x/v
       <=> √x < x/v
       -> so indeed : if v < √x, then v < √x < x/v == v < s < x/v
       -> conclusion, the new range < v , x/v > contains the solution
@@ -120,39 +129,23 @@ Ex :
 1. **show that if `v² > x` (== `v > s`) then `v > s > x/v`, and if `v² < x` (== `v < s`) then `v < s < x/v` :**
   1.1. **for value too high `v > s` :**
-    1.1.1 **why `v > x/v` :**
+   1.1.1 **why `v > x/v` :** - let's take initial value v = 5 : - is 5² the solution ? 5² == 25 -> so no, 5 is not the sqrt, it's too high
-      - let's take initial value v = 5 :
+   `                v                   v²
      - is 5² the solution ? 5² == 25 -> so no, 5 is not the sqrt, it's too high
        ```
                  v                   v²
     0   (5)   10   15   20   25
 v  : |----|----|----|----|----|
 x/v: |---|---|---|---|---| <----- squiz it, so the previous 5 portions fit the x = 20 size
     0  (4)  8   12  16  20
-                x/v              x
+        x/v              x` - the value of the new portion is 4, and we can visually see that it's lower than the previous portion 5 - so : `v > x/v`
-        ```
+   1.1.2 **why `s > x/v` :** - let's take the value v = 5 : - we already showed that it's too high, now we will see that x/v == 20/5 is too low :
-      - the value of the new portion is 4, and we can visually see that it's lower than the previous portion 5
+   `                   v
      - so : `v > x/v`
      1.1.2 **why `s > x/v` :**
      - let's take the value v = 5 : 
      - we already showed that it's too high, now we will see that x/v == 20/5 is too low : 
        ```
                     v
        0   (5)   10   15   20   25
 v  :    |----|----|----|----|----|
 x/v:    |---|---|---|---|---|   <----- squizz
        0  *1  *2  *3  *4  *5       -> number of portions
        01234                       -> portion size
 (x/v)²: |---|---|---|---|
-                0   4   8   12  16
+        0   4   8   12  16` - the portion size is smaller than the number of portions, so it's too small to be the sqrt, indeed we visually see that this portion size `x/v` is a root of a smaller number : `(x/v)² == 16` - so : `s > x/v`
-        ```
+   1.1.3. **conclusion :** - v > s - and v > x/v (<- this proof is not essential) - and s > x/v (<- we actually only need this proof) - so `v > s > x/v`
      - the portion size is smaller than the number of portions, so it's too small to be the sqrt, indeed we visually see that this portion size `x/v` is a root of a smaller number : `(x/v)² == 16`
      - so : `s > x/v`
    1.1.3. **conclusion :**
      -     v > s
      - and v > x/v (<- this proof is not essential)
      - and s > x/v (<- we actually only need this proof)
      - so `v > s > x/v`
   1.2. **for value too high `v < s` :**
   - this is the same demonstration but in other direction, let's just summarize it :
--- a/src/launcher.c
+++ b/src/launcher.c
@@ -91,7 +91,7 @@ void launch_computorv1(char *input)
    int max_exponent;
    int nbr_of_exponents;
    int degree;
-    size_t arg_len;
+    size_t token_len;
    size_t terms_count_prediction;
    // init
@@ -99,9 +99,9 @@ void launch_computorv1(char *input)
    remove_spaces(input);
    // lexerize
-    arg_len = ft_strlen(input) + 1; // +1 for last END token
+    token_len = ft_strlen(input) + 1; // +1 for last END token
-    print_debug("\n-> tokens[%i]\n", arg_len);
+    print_debug("\n-> tokens[%i]\n", token_len);
-    s_token tokens[arg_len];
+    s_token tokens[token_len];
    tokens_g_err = tokens;
    ft_bzero(tokens, sizeof(tokens));
    lexerize(input, tokens);
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -2,6 +2,50 @@
 #include "computorv1.h"
 /**
 * INT CHECKS
 */
 static bool is_number_bigger_than_int_max(const char *input, int size)
 {
 	const char *int_max_str = "2147483647";
 	int int_max_len = ft_strlen(int_max_str);
 	if (size < int_max_len)
 		return false;
 	if (size > int_max_len)
 		return true;
 	return ft_strncmp(input, int_max_str, int_max_len) > 0;
 }
 static bool is_number_smaller_than_int_min(const char *input, int size)
 {
 	const char *int_min_str = "-2147483648";
 	int int_min_len = ft_strlen(int_min_str);
 	if (size < int_min_len)
 		return false;
 	if (size > int_min_len)
 		return true;
 	return ft_strncmp(input, int_min_str, int_min_len) < 0;
 }
 static bool is_number_out_of_int_range(const char *input, int size)
 {
 	if (input[0] == '-')
 	{
 		return is_number_smaller_than_int_min(input, size);
 	}
 	else
 	{
 		return is_number_bigger_than_int_max(input, size);
 	}
 }
 /**
 * TOKEN CHECKS
 */
 // token is alphabet letter, like 'x' or 'y'
 static bool token_is_variable(const char *input, int input_pos, int *token_size)
 {
@@ -17,107 +61,87 @@ static bool token_is_variable(const char *input, int input_pos, int *token_size)
 static bool token_is_number_int(const char *input, int input_pos, int *token_size)
 {
 	int number_size;
    int max_number_size;
-    if (!ft_isdigit(input[input_pos]))
+	number_size = 0;
    {
        return false;
    }
-    number_size = 1;
+	// if (input[input_pos + number_size] == '-')
-    max_number_size = 16; // max size for int
+	// 	number_size++;
-    while (number_size <= max_number_size)
+
 	while (input[input_pos + number_size] != '\0')
 	{
 		if (ft_isdigit(input[input_pos + number_size]))
 		{
 			number_size++;
 		}
-        else if (input[input_pos + number_size] == '.')
+		else
 			break;
 	}
 	if (number_size == 0)
 	{
            if (ft_isdigit(input[input_pos + number_size + 1]))
            {
                // number is double
 		return false;
 	}
-            else
+	if (is_number_out_of_int_range(&input[input_pos], number_size))
                break;
        }
        else
            break;
    }
    if (number_size > max_number_size)
 	{
-        stop_errors(&input[input_pos]);
+		return false;
 	}
 	*token_size = number_size;
 	return true;
 }
-// token is double "123.456"
+// token is double "123.456" or token < MIN_INT or token > MAX_INT
 static bool token_is_number_double(const char *input, int input_pos, int *token_size)
 {
 	int number_size;
    int max_number_size;
 	bool has_dot;
    if (!ft_isdigit(input[input_pos]))
    {
        return false;
    }
 	has_dot = false;
-    number_size = 1;
+	number_size = 0;
-    max_number_size = 24; // max char needed to represent double : 1 sign + 1 point + 17 fractinoal part + 5 exponent
+
-    while (number_size <= max_number_size)
+	// if (input[input_pos + number_size] == '-')
 	// 	number_size++;
 	while (input[input_pos + number_size] != '\0')
 	{
 		if (ft_isdigit(input[input_pos + number_size]))
 		{
 			number_size++;
 		}
-        else if (input[input_pos + number_size] == '.')
+		else if (input[input_pos + number_size] == '.' && !has_dot)
 		{
            if (has_dot)
            {
                // number is not a valid token, it has 2 dots
                return false;
            }
            if (!ft_isdigit(input[input_pos + number_size + 1]))
            {
                // number is not valid token, it has a dot with no number after the dot
                return false;
            }
 			has_dot = true;
 			number_size++;
 		}
 		else
 			break;
 	}
-    if (number_size > max_number_size)
+	if (is_number_out_of_int_range(&input[input_pos], number_size))
 	{
        stop_errors(&input[input_pos]);
    }
 		*token_size = number_size;
 		return true;
 	}
-
+	if (!has_dot)
 // token is superscript int (e.g., ¹, ², ³, ⁴, ⁵, etc.)
 static bool token_is_number_int_super(const char *input, int input_pos, int *token_size)
 {
    int digit_size = 0;
    int number_size = 0;
    int max_number_size = 16; // same max size as regular integers
    // check if first character is superscript
    if (!ft_isdigit_superscript(input + input_pos, NULL))
 	{
 		return false;
 	}
 	*token_size = number_size;
 	return true;
 }
 // token is superscript int e.g. ¹, ², ³, ⁴, ⁵, ⁶⁶⁶
 static bool token_is_number_int_super(const char *input, int input_pos, int *token_size)
 {
 	int digit_size = 0;
 	int number_size = 0;
 	int superscript_len = 0;
 	int superscript_len_max = 3; // max length of superscript int we want to support
 	// iterate to find full superscript number
-    while (number_size < max_number_size)
+	while (input[input_pos + number_size] != '\0')
 	{
-        if (ft_isdigit_superscript(input + input_pos + number_size, &digit_size))
+		if (ft_isdigit_superscript(&input[input_pos + number_size], &digit_size))
 		{
 			superscript_len++;
 			// Increment by the length of the UTF-8 character (2 or 3 bytes)
 			number_size += digit_size;
 		}
@@ -126,11 +150,14 @@ static bool token_is_number_int_super(const char *input, int input_pos, int *tok
 			break;
 		}
 	}
 	if (number_size == 0)
 	{
 		return false;
 	}
 	if (superscript_len > superscript_len_max)
 	{
 		stop_errors("superscript int is too long (max supported length is %d), got : %s\n", superscript_len_max, &input[input_pos]);
 	}
 	*token_size = number_size;
 	return true;
@@ -215,6 +242,7 @@ static bool token_is_equal(const char *input, int input_pos, int *token_size)
 /**
 * LEXER
 */
 void lexerize(const char *input, s_token *tokens)
 {
 	int tokens_count;
@@ -240,18 +268,18 @@ void lexerize(const char *input, s_token *tokens)
 			tokens[tokens_count].tag = TOKEN_NO_TAG;
 			tokens[tokens_count].value_char = 'x';
 		}
        else if (token_is_number_int(input, input_pos, &token_size))
        {
            tokens[tokens_count].type = TOKEN_NUMBER_INT;
            tokens[tokens_count].tag = TOKEN_NUMBER;
            tokens[tokens_count].value_int = ft_atoi(&input[input_pos]);
        }
 		else if (token_is_number_double(input, input_pos, &token_size))
 		{
 			tokens[tokens_count].type = TOKEN_NUMBER_DOUBLE;
 			tokens[tokens_count].tag = TOKEN_NUMBER;
 			tokens[tokens_count].value_double = ft_atof(&input[input_pos]);
 		}
 		else if (token_is_number_int(input, input_pos, &token_size))
 		{
 			tokens[tokens_count].type = TOKEN_NUMBER_INT;
 			tokens[tokens_count].tag = TOKEN_NUMBER;
 			tokens[tokens_count].value_int = ft_atoi(&input[input_pos]);
 		}
 		else if (token_is_number_int_super(input, input_pos, &token_size))
 		{
 			tokens[tokens_count].type = TOKEN_NUMBER_INT_SUPER;
--- a/src/main.c
+++ b/src/main.c
@@ -101,14 +101,14 @@ static void launch_stdin()
 	free(line);
 }
-static void launch_stdin_loop()
+// static void launch_stdin_loop()
-{
+// {
-    while (1)
+// 	while (1)
-    {
+// 	{
-        // for the moment it does not work since errors exit
+// 		// for the moment it does not work since errors exit
-        launch_stdin();
+// 		launch_stdin();
-    }
+// 	}
-}
+// }
 int main(int ac, char **av)
 {
@@ -123,12 +123,7 @@ int main(int ac, char **av)
 	// check arguments
 	program_mode = MODE_ARGV;
-    if (ac == 1)
+
    {
        program_mode = MODE_STDIN;
    }
    else if (ac > 1)
    {
 	// get flags
 	input = NULL;
 	i = 1;
@@ -167,7 +162,6 @@ int main(int ac, char **av)
 	{
 		program_mode = MODE_STDIN;
 	}
    }
 	// launch calculator
 	if (program_mode == MODE_ARGV)
@@ -178,10 +172,10 @@ int main(int ac, char **av)
 	{
 		launch_stdin();
 	}
-    else if (program_mode == MODE_LOOP)
+	// else if (program_mode == MODE_LOOP)
-    {
+	// {
-        launch_stdin_loop();
+	// 	launch_stdin_loop();
-    }
+	// }
 	return (0);
 }
--- a/src/parser.c
+++ b/src/parser.c
@@ -16,30 +16,44 @@
 static e_term_sign get_sign(s_token *tokens, int i, int *token_count)
 {
    *token_count = 0;
    int j;
    e_term_sign ret_sign;
    // default to '+'
    ret_sign = TERM_PLUS;
    // sign
    if (tokens[i].tag == TOKEN_SIGN)
    {
-        *token_count = 1;
+        // we cna have two signs in a row, like "3 - -2" or "3 - +2"
        j = 0;
        while (j < 2)
        {
            if (tokens[i + j].tag != TOKEN_SIGN)
                break;
            if (tokens[i + j].type == TOKEN_SIGN_MINUS)
                ret_sign = (ret_sign == TERM_PLUS) ? TERM_MINUS : TERM_PLUS;
            *token_count += 1;
            j++;
        }
    }
    else if (i == 0)
    {
        // if most left term, the sign can be ommited for a '+' sign in front of a number or variable
        *token_count = 0;
-        return TERM_PLUS;
+        ret_sign = TERM_PLUS;
    }
    else if (tokens[i - 1].type == TOKEN_EQUAL)
    {
        // if first token after 'equal', the sign can be ommited for a '+' sign in front of a number or variable
        *token_count = 0;
-        return TERM_PLUS;
+        ret_sign = TERM_PLUS;
    }
    else
    {
        stop_errors("at begining of term, we should have a token 'sign', not '%s' (token[%i])", token_type_to_str(tokens[i].type), i);
    }
-    return tokens[i].type == TOKEN_SIGN_PLUS ? TERM_PLUS : TERM_MINUS;
+    return ret_sign;
 }
 static double get_double_value(s_token token)
--- a/src/utils/math.c
+++ b/src/utils/math.c
@@ -65,7 +65,7 @@ int gcd_int(int a, int b)
        b = a % b;
        a = tmp;
    }
-    return abs(a);
+    return ft_abs(a);
 }
 // returns the gcd, and modify arguments with new reduced values
--- a/tester.sh
+++ b/tester.sh