improve parser exponent and detect too big exponent

2026-05-26 14:30:28 +02:00
parent c99bdfc63a
commit abc5754bba
3 changed files with 90 additions and 67 deletions
--- a/headers/computorv1.h
+++ b/headers/computorv1.h
@@ -13,6 +13,7 @@
 */
 #define DOUBLE_PRECISION 0.00001
 #define MAX_EXPONENT 1000
 /** * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 * MAIN.C
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -42,6 +42,23 @@ static bool is_number_out_of_int_range(const char *input, int size)
 	}
 }
 /**
 * SUPERSCRIPT DIGIT CHECKS
 */
 static bool is_superscript_bigger_than_int_max(const char *input, int size)
 {
 	const char *int_max_str = "²¹⁴⁷⁴⁸³⁶⁴⁷";
 	int int_max_len = ft_strlen(int_max_str);
 	if (size < int_max_len)
 		return false;
 	if (size > int_max_len)
 		return true;
 	return ft_strncmp(input, int_max_str, int_max_len) > 0;
 }
 /**
 * TOKEN CHECKS
 */
@@ -64,9 +81,6 @@ static bool token_is_number_int(const char *input, int input_pos, int *token_siz
 	number_size = 0;
 	// if (input[input_pos + number_size] == '-')
 	// 	number_size++;
 	while (input[input_pos + number_size] != '\0')
 	{
 		if (ft_isdigit(input[input_pos + number_size]))
@@ -97,9 +111,6 @@ static bool token_is_number_double(const char *input, int input_pos, int *token_
 	has_dot = false;
 	number_size = 0;
 	// if (input[input_pos + number_size] == '-')
 	// 	number_size++;
 	while (input[input_pos + number_size] != '\0')
 	{
 		if (ft_isdigit(input[input_pos + number_size]))
@@ -133,15 +144,12 @@ static bool token_is_number_int_super(const char *input, int input_pos, int *tok
 {
 	int digit_size = 0;
 	int number_size = 0;
 	int superscript_len = 0;
 	int superscript_len_max = 3; // max length of superscript int we want to support
 	// iterate to find full superscript number
 	while (input[input_pos + number_size] != '\0')
 	{
 		if (ft_isdigit_superscript(&input[input_pos + number_size], &digit_size))
 		{
 			superscript_len++;
 			// Increment by the length of the UTF-8 character (2 or 3 bytes)
 			number_size += digit_size;
 		}
@@ -154,9 +162,9 @@ static bool token_is_number_int_super(const char *input, int input_pos, int *tok
 	{
 		return false;
 	}
-	if (superscript_len > superscript_len_max)
+	if (is_superscript_bigger_than_int_max(&input[input_pos], number_size))
 	{
-		stop_errors("superscript int is too long (max supported length is %d), got : %s\n", superscript_len_max, &input[input_pos]);
+		return false;
 	}
 	*token_size = number_size;
--- a/src/parser.c
+++ b/src/parser.c
@@ -24,7 +24,7 @@ static e_term_sign get_sign(s_token *tokens, int i, int *token_count)
    if (tokens[i].tag == TOKEN_SIGN)
    {
-        // we cna have two signs in a row, like "3 - -2" or "3 - +2"
+        // we can have two signs in a row, like "3 - -2" or "3 - +2"
        j = 0;
        while (j < 2)
        {
@@ -116,75 +116,91 @@ static double get_coefficient(s_token *tokens, int i, int *token_count)
    return coefficient;
 }
 static bool token_sequence(s_token *tokens, e_token_type *types, int len)
 {
    int i;
    i = 0;
    while (i < len)
    {
        if (tokens[i].type != types[i])
            return false;
        i++;
    }
    return true;
 }
 static int get_exponent(s_token *tokens, int i, int *token_count)
 {
    *token_count = 0;
    int ret_exponent;
    // valide :
-    // - '*x^2' -> exponent 2
+    // - [*] [x] [^] [2] -> exponent 2
-    // - '*x²'  -> exponent 2
+    // - [*] [x] [²]     -> exponent 2
-    // - 'x^2'  -> exponent 2
+    // - [*] [x]         -> exponent 1
-    // - '*x'   -> exponent 1
+    // - [x] [^] [2]     -> exponent 2
-    // - 'x'    -> exponent 1
+    // - [x] [²]         -> exponent 2
    // - [x]             -> exponent 1
    // - ''              -> exponent 0
    // invalid first token :
    // - '2'        -> number
-    // first reach VARIABLE : 'x' or '*x'
+    if (tokens[i].tag == TOKEN_NUMBER)
    if (tokens[i].type == TOKEN_VARIABLE)
    {
-        // token is 'x'
+        // exponent term cannot begin with a number
-        i++;
+        stop_errors("at exponent place, we should have an exponent expression, but instead got : '%s' (token number %i)", token_type_to_str(tokens[i].type), i);
        *token_count = 1;
    }
-    else if (tokens[i].type == TOKEN_FACTOR_MULT)
+    else if (token_sequence(&tokens[i], (e_token_type[]){TOKEN_FACTOR_MULT, TOKEN_VARIABLE, TOKEN_POWER, TOKEN_NUMBER_INT}, 4))
    {
-        // token is '*'
+        // ex: [*] [x] [^] [2]
-        i++;
+        *token_count = 4;
-        if (tokens[i].type == TOKEN_VARIABLE)
+        ret_exponent = tokens[i + 3].value_int;
    }
    else if (token_sequence(&tokens[i], (e_token_type[]){TOKEN_FACTOR_MULT, TOKEN_VARIABLE, TOKEN_NUMBER_INT_SUPER}, 3))
    {
-            // tokens are '*x'
+        // ex: [*] [x] [²]
-            i++;
+        *token_count = 3;
        ret_exponent = tokens[i + 2].value_int;
    }
    else if (token_sequence(&tokens[i], (e_token_type[]){TOKEN_FACTOR_MULT, TOKEN_VARIABLE}, 2))
    {
        // ex: [*] [x] -> exponent 1
        *token_count = 2;
        ret_exponent = 1;
    }
    else if (token_sequence(&tokens[i], (e_token_type[]){TOKEN_VARIABLE, TOKEN_POWER, TOKEN_NUMBER_INT}, 3))
    {
        // ex: [x] [^] [2]
        *token_count = 3;
        ret_exponent = tokens[i + 2].value_int;
    }
    else if (token_sequence(&tokens[i], (e_token_type[]){TOKEN_VARIABLE, TOKEN_NUMBER_INT_SUPER}, 2))
    {
        // ex: [x] [²]
        *token_count = 2;
        ret_exponent = tokens[i + 1].value_int;
    }
    else if (token_sequence(&tokens[i], (e_token_type[]){TOKEN_VARIABLE}, 1))
    {
        // ex: [x] -> exponent 1
        *token_count = 1;
        ret_exponent = 1;
    }
    else
    {
-            stop_errors("at exponent place, after a '*' we should have a 'var', but instead got : '%s' (token number %i)", token_type_to_str(tokens[i].type), i);
+        // no variable, so no exponent -> exponent 0
-        }
+        *token_count = 0;
-    }
+        ret_exponent = 0;
    else
    {
        // if token are neither 'x' or '*x', then exponent is 0
        return 0;
    }
-    // then get power sign '^' or directly superscript power like '²'
+    // check if exponent is not too big, to avoid overflow when calculating power
-    if (tokens[i].type == TOKEN_POWER)
+    if (ret_exponent > MAX_EXPONENT)
    {
-        i++;
+        stop_errors("exponent is too big (max supported exponent is %d), got : %d\n", MAX_EXPONENT, ret_exponent);
        *token_count += 1;
    }
    else if (tokens[i].type == TOKEN_NUMBER_INT_SUPER)
    {
        *token_count += 1;
        // return exponent directly
        return tokens[i].value_int;
    }
    else
    {
        // if token is 'x' not followed by '^' -> it's an exponent 1
        return 1;
    }
-    // then get exponent
+    return ret_exponent;
    if (tokens[i].type == TOKEN_NUMBER_INT)
    {
        *token_count += 1;
    }
    else
    {
        stop_errors("at exponent place, we should have an int, but instead got : '%s' (token number %i)", token_type_to_str(tokens[i].type), i);
    }
    return tokens[i].value_int;
 }
 static void check_variables(s_token *tokens)
@@ -248,9 +264,7 @@ void parse(s_token *tokens, s_term *terms, int terms_count_max)
        e_term_sign ret_sign = get_sign(tokens, i, &token_count);
        terms[terms_count].sign = ret_sign;
        if (ret_sign == TERM_MINUS)
        {
            sign = -1;
        }
        i += token_count;
        print_debug("term[%i] get_sign: (%i)[%s], token_count: [%d]\n", terms_count, ret_sign, term_sign_to_str(ret_sign), token_count);