improve parser exponent and detect too big exponent

This commit is contained in:
hugogogo
2026-05-26 14:30:28 +02:00
parent c99bdfc63a
commit abc5754bba
3 changed files with 90 additions and 67 deletions

View File

@@ -13,6 +13,7 @@
*/
#define DOUBLE_PRECISION 0.00001
#define MAX_EXPONENT 1000
/** * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* MAIN.C

View File

@@ -42,6 +42,23 @@ static bool is_number_out_of_int_range(const char *input, int size)
}
}
/**
* SUPERSCRIPT DIGIT CHECKS
*/
static bool is_superscript_bigger_than_int_max(const char *input, int size)
{
const char *int_max_str = "²¹⁴⁷⁴⁸³⁶⁴⁷";
int int_max_len = ft_strlen(int_max_str);
if (size < int_max_len)
return false;
if (size > int_max_len)
return true;
return ft_strncmp(input, int_max_str, int_max_len) > 0;
}
/**
* TOKEN CHECKS
*/
@@ -64,9 +81,6 @@ static bool token_is_number_int(const char *input, int input_pos, int *token_siz
number_size = 0;
// if (input[input_pos + number_size] == '-')
// number_size++;
while (input[input_pos + number_size] != '\0')
{
if (ft_isdigit(input[input_pos + number_size]))
@@ -97,9 +111,6 @@ static bool token_is_number_double(const char *input, int input_pos, int *token_
has_dot = false;
number_size = 0;
// if (input[input_pos + number_size] == '-')
// number_size++;
while (input[input_pos + number_size] != '\0')
{
if (ft_isdigit(input[input_pos + number_size]))
@@ -133,15 +144,12 @@ static bool token_is_number_int_super(const char *input, int input_pos, int *tok
{
int digit_size = 0;
int number_size = 0;
int superscript_len = 0;
int superscript_len_max = 3; // max length of superscript int we want to support
// iterate to find full superscript number
while (input[input_pos + number_size] != '\0')
{
if (ft_isdigit_superscript(&input[input_pos + number_size], &digit_size))
{
superscript_len++;
// Increment by the length of the UTF-8 character (2 or 3 bytes)
number_size += digit_size;
}
@@ -154,9 +162,9 @@ static bool token_is_number_int_super(const char *input, int input_pos, int *tok
{
return false;
}
if (superscript_len > superscript_len_max)
if (is_superscript_bigger_than_int_max(&input[input_pos], number_size))
{
stop_errors("superscript int is too long (max supported length is %d), got : %s\n", superscript_len_max, &input[input_pos]);
return false;
}
*token_size = number_size;

View File

@@ -24,7 +24,7 @@ static e_term_sign get_sign(s_token *tokens, int i, int *token_count)
if (tokens[i].tag == TOKEN_SIGN)
{
// we cna have two signs in a row, like "3 - -2" or "3 - +2"
// we can have two signs in a row, like "3 - -2" or "3 - +2"
j = 0;
while (j < 2)
{
@@ -116,75 +116,91 @@ static double get_coefficient(s_token *tokens, int i, int *token_count)
return coefficient;
}
static bool token_sequence(s_token *tokens, e_token_type *types, int len)
{
int i;
i = 0;
while (i < len)
{
if (tokens[i].type != types[i])
return false;
i++;
}
return true;
}
static int get_exponent(s_token *tokens, int i, int *token_count)
{
*token_count = 0;
int ret_exponent;
// valide :
// - '*x^2' -> exponent 2
// - '*x²' -> exponent 2
// - 'x^2' -> exponent 2
// - '*x' -> exponent 1
// - 'x' -> exponent 1
// - '' -> exponent 0
// - [*] [x] [^] [2] -> exponent 2
// - [*] [x] [²] -> exponent 2
// - [*] [x] -> exponent 1
// - [x] [^] [2] -> exponent 2
// - [x] [²] -> exponent 2
// - [x] -> exponent 1
// - '' -> exponent 0
// invalid first token :
// - '2' -> number
// first reach VARIABLE : 'x' or '*x'
if (tokens[i].type == TOKEN_VARIABLE)
if (tokens[i].tag == TOKEN_NUMBER)
{
// token is 'x'
i++;
// exponent term cannot begin with a number
stop_errors("at exponent place, we should have an exponent expression, but instead got : '%s' (token number %i)", token_type_to_str(tokens[i].type), i);
}
else if (token_sequence(&tokens[i], (e_token_type[]){TOKEN_FACTOR_MULT, TOKEN_VARIABLE, TOKEN_POWER, TOKEN_NUMBER_INT}, 4))
{
// ex: [*] [x] [^] [2]
*token_count = 4;
ret_exponent = tokens[i + 3].value_int;
}
else if (token_sequence(&tokens[i], (e_token_type[]){TOKEN_FACTOR_MULT, TOKEN_VARIABLE, TOKEN_NUMBER_INT_SUPER}, 3))
{
// ex: [*] [x] [²]
*token_count = 3;
ret_exponent = tokens[i + 2].value_int;
}
else if (token_sequence(&tokens[i], (e_token_type[]){TOKEN_FACTOR_MULT, TOKEN_VARIABLE}, 2))
{
// ex: [*] [x] -> exponent 1
*token_count = 2;
ret_exponent = 1;
}
else if (token_sequence(&tokens[i], (e_token_type[]){TOKEN_VARIABLE, TOKEN_POWER, TOKEN_NUMBER_INT}, 3))
{
// ex: [x] [^] [2]
*token_count = 3;
ret_exponent = tokens[i + 2].value_int;
}
else if (token_sequence(&tokens[i], (e_token_type[]){TOKEN_VARIABLE, TOKEN_NUMBER_INT_SUPER}, 2))
{
// ex: [x] [²]
*token_count = 2;
ret_exponent = tokens[i + 1].value_int;
}
else if (token_sequence(&tokens[i], (e_token_type[]){TOKEN_VARIABLE}, 1))
{
// ex: [x] -> exponent 1
*token_count = 1;
}
else if (tokens[i].type == TOKEN_FACTOR_MULT)
{
// token is '*'
i++;
if (tokens[i].type == TOKEN_VARIABLE)
{
// tokens are '*x'
i++;
*token_count = 2;
}
else
{
stop_errors("at exponent place, after a '*' we should have a 'var', but instead got : '%s' (token number %i)", token_type_to_str(tokens[i].type), i);
}
ret_exponent = 1;
}
else
{
// if token are neither 'x' or '*x', then exponent is 0
return 0;
// no variable, so no exponent -> exponent 0
*token_count = 0;
ret_exponent = 0;
}
// then get power sign '^' or directly superscript power like '²'
if (tokens[i].type == TOKEN_POWER)
// check if exponent is not too big, to avoid overflow when calculating power
if (ret_exponent > MAX_EXPONENT)
{
i++;
*token_count += 1;
}
else if (tokens[i].type == TOKEN_NUMBER_INT_SUPER)
{
*token_count += 1;
// return exponent directly
return tokens[i].value_int;
}
else
{
// if token is 'x' not followed by '^' -> it's an exponent 1
return 1;
stop_errors("exponent is too big (max supported exponent is %d), got : %d\n", MAX_EXPONENT, ret_exponent);
}
// then get exponent
if (tokens[i].type == TOKEN_NUMBER_INT)
{
*token_count += 1;
}
else
{
stop_errors("at exponent place, we should have an int, but instead got : '%s' (token number %i)", token_type_to_str(tokens[i].type), i);
}
return tokens[i].value_int;
return ret_exponent;
}
static void check_variables(s_token *tokens)
@@ -248,9 +264,7 @@ void parse(s_token *tokens, s_term *terms, int terms_count_max)
e_term_sign ret_sign = get_sign(tokens, i, &token_count);
terms[terms_count].sign = ret_sign;
if (ret_sign == TERM_MINUS)
{
sign = -1;
}
i += token_count;
print_debug("term[%i] get_sign: (%i)[%s], token_count: [%d]\n", terms_count, ret_sign, term_sign_to_str(ret_sign), token_count);