diff --git a/headers/computorv1.h b/headers/computorv1.h index 0c42bed..2ef7cbe 100644 --- a/headers/computorv1.h +++ b/headers/computorv1.h @@ -13,6 +13,7 @@ */ #define DOUBLE_PRECISION 0.00001 +#define MAX_EXPONENT 1000 /** * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * MAIN.C diff --git a/src/lexer.c b/src/lexer.c index 404c4d3..a0280fc 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -42,6 +42,23 @@ static bool is_number_out_of_int_range(const char *input, int size) } } +/** + * SUPERSCRIPT DIGIT CHECKS + */ + +static bool is_superscript_bigger_than_int_max(const char *input, int size) +{ + const char *int_max_str = "²¹⁴⁷⁴⁸³⁶⁴⁷"; + int int_max_len = ft_strlen(int_max_str); + + if (size < int_max_len) + return false; + if (size > int_max_len) + return true; + + return ft_strncmp(input, int_max_str, int_max_len) > 0; +} + /** * TOKEN CHECKS */ @@ -64,9 +81,6 @@ static bool token_is_number_int(const char *input, int input_pos, int *token_siz number_size = 0; - // if (input[input_pos + number_size] == '-') - // number_size++; - while (input[input_pos + number_size] != '\0') { if (ft_isdigit(input[input_pos + number_size])) @@ -97,9 +111,6 @@ static bool token_is_number_double(const char *input, int input_pos, int *token_ has_dot = false; number_size = 0; - // if (input[input_pos + number_size] == '-') - // number_size++; - while (input[input_pos + number_size] != '\0') { if (ft_isdigit(input[input_pos + number_size])) @@ -133,15 +144,12 @@ static bool token_is_number_int_super(const char *input, int input_pos, int *tok { int digit_size = 0; int number_size = 0; - int superscript_len = 0; - int superscript_len_max = 3; // max length of superscript int we want to support // iterate to find full superscript number while (input[input_pos + number_size] != '\0') { if (ft_isdigit_superscript(&input[input_pos + number_size], &digit_size)) { - superscript_len++; // Increment by the length of the UTF-8 character (2 or 3 bytes) number_size += digit_size; } @@ -154,9 +162,9 @@ static bool token_is_number_int_super(const char *input, int input_pos, int *tok { return false; } - if (superscript_len > superscript_len_max) + if (is_superscript_bigger_than_int_max(&input[input_pos], number_size)) { - stop_errors("superscript int is too long (max supported length is %d), got : %s\n", superscript_len_max, &input[input_pos]); + return false; } *token_size = number_size; diff --git a/src/parser.c b/src/parser.c index 6b7477e..0ab0b1d 100644 --- a/src/parser.c +++ b/src/parser.c @@ -24,7 +24,7 @@ static e_term_sign get_sign(s_token *tokens, int i, int *token_count) if (tokens[i].tag == TOKEN_SIGN) { - // we cna have two signs in a row, like "3 - -2" or "3 - +2" + // we can have two signs in a row, like "3 - -2" or "3 - +2" j = 0; while (j < 2) { @@ -116,75 +116,91 @@ static double get_coefficient(s_token *tokens, int i, int *token_count) return coefficient; } +static bool token_sequence(s_token *tokens, e_token_type *types, int len) +{ + int i; + + i = 0; + while (i < len) + { + if (tokens[i].type != types[i]) + return false; + i++; + } + return true; +} + static int get_exponent(s_token *tokens, int i, int *token_count) { *token_count = 0; + int ret_exponent; // valide : - // - '*x^2' -> exponent 2 - // - '*x²' -> exponent 2 - // - 'x^2' -> exponent 2 - // - '*x' -> exponent 1 - // - 'x' -> exponent 1 - // - '' -> exponent 0 + // - [*] [x] [^] [2] -> exponent 2 + // - [*] [x] [²] -> exponent 2 + // - [*] [x] -> exponent 1 + // - [x] [^] [2] -> exponent 2 + // - [x] [²] -> exponent 2 + // - [x] -> exponent 1 + // - '' -> exponent 0 + // invalid first token : + // - '2' -> number - // first reach VARIABLE : 'x' or '*x' - if (tokens[i].type == TOKEN_VARIABLE) + if (tokens[i].tag == TOKEN_NUMBER) { - // token is 'x' - i++; + // exponent term cannot begin with a number + stop_errors("at exponent place, we should have an exponent expression, but instead got : '%s' (token number %i)", token_type_to_str(tokens[i].type), i); + } + else if (token_sequence(&tokens[i], (e_token_type[]){TOKEN_FACTOR_MULT, TOKEN_VARIABLE, TOKEN_POWER, TOKEN_NUMBER_INT}, 4)) + { + // ex: [*] [x] [^] [2] + *token_count = 4; + ret_exponent = tokens[i + 3].value_int; + } + else if (token_sequence(&tokens[i], (e_token_type[]){TOKEN_FACTOR_MULT, TOKEN_VARIABLE, TOKEN_NUMBER_INT_SUPER}, 3)) + { + // ex: [*] [x] [²] + *token_count = 3; + ret_exponent = tokens[i + 2].value_int; + } + else if (token_sequence(&tokens[i], (e_token_type[]){TOKEN_FACTOR_MULT, TOKEN_VARIABLE}, 2)) + { + // ex: [*] [x] -> exponent 1 + *token_count = 2; + ret_exponent = 1; + } + else if (token_sequence(&tokens[i], (e_token_type[]){TOKEN_VARIABLE, TOKEN_POWER, TOKEN_NUMBER_INT}, 3)) + { + // ex: [x] [^] [2] + *token_count = 3; + ret_exponent = tokens[i + 2].value_int; + } + else if (token_sequence(&tokens[i], (e_token_type[]){TOKEN_VARIABLE, TOKEN_NUMBER_INT_SUPER}, 2)) + { + // ex: [x] [²] + *token_count = 2; + ret_exponent = tokens[i + 1].value_int; + } + else if (token_sequence(&tokens[i], (e_token_type[]){TOKEN_VARIABLE}, 1)) + { + // ex: [x] -> exponent 1 *token_count = 1; - } - else if (tokens[i].type == TOKEN_FACTOR_MULT) - { - // token is '*' - i++; - if (tokens[i].type == TOKEN_VARIABLE) - { - // tokens are '*x' - i++; - *token_count = 2; - } - else - { - stop_errors("at exponent place, after a '*' we should have a 'var', but instead got : '%s' (token number %i)", token_type_to_str(tokens[i].type), i); - } + ret_exponent = 1; } else { - // if token are neither 'x' or '*x', then exponent is 0 - return 0; + // no variable, so no exponent -> exponent 0 + *token_count = 0; + ret_exponent = 0; } - // then get power sign '^' or directly superscript power like '²' - if (tokens[i].type == TOKEN_POWER) + // check if exponent is not too big, to avoid overflow when calculating power + if (ret_exponent > MAX_EXPONENT) { - i++; - *token_count += 1; - } - else if (tokens[i].type == TOKEN_NUMBER_INT_SUPER) - { - *token_count += 1; - // return exponent directly - return tokens[i].value_int; - } - else - { - // if token is 'x' not followed by '^' -> it's an exponent 1 - return 1; + stop_errors("exponent is too big (max supported exponent is %d), got : %d\n", MAX_EXPONENT, ret_exponent); } - // then get exponent - if (tokens[i].type == TOKEN_NUMBER_INT) - { - *token_count += 1; - } - else - { - stop_errors("at exponent place, we should have an int, but instead got : '%s' (token number %i)", token_type_to_str(tokens[i].type), i); - } - - return tokens[i].value_int; + return ret_exponent; } static void check_variables(s_token *tokens) @@ -248,9 +264,7 @@ void parse(s_token *tokens, s_term *terms, int terms_count_max) e_term_sign ret_sign = get_sign(tokens, i, &token_count); terms[terms_count].sign = ret_sign; if (ret_sign == TERM_MINUS) - { sign = -1; - } i += token_count; print_debug("term[%i] get_sign: (%i)[%s], token_count: [%d]\n", terms_count, ret_sign, term_sign_to_str(ret_sign), token_count);