42_EXT_05_computorv1/src/parser.c

/* parser.c */

#include "computorv1.h"

/**
    1.   VAR |   NUMBER_I |   NUMBER_D | ! POW |   SIGN_P |   SIGN_M | ! FACTOR_MUL | ! FACTOR_DIV | ! EQUAL | END
         NT  |   NUMBER                |   NT  |   SIGN              | ! FACTOR                    |   NT    | NT

    2.   VAR |   NUMBER_I |   NUMBER_D | ! POW | ! SIGN_P | ! SIGN_M | ! FACTOR_MUL | ! FACTOR_DIV | ! EQUAL | END
         NT  |   NUMBER                |   NT  | ! SIGN              | ! FACTOR                    |   NT    | NT

    3.   VAR | ! NUMBER_I | ! NUMBER_D | ! POW | ! SIGN_P | ! SIGN_M |   FACTOR_MUL | ! FACTOR_DIV | ! EQUAL | END
         NT  | ! NUMBER                |   NT  | ! SIGN              |   FACTOR                    |   NT    | NT
 */

static e_term_sign get_sign(s_token *tokens, int i, int *token_count)
{
    *token_count = 0;
    int j;
    e_term_sign ret_sign;

    // default to '+'
    ret_sign = TERM_PLUS;

    if (tokens[i].tag == TOKEN_SIGN)
    {
        // we can have two signs in a row, like "3 - -2" or "3 - +2"
        j = 0;
        while (j < 2)
        {
            if (tokens[i + j].tag != TOKEN_SIGN)
                break;
            if (tokens[i + j].type == TOKEN_SIGN_MINUS)
                ret_sign = (ret_sign == TERM_PLUS) ? TERM_MINUS : TERM_PLUS;
            *token_count += 1;
            j++;
        }
    }
    else if (i == 0)
    {
        // if most left term, the sign can be ommited for a '+' sign in front of a number or variable
        *token_count = 0;
        ret_sign = TERM_PLUS;
    }
    else if (tokens[i - 1].type == TOKEN_EQUAL)
    {
        // if first token after 'equal', the sign can be ommited for a '+' sign in front of a number or variable
        *token_count = 0;
        ret_sign = TERM_PLUS;
    }
    else
    {
        stop_errors("at begining of term, we should have a token 'sign', not '%s' (token[%i])", token_type_to_str(tokens[i].type), i);
    }

    return ret_sign;
}

static double get_double_value(s_token token)
{
    if (token.tag != TOKEN_NUMBER)
    {
        stop_errors("this was suppose to be a number, instead got a %s", token_type_to_str(token.type));
    }
    if (token.type == TOKEN_NUMBER_DOUBLE)
    {
        return token.value_double;
    }
    // else it's an int
    return token.value_int;
}

static double get_coefficient(s_token *tokens, int i, int *token_count)
{
    double coefficient;

    coefficient = 1.0;
    *token_count = 0;

    // if not coefficient token
    if (tokens[i].type == TOKEN_VARIABLE)
    {
        *token_count = 0;
        return coefficient;
    }

    // if coefficient tokens
    if (tokens[i].tag == TOKEN_NUMBER)
    {
        *token_count = 1;
        coefficient = get_double_value(tokens[i]);
    }

    // detect more coefficients, like "3 * 2 / 5" etc
    i++;
    while (tokens[i].tag == TOKEN_FACTOR)
    {
        i++; // to check if token after factor is number
        if (tokens[i].tag != TOKEN_NUMBER)
        {
            return coefficient;
        }

        *token_count += 2;
        if (tokens[i - 1].type == TOKEN_FACTOR_MULT)
        {
            coefficient *= get_double_value(tokens[i]);
        }
        else if (tokens[i - 1].type == TOKEN_FACTOR_DIV)
        {
            coefficient /= get_double_value(tokens[i]);
        }
        i++; // to check if next token is a factor
    }

    return coefficient;
}

static bool token_sequence(s_token *tokens, e_token_type *types, int len)
{
    int i;

    i = 0;
    while (i < len)
    {
        if (tokens[i].type != types[i])
            return false;
        i++;
    }
    return true;
}

static int get_exponent(s_token *tokens, int i, int *token_count)
{
    *token_count = 0;
    int ret_exponent;

    // valide :
    // - [*] [x] [^] [2] -> exponent 2
    // - [*] [x] [²]     -> exponent 2
    // - [*] [x]         -> exponent 1
    // - [x] [^] [2]     -> exponent 2
    // - [x] [²]         -> exponent 2
    // - [x]             -> exponent 1
    // - ''              -> exponent 0
    // invalid first token :
    // - '2'        -> number

    if (tokens[i].tag == TOKEN_NUMBER)
    {
        // exponent term cannot begin with a number
        stop_errors("at exponent place, we should have an exponent expression, but instead got : '%s' (token number %i)", token_type_to_str(tokens[i].type), i);
    }
    else if (token_sequence(&tokens[i], (e_token_type[]){TOKEN_FACTOR_MULT, TOKEN_VARIABLE, TOKEN_POWER, TOKEN_NUMBER_INT}, 4))
    {
        // ex: [*] [x] [^] [2]
        *token_count = 4;
        ret_exponent = tokens[i + 3].value_int;
    }
    else if (token_sequence(&tokens[i], (e_token_type[]){TOKEN_FACTOR_MULT, TOKEN_VARIABLE, TOKEN_NUMBER_INT_SUPER}, 3))
    {
        // ex: [*] [x] [²]
        *token_count = 3;
        ret_exponent = tokens[i + 2].value_int;
    }
    else if (token_sequence(&tokens[i], (e_token_type[]){TOKEN_FACTOR_MULT, TOKEN_VARIABLE}, 2))
    {
        // ex: [*] [x] -> exponent 1
        *token_count = 2;
        ret_exponent = 1;
    }
    else if (token_sequence(&tokens[i], (e_token_type[]){TOKEN_VARIABLE, TOKEN_POWER, TOKEN_NUMBER_INT}, 3))
    {
        // ex: [x] [^] [2]
        *token_count = 3;
        ret_exponent = tokens[i + 2].value_int;
    }
    else if (token_sequence(&tokens[i], (e_token_type[]){TOKEN_VARIABLE, TOKEN_NUMBER_INT_SUPER}, 2))
    {
        // ex: [x] [²]
        *token_count = 2;
        ret_exponent = tokens[i + 1].value_int;
    }
    else if (token_sequence(&tokens[i], (e_token_type[]){TOKEN_VARIABLE}, 1))
    {
        // ex: [x] -> exponent 1
        *token_count = 1;
        ret_exponent = 1;
    }
    else
    {
        // no variable, so no exponent -> exponent 0
        *token_count = 0;
        ret_exponent = 0;
    }

    // check if exponent is not too big, to avoid overflow when calculating power
    if (ret_exponent > MAX_EXPONENT)
    {
        stop_errors("exponent is too big (max supported exponent is %d), got : %d\n", MAX_EXPONENT, ret_exponent);
    }

    return ret_exponent;
}

static void check_variables(s_token *tokens)
{
    int i;
    char var;

    i = 0;
    var = 0;
    while (tokens[i].type != TOKEN_END)
    {
        // variable -> all variables must be the same
        if (tokens[i].type == TOKEN_VARIABLE)
        {
            if (!var)
            {
                var = tokens[i].value_char;
            }
            else if (var != tokens[i].value_char)
            {
                stop_errors("old var : '%c' - new var : '%c' (token number %i)", var, tokens[i].value_char, i);
            }
        }
        i++;
    }
}

void parse(s_token *tokens, s_term *terms, int terms_count_max)
{
    int i;
    int terms_count;
    int token_count;
    int sign;
    e_term_position term_position;

    check_variables(tokens);

    print_debug("PARSER STEPS :\n");

    terms_count = 0;
    token_count = 0;
    i = 0;
    term_position = TERM_LEFT;
    while (tokens[i].type != TOKEN_END && terms_count < terms_count_max)
    {
        print_debug("- token[%i]\n", i);

        // equal
        if (tokens[i].type == TOKEN_EQUAL)
        {
            term_position = TERM_RIGHT;
            i++;
            continue;
        }

        // position
        terms[terms_count].position = term_position;

        // sign
        sign = 1;
        e_term_sign ret_sign = get_sign(tokens, i, &token_count);
        terms[terms_count].sign = ret_sign;
        if (ret_sign == TERM_MINUS)
            sign = -1;
        i += token_count;
        print_debug("term[%i] get_sign: (%i)[%s], token_count: [%d]\n", terms_count, ret_sign, term_sign_to_str(ret_sign), token_count);

        // coefficient
        double ret_coefficient = get_coefficient(tokens, i, &token_count);
        terms[terms_count].coefficient = ret_coefficient * sign;
        i += token_count;
        print_debug("term[%i] get_coefficient: [%g], token_count: [%d]\n", terms_count, ret_coefficient, token_count);

        // exponent
        int ret_exponent = get_exponent(tokens, i, &token_count);
        terms[terms_count].exponent = ret_exponent;
        i += token_count;
        print_debug("term[%i] get_exponent: [%i], token_count: [%d]\n", terms_count, ret_exponent, token_count);

        terms_count++;
    }

    // last token is TOKEN_END, and terms[] should have at least one more spot for the END term
    if (tokens[i].type == TOKEN_END && terms_count < terms_count_max)
    {
        terms[terms_count].position = TERM_POS_END;
        terms[terms_count].sign = TERM_SIGN_END;
        terms[terms_count].coefficient = 0;
        terms[terms_count].exponent = 0;
    }
    else
    {
        stop_errors("terms_count: %i, terms_count_max: %i, tokens[%i].type: %s", terms_count, terms_count_max, i, token_type_to_str(tokens[i].type));
    }
}