42_EXT_05_computorv1/src/parser.c

/* parser.c */

#include "computorv1.h"

/**
    1.   VAR |   NUMBER_I |   NUMBER_D | ! POW |   SIGN_P |   SIGN_M | ! FACTOR_MUL | ! FACTOR_DIV | ! EQUAL | END
         NT  |   NUMBER                |   NT  |   SIGN              | ! FACTOR                    |   NT    | NT

    2.   VAR |   NUMBER_I |   NUMBER_D | ! POW | ! SIGN_P | ! SIGN_M | ! FACTOR_MUL | ! FACTOR_DIV | ! EQUAL | END
         NT  |   NUMBER                |   NT  | ! SIGN              | ! FACTOR                    |   NT    | NT

    3.   VAR | ! NUMBER_I | ! NUMBER_D | ! POW | ! SIGN_P | ! SIGN_M |   FACTOR_MUL | ! FACTOR_DIV | ! EQUAL | END
         NT  | ! NUMBER                |   NT  | ! SIGN              |   FACTOR                    |   NT    | NT
 */

static e_term_sign get_sign(s_token *tokens, int i, int *token_count)
{
    *token_count = 0;

    // sign
    if (tokens[i].tag == TOKEN_SIGN)
    {
        *token_count = 1;
    }
    else if (i == 0)
    {
        // if most left term, the sign can be ommited for a '+' sign in front of a number or variable
        *token_count = 0;
        return TERM_PLUS;
    }
    else if (tokens[i - 1].type == TOKEN_EQUAL)
    {
        // if first token after 'equal', the sign can be ommited for a '+' sign in front of a number or variable
        *token_count = 0;
        return TERM_PLUS;
    }
    else
    {
        stop_errors("at begining of term, we should have a token 'sign', not '%s' (token[%i])", token_type_to_str(tokens[i].type), i);
    }

    return tokens[i].type == TOKEN_SIGN_PLUS ? TERM_PLUS : TERM_MINUS;
}

static double get_double_value(s_token token)
{
    if (token.tag != TOKEN_NUMBER)
    {
        stop_errors("this was suppose to be a number, instead got a %s", token_type_to_str(token.type));
    }
    if (token.type == TOKEN_NUMBER_DOUBLE)
    {
        return token.value_double;
    }
    // else it's an int
    return token.value_int;
}

static double get_coefficient(s_token *tokens, int i, int *token_count)
{
    double coefficient;

    coefficient = 1.0;
    *token_count = 0;

    // if not coefficient token
    if (tokens[i].type == TOKEN_VARIABLE)
    {
        *token_count = 0;
        return coefficient;
    }

    // if coefficient tokens
    if (tokens[i].tag == TOKEN_NUMBER)
    {
        *token_count = 1;
        coefficient = get_double_value(tokens[i]);
    }

    // detect more coefficients, like "3 * 2 / 5" etc
    i++;
    while (tokens[i].tag == TOKEN_FACTOR)
    {
        i++; // to check if token after factor is number
        if (tokens[i].tag != TOKEN_NUMBER)
        {
            return coefficient;
        }

        *token_count += 2;
        if (tokens[i - 1].type == TOKEN_FACTOR_MULT)
        {
            coefficient *= get_double_value(tokens[i]);
        }
        else if (tokens[i - 1].type == TOKEN_FACTOR_DIV)
        {
            coefficient /= get_double_value(tokens[i]);
        }
        i++; // to check if next token is a factor
    }

    return coefficient;
}

static int get_exponent(s_token *tokens, int i, int *token_count)
{
    *token_count = 0;

    // valide :
    // - '*x^2' -> exponent 2
    // - '*x²'  -> exponent 2
    // - 'x^2'  -> exponent 2
    // - '*x'   -> exponent 1
    // - 'x'    -> exponent 1
    // - ''     -> exponent 0

    // first reach VARIABLE : 'x' or '*x'
    if (tokens[i].type == TOKEN_VARIABLE)
    {
        // token is 'x'
        i++;
        *token_count = 1;
    }
    else if (tokens[i].type == TOKEN_FACTOR_MULT)
    {
        // token is '*'
        i++;
        if (tokens[i].type == TOKEN_VARIABLE)
        {
            // tokens are '*x'
            i++;
            *token_count = 2;
        }
        else
        {
            stop_errors("at exponent place, after a '*' we should have a 'var', but instead got : '%s' (token number %i)", token_type_to_str(tokens[i].type), i);
        }
    }
    else
    {
        // if token are neither 'x' or '*x', then exponent is 0
        return 0;
    }

    // then get power sign '^' or directly superscript power like '²'
    if (tokens[i].type == TOKEN_POWER)
    {
        i++;
        *token_count += 1;
    }
    else if (tokens[i].type == TOKEN_NUMBER_INT_SUPER)
    {
        *token_count += 1;
        // return exponent directly
        return tokens[i].value_int;
    }
    else
    {
        // if token is 'x' not followed by '^' -> it's an exponent 1
        return 1;
    }

    // then get exponent
    if (tokens[i].type == TOKEN_NUMBER_INT)
    {
        *token_count += 1;
    }
    else
    {
        stop_errors("at exponent place, we should have an int, but instead got : '%s' (token number %i)", token_type_to_str(tokens[i].type), i);
    }

    return tokens[i].value_int;
}

static void check_variables(s_token *tokens)
{
    int i;
    char var;

    i = 0;
    var = 0;
    while (tokens[i].type != TOKEN_END)
    {
        // variable -> all variables must be the same
        if (tokens[i].type == TOKEN_VARIABLE)
        {
            if (!var)
            {
                var = tokens[i].value_char;
            }
            else if (var != tokens[i].value_char)
            {
                stop_errors("old var : '%c' - new var : '%c' (token number %i)", var, tokens[i].value_char, i);
            }
        }
        i++;
    }
}

void parse(s_token *tokens, s_term *terms, int terms_count_max)
{
    int i;
    int terms_count;
    int token_count;
    int sign;
    e_term_position term_position;

    check_variables(tokens);

    print_debug("PARSER STEPS :\n"); // debug

    terms_count = 0;
    token_count = 0;
    i = 0;
    term_position = TERM_LEFT;
    while (tokens[i].type != TOKEN_END && terms_count < terms_count_max)
    {
        print_debug("- token[%i]\n", i); // debug

        // equal
        if (tokens[i].type == TOKEN_EQUAL)
        {
            term_position = TERM_RIGHT;
            i++;
            continue;
        }

        // position
        terms[terms_count].position = term_position;

        // sign
        sign = 1;
        e_term_sign ret_sign = get_sign(tokens, i, &token_count);
        terms[terms_count].sign = ret_sign;
        if (ret_sign == TERM_MINUS)
        {
            sign = -1;
        }
        i += token_count;
        print_debug("term[%i] get_sign: (%i)[%s], token_count: [%d]\n", terms_count, ret_sign, term_sign_to_str(ret_sign), token_count); // debug

        // coefficient
        double ret_coefficient = get_coefficient(tokens, i, &token_count);
        terms[terms_count].coefficient = ret_coefficient * sign;
        i += token_count;
        print_debug("term[%i] get_coefficient: [%g], token_count: [%d]\n", terms_count, ret_coefficient, token_count); // debug

        // exponent
        int ret_exponent = get_exponent(tokens, i, &token_count);
        terms[terms_count].exponent = ret_exponent;
        i += token_count;
        print_debug("term[%i] get_exponent: [%i], token_count: [%d]\n", terms_count, ret_exponent, token_count); // debug

        terms_count++;
    }

    // last token is TOKEN_END, and terms[] should have at least one more spot for the END term
    if (tokens[i].type == TOKEN_END && terms_count < terms_count_max)
    {
        terms[terms_count].position = TERM_POS_END;
        terms[terms_count].sign = TERM_SIGN_END;
        terms[terms_count].coefficient = 0;
        terms[terms_count].exponent = 0;
    }
    else
    {
        stop_errors("terms_count: %i, terms_count_max: %i, tokens[%i].type: %s", terms_count, terms_count_max, i, token_type_to_str(tokens[i].type));
    }
}