diff --git a/headers/lexer.h b/headers/lexer.h index ae53511..69b8a43 100644 --- a/headers/lexer.h +++ b/headers/lexer.h @@ -5,25 +5,172 @@ #include "errors.h" #include +/** + * PROPOSITION 1 + */ + +// typedef enum +// { +// TOKEN_VARIABLE, // x, y, etc. +// TOKEN_NUMBER, // int or double +// TOKEN_POWER, // ^ or ** +// TOKEN_SIGN, // + or - +// TOKEN_FACTOR, // * or / or : +// TOKEN_EQUAL, // = +// TOKEN_END // null (end of input) +// } token_type; +// +// typedef enum +// { +// TOKEN_NUMBER_INT, // int +// TOKEN_NUMBER_DOUBLE, // double +// TOKEN_SIGN_ADD, // + +// TOKEN_SIGN_MINUS, // - +// TOKEN_FACTOR_MULTIPLICATION, // * +// TOKEN_FACTOR_DIVISION, // / or : +// } token_subtype; +// +// typedef struct +// { +// token_type type; +// token_subtype subtype; +// union +// { +// char value_char; +// double value_double; +// }; +// } token; + +/** + * PROPOSITION 2 + */ + +// // TYPES and SUBTYPES + +// typedef enum +// { +// TOKEN_VARIABLE, // x, y, etc. +// TOKEN_NUMBER, // int or double +// TOKEN_POWER, // ^ or ** +// TOKEN_SIGN, // + or - +// TOKEN_FACTOR, // * or / or : +// TOKEN_EQUAL, // = +// TOKEN_END // null (end of input) +// } token_type; +// +// typedef enum +// { +// NUMBER_INT, +// NUMBER_DOUBLE +// } number_subtype; +// +// typedef enum +// { +// SIGN_ADD, // + +// SIGN_MINUS // - +// } sign_subtype; +// +// typedef enum +// { +// FACTOR_MULTIPLICATION, // * +// FACTOR_DIVISION, // / or : +// } factor_subtype; +// +// // DATA +// +// typedef struct +// { +// char value; // e.g., 'x', 'y' +// } token_variable; +// +// typedef struct +// { +// number_subtype subtype; +// double value; +// } token_number; +// +// typedef struct +// { +// sign_subtype subtype; +// char value; +// } token_sign; +// +// typedef struct +// { +// factor_subtype subtype; +// char value; +// } token_factor; +// +// typedef struct +// { +// char value; +// } token_power; +// +// typedef struct +// { +// char value; +// } token_equal; +// +// typedef struct +// { +// char value; +// } token_end; +// +// // TOKEN +// +// typedef union +// { +// token_variable variable; // value +// token_number number; // subtype [INT, DOUBLE], value +// token_sign sign; // subtype [PLUS, MINUS], value +// token_factor factor; // subtype [MULT, DIV], value +// token_power power; // value +// token_equal equal; // value +// token_end end; // value +// } token_data; +// +// typedef struct +// { +// token_type type; +// token_data data; +// } token; + +/** + * PROPOSITION 3 + */ + typedef enum { - TOKEN_VARIABLE, // x, y, etc. - TOKEN_NUMBER_INT, // int - TOKEN_NUMBER_DOUBLE, // double - TOKEN_POWER, // ^ or ** - TOKEN_SIGN, // + or - - TOKEN_FACTOR, // * or / - TOKEN_EQUAL, // = - TOKEN_END // null (end of input) + TOKEN_VARIABLE, // x, y, etc. + TOKEN_NUMBER, // int or double + TOKEN_POWER, // ^ or ** + TOKEN_SIGN, // + or - + TOKEN_FACTOR, // * or / or : + TOKEN_EQUAL, // = + TOKEN_END // null (end of input) } token_type; +typedef enum +{ + TOKEN_NO_SUBTYPE, + // NUMBER + TOKEN_NUMBER_INT, + TOKEN_NUMBER_DOUBLE, + // SIGN + TOKEN_SIGN_PLUS, + TOKEN_SIGN_MINUS, + // FACTOR + TOKEN_FACTOR_MULTIPLICATION, + TOKEN_FACTOR_DIVISION, +} token_subtype; + typedef struct { token_type type; + token_subtype subtype; union { char value_char; - int value_int; double value_double; }; } token; diff --git a/src/computorv1.c b/src/computorv1.c index e0fe3e0..69b7e81 100644 --- a/src/computorv1.c +++ b/src/computorv1.c @@ -87,29 +87,41 @@ int main(int ac, char **av) ft_printf("token %2i - type : ", i); if (tokens[i].type == TOKEN_VARIABLE) - ft_printf("%20s", "TOKEN_VARIABLE"); - else if (tokens[i].type == TOKEN_NUMBER_INT) - ft_printf("%20s", "TOKEN_NUMBER_INT"); - else if (tokens[i].type == TOKEN_NUMBER_DOUBLE) - ft_printf("%20s", "TOKEN_NUMBER_DOUBLE"); + ft_printf("%14s%30s", "TOKEN_VARIABLE", ""); + else if (tokens[i].type == TOKEN_NUMBER) + { + ft_printf("%14s", "TOKEN_NUMBER"); + if (tokens[i].subtype == TOKEN_NUMBER_INT) + ft_printf("%30s", "TOKEN_NUMBER_INT"); + else if (tokens[i].subtype == TOKEN_NUMBER_DOUBLE) + ft_printf("%30s", "TOKEN_NUMBER_DOUBLE"); + } else if (tokens[i].type == TOKEN_POWER) - ft_printf("%20s", "TOKEN_POWER"); + ft_printf("%14s%30s", "TOKEN_POWER", ""); else if (tokens[i].type == TOKEN_SIGN) - ft_printf("%20s", "TOKEN_SIGN"); + { + ft_printf("%14s", "TOKEN_SIGN"); + if (tokens[i].subtype == TOKEN_SIGN_PLUS) + ft_printf("%30s", "TOKEN_SIGN_PLUS"); + else if (tokens[i].subtype == TOKEN_SIGN_MINUS) + ft_printf("%30s", "TOKEN_SIGN_MINUS"); + } else if (tokens[i].type == TOKEN_FACTOR) - ft_printf("%20s", "TOKEN_FACTOR"); + { + ft_printf("%14s", "TOKEN_FACTOR"); + if (tokens[i].subtype == TOKEN_FACTOR_MULTIPLICATION) + ft_printf("%30s", "TOKEN_FACTOR_MULTIPLICATION"); + else if (tokens[i].subtype == TOKEN_FACTOR_DIVISION) + ft_printf("%30s", "TOKEN_FACTOR_DIVISION"); + } else if (tokens[i].type == TOKEN_EQUAL) - ft_printf("%20s", "TOKEN_EQUAL"); + ft_printf("%14s%30s", "TOKEN_EQUAL", ""); else if (tokens[i].type == TOKEN_END) - ft_printf("%20s", "TOKEN_END"); + ft_printf("%14s%30s", "TOKEN_END", ""); ft_putstr(" - value : "); - if (tokens[i].type == TOKEN_NUMBER_INT) - { - printf("%i\n", tokens[i].value_int); - } - else if (tokens[i].type == TOKEN_NUMBER_DOUBLE) + if (tokens[i].type == TOKEN_NUMBER) { printf("%g\n", tokens[i].value_double); } diff --git a/src/lexer.c b/src/lexer.c index 66982b5..5049b81 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1,6 +1,6 @@ #include "lexer.h" -// any single letter is a valid variable, like 'x' or 'y' +// token is alphabet letter, like 'x' or 'y' static bool token_is_variable(const char *input, int input_pos, int *token_size) { if (ft_isalpha(input[input_pos])) @@ -11,7 +11,7 @@ static bool token_is_variable(const char *input, int input_pos, int *token_size) return false; } -// number can be int "123" +// token is int "123" static bool token_is_number_int(const char *input, int input_pos, int *token_size) { int number_size; @@ -51,7 +51,7 @@ static bool token_is_number_int(const char *input, int input_pos, int *token_siz return true; } -// number can be double "123.456" +// token is double "123.456" static bool token_is_number_double(const char *input, int input_pos, int *token_size) { int number_size; @@ -98,7 +98,7 @@ static bool token_is_number_double(const char *input, int input_pos, int *token_ return true; } -// power can be '^' and "**" +// token is '^' or "**" static bool token_is_power(const char *input, int input_pos, int *token_size) { if (input[input_pos] == '^') @@ -114,14 +114,20 @@ static bool token_is_power(const char *input, int input_pos, int *token_size) return false; } -// sign can be '+' or '-' -static bool token_is_sign(const char *input, int input_pos, int *token_size) +// token is '+' +static bool token_is_sign_plus(const char *input, int input_pos, int *token_size) { if (input[input_pos] == '+') { *token_size = 1; return true; } + return false; +} + +// token is '-' +static bool token_is_sign_minus(const char *input, int input_pos, int *token_size) +{ if (input[input_pos] == '-') { *token_size = 1; @@ -130,14 +136,20 @@ static bool token_is_sign(const char *input, int input_pos, int *token_size) return false; } -// factor can be '*' or '/' or ':' -static bool token_is_factor(const char *input, int input_pos, int *token_size) +// token is '*' +static bool token_is_factor_multiplication(const char *input, int input_pos, int *token_size) { if (input[input_pos] == '*') { *token_size = 1; return true; } + return false; +} + +// token is '/' or ':' +static bool token_is_factor_division(const char *input, int input_pos, int *token_size) +{ if (input[input_pos] == '/') { *token_size = 1; @@ -151,7 +163,7 @@ static bool token_is_factor(const char *input, int input_pos, int *token_size) return false; } -// detect a single '=' +// token is '=' static bool token_is_equal(const char *input, int input_pos, int *token_size) { if (input[input_pos] == '=') @@ -185,36 +197,55 @@ int lexerize(const char *input, token *tokens) if (token_is_variable(input, input_pos, &token_size)) { tokens[tokens_count].type = TOKEN_VARIABLE; + tokens[tokens_count].subtype = TOKEN_NO_SUBTYPE; tokens[tokens_count].value_char = 'x'; } else if (token_is_number_int(input, input_pos, &token_size)) { - tokens[tokens_count].type = TOKEN_NUMBER_INT; - tokens[tokens_count].value_int = ft_atoi(&input[input_pos]); + tokens[tokens_count].type = TOKEN_NUMBER; + tokens[tokens_count].subtype = TOKEN_NUMBER_INT; + tokens[tokens_count].value_double = ft_atof(&input[input_pos]); // we keep info it's an int, but treat it as a double } else if (token_is_number_double(input, input_pos, &token_size)) { - tokens[tokens_count].type = TOKEN_NUMBER_DOUBLE; + tokens[tokens_count].type = TOKEN_NUMBER; + tokens[tokens_count].subtype = TOKEN_NUMBER_DOUBLE; tokens[tokens_count].value_double = ft_atof(&input[input_pos]); } else if (token_is_power(input, input_pos, &token_size)) { tokens[tokens_count].type = TOKEN_POWER; + tokens[tokens_count].subtype = TOKEN_NO_SUBTYPE; tokens[tokens_count].value_char = '^'; } - else if (token_is_sign(input, input_pos, &token_size)) + else if (token_is_sign_plus(input, input_pos, &token_size)) { tokens[tokens_count].type = TOKEN_SIGN; + tokens[tokens_count].subtype = TOKEN_SIGN_PLUS; tokens[tokens_count].value_char = input[input_pos]; } - else if (token_is_factor(input, input_pos, &token_size)) + else if (token_is_sign_minus(input, input_pos, &token_size)) + { + tokens[tokens_count].type = TOKEN_SIGN; + tokens[tokens_count].subtype = TOKEN_SIGN_MINUS; + tokens[tokens_count].value_char = input[input_pos]; + } + else if (token_is_factor_multiplication(input, input_pos, &token_size)) { tokens[tokens_count].type = TOKEN_FACTOR; + tokens[tokens_count].subtype = TOKEN_FACTOR_MULTIPLICATION; + tokens[tokens_count].value_char = input[input_pos]; + } + else if (token_is_factor_division(input, input_pos, &token_size)) + { + tokens[tokens_count].type = TOKEN_FACTOR; + tokens[tokens_count].subtype = TOKEN_FACTOR_DIVISION; tokens[tokens_count].value_char = input[input_pos]; } else if (token_is_equal(input, input_pos, &token_size)) { tokens[tokens_count].type = TOKEN_EQUAL; + tokens[tokens_count].subtype = TOKEN_NO_SUBTYPE; tokens[tokens_count].value_char = '='; } else @@ -231,6 +262,7 @@ int lexerize(const char *input, token *tokens) } tokens[tokens_count].type = TOKEN_END; + tokens[tokens_count].subtype = TOKEN_NO_SUBTYPE; tokens[tokens_count].value_char = '\0'; return tokens_count; diff --git a/src/parser.c b/src/parser.c index 1eee6d5..d4c365c 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1,18 +1,24 @@ #include "parser.h" /** - TOKEN_VARIABLE, // x, y, etc. - TOKEN_NUMBER_INT, // int - TOKEN_NUMBER_DOUBLE, // double - TOKEN_POWER, // ^ or ** - TOKEN_SIGN, // + or - - TOKEN_FACTOR, // * or / - TOKEN_EQUAL, // = - TOKEN_END // null (end of input) + TOKEN_VARIABLE, // x, y, etc. + TOKEN_NUMBER_INT, // int + TOKEN_NUMBER_DOUBLE, // double + TOKEN_POWER, // ^ or ** + TOKEN_SIGN, // + or - + TOKEN_MULTIPLICATION, // * + TOKEN_DIVISION, // / + TOKEN_EQUAL, // = + TOKEN_END // null (end of input) - 1. VARIABLE | NUMBER_INT | NUMBER_DOUBLE | ! POWER | SIGN | ! FACTOR | ! EQUAL | END - 2. VARIABLE | NUMBER_INT | NUMBER_DOUBLE | POWER | SIGN | FACTOR | EQUAL | END - 3. VARIABLE | NUMBER_INT | NUMBER_DOUBLE | POWER | SIGN | FACTOR | EQUAL | END + 1. VAR | NUM | ! POW | SIGN | ! FACTOR | ! EQUAL | END + NS | NUM_I | NUM_D | NS | SIGN_P | SIGN_M | ! FACTOR_MUL | ! FACTOR_DIV | NS | NS + + 2. VAR | NUM | ! POW | ! SIGN | ! FACTOR | ! EQUAL | END + NS | NUM_I | NUM_D | NS | ! SIGN_P | ! SIGN_M | ! FACTOR_MUL | ! FACTOR_DIV | NS | NS + + 3. VAR | NUM | ! POW | SIGN | FACTOR | ! EQUAL | END + NS | NUM_I | NUM_D | NS | SIGN_P | SIGN_M | FACTOR_MUL | ! FACTOR_DIV | NS | NS term_position position; term_sign sign; @@ -45,12 +51,12 @@ static term_sign get_sign(token *tokens, int i, int *token_count) } else if (i == 0) // if most left term, the sign can be ommited for a '+' sign in front of a number or variable { - *token_count = 1; + *token_count = 0; return '+'; } else if (tokens[i - 1].type == TOKEN_EQUAL) // if first token after 'equal', the sign can be ommited for a '+' sign in front of a number or variable { - *token_count = 1; + *token_count = 0; return '+'; } @@ -59,9 +65,65 @@ static term_sign get_sign(token *tokens, int i, int *token_count) static double get_coefficient(token *tokens, int i, int *token_count) { - if (tokens[i].type) // placeholder - *token_count = 1; // placeholder - return 1.0; // placeholder + double coefficient; + + coefficient = 1.0; + + // forbidden tokens + if (tokens[i].type == TOKEN_POWER) + { + stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a power token : " + tokens[i].value_char); + } + if (tokens[i].type == TOKEN_FACTOR) + { + stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a factor token : " + tokens[i].value_char); + } + if (tokens[i].type == TOKEN_EQUAL) + { + stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have an equal token : " + tokens[i].value_char); + } + if (tokens[i].type == TOKEN_SIGN) + { + stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a sign token : " + tokens[i].value_char); + } + + // if not coefficient token + if (tokens[i].type == TOKEN_VARIABLE) + { + *token_count = 0; + return coefficient; + } + + // if coefficient tokens + if (tokens[i].type == TOKEN_NUMBER) + { + (*token_count)++; + coefficient = tokens[i].value_double; + } + // detect more coefficients, like "3 * 2 / 5" etc + i++; + while (tokens[i].type == TOKEN_FACTOR) + { + i++; + if (tokens[i].type == TOKEN_NUMBER) + { + *token_count += 2; + if (tokens[i - 1].subtype == TOKEN_FACTOR_MULTIPLICATION) + { + coefficient *= tokens[i].value_double; + } + else if (tokens[i - 1].subtype == TOKEN_FACTOR_DIVISION) + { + coefficient /= tokens[i].value_double; + } + } + else + { + return coefficient; + } + } + + return coefficient; } static int get_exponent(token *tokens, int i, int *token_count)