adding subtypes

This commit is contained in:
hugogogo
2026-05-02 09:42:04 +02:00
parent bcbd3b2abb
commit a250a170cb
4 changed files with 307 additions and 54 deletions

View File

@@ -5,25 +5,172 @@
#include "errors.h" #include "errors.h"
#include <stdbool.h> #include <stdbool.h>
/**
* PROPOSITION 1
*/
// typedef enum
// {
// TOKEN_VARIABLE, // x, y, etc.
// TOKEN_NUMBER, // int or double
// TOKEN_POWER, // ^ or **
// TOKEN_SIGN, // + or -
// TOKEN_FACTOR, // * or / or :
// TOKEN_EQUAL, // =
// TOKEN_END // null (end of input)
// } token_type;
//
// typedef enum
// {
// TOKEN_NUMBER_INT, // int
// TOKEN_NUMBER_DOUBLE, // double
// TOKEN_SIGN_ADD, // +
// TOKEN_SIGN_MINUS, // -
// TOKEN_FACTOR_MULTIPLICATION, // *
// TOKEN_FACTOR_DIVISION, // / or :
// } token_subtype;
//
// typedef struct
// {
// token_type type;
// token_subtype subtype;
// union
// {
// char value_char;
// double value_double;
// };
// } token;
/**
* PROPOSITION 2
*/
// // TYPES and SUBTYPES
// typedef enum
// {
// TOKEN_VARIABLE, // x, y, etc.
// TOKEN_NUMBER, // int or double
// TOKEN_POWER, // ^ or **
// TOKEN_SIGN, // + or -
// TOKEN_FACTOR, // * or / or :
// TOKEN_EQUAL, // =
// TOKEN_END // null (end of input)
// } token_type;
//
// typedef enum
// {
// NUMBER_INT,
// NUMBER_DOUBLE
// } number_subtype;
//
// typedef enum
// {
// SIGN_ADD, // +
// SIGN_MINUS // -
// } sign_subtype;
//
// typedef enum
// {
// FACTOR_MULTIPLICATION, // *
// FACTOR_DIVISION, // / or :
// } factor_subtype;
//
// // DATA
//
// typedef struct
// {
// char value; // e.g., 'x', 'y'
// } token_variable;
//
// typedef struct
// {
// number_subtype subtype;
// double value;
// } token_number;
//
// typedef struct
// {
// sign_subtype subtype;
// char value;
// } token_sign;
//
// typedef struct
// {
// factor_subtype subtype;
// char value;
// } token_factor;
//
// typedef struct
// {
// char value;
// } token_power;
//
// typedef struct
// {
// char value;
// } token_equal;
//
// typedef struct
// {
// char value;
// } token_end;
//
// // TOKEN
//
// typedef union
// {
// token_variable variable; // value
// token_number number; // subtype [INT, DOUBLE], value
// token_sign sign; // subtype [PLUS, MINUS], value
// token_factor factor; // subtype [MULT, DIV], value
// token_power power; // value
// token_equal equal; // value
// token_end end; // value
// } token_data;
//
// typedef struct
// {
// token_type type;
// token_data data;
// } token;
/**
* PROPOSITION 3
*/
typedef enum typedef enum
{ {
TOKEN_VARIABLE, // x, y, etc. TOKEN_VARIABLE, // x, y, etc.
TOKEN_NUMBER_INT, // int TOKEN_NUMBER, // int or double
TOKEN_NUMBER_DOUBLE, // double TOKEN_POWER, // ^ or **
TOKEN_POWER, // ^ or ** TOKEN_SIGN, // + or -
TOKEN_SIGN, // + or - TOKEN_FACTOR, // * or / or :
TOKEN_FACTOR, // * or / TOKEN_EQUAL, // =
TOKEN_EQUAL, // = TOKEN_END // null (end of input)
TOKEN_END // null (end of input)
} token_type; } token_type;
typedef enum
{
TOKEN_NO_SUBTYPE,
// NUMBER
TOKEN_NUMBER_INT,
TOKEN_NUMBER_DOUBLE,
// SIGN
TOKEN_SIGN_PLUS,
TOKEN_SIGN_MINUS,
// FACTOR
TOKEN_FACTOR_MULTIPLICATION,
TOKEN_FACTOR_DIVISION,
} token_subtype;
typedef struct typedef struct
{ {
token_type type; token_type type;
token_subtype subtype;
union union
{ {
char value_char; char value_char;
int value_int;
double value_double; double value_double;
}; };
} token; } token;

View File

@@ -87,29 +87,41 @@ int main(int ac, char **av)
ft_printf("token %2i - type : ", i); ft_printf("token %2i - type : ", i);
if (tokens[i].type == TOKEN_VARIABLE) if (tokens[i].type == TOKEN_VARIABLE)
ft_printf("%20s", "TOKEN_VARIABLE"); ft_printf("%14s%30s", "TOKEN_VARIABLE", "");
else if (tokens[i].type == TOKEN_NUMBER_INT) else if (tokens[i].type == TOKEN_NUMBER)
ft_printf("%20s", "TOKEN_NUMBER_INT"); {
else if (tokens[i].type == TOKEN_NUMBER_DOUBLE) ft_printf("%14s", "TOKEN_NUMBER");
ft_printf("%20s", "TOKEN_NUMBER_DOUBLE"); if (tokens[i].subtype == TOKEN_NUMBER_INT)
ft_printf("%30s", "TOKEN_NUMBER_INT");
else if (tokens[i].subtype == TOKEN_NUMBER_DOUBLE)
ft_printf("%30s", "TOKEN_NUMBER_DOUBLE");
}
else if (tokens[i].type == TOKEN_POWER) else if (tokens[i].type == TOKEN_POWER)
ft_printf("%20s", "TOKEN_POWER"); ft_printf("%14s%30s", "TOKEN_POWER", "");
else if (tokens[i].type == TOKEN_SIGN) else if (tokens[i].type == TOKEN_SIGN)
ft_printf("%20s", "TOKEN_SIGN"); {
ft_printf("%14s", "TOKEN_SIGN");
if (tokens[i].subtype == TOKEN_SIGN_PLUS)
ft_printf("%30s", "TOKEN_SIGN_PLUS");
else if (tokens[i].subtype == TOKEN_SIGN_MINUS)
ft_printf("%30s", "TOKEN_SIGN_MINUS");
}
else if (tokens[i].type == TOKEN_FACTOR) else if (tokens[i].type == TOKEN_FACTOR)
ft_printf("%20s", "TOKEN_FACTOR"); {
ft_printf("%14s", "TOKEN_FACTOR");
if (tokens[i].subtype == TOKEN_FACTOR_MULTIPLICATION)
ft_printf("%30s", "TOKEN_FACTOR_MULTIPLICATION");
else if (tokens[i].subtype == TOKEN_FACTOR_DIVISION)
ft_printf("%30s", "TOKEN_FACTOR_DIVISION");
}
else if (tokens[i].type == TOKEN_EQUAL) else if (tokens[i].type == TOKEN_EQUAL)
ft_printf("%20s", "TOKEN_EQUAL"); ft_printf("%14s%30s", "TOKEN_EQUAL", "");
else if (tokens[i].type == TOKEN_END) else if (tokens[i].type == TOKEN_END)
ft_printf("%20s", "TOKEN_END"); ft_printf("%14s%30s", "TOKEN_END", "");
ft_putstr(" - value : "); ft_putstr(" - value : ");
if (tokens[i].type == TOKEN_NUMBER_INT) if (tokens[i].type == TOKEN_NUMBER)
{
printf("%i\n", tokens[i].value_int);
}
else if (tokens[i].type == TOKEN_NUMBER_DOUBLE)
{ {
printf("%g\n", tokens[i].value_double); printf("%g\n", tokens[i].value_double);
} }

View File

@@ -1,6 +1,6 @@
#include "lexer.h" #include "lexer.h"
// any single letter is a valid variable, like 'x' or 'y' // token is alphabet letter, like 'x' or 'y'
static bool token_is_variable(const char *input, int input_pos, int *token_size) static bool token_is_variable(const char *input, int input_pos, int *token_size)
{ {
if (ft_isalpha(input[input_pos])) if (ft_isalpha(input[input_pos]))
@@ -11,7 +11,7 @@ static bool token_is_variable(const char *input, int input_pos, int *token_size)
return false; return false;
} }
// number can be int "123" // token is int "123"
static bool token_is_number_int(const char *input, int input_pos, int *token_size) static bool token_is_number_int(const char *input, int input_pos, int *token_size)
{ {
int number_size; int number_size;
@@ -51,7 +51,7 @@ static bool token_is_number_int(const char *input, int input_pos, int *token_siz
return true; return true;
} }
// number can be double "123.456" // token is double "123.456"
static bool token_is_number_double(const char *input, int input_pos, int *token_size) static bool token_is_number_double(const char *input, int input_pos, int *token_size)
{ {
int number_size; int number_size;
@@ -98,7 +98,7 @@ static bool token_is_number_double(const char *input, int input_pos, int *token_
return true; return true;
} }
// power can be '^' and "**" // token is '^' or "**"
static bool token_is_power(const char *input, int input_pos, int *token_size) static bool token_is_power(const char *input, int input_pos, int *token_size)
{ {
if (input[input_pos] == '^') if (input[input_pos] == '^')
@@ -114,14 +114,20 @@ static bool token_is_power(const char *input, int input_pos, int *token_size)
return false; return false;
} }
// sign can be '+' or '-' // token is '+'
static bool token_is_sign(const char *input, int input_pos, int *token_size) static bool token_is_sign_plus(const char *input, int input_pos, int *token_size)
{ {
if (input[input_pos] == '+') if (input[input_pos] == '+')
{ {
*token_size = 1; *token_size = 1;
return true; return true;
} }
return false;
}
// token is '-'
static bool token_is_sign_minus(const char *input, int input_pos, int *token_size)
{
if (input[input_pos] == '-') if (input[input_pos] == '-')
{ {
*token_size = 1; *token_size = 1;
@@ -130,14 +136,20 @@ static bool token_is_sign(const char *input, int input_pos, int *token_size)
return false; return false;
} }
// factor can be '*' or '/' or ':' // token is '*'
static bool token_is_factor(const char *input, int input_pos, int *token_size) static bool token_is_factor_multiplication(const char *input, int input_pos, int *token_size)
{ {
if (input[input_pos] == '*') if (input[input_pos] == '*')
{ {
*token_size = 1; *token_size = 1;
return true; return true;
} }
return false;
}
// token is '/' or ':'
static bool token_is_factor_division(const char *input, int input_pos, int *token_size)
{
if (input[input_pos] == '/') if (input[input_pos] == '/')
{ {
*token_size = 1; *token_size = 1;
@@ -151,7 +163,7 @@ static bool token_is_factor(const char *input, int input_pos, int *token_size)
return false; return false;
} }
// detect a single '=' // token is '='
static bool token_is_equal(const char *input, int input_pos, int *token_size) static bool token_is_equal(const char *input, int input_pos, int *token_size)
{ {
if (input[input_pos] == '=') if (input[input_pos] == '=')
@@ -185,36 +197,55 @@ int lexerize(const char *input, token *tokens)
if (token_is_variable(input, input_pos, &token_size)) if (token_is_variable(input, input_pos, &token_size))
{ {
tokens[tokens_count].type = TOKEN_VARIABLE; tokens[tokens_count].type = TOKEN_VARIABLE;
tokens[tokens_count].subtype = TOKEN_NO_SUBTYPE;
tokens[tokens_count].value_char = 'x'; tokens[tokens_count].value_char = 'x';
} }
else if (token_is_number_int(input, input_pos, &token_size)) else if (token_is_number_int(input, input_pos, &token_size))
{ {
tokens[tokens_count].type = TOKEN_NUMBER_INT; tokens[tokens_count].type = TOKEN_NUMBER;
tokens[tokens_count].value_int = ft_atoi(&input[input_pos]); tokens[tokens_count].subtype = TOKEN_NUMBER_INT;
tokens[tokens_count].value_double = ft_atof(&input[input_pos]); // we keep info it's an int, but treat it as a double
} }
else if (token_is_number_double(input, input_pos, &token_size)) else if (token_is_number_double(input, input_pos, &token_size))
{ {
tokens[tokens_count].type = TOKEN_NUMBER_DOUBLE; tokens[tokens_count].type = TOKEN_NUMBER;
tokens[tokens_count].subtype = TOKEN_NUMBER_DOUBLE;
tokens[tokens_count].value_double = ft_atof(&input[input_pos]); tokens[tokens_count].value_double = ft_atof(&input[input_pos]);
} }
else if (token_is_power(input, input_pos, &token_size)) else if (token_is_power(input, input_pos, &token_size))
{ {
tokens[tokens_count].type = TOKEN_POWER; tokens[tokens_count].type = TOKEN_POWER;
tokens[tokens_count].subtype = TOKEN_NO_SUBTYPE;
tokens[tokens_count].value_char = '^'; tokens[tokens_count].value_char = '^';
} }
else if (token_is_sign(input, input_pos, &token_size)) else if (token_is_sign_plus(input, input_pos, &token_size))
{ {
tokens[tokens_count].type = TOKEN_SIGN; tokens[tokens_count].type = TOKEN_SIGN;
tokens[tokens_count].subtype = TOKEN_SIGN_PLUS;
tokens[tokens_count].value_char = input[input_pos]; tokens[tokens_count].value_char = input[input_pos];
} }
else if (token_is_factor(input, input_pos, &token_size)) else if (token_is_sign_minus(input, input_pos, &token_size))
{
tokens[tokens_count].type = TOKEN_SIGN;
tokens[tokens_count].subtype = TOKEN_SIGN_MINUS;
tokens[tokens_count].value_char = input[input_pos];
}
else if (token_is_factor_multiplication(input, input_pos, &token_size))
{ {
tokens[tokens_count].type = TOKEN_FACTOR; tokens[tokens_count].type = TOKEN_FACTOR;
tokens[tokens_count].subtype = TOKEN_FACTOR_MULTIPLICATION;
tokens[tokens_count].value_char = input[input_pos];
}
else if (token_is_factor_division(input, input_pos, &token_size))
{
tokens[tokens_count].type = TOKEN_FACTOR;
tokens[tokens_count].subtype = TOKEN_FACTOR_DIVISION;
tokens[tokens_count].value_char = input[input_pos]; tokens[tokens_count].value_char = input[input_pos];
} }
else if (token_is_equal(input, input_pos, &token_size)) else if (token_is_equal(input, input_pos, &token_size))
{ {
tokens[tokens_count].type = TOKEN_EQUAL; tokens[tokens_count].type = TOKEN_EQUAL;
tokens[tokens_count].subtype = TOKEN_NO_SUBTYPE;
tokens[tokens_count].value_char = '='; tokens[tokens_count].value_char = '=';
} }
else else
@@ -231,6 +262,7 @@ int lexerize(const char *input, token *tokens)
} }
tokens[tokens_count].type = TOKEN_END; tokens[tokens_count].type = TOKEN_END;
tokens[tokens_count].subtype = TOKEN_NO_SUBTYPE;
tokens[tokens_count].value_char = '\0'; tokens[tokens_count].value_char = '\0';
return tokens_count; return tokens_count;

View File

@@ -1,18 +1,24 @@
#include "parser.h" #include "parser.h"
/** /**
TOKEN_VARIABLE, // x, y, etc. TOKEN_VARIABLE, // x, y, etc.
TOKEN_NUMBER_INT, // int TOKEN_NUMBER_INT, // int
TOKEN_NUMBER_DOUBLE, // double TOKEN_NUMBER_DOUBLE, // double
TOKEN_POWER, // ^ or ** TOKEN_POWER, // ^ or **
TOKEN_SIGN, // + or - TOKEN_SIGN, // + or -
TOKEN_FACTOR, // * or / TOKEN_MULTIPLICATION, // *
TOKEN_EQUAL, // = TOKEN_DIVISION, // /
TOKEN_END // null (end of input) TOKEN_EQUAL, // =
TOKEN_END // null (end of input)
1. VARIABLE | NUMBER_INT | NUMBER_DOUBLE | ! POWER | SIGN | ! FACTOR | ! EQUAL | END 1. VAR | NUM | ! POW | SIGN | ! FACTOR | ! EQUAL | END
2. VARIABLE | NUMBER_INT | NUMBER_DOUBLE | POWER | SIGN | FACTOR | EQUAL | END NS | NUM_I | NUM_D | NS | SIGN_P | SIGN_M | ! FACTOR_MUL | ! FACTOR_DIV | NS | NS
3. VARIABLE | NUMBER_INT | NUMBER_DOUBLE | POWER | SIGN | FACTOR | EQUAL | END
2. VAR | NUM | ! POW | ! SIGN | ! FACTOR | ! EQUAL | END
NS | NUM_I | NUM_D | NS | ! SIGN_P | ! SIGN_M | ! FACTOR_MUL | ! FACTOR_DIV | NS | NS
3. VAR | NUM | ! POW | SIGN | FACTOR | ! EQUAL | END
NS | NUM_I | NUM_D | NS | SIGN_P | SIGN_M | FACTOR_MUL | ! FACTOR_DIV | NS | NS
term_position position; term_position position;
term_sign sign; term_sign sign;
@@ -45,12 +51,12 @@ static term_sign get_sign(token *tokens, int i, int *token_count)
} }
else if (i == 0) // if most left term, the sign can be ommited for a '+' sign in front of a number or variable else if (i == 0) // if most left term, the sign can be ommited for a '+' sign in front of a number or variable
{ {
*token_count = 1; *token_count = 0;
return '+'; return '+';
} }
else if (tokens[i - 1].type == TOKEN_EQUAL) // if first token after 'equal', the sign can be ommited for a '+' sign in front of a number or variable else if (tokens[i - 1].type == TOKEN_EQUAL) // if first token after 'equal', the sign can be ommited for a '+' sign in front of a number or variable
{ {
*token_count = 1; *token_count = 0;
return '+'; return '+';
} }
@@ -59,9 +65,65 @@ static term_sign get_sign(token *tokens, int i, int *token_count)
static double get_coefficient(token *tokens, int i, int *token_count) static double get_coefficient(token *tokens, int i, int *token_count)
{ {
if (tokens[i].type) // placeholder double coefficient;
*token_count = 1; // placeholder
return 1.0; // placeholder coefficient = 1.0;
// forbidden tokens
if (tokens[i].type == TOKEN_POWER)
{
stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a power token : " + tokens[i].value_char);
}
if (tokens[i].type == TOKEN_FACTOR)
{
stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a factor token : " + tokens[i].value_char);
}
if (tokens[i].type == TOKEN_EQUAL)
{
stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have an equal token : " + tokens[i].value_char);
}
if (tokens[i].type == TOKEN_SIGN)
{
stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a sign token : " + tokens[i].value_char);
}
// if not coefficient token
if (tokens[i].type == TOKEN_VARIABLE)
{
*token_count = 0;
return coefficient;
}
// if coefficient tokens
if (tokens[i].type == TOKEN_NUMBER)
{
(*token_count)++;
coefficient = tokens[i].value_double;
}
// detect more coefficients, like "3 * 2 / 5" etc
i++;
while (tokens[i].type == TOKEN_FACTOR)
{
i++;
if (tokens[i].type == TOKEN_NUMBER)
{
*token_count += 2;
if (tokens[i - 1].subtype == TOKEN_FACTOR_MULTIPLICATION)
{
coefficient *= tokens[i].value_double;
}
else if (tokens[i - 1].subtype == TOKEN_FACTOR_DIVISION)
{
coefficient /= tokens[i].value_double;
}
}
else
{
return coefficient;
}
}
return coefficient;
} }
static int get_exponent(token *tokens, int i, int *token_count) static int get_exponent(token *tokens, int i, int *token_count)