270 lines
7.0 KiB
C
270 lines
7.0 KiB
C
#include "lexer.h"
|
|
|
|
// token is alphabet letter, like 'x' or 'y'
|
|
static bool token_is_variable(const char *input, int input_pos, int *token_size)
|
|
{
|
|
if (ft_isalpha(input[input_pos]))
|
|
{
|
|
*token_size = 1;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// token is int "123"
|
|
static bool token_is_number_int(const char *input, int input_pos, int *token_size)
|
|
{
|
|
int number_size;
|
|
int max_number_size;
|
|
|
|
if (!ft_isdigit(input[input_pos]))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
number_size = 1;
|
|
max_number_size = 16; // max size for int
|
|
while (number_size <= max_number_size)
|
|
{
|
|
if (ft_isdigit(input[input_pos + number_size]))
|
|
{
|
|
number_size++;
|
|
}
|
|
else if (input[input_pos + number_size] == '.')
|
|
{
|
|
if (ft_isdigit(input[input_pos + number_size + 1]))
|
|
{
|
|
// number is double
|
|
return false;
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
if (number_size > max_number_size)
|
|
{
|
|
stop_errors(ERROR_NUMBER_TOO_BIG, &input[input_pos]);
|
|
}
|
|
*token_size = number_size;
|
|
return true;
|
|
}
|
|
|
|
// token is double "123.456"
|
|
static bool token_is_number_double(const char *input, int input_pos, int *token_size)
|
|
{
|
|
int number_size;
|
|
int max_number_size;
|
|
bool has_dot;
|
|
|
|
if (!ft_isdigit(input[input_pos]))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
has_dot = false;
|
|
number_size = 1;
|
|
max_number_size = 24; // max char needed to represent double : 1 sign + 1 point + 17 fractinoal part + 5 exponent
|
|
while (number_size <= max_number_size)
|
|
{
|
|
if (ft_isdigit(input[input_pos + number_size]))
|
|
{
|
|
number_size++;
|
|
}
|
|
else if (input[input_pos + number_size] == '.')
|
|
{
|
|
if (has_dot)
|
|
{
|
|
// number is not a valid token, it has 2 dots
|
|
return false;
|
|
}
|
|
if (!ft_isdigit(input[input_pos + number_size + 1]))
|
|
{
|
|
// number is not valid token, it has a dot with no number after the dot
|
|
return false;
|
|
}
|
|
has_dot = true;
|
|
number_size++;
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
if (number_size > max_number_size)
|
|
{
|
|
stop_errors(ERROR_NUMBER_TOO_BIG, &input[input_pos]);
|
|
}
|
|
*token_size = number_size;
|
|
return true;
|
|
}
|
|
|
|
// token is '^' or "**"
|
|
static bool token_is_power(const char *input, int input_pos, int *token_size)
|
|
{
|
|
if (input[input_pos] == '^')
|
|
{
|
|
*token_size = 1;
|
|
return true;
|
|
}
|
|
if (ft_memcmp(&input[input_pos], "**", 2) == 0)
|
|
{
|
|
*token_size = 2;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// token is '+'
|
|
static bool token_is_sign_plus(const char *input, int input_pos, int *token_size)
|
|
{
|
|
if (input[input_pos] == '+')
|
|
{
|
|
*token_size = 1;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// token is '-'
|
|
static bool token_is_sign_minus(const char *input, int input_pos, int *token_size)
|
|
{
|
|
if (input[input_pos] == '-')
|
|
{
|
|
*token_size = 1;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// token is '*'
|
|
static bool token_is_factor_multiplication(const char *input, int input_pos, int *token_size)
|
|
{
|
|
if (input[input_pos] == '*')
|
|
{
|
|
*token_size = 1;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// token is '/' or ':'
|
|
static bool token_is_factor_division(const char *input, int input_pos, int *token_size)
|
|
{
|
|
if (input[input_pos] == '/')
|
|
{
|
|
*token_size = 1;
|
|
return true;
|
|
}
|
|
if (input[input_pos] == ':')
|
|
{
|
|
*token_size = 1;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// token is '='
|
|
static bool token_is_equal(const char *input, int input_pos, int *token_size)
|
|
{
|
|
if (input[input_pos] == '=')
|
|
{
|
|
*token_size = 1;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* LEXER
|
|
*/
|
|
int lexerize(const char *input, token *tokens)
|
|
{
|
|
int tokens_count;
|
|
int input_pos;
|
|
int token_size;
|
|
|
|
tokens_count = 0;
|
|
input_pos = 0;
|
|
while (input[input_pos])
|
|
{
|
|
token_size = 0;
|
|
|
|
if (input[input_pos] == '\0')
|
|
{
|
|
break;
|
|
}
|
|
|
|
if (token_is_variable(input, input_pos, &token_size))
|
|
{
|
|
tokens[tokens_count].type = TOKEN_VARIABLE;
|
|
tokens[tokens_count].subtype = TOKEN_NO_SUBTYPE;
|
|
tokens[tokens_count].value_char = 'x';
|
|
}
|
|
else if (token_is_number_int(input, input_pos, &token_size))
|
|
{
|
|
tokens[tokens_count].type = TOKEN_NUMBER;
|
|
tokens[tokens_count].subtype = TOKEN_NUMBER_INT;
|
|
tokens[tokens_count].value_double = ft_atof(&input[input_pos]); // we keep info it's an int, but treat it as a double
|
|
}
|
|
else if (token_is_number_double(input, input_pos, &token_size))
|
|
{
|
|
tokens[tokens_count].type = TOKEN_NUMBER;
|
|
tokens[tokens_count].subtype = TOKEN_NUMBER_DOUBLE;
|
|
tokens[tokens_count].value_double = ft_atof(&input[input_pos]);
|
|
}
|
|
else if (token_is_power(input, input_pos, &token_size))
|
|
{
|
|
tokens[tokens_count].type = TOKEN_POWER;
|
|
tokens[tokens_count].subtype = TOKEN_NO_SUBTYPE;
|
|
tokens[tokens_count].value_char = '^';
|
|
}
|
|
else if (token_is_sign_plus(input, input_pos, &token_size))
|
|
{
|
|
tokens[tokens_count].type = TOKEN_SIGN;
|
|
tokens[tokens_count].subtype = TOKEN_SIGN_PLUS;
|
|
tokens[tokens_count].value_char = input[input_pos];
|
|
}
|
|
else if (token_is_sign_minus(input, input_pos, &token_size))
|
|
{
|
|
tokens[tokens_count].type = TOKEN_SIGN;
|
|
tokens[tokens_count].subtype = TOKEN_SIGN_MINUS;
|
|
tokens[tokens_count].value_char = input[input_pos];
|
|
}
|
|
else if (token_is_factor_multiplication(input, input_pos, &token_size))
|
|
{
|
|
tokens[tokens_count].type = TOKEN_FACTOR;
|
|
tokens[tokens_count].subtype = TOKEN_FACTOR_MULTIPLICATION;
|
|
tokens[tokens_count].value_char = input[input_pos];
|
|
}
|
|
else if (token_is_factor_division(input, input_pos, &token_size))
|
|
{
|
|
tokens[tokens_count].type = TOKEN_FACTOR;
|
|
tokens[tokens_count].subtype = TOKEN_FACTOR_DIVISION;
|
|
tokens[tokens_count].value_char = input[input_pos];
|
|
}
|
|
else if (token_is_equal(input, input_pos, &token_size))
|
|
{
|
|
tokens[tokens_count].type = TOKEN_EQUAL;
|
|
tokens[tokens_count].subtype = TOKEN_NO_SUBTYPE;
|
|
tokens[tokens_count].value_char = '=';
|
|
}
|
|
else
|
|
{
|
|
stop_errors(ERROR_UNKNOWN_TOKEN, &input[input_pos]);
|
|
}
|
|
|
|
tokens_count++;
|
|
if (token_size == 0)
|
|
{
|
|
stop_errors(ERROR_UNKNOWN_TOKEN, &input[input_pos]);
|
|
}
|
|
input_pos += token_size;
|
|
}
|
|
|
|
tokens[tokens_count].type = TOKEN_END;
|
|
tokens[tokens_count].subtype = TOKEN_NO_SUBTYPE;
|
|
tokens[tokens_count].value_char = '\0';
|
|
|
|
return tokens_count;
|
|
}
|