lexer complete with luke memory solution

This commit is contained in:
hugogogo
2026-04-29 12:41:19 +02:00
parent 9ced220c00
commit 26fa8025ef
3 changed files with 134 additions and 45 deletions

View File

@@ -6,7 +6,8 @@
typedef enum typedef enum
{ {
TOKEN_VARIABLE, // x, y, etc. TOKEN_VARIABLE, // x, y, etc.
TOKEN_NUMBER, // int or double TOKEN_NUMBER_INT, // int
TOKEN_NUMBER_DOUBLE, // double
TOKEN_POWER, // ^ or ** TOKEN_POWER, // ^ or **
TOKEN_PLUS, // + TOKEN_PLUS, // +
TOKEN_MINUS, // - TOKEN_MINUS, // -
@@ -20,12 +21,12 @@ typedef struct
token_type type; token_type type;
union union
{ {
double num_value; // For NUMBER char value_char;
char var_value; // For VARIABLE (single char, e.g., 'x') int value_int;
double value_double;
}; };
} token; } token;
#define MAX_TOKENS 100 void lexerize(const char *input, token *tokens);
int lexerize(const char *input, token tokens[MAX_TOKENS]);
#endif #endif

View File

@@ -3,18 +3,46 @@
#include "errors.h" #include "errors.h"
#include <stdio.h> // tmp for float debug #include <stdio.h> // tmp for float debug
void remove_spaces(char *s)
{
char *read = s;
char *write = s;
// copy all non-space chars
while (*read)
{
if (!ft_isspace(*read))
{
*write++ = *read;
}
read++;
}
*write = '\0';
// zero the rest of the buffer
while (write != read)
{
*write++ = '\0';
}
}
int main(int ac, char **av) int main(int ac, char **av)
{ {
int i; int i;
int ret; int arg_len;
char *input;
if (ac < 2) if (ac < 2)
{ {
return 0; return 0;
} }
// tmp debug output
ft_putstr("-> received args :\n"); // debug
i = 0; i = 0;
while (i < ac) while (i < ac)
{ {
ft_putstr(" ");
ft_putnbr(i); ft_putnbr(i);
ft_putstr(" : "); ft_putstr(" : ");
ft_putstr(av[i]); ft_putstr(av[i]);
@@ -22,14 +50,22 @@ int main(int ac, char **av)
i++; i++;
} }
token tokens[MAX_TOKENS]; input = av[1];
ret = lexerize(av[1], tokens); remove_spaces(input);
if (ret <= 0) arg_len = ft_strlen(input);
{
stop_errors(ret); ft_putstr("-> input without space : "); // debug
} ft_putstr(input); // debug
ft_putchar('\n'); // debug
ft_putstr("-> arg_len : "); // debug
ft_putnbr(arg_len); // debug
ft_putchar('\n'); // debug
token tokens[arg_len];
lexerize(input, tokens);
// tmp debug output // tmp debug output
ft_putchar('\n'); // debug
i = 0; i = 0;
while (tokens[i].type != TOKEN_END) while (tokens[i].type != TOKEN_END)
{ {
@@ -37,8 +73,10 @@ int main(int ac, char **av)
if (tokens[i].type == TOKEN_VARIABLE) if (tokens[i].type == TOKEN_VARIABLE)
ft_printf("%20s", "TOKEN_VARIABLE"); ft_printf("%20s", "TOKEN_VARIABLE");
if (tokens[i].type == TOKEN_NUMBER) if (tokens[i].type == TOKEN_NUMBER_INT)
ft_printf("%20s", "TOKEN_NUMBER"); ft_printf("%20s", "TOKEN_NUMBER_INT");
if (tokens[i].type == TOKEN_NUMBER_DOUBLE)
ft_printf("%20s", "TOKEN_NUMBER_DOUBLE");
if (tokens[i].type == TOKEN_POWER) if (tokens[i].type == TOKEN_POWER)
ft_printf("%20s", "TOKEN_POWER"); ft_printf("%20s", "TOKEN_POWER");
if (tokens[i].type == TOKEN_PLUS) if (tokens[i].type == TOKEN_PLUS)
@@ -54,13 +92,17 @@ int main(int ac, char **av)
ft_putstr(" - value : "); ft_putstr(" - value : ");
if (tokens[i].type == TOKEN_NUMBER) if (tokens[i].type == TOKEN_NUMBER_INT)
{ {
printf("%f\n", tokens[i].num_value); printf("%i\n", tokens[i].value_int);
}
else if (tokens[i].type == TOKEN_NUMBER_DOUBLE)
{
printf("%f\n", tokens[i].value_double);
} }
else else
{ {
ft_printf("%c\n", tokens[i].var_value); ft_printf("%c\n", tokens[i].value_char);
} }
i++; i++;
} }

View File

@@ -2,15 +2,6 @@
#include "errors.h" #include "errors.h"
#include <stdbool.h> #include <stdbool.h>
static int skip_whitespace(const char *input, int input_pos)
{
while (ft_isspace(input[input_pos]))
{
input_pos++;
}
return input_pos;
}
// any single letter is a valid variable, like "x" or "y" // any single letter is a valid variable, like "x" or "y"
static bool token_is_variable(const char *input, int input_pos, int *token_size) static bool token_is_variable(const char *input, int input_pos, int *token_size)
{ {
@@ -22,8 +13,8 @@ static bool token_is_variable(const char *input, int input_pos, int *token_size)
return false; return false;
} }
// number can be double "123.456" // number can be int "123"
static bool token_is_number(const char *input, int input_pos, int *token_size, int *float_precision) static bool token_is_number_int(const char *input, int input_pos, int *token_size)
{ {
int number_size; int number_size;
int max_number_size; int max_number_size;
@@ -33,6 +24,48 @@ static bool token_is_number(const char *input, int input_pos, int *token_size, i
return false; return false;
} }
number_size = 1;
max_number_size = 16; // max size for int
while (number_size <= max_number_size)
{
if (ft_isdigit(input[input_pos + number_size]))
{
number_size++;
}
else if (input[input_pos + number_size] == '.')
{
if (ft_isdigit(input[input_pos + number_size + 1]))
{
// number is double
return false;
}
else
break;
}
else
break;
}
if (number_size > max_number_size)
{
stop_errors(ERROR_NUMBER_TOO_BIG);
}
*token_size = number_size;
return true;
}
// number can be double "123.456"
static bool token_is_number_double(const char *input, int input_pos, int *token_size)
{
int number_size;
int max_number_size;
bool has_dot;
if (!ft_isdigit(input[input_pos]))
{
return false;
}
has_dot = false;
number_size = 1; number_size = 1;
max_number_size = 129; // max size for double double is 128 bits, + the coma max_number_size = 129; // max size for double double is 128 bits, + the coma
while (number_size <= max_number_size) while (number_size <= max_number_size)
@@ -43,6 +76,17 @@ static bool token_is_number(const char *input, int input_pos, int *token_size, i
} }
else if (input[input_pos + number_size] == '.') else if (input[input_pos + number_size] == '.')
{ {
if (has_dot)
{
// number is not a valid double, it has 2 dots
return false;
}
if (!ft_isdigit(input[input_pos + number_size + 1]))
{
// number is not a double, it has no number after the dot
return false;
}
has_dot = true;
number_size++; number_size++;
} }
else else
@@ -119,20 +163,17 @@ static bool token_is_division(const char *input, int input_pos, int *token_size)
/** /**
* LEXER * LEXER
*/ */
int lexerize(const char *input, token tokens[MAX_TOKENS]) void lexerize(const char *input, token *tokens)
{ {
int token_count; int token_count;
int input_pos; int input_pos;
int token_size; int token_size;
int float_precision;
token_count = 0; token_count = 0;
input_pos = 0; input_pos = 0;
float_precision = 0;
while (input[input_pos]) while (input[input_pos])
{ {
token_size = 0; token_size = 0;
input_pos = skip_whitespace(input, input_pos);
if (input[input_pos] == '\0') if (input[input_pos] == '\0')
{ {
@@ -142,41 +183,48 @@ int lexerize(const char *input, token tokens[MAX_TOKENS])
if (token_is_variable(input, input_pos, &token_size)) if (token_is_variable(input, input_pos, &token_size))
{ {
tokens[token_count].type = TOKEN_VARIABLE; tokens[token_count].type = TOKEN_VARIABLE;
tokens[token_count].var_value = 'x'; tokens[token_count].value_char = 'x';
} }
else if (token_is_number(input, input_pos, &token_size, &float_precision)) else if (token_is_number_int(input, input_pos, &token_size))
{ {
tokens[token_count].type = TOKEN_NUMBER; tokens[token_count].type = TOKEN_NUMBER_INT;
tokens[token_count].num_value = ft_atof(&input[input_pos]); tokens[token_count].value_int = ft_atoi(&input[input_pos]);
}
else if (token_is_number_double(input, input_pos, &token_size))
{
tokens[token_count].type = TOKEN_NUMBER_DOUBLE;
tokens[token_count].value_double = ft_atof(&input[input_pos]);
} }
else if (token_is_power(input, input_pos, &token_size)) else if (token_is_power(input, input_pos, &token_size))
{ {
tokens[token_count].type = TOKEN_POWER; tokens[token_count].type = TOKEN_POWER;
tokens[token_count].var_value = '^'; tokens[token_count].value_char = '^';
} }
else if (token_is_plus(input, input_pos, &token_size)) else if (token_is_plus(input, input_pos, &token_size))
{ {
tokens[token_count].type = TOKEN_PLUS; tokens[token_count].type = TOKEN_PLUS;
tokens[token_count].var_value = '+'; tokens[token_count].value_char = '+';
} }
else if (token_is_minus(input, input_pos, &token_size)) else if (token_is_minus(input, input_pos, &token_size))
{ {
tokens[token_count].type = TOKEN_MINUS; tokens[token_count].type = TOKEN_MINUS;
tokens[token_count].var_value = '-'; tokens[token_count].value_char = '-';
} }
else if (token_is_multiplication(input, input_pos, &token_size)) else if (token_is_multiplication(input, input_pos, &token_size))
{ {
tokens[token_count].type = TOKEN_MULTIPLICATION; tokens[token_count].type = TOKEN_MULTIPLICATION;
tokens[token_count].var_value = '*'; tokens[token_count].value_char = '*';
} }
else if (token_is_division(input, input_pos, &token_size)) else if (token_is_division(input, input_pos, &token_size))
{ {
tokens[token_count].type = TOKEN_DIVISION; tokens[token_count].type = TOKEN_DIVISION;
tokens[token_count].var_value = '/'; tokens[token_count].value_char = '/';
} }
else else
{ {
stop_errors(ERROR_UNKNOWN_TOKEN); // tmp
token_size = 1;
// stop_errors(ERROR_UNKNOWN_TOKEN);
} }
token_count++; token_count++;
@@ -188,7 +236,5 @@ int lexerize(const char *input, token tokens[MAX_TOKENS])
} }
tokens[token_count].type = TOKEN_END; tokens[token_count].type = TOKEN_END;
tokens[token_count].var_value = '\0'; tokens[token_count].value_char = '\0';
return 1;
} }