lexer adding is_number

This commit is contained in:
hugogogo
2026-04-28 23:42:25 +02:00
parent fb81f200d9
commit 5108571deb
4 changed files with 152 additions and 11 deletions

View File

@@ -7,6 +7,7 @@ typedef enum
{ {
ERROR_BASIC = 0, ERROR_BASIC = 0,
ERROR_UNKNOWN_TOKEN = -1, ERROR_UNKNOWN_TOKEN = -1,
ERROR_NUMBER_TOO_BIG = -2,
} program_error; } program_error;
int stop_errors(int err); int stop_errors(int err);

View File

@@ -5,11 +5,11 @@
typedef enum typedef enum
{ {
TOKEN_PLUS, // +
TOKEN_MINUS, // -
TOKEN_VARIABLE, // x, y, etc. TOKEN_VARIABLE, // x, y, etc.
TOKEN_NUMBER, // int or double TOKEN_NUMBER, // int or double
TOKEN_POWER, // ^ or ** TOKEN_POWER, // ^ or **
TOKEN_PLUS, // +
TOKEN_MINUS, // -
TOKEN_MULTIPLICATION, // * TOKEN_MULTIPLICATION, // *
TOKEN_DIVISION, // / TOKEN_DIVISION, // /
TOKEN_END // null (end of input) TOKEN_END // null (end of input)

View File

@@ -7,7 +7,9 @@ int stop_errors(int err)
case ERROR_UNKNOWN_TOKEN: case ERROR_UNKNOWN_TOKEN:
ft_putstr_fd("error: unknown token\n", STDERR_FILENO); ft_putstr_fd("error: unknown token\n", STDERR_FILENO);
break; break;
case ERROR_NUMBER_TOO_BIG:
ft_putstr_fd("error: number is too big\n", STDERR_FILENO);
break;
default: default:
ft_putstr_fd("unknown error\n", STDERR_FILENO); ft_putstr_fd("unknown error\n", STDERR_FILENO);
break; break;

View File

@@ -1,5 +1,6 @@
#include "lexer.h" #include "lexer.h"
#include "errors.h" #include "errors.h"
#include <stdbool.h>
static int skip_whitespace(const char *input, int input_pos) static int skip_whitespace(const char *input, int input_pos)
{ {
@@ -10,19 +11,123 @@ static int skip_whitespace(const char *input, int input_pos)
return input_pos; return input_pos;
} }
static int token_is_plus(const char *input, int input_pos) static bool token_is_variable(const char *input, int input_pos, int *token_size)
{ {
return (input[input_pos] == '+'); if (input[input_pos] == 'x' || input[input_pos] == 'X')
{
return false;
}
if (ft_isalpha(input[input_pos]))
{
*token_size = 1;
return true;
}
return false;
}
static bool token_is_number(const char *input, int input_pos, int *token_size)
{
int number_size;
int max_number_size;
int is_number;
if (!ft_isnumber(input[input_pos]))
{
return false;
}
number_size = 1;
max_number_size = 129; // max size for double double is 128 bits, + the coma
while (number_size <= max_number_size)
{
if (ft_isnumber(input[input_pos + number_size]))
{
number_size++;
}
else if (input[input_pos] == '.')
{
number_size++;
}
}
if (number_size > max_number_size)
{
stop_errors(ERROR_NUMBER_TOO_BIG);
}
*token_size = number_size;
return true;
}
static bool token_is_plus(const char *input, int input_pos, int *token_size)
{
if (input[input_pos] == '+')
{
*token_size = 1;
return true;
}
return false;
}
static bool token_is_plus(const char *input, int input_pos, int *token_size)
{
if (input[input_pos] == '+')
{
*token_size = 1;
return true;
}
return false;
}
static bool token_is_plus(const char *input, int input_pos, int *token_size)
{
if (input[input_pos] == '+')
{
*token_size = 1;
return true;
}
return false;
}
static bool token_is_plus(const char *input, int input_pos, int *token_size)
{
if (input[input_pos] == '+')
{
*token_size = 1;
return true;
}
return false;
}
static bool token_is_plus(const char *input, int input_pos, int *token_size)
{
if (input[input_pos] == '+')
{
*token_size = 1;
return true;
}
return false;
}
static bool token_is_plus(const char *input, int input_pos, int *token_size)
{
if (input[input_pos] == '+')
{
*token_size = 1;
return true;
}
return false;
} }
int lexerize(const char *input, token tokens[MAX_TOKENS]) int lexerize(const char *input, token tokens[MAX_TOKENS])
{ {
int token_count = 0; int token_count;
int input_pos = 0; int input_pos;
int token_size = 0; int token_size;
token_count = 0;
input_pos = 0;
while (input[input_pos]) while (input[input_pos])
{ {
token_size = 0;
input_pos = skip_whitespace(input, input_pos); input_pos = skip_whitespace(input, input_pos);
if (input[input_pos] == '\0') if (input[input_pos] == '\0')
@@ -30,18 +135,51 @@ int lexerize(const char *input, token tokens[MAX_TOKENS])
break; break;
} }
token_size = token_is_plus(input, input_pos); if (token_is_variable(input, input_pos, &token_size))
if (token_size) {
tokens[token_count].type = TOKEN_VARIABLE;
tokens[token_count].var_value = 'x';
}
else if (token_is_number(input, input_pos, &token_size))
{
tokens[token_count].type = TOKEN_NUMBER;
tokens[token_count].num_value = ft_atoi(input[input_pos]);
}
else if (token_is_power(input, input_pos, &token_size))
{
tokens[token_count].type = TOKEN_POWER;
tokens[token_count].var_value = '^';
}
else if (token_is_plus(input, input_pos, &token_size))
{ {
tokens[token_count].type = TOKEN_PLUS; tokens[token_count].type = TOKEN_PLUS;
tokens[token_count].var_value = '+'; tokens[token_count].var_value = '+';
} }
else if (token_is_minus(input, input_pos, &token_size))
{
tokens[token_count].type = TOKEN_MINUS;
tokens[token_count].var_value = '-';
}
else if (token_is_multiplication(input, input_pos, &token_size))
{
tokens[token_count].type = TOKEN_MULTIPLICATION;
tokens[token_count].var_value = '*';
}
else if (token_is_division(input, input_pos, &token_size))
{
tokens[token_count].type = TOKEN_DIVISION;
tokens[token_count].var_value = '/';
}
else
{
stop_errors(ERROR_UNKNOWN_TOKEN);
}
token_count++;
if (token_size == 0) if (token_size == 0)
{ {
stop_errors(ERROR_UNKNOWN_TOKEN); stop_errors(ERROR_UNKNOWN_TOKEN);
} }
token_count++;
input_pos += token_size; input_pos += token_size;
} }