From 5108571debeb39785dc4141e6c8c8e5144bbbcdb Mon Sep 17 00:00:00 2001 From: hugogogo Date: Tue, 28 Apr 2026 23:42:25 +0200 Subject: [PATCH] lexer adding is_number --- headers/errors.h | 1 + headers/lexer.h | 4 +- src/errors.c | 4 +- src/lexer.c | 154 ++++++++++++++++++++++++++++++++++++++++++++--- 4 files changed, 152 insertions(+), 11 deletions(-) diff --git a/headers/errors.h b/headers/errors.h index 0f7313b..04d7502 100644 --- a/headers/errors.h +++ b/headers/errors.h @@ -7,6 +7,7 @@ typedef enum { ERROR_BASIC = 0, ERROR_UNKNOWN_TOKEN = -1, + ERROR_NUMBER_TOO_BIG = -2, } program_error; int stop_errors(int err); diff --git a/headers/lexer.h b/headers/lexer.h index 7e7da5d..1290286 100644 --- a/headers/lexer.h +++ b/headers/lexer.h @@ -5,11 +5,11 @@ typedef enum { - TOKEN_PLUS, // + - TOKEN_MINUS, // - TOKEN_VARIABLE, // x, y, etc. TOKEN_NUMBER, // int or double TOKEN_POWER, // ^ or ** + TOKEN_PLUS, // + + TOKEN_MINUS, // - TOKEN_MULTIPLICATION, // * TOKEN_DIVISION, // / TOKEN_END // null (end of input) diff --git a/src/errors.c b/src/errors.c index 2ffb257..9654c67 100644 --- a/src/errors.c +++ b/src/errors.c @@ -7,7 +7,9 @@ int stop_errors(int err) case ERROR_UNKNOWN_TOKEN: ft_putstr_fd("error: unknown token\n", STDERR_FILENO); break; - + case ERROR_NUMBER_TOO_BIG: + ft_putstr_fd("error: number is too big\n", STDERR_FILENO); + break; default: ft_putstr_fd("unknown error\n", STDERR_FILENO); break; diff --git a/src/lexer.c b/src/lexer.c index e6fcb0b..e7d7e97 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1,5 +1,6 @@ #include "lexer.h" #include "errors.h" +#include static int skip_whitespace(const char *input, int input_pos) { @@ -10,19 +11,123 @@ static int skip_whitespace(const char *input, int input_pos) return input_pos; } -static int token_is_plus(const char *input, int input_pos) +static bool token_is_variable(const char *input, int input_pos, int *token_size) { - return (input[input_pos] == '+'); + if (input[input_pos] == 'x' || input[input_pos] == 'X') + { + return false; + } + if (ft_isalpha(input[input_pos])) + { + *token_size = 1; + return true; + } + return false; +} + +static bool token_is_number(const char *input, int input_pos, int *token_size) +{ + int number_size; + int max_number_size; + int is_number; + + if (!ft_isnumber(input[input_pos])) + { + return false; + } + + number_size = 1; + max_number_size = 129; // max size for double double is 128 bits, + the coma + while (number_size <= max_number_size) + { + if (ft_isnumber(input[input_pos + number_size])) + { + number_size++; + } + else if (input[input_pos] == '.') + { + number_size++; + } + } + if (number_size > max_number_size) + { + stop_errors(ERROR_NUMBER_TOO_BIG); + } + *token_size = number_size; + return true; +} + +static bool token_is_plus(const char *input, int input_pos, int *token_size) +{ + if (input[input_pos] == '+') + { + *token_size = 1; + return true; + } + return false; +} + +static bool token_is_plus(const char *input, int input_pos, int *token_size) +{ + if (input[input_pos] == '+') + { + *token_size = 1; + return true; + } + return false; +} + +static bool token_is_plus(const char *input, int input_pos, int *token_size) +{ + if (input[input_pos] == '+') + { + *token_size = 1; + return true; + } + return false; +} + +static bool token_is_plus(const char *input, int input_pos, int *token_size) +{ + if (input[input_pos] == '+') + { + *token_size = 1; + return true; + } + return false; +} + +static bool token_is_plus(const char *input, int input_pos, int *token_size) +{ + if (input[input_pos] == '+') + { + *token_size = 1; + return true; + } + return false; +} + +static bool token_is_plus(const char *input, int input_pos, int *token_size) +{ + if (input[input_pos] == '+') + { + *token_size = 1; + return true; + } + return false; } int lexerize(const char *input, token tokens[MAX_TOKENS]) { - int token_count = 0; - int input_pos = 0; - int token_size = 0; + int token_count; + int input_pos; + int token_size; + token_count = 0; + input_pos = 0; while (input[input_pos]) { + token_size = 0; input_pos = skip_whitespace(input, input_pos); if (input[input_pos] == '\0') @@ -30,18 +135,51 @@ int lexerize(const char *input, token tokens[MAX_TOKENS]) break; } - token_size = token_is_plus(input, input_pos); - if (token_size) + if (token_is_variable(input, input_pos, &token_size)) + { + tokens[token_count].type = TOKEN_VARIABLE; + tokens[token_count].var_value = 'x'; + } + else if (token_is_number(input, input_pos, &token_size)) + { + tokens[token_count].type = TOKEN_NUMBER; + tokens[token_count].num_value = ft_atoi(input[input_pos]); + } + else if (token_is_power(input, input_pos, &token_size)) + { + tokens[token_count].type = TOKEN_POWER; + tokens[token_count].var_value = '^'; + } + else if (token_is_plus(input, input_pos, &token_size)) { tokens[token_count].type = TOKEN_PLUS; tokens[token_count].var_value = '+'; } + else if (token_is_minus(input, input_pos, &token_size)) + { + tokens[token_count].type = TOKEN_MINUS; + tokens[token_count].var_value = '-'; + } + else if (token_is_multiplication(input, input_pos, &token_size)) + { + tokens[token_count].type = TOKEN_MULTIPLICATION; + tokens[token_count].var_value = '*'; + } + else if (token_is_division(input, input_pos, &token_size)) + { + tokens[token_count].type = TOKEN_DIVISION; + tokens[token_count].var_value = '/'; + } + else + { + stop_errors(ERROR_UNKNOWN_TOKEN); + } + token_count++; if (token_size == 0) { stop_errors(ERROR_UNKNOWN_TOKEN); } - token_count++; input_pos += token_size; }