Compare commits

..

6 Commits

Author SHA1 Message Date
hugogogo
512ba9b5f4 init parser 2026-04-30 23:19:14 +02:00
hugogogo
7a30dcc345 lexer types equal and factor 2026-04-30 22:48:51 +02:00
hugogogo
26fa8025ef lexer complete with luke memory solution 2026-04-29 12:41:19 +02:00
hugogogo
9ced220c00 add atof 2026-04-29 00:58:33 +02:00
hugogogo
52c76767bd all lexer except atoi 2026-04-29 00:27:46 +02:00
hugogogo
5108571deb lexer adding is_number 2026-04-28 23:42:25 +02:00
10 changed files with 341 additions and 50 deletions

View File

@@ -36,11 +36,13 @@ D_LIB = ./libft
D_HEADERS = ./headers D_HEADERS = ./headers
HEADERS = computorv1.h \ HEADERS = computorv1.h \
errors.h \ errors.h \
lexer.h lexer.h \
parser.h
D_SRCS = ./src D_SRCS = ./src
SRCS = computorv1.c \ SRCS = computorv1.c \
errors.c \ errors.c \
lexer.c lexer.c \
parser.c
# COMPILATION CONFIG : # COMPILATION CONFIG :
CC = gcc CC = gcc
@@ -95,7 +97,7 @@ $(NAME): $(OBJS)
run: $(NAME) run: $(NAME)
@echo $(YELLOW)"run"$(RESET) @echo $(YELLOW)"run"$(RESET)
@./$(NAME) "3 * x^2 + 1 * x^1 - 2 * x^0" @./$(NAME) "3.4 * x^2 + 1 * x^1 - 2 * x^0 = 5.123 * x^1"
clean: clean:
$(RM_OBJS) $(RM_OBJS)

View File

@@ -2,5 +2,9 @@
#define COMPUTORV1_H #define COMPUTORV1_H
#include "libft.h" #include "libft.h"
#include "lexer.h"
#include "parser.h"
#include "errors.h"
#include <stdio.h> // tmp for printf, for float debug
#endif #endif

View File

@@ -7,6 +7,7 @@ typedef enum
{ {
ERROR_BASIC = 0, ERROR_BASIC = 0,
ERROR_UNKNOWN_TOKEN = -1, ERROR_UNKNOWN_TOKEN = -1,
ERROR_NUMBER_TOO_BIG = -2,
} program_error; } program_error;
int stop_errors(int err); int stop_errors(int err);

View File

@@ -1,18 +1,20 @@
#ifndef LEXER_H #ifndef LEXER_H
#define LEXER_H #define LEXER_H
#include "../libft/includes/libft.h" #include "libft.h"
#include "errors.h"
#include <stdbool.h>
typedef enum typedef enum
{ {
TOKEN_PLUS, // + TOKEN_VARIABLE, // x, y, etc.
TOKEN_MINUS, // - TOKEN_NUMBER_INT, // int
TOKEN_VARIABLE, // x, y, etc. TOKEN_NUMBER_DOUBLE, // double
TOKEN_NUMBER, // int or double TOKEN_POWER, // ^ or **
TOKEN_POWER, // ^ or ** TOKEN_SIGN, // + or -
TOKEN_MULTIPLICATION, // * TOKEN_FACTOR, // * or /
TOKEN_DIVISION, // / TOKEN_EQUAL, // =
TOKEN_END // null (end of input) TOKEN_END // null (end of input)
} token_type; } token_type;
typedef struct typedef struct
@@ -20,12 +22,12 @@ typedef struct
token_type type; token_type type;
union union
{ {
double num_value; // For NUMBER char value_char;
char var_value; // For VARIABLE (single char, e.g., 'x') int value_int;
double value_double;
}; };
} token; } token;
#define MAX_TOKENS 100 int lexerize(const char *input, token *tokens);
int lexerize(const char *input, token tokens[MAX_TOKENS]);
#endif #endif

27
headers/parser.h Normal file
View File

@@ -0,0 +1,27 @@
#ifndef PARSER_H
#define PARSER_H
#include "libft.h"
#include "lexer.h"
#include "errors.h"
typedef enum
{
TERM_LEFT, // a in "a = b"
TERM_RIGHT, // b in "a = b"
} term_type;
typedef struct
{
term_type type;
union
{
char value_char;
int value_int;
double value_double;
};
} term;
int parse(token *tokens, term *terms);
#endif

2
libft

Submodule libft updated: 2be81d5630...f86c2cf5cb

View File

@@ -1,19 +1,45 @@
#include "computorv1.h" #include "computorv1.h"
#include "lexer.h"
#include "errors.h" void remove_spaces(char *s)
{
char *read = s;
char *write = s;
// copy all non-space chars
while (*read)
{
if (!ft_isspace(*read))
{
*write++ = *read;
}
read++;
}
*write = '\0';
// zero the rest of the buffer
while (write != read)
{
*write++ = '\0';
}
}
int main(int ac, char **av) int main(int ac, char **av)
{ {
int i; int i;
int ret; int arg_len;
char *input;
if (ac < 2) if (ac < 2)
{ {
return 0; return 0;
} }
// tmp debug output
ft_putstr("-> received args :\n"); // debug
i = 0; i = 0;
while (i < ac) while (i < ac)
{ {
ft_putstr(" ");
ft_putnbr(i); ft_putnbr(i);
ft_putstr(" : "); ft_putstr(" : ");
ft_putstr(av[i]); ft_putstr(av[i]);
@@ -21,28 +47,73 @@ int main(int ac, char **av)
i++; i++;
} }
token tokens[MAX_TOKENS]; input = av[1];
ret = lexerize(av[1], tokens); remove_spaces(input);
if (ret <= 0) arg_len = ft_strlen(input);
{
stop_errors(ret); ft_putstr("-> input without space : "); // debug
} ft_putstr(input); // debug
ft_putchar('\n'); // debug
ft_putstr("-> arg_len : "); // debug
ft_putnbr(arg_len); // debug
ft_putchar('\n'); // debug
token tokens[arg_len];
int token_count = lexerize(input, tokens);
ft_putstr("-> token_count : "); // debug
ft_putnbr(token_count); // debug
ft_putchar('\n'); // debug
// tmp debug output // tmp debug output
ft_putchar('\n'); // debug
i = 0; i = 0;
while (tokens[i].type != TOKEN_END) while (tokens[i].type != TOKEN_END)
{ {
ft_printf("token %i :\n type : %i\n value : ", i, tokens[i].type); ft_printf("token %2i - type : ", i);
if (tokens[i].type == TOKEN_NUMBER)
if (tokens[i].type == TOKEN_VARIABLE)
ft_printf("%20s", "TOKEN_VARIABLE");
if (tokens[i].type == TOKEN_NUMBER_INT)
ft_printf("%20s", "TOKEN_NUMBER_INT");
if (tokens[i].type == TOKEN_NUMBER_DOUBLE)
ft_printf("%20s", "TOKEN_NUMBER_DOUBLE");
if (tokens[i].type == TOKEN_POWER)
ft_printf("%20s", "TOKEN_POWER");
if (tokens[i].type == TOKEN_SIGN)
ft_printf("%20s", "TOKEN_SIGN");
if (tokens[i].type == TOKEN_FACTOR)
ft_printf("%20s", "TOKEN_FACTOR");
if (tokens[i].type == TOKEN_EQUAL)
ft_printf("%20s", "TOKEN_EQUAL");
if (tokens[i].type == TOKEN_END)
ft_printf("%20s", "TOKEN_END");
ft_putstr(" - value : ");
if (tokens[i].type == TOKEN_NUMBER_INT)
{ {
ft_printf("%d\n", i, tokens[i].num_value); printf("%i\n", tokens[i].value_int);
}
else if (tokens[i].type == TOKEN_NUMBER_DOUBLE)
{
printf("%f\n", tokens[i].value_double);
} }
else else
{ {
ft_printf("%c\n", tokens[i].var_value); ft_printf("%c\n", tokens[i].value_char);
} }
i++; i++;
} }
ft_putchar('\n'); // debug
// END tmp debug output
term terms[token_count / 2];
int term_count = parse(tokens, terms);
ft_putstr("-> term_count : "); // debug
ft_putnbr(term_count); // debug
ft_putchar('\n'); // debug
return (0); return (0);
} }

View File

@@ -7,7 +7,9 @@ int stop_errors(int err)
case ERROR_UNKNOWN_TOKEN: case ERROR_UNKNOWN_TOKEN:
ft_putstr_fd("error: unknown token\n", STDERR_FILENO); ft_putstr_fd("error: unknown token\n", STDERR_FILENO);
break; break;
case ERROR_NUMBER_TOO_BIG:
ft_putstr_fd("error: number is too big\n", STDERR_FILENO);
break;
default: default:
ft_putstr_fd("unknown error\n", STDERR_FILENO); ft_putstr_fd("unknown error\n", STDERR_FILENO);
break; break;

View File

@@ -1,52 +1,224 @@
#include "lexer.h" #include "lexer.h"
#include "errors.h"
static int skip_whitespace(const char *input, int input_pos) // any single letter is a valid variable, like 'x' or 'y'
static bool token_is_variable(const char *input, int input_pos, int *token_size)
{ {
while (ft_isspace(input[input_pos])) if (ft_isalpha(input[input_pos]))
{ {
input_pos++; *token_size = 1;
return true;
} }
return input_pos; return false;
} }
static int token_is_plus(const char *input, int input_pos) // number can be int "123"
static bool token_is_number_int(const char *input, int input_pos, int *token_size)
{ {
return (input[input_pos] == '+'); int number_size;
int max_number_size;
if (!ft_isdigit(input[input_pos]))
{
return false;
}
number_size = 1;
max_number_size = 16; // max size for int
while (number_size <= max_number_size)
{
if (ft_isdigit(input[input_pos + number_size]))
{
number_size++;
}
else if (input[input_pos + number_size] == '.')
{
if (ft_isdigit(input[input_pos + number_size + 1]))
{
// number is double
return false;
}
else
break;
}
else
break;
}
if (number_size > max_number_size)
{
stop_errors(ERROR_NUMBER_TOO_BIG);
}
*token_size = number_size;
return true;
} }
int lexerize(const char *input, token tokens[MAX_TOKENS]) // number can be double "123.456"
static bool token_is_number_double(const char *input, int input_pos, int *token_size)
{ {
int token_count = 0; int number_size;
int input_pos = 0; int max_number_size;
int token_size = 0; bool has_dot;
if (!ft_isdigit(input[input_pos]))
{
return false;
}
has_dot = false;
number_size = 1;
max_number_size = 129; // max size for double double is 128 bits, + the coma
while (number_size <= max_number_size)
{
if (ft_isdigit(input[input_pos + number_size]))
{
number_size++;
}
else if (input[input_pos + number_size] == '.')
{
if (has_dot)
{
// number is not a valid double, it has 2 dots
return false;
}
if (!ft_isdigit(input[input_pos + number_size + 1]))
{
// number is not a double, it has no number after the dot
return false;
}
has_dot = true;
number_size++;
}
else
break;
}
if (number_size > max_number_size)
{
stop_errors(ERROR_NUMBER_TOO_BIG);
}
*token_size = number_size;
return true;
}
// power can be '^' and "**"
static bool token_is_power(const char *input, int input_pos, int *token_size)
{
if (input[input_pos] == '^')
{
*token_size = 1;
return true;
}
else if (ft_memcmp(&input[input_pos], "**", 2) == 0)
{
*token_size = 2;
return true;
}
return false;
}
// sign can be '+' or '-'
static bool token_is_sign(const char *input, int input_pos, int *token_size)
{
if (input[input_pos] == '+' || input[input_pos] == '-')
{
*token_size = 1;
return true;
}
return false;
}
// factor can be '*' or '/' or ':'
static bool token_is_factor(const char *input, int input_pos, int *token_size)
{
if (input[input_pos] == '*' || input[input_pos] == '/' || input[input_pos] == ':')
{
*token_size = 1;
return true;
}
return false;
}
// detect a single '='
static bool token_is_equal(const char *input, int input_pos, int *token_size)
{
if (input[input_pos] == '=')
{
*token_size = 1;
return true;
}
return false;
}
/**
* LEXER
*/
int lexerize(const char *input, token *tokens)
{
int token_count;
int input_pos;
int token_size;
token_count = 0;
input_pos = 0;
while (input[input_pos]) while (input[input_pos])
{ {
input_pos = skip_whitespace(input, input_pos); token_size = 0;
if (input[input_pos] == '\0') if (input[input_pos] == '\0')
{ {
break; break;
} }
token_size = token_is_plus(input, input_pos); if (token_is_variable(input, input_pos, &token_size))
if (token_size)
{ {
tokens[token_count].type = TOKEN_PLUS; tokens[token_count].type = TOKEN_VARIABLE;
tokens[token_count].var_value = '+'; tokens[token_count].value_char = 'x';
}
else if (token_is_number_int(input, input_pos, &token_size))
{
tokens[token_count].type = TOKEN_NUMBER_INT;
tokens[token_count].value_int = ft_atoi(&input[input_pos]);
}
else if (token_is_number_double(input, input_pos, &token_size))
{
tokens[token_count].type = TOKEN_NUMBER_DOUBLE;
tokens[token_count].value_double = ft_atof(&input[input_pos]);
}
else if (token_is_power(input, input_pos, &token_size))
{
tokens[token_count].type = TOKEN_POWER;
tokens[token_count].value_char = '^';
}
else if (token_is_sign(input, input_pos, &token_size))
{
tokens[token_count].type = TOKEN_SIGN;
tokens[token_count].value_char = input[input_pos];
}
else if (token_is_factor(input, input_pos, &token_size))
{
tokens[token_count].type = TOKEN_FACTOR;
tokens[token_count].value_char = input[input_pos];
}
else if (token_is_equal(input, input_pos, &token_size))
{
tokens[token_count].type = TOKEN_EQUAL;
tokens[token_count].value_char = '=';
}
else
{
// tmp
token_size = 1;
// stop_errors(ERROR_UNKNOWN_TOKEN);
} }
token_count++;
if (token_size == 0) if (token_size == 0)
{ {
stop_errors(ERROR_UNKNOWN_TOKEN); stop_errors(ERROR_UNKNOWN_TOKEN);
} }
token_count++;
input_pos += token_size; input_pos += token_size;
} }
tokens[token_count].type = TOKEN_END; tokens[token_count].type = TOKEN_END;
tokens[token_count].var_value = '\0'; tokens[token_count].value_char = '\0';
return 1; return token_count;
} }

10
src/parser.c Normal file
View File

@@ -0,0 +1,10 @@
#include "parser.h"
int parse(token *tokens, term *terms)
{
if (tokens)
{
terms[0].type = 0;
}
return 1;
}