lexer complete with luke memory solution

add atof
all lexer except atoi
2026-04-29 12:41:19 +02:00 · 2026-04-29 00:58:33 +02:00 · 2026-04-29 00:27:46 +02:00 · 2026-04-28 23:42:25 +02:00 · 2026-04-28 22:35:46 +02:00 · 2026-04-28 22:04:23 +02:00
9 changed files with 448 additions and 14 deletions
--- a/14
+++ b/14
@@ -33,10 +33,14 @@ RESET    = "\e[0m"
 # FILES :
 NAME          = computorv1
 D_LIB         = ./libft
-D_SRCS        = ./src
 D_HEADERS     = ./headers
-SRCS          = computorv1.c
-HEADERS       = computorv1.h
+HEADERS       = computorv1.h \
+				errors.h \
+				lexer.h
+D_SRCS        = ./src
+SRCS          = computorv1.c \
+				errors.c \
+				lexer.c

 # COMPILATION CONFIG :
 CC            = gcc
@@ -49,7 +53,7 @@ D_OBJS        = builds
 OBJS          = $(SRCS:%.$(EXT)=$(D_OBJS)/%.o)
 VPATH         = $(D_SRCS)
 F_INCLUDES    = $(HEADERS:%=$(D_HEADERS)/%)
-INCLUDES      = -I$(D_HEADERS)
+INCLUDES      = -I$(D_HEADERS) -I$(D_LIB)/includes
 ifeq "$(D_OBJS)" "."
  RM_OBJS     = rm -f $(OBJS)
 else
@@ -91,7 +95,7 @@ $(NAME): $(OBJS)

 run: $(NAME)
 	@echo $(YELLOW)"run"$(RESET)
-	@./$(NAME)
+	@./$(NAME) "3 * x^2 + 1 * x^1 - 2 * x^0"

 clean:
 	$(RM_OBJS)
--- a/README.md
+++ b/README.md
@@ -12,3 +12,31 @@ this project uses submodules (maybe recursively), so either :

 - `git clone --recurse-submodules <repo-url>`
 - or, after cloning : `git submodule update --init --recursive`
+
+## steps
+
+1. lexer
+    -> tokens[] :
+       {
+         PLUS -> +
+         MINUS -> -
+         VARIABLE -> x
+         NUMBER -> int or double
+         POWER -> ^
+         MULTIPLICATION -> *
+         DIVISION -> /
+         END -> null
+       }[]
+2. parser
+    -> terms[] :
+       {
+         SIGN -> + or -
+         COEFFICIENT -> double
+         EXPONENT -> double
+       }[]
+3. reduce
+4. print reduced form
+5. find degree
+6. print degree
+7. solve
+8. print solution
--- a/headers/computorv1.h
+++ b/headers/computorv1.h
@@ -1,6 +1,6 @@
 #ifndef COMPUTORV1_H
-# define COMPUTORV1_H
+#define COMPUTORV1_H

-# include "../libft/includes/libft.h"
+#include "libft.h"

 #endif
--- a/headers/errors.h
+++ b/headers/errors.h
@@ -0,0 +1,15 @@
+#ifndef ERRORS_H
+#define ERRORS_H
+
+#include "../libft/includes/libft.h"
+
+typedef enum
+{
+    ERROR_BASIC = 0,
+    ERROR_UNKNOWN_TOKEN = -1,
+    ERROR_NUMBER_TOO_BIG = -2,
+} program_error;
+
+int stop_errors(int err);
+
+#endif
--- a/headers/lexer.h
+++ b/headers/lexer.h
@@ -0,0 +1,32 @@
+#ifndef LEXER_H
+#define LEXER_H
+
+#include "../libft/includes/libft.h"
+
+typedef enum
+{
+    TOKEN_VARIABLE,       // x, y, etc.
+    TOKEN_NUMBER_INT,     // int
+    TOKEN_NUMBER_DOUBLE,  // double
+    TOKEN_POWER,          // ^ or **
+    TOKEN_PLUS,           // +
+    TOKEN_MINUS,          // -
+    TOKEN_MULTIPLICATION, // *
+    TOKEN_DIVISION,       // /
+    TOKEN_END             // null (end of input)
+} token_type;
+
+typedef struct
+{
+    token_type type;
+    union
+    {
+        char value_char;
+        int value_int;
+        double value_double;
+    };
+} token;
+
+void lexerize(const char *input, token *tokens);
+
+#endif
--- a/2
+++ b/2
--- a/src/computorv1.c
+++ b/src/computorv1.c
@@ -1,15 +1,111 @@
-
 #include "computorv1.h"
+#include "lexer.h"
+#include "errors.h"
+#include <stdio.h> // tmp for float debug

-int	main(int ac, char **av)
+void remove_spaces(char *s)
+{
+    char *read = s;
+    char *write = s;
+
+    // copy all non-space chars
+    while (*read)
+    {
+        if (!ft_isspace(*read))
+        {
+            *write++ = *read;
+        }
+        read++;
+    }
+    *write = '\0';
+
+    // zero the rest of the buffer
+    while (write != read)
+    {
+        *write++ = '\0';
+    }
+}
+
+int main(int ac, char **av)
 {
    int i;
+    int arg_len;
+    char *input;

+    if (ac < 2)
+    {
+        return 0;
+    }
+
+    // tmp debug output
+    ft_putstr("-> received args :\n"); // debug
    i = 0;
-    while(i < ac) {
-        ft_putstr_fd(av[i], STDOUT_FILENO);
-        ft_putchar_fd('\n', STDOUT_FILENO);
+    while (i < ac)
+    {
+        ft_putstr("   ");
+        ft_putnbr(i);
+        ft_putstr(" : ");
+        ft_putstr(av[i]);
+        ft_putchar('\n');
        i++;
    }
-	return (0);
+
+    input = av[1];
+    remove_spaces(input);
+    arg_len = ft_strlen(input);
+
+    ft_putstr("-> input without space : "); // debug
+    ft_putstr(input);                       // debug
+    ft_putchar('\n');                       // debug
+    ft_putstr("-> arg_len : ");             // debug
+    ft_putnbr(arg_len);                     // debug
+    ft_putchar('\n');                       // debug
+
+    token tokens[arg_len];
+    lexerize(input, tokens);
+
+    // tmp debug output
+    ft_putchar('\n'); // debug
+    i = 0;
+    while (tokens[i].type != TOKEN_END)
+    {
+        ft_printf("token %2i - type : ", i);
+
+        if (tokens[i].type == TOKEN_VARIABLE)
+            ft_printf("%20s", "TOKEN_VARIABLE");
+        if (tokens[i].type == TOKEN_NUMBER_INT)
+            ft_printf("%20s", "TOKEN_NUMBER_INT");
+        if (tokens[i].type == TOKEN_NUMBER_DOUBLE)
+            ft_printf("%20s", "TOKEN_NUMBER_DOUBLE");
+        if (tokens[i].type == TOKEN_POWER)
+            ft_printf("%20s", "TOKEN_POWER");
+        if (tokens[i].type == TOKEN_PLUS)
+            ft_printf("%20s", "TOKEN_PLUS");
+        if (tokens[i].type == TOKEN_MINUS)
+            ft_printf("%20s", "TOKEN_MINUS");
+        if (tokens[i].type == TOKEN_MULTIPLICATION)
+            ft_printf("%20s", "TOKEN_MULTIPLICATION");
+        if (tokens[i].type == TOKEN_DIVISION)
+            ft_printf("%20s", "TOKEN_DIVISION");
+        if (tokens[i].type == TOKEN_END)
+            ft_printf("%20s", "TOKEN_END");
+
+        ft_putstr(" - value : ");
+
+        if (tokens[i].type == TOKEN_NUMBER_INT)
+        {
+            printf("%i\n", tokens[i].value_int);
+        }
+        else if (tokens[i].type == TOKEN_NUMBER_DOUBLE)
+        {
+            printf("%f\n", tokens[i].value_double);
+        }
+        else
+        {
+            ft_printf("%c\n", tokens[i].value_char);
+        }
+        i++;
+    }
+
+    return (0);
 }
--- a/src/errors.c
+++ b/src/errors.c
@@ -0,0 +1,19 @@
+#include "errors.h"
+
+int stop_errors(int err)
+{
+    switch (err)
+    {
+    case ERROR_UNKNOWN_TOKEN:
+        ft_putstr_fd("error: unknown token\n", STDERR_FILENO);
+        break;
+    case ERROR_NUMBER_TOO_BIG:
+        ft_putstr_fd("error: number is too big\n", STDERR_FILENO);
+        break;
+    default:
+        ft_putstr_fd("unknown error\n", STDERR_FILENO);
+        break;
+    }
+
+    exit(err);
+}
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -0,0 +1,240 @@
+#include "lexer.h"
+#include "errors.h"
+#include <stdbool.h>
+
+// any single letter is a valid variable, like "x" or "y"
+static bool token_is_variable(const char *input, int input_pos, int *token_size)
+{
+    if (ft_isalpha(input[input_pos]))
+    {
+        *token_size = 1;
+        return true;
+    }
+    return false;
+}
+
+// number can be int "123"
+static bool token_is_number_int(const char *input, int input_pos, int *token_size)
+{
+    int number_size;
+    int max_number_size;
+
+    if (!ft_isdigit(input[input_pos]))
+    {
+        return false;
+    }
+
+    number_size = 1;
+    max_number_size = 16; // max size for int
+    while (number_size <= max_number_size)
+    {
+        if (ft_isdigit(input[input_pos + number_size]))
+        {
+            number_size++;
+        }
+        else if (input[input_pos + number_size] == '.')
+        {
+            if (ft_isdigit(input[input_pos + number_size + 1]))
+            {
+                // number is double
+                return false;
+            }
+            else
+                break;
+        }
+        else
+            break;
+    }
+    if (number_size > max_number_size)
+    {
+        stop_errors(ERROR_NUMBER_TOO_BIG);
+    }
+    *token_size = number_size;
+    return true;
+}
+
+// number can be double "123.456"
+static bool token_is_number_double(const char *input, int input_pos, int *token_size)
+{
+    int number_size;
+    int max_number_size;
+    bool has_dot;
+
+    if (!ft_isdigit(input[input_pos]))
+    {
+        return false;
+    }
+
+    has_dot = false;
+    number_size = 1;
+    max_number_size = 129; // max size for double double is 128 bits, + the coma
+    while (number_size <= max_number_size)
+    {
+        if (ft_isdigit(input[input_pos + number_size]))
+        {
+            number_size++;
+        }
+        else if (input[input_pos + number_size] == '.')
+        {
+            if (has_dot)
+            {
+                // number is not a valid double, it has 2 dots
+                return false;
+            }
+            if (!ft_isdigit(input[input_pos + number_size + 1]))
+            {
+                // number is not a double, it has no number after the dot
+                return false;
+            }
+            has_dot = true;
+            number_size++;
+        }
+        else
+            break;
+    }
+    if (number_size > max_number_size)
+    {
+        stop_errors(ERROR_NUMBER_TOO_BIG);
+    }
+    *token_size = number_size;
+    return true;
+}
+
+// power can be "^" and "**"
+static bool token_is_power(const char *input, int input_pos, int *token_size)
+{
+    if (input[input_pos] == '^')
+    {
+        *token_size = 1;
+        return true;
+    }
+    else if (ft_memcmp(&input[input_pos], "**", 2) == 0)
+    {
+        *token_size = 2;
+        return true;
+    }
+    return false;
+}
+
+// detect a single "+" is valid
+static bool token_is_plus(const char *input, int input_pos, int *token_size)
+{
+    if (input[input_pos] == '+')
+    {
+        *token_size = 1;
+        return true;
+    }
+    return false;
+}
+
+// detect a single "-"
+static bool token_is_minus(const char *input, int input_pos, int *token_size)
+{
+    if (input[input_pos] == '-')
+    {
+        *token_size = 1;
+        return true;
+    }
+    return false;
+}
+
+// detect a single "*"
+static bool token_is_multiplication(const char *input, int input_pos, int *token_size)
+{
+    if (input[input_pos] == '*')
+    {
+        *token_size = 1;
+        return true;
+    }
+    return false;
+}
+
+// detect a single "/"
+static bool token_is_division(const char *input, int input_pos, int *token_size)
+{
+    if (input[input_pos] == '+')
+    {
+        *token_size = 1;
+        return true;
+    }
+    return false;
+}
+
+/**
+ * LEXER
+ */
+void lexerize(const char *input, token *tokens)
+{
+    int token_count;
+    int input_pos;
+    int token_size;
+
+    token_count = 0;
+    input_pos = 0;
+    while (input[input_pos])
+    {
+        token_size = 0;
+
+        if (input[input_pos] == '\0')
+        {
+            break;
+        }
+
+        if (token_is_variable(input, input_pos, &token_size))
+        {
+            tokens[token_count].type = TOKEN_VARIABLE;
+            tokens[token_count].value_char = 'x';
+        }
+        else if (token_is_number_int(input, input_pos, &token_size))
+        {
+            tokens[token_count].type = TOKEN_NUMBER_INT;
+            tokens[token_count].value_int = ft_atoi(&input[input_pos]);
+        }
+        else if (token_is_number_double(input, input_pos, &token_size))
+        {
+            tokens[token_count].type = TOKEN_NUMBER_DOUBLE;
+            tokens[token_count].value_double = ft_atof(&input[input_pos]);
+        }
+        else if (token_is_power(input, input_pos, &token_size))
+        {
+            tokens[token_count].type = TOKEN_POWER;
+            tokens[token_count].value_char = '^';
+        }
+        else if (token_is_plus(input, input_pos, &token_size))
+        {
+            tokens[token_count].type = TOKEN_PLUS;
+            tokens[token_count].value_char = '+';
+        }
+        else if (token_is_minus(input, input_pos, &token_size))
+        {
+            tokens[token_count].type = TOKEN_MINUS;
+            tokens[token_count].value_char = '-';
+        }
+        else if (token_is_multiplication(input, input_pos, &token_size))
+        {
+            tokens[token_count].type = TOKEN_MULTIPLICATION;
+            tokens[token_count].value_char = '*';
+        }
+        else if (token_is_division(input, input_pos, &token_size))
+        {
+            tokens[token_count].type = TOKEN_DIVISION;
+            tokens[token_count].value_char = '/';
+        }
+        else
+        {
+            // tmp
+            token_size = 1;
+            // stop_errors(ERROR_UNKNOWN_TOKEN);
+        }
+
+        token_count++;
+        if (token_size == 0)
+        {
+            stop_errors(ERROR_UNKNOWN_TOKEN);
+        }
+        input_pos += token_size;
+    }
+
+    tokens[token_count].type = TOKEN_END;
+    tokens[token_count].value_char = '\0';
+}
Author	SHA1	Message	Date
hugogogo	26fa8025ef	lexer complete with luke memory solution	2026-04-29 12:41:19 +02:00
hugogogo	9ced220c00	add atof	2026-04-29 00:58:33 +02:00
hugogogo	52c76767bd	all lexer except atoi	2026-04-29 00:27:46 +02:00
hugogogo	5108571deb	lexer adding is_number	2026-04-28 23:42:25 +02:00
hugogogo	fb81f200d9	split files	2026-04-28 22:35:46 +02:00
hugogogo	48221894c0	wip lexer	2026-04-28 22:04:23 +02:00